From 92fcbf6e7bc622dcace226bb70ff6d5cdbdbaecb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 20:07:18 +0900 Subject: Code reorganization: s/aux/auxiliary/. "aux" is a reserved name on Windows (X_X) --- src/gallium/auxiliary/Makefile | 24 + src/gallium/auxiliary/cso_cache/cso_cache.c | 181 ++ src/gallium/auxiliary/cso_cache/cso_cache.h | 107 + src/gallium/auxiliary/cso_cache/cso_hash.c | 388 +++ src/gallium/auxiliary/cso_cache/cso_hash.h | 62 + src/gallium/auxiliary/draw/Makefile | 2 + src/gallium/auxiliary/draw/draw_clip.c | 488 ++++ src/gallium/auxiliary/draw/draw_context.c | 293 +++ src/gallium/auxiliary/draw/draw_context.h | 142 ++ src/gallium/auxiliary/draw/draw_cull.c | 150 ++ src/gallium/auxiliary/draw/draw_debug.c | 113 + src/gallium/auxiliary/draw/draw_flatshade.c | 205 ++ src/gallium/auxiliary/draw/draw_offset.c | 186 ++ src/gallium/auxiliary/draw/draw_prim.c | 482 ++++ src/gallium/auxiliary/draw/draw_private.h | 346 +++ src/gallium/auxiliary/draw/draw_stipple.c | 239 ++ src/gallium/auxiliary/draw/draw_twoside.c | 203 ++ src/gallium/auxiliary/draw/draw_unfilled.c | 206 ++ src/gallium/auxiliary/draw/draw_validate.c | 185 ++ src/gallium/auxiliary/draw/draw_vbuf.c | 570 +++++ src/gallium/auxiliary/draw/draw_vbuf.h | 106 + src/gallium/auxiliary/draw/draw_vertex.c | 79 + src/gallium/auxiliary/draw/draw_vertex.h | 111 + src/gallium/auxiliary/draw/draw_vertex_cache.c | 196 ++ src/gallium/auxiliary/draw/draw_vertex_fetch.c | 510 ++++ src/gallium/auxiliary/draw/draw_vertex_shader.c | 325 +++ src/gallium/auxiliary/draw/draw_vf.c | 428 ++++ src/gallium/auxiliary/draw/draw_vf.h | 236 ++ src/gallium/auxiliary/draw/draw_vf_generic.c | 585 +++++ src/gallium/auxiliary/draw/draw_vf_sse.c | 614 +++++ src/gallium/auxiliary/draw/draw_wide_prims.c | 432 ++++ src/gallium/auxiliary/llvm/Makefile | 85 + src/gallium/auxiliary/llvm/gallivm.cpp | 327 +++ src/gallium/auxiliary/llvm/gallivm.h | 103 + src/gallium/auxiliary/llvm/gallivm_builtins.cpp | 567 +++++ src/gallium/auxiliary/llvm/gallivm_cpu.cpp | 202 ++ src/gallium/auxiliary/llvm/gallivm_p.h | 110 + src/gallium/auxiliary/llvm/instructions.cpp | 889 +++++++ src/gallium/auxiliary/llvm/instructions.h | 152 ++ src/gallium/auxiliary/llvm/instructionssoa.cpp | 121 + src/gallium/auxiliary/llvm/instructionssoa.h | 74 + src/gallium/auxiliary/llvm/llvm_builtins.c | 115 + src/gallium/auxiliary/llvm/loweringpass.cpp | 17 + src/gallium/auxiliary/llvm/loweringpass.h | 15 + src/gallium/auxiliary/llvm/storage.cpp | 364 +++ src/gallium/auxiliary/llvm/storage.h | 133 ++ src/gallium/auxiliary/llvm/storagesoa.cpp | 389 +++ src/gallium/auxiliary/llvm/storagesoa.h | 111 + src/gallium/auxiliary/llvm/tgsitollvm.cpp | 1221 ++++++++++ src/gallium/auxiliary/llvm/tgsitollvm.h | 20 + src/gallium/auxiliary/pipebuffer/Makefile | 23 + src/gallium/auxiliary/pipebuffer/linked_list.h | 91 + src/gallium/auxiliary/pipebuffer/pb_buffer.h | 202 ++ .../auxiliary/pipebuffer/pb_buffer_fenced.c | 299 +++ .../auxiliary/pipebuffer/pb_buffer_fenced.h | 117 + .../auxiliary/pipebuffer/pb_buffer_malloc.c | 127 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 126 + .../auxiliary/pipebuffer/pb_bufmgr_fenced.c | 131 ++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 593 +++++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 288 +++ src/gallium/auxiliary/pipebuffer/pb_winsys.c | 170 ++ src/gallium/auxiliary/tgsi/Makefile | 3 + src/gallium/auxiliary/tgsi/exec/Makefile | 3 + src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2485 ++++++++++++++++++++ src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 239 ++ src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2378 +++++++++++++++++++ src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 26 + src/gallium/auxiliary/tgsi/util/tgsi_build.c | 1371 +++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_build.h | 320 +++ src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 1581 +++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 28 + src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 319 +++ src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 121 + src/gallium/auxiliary/tgsi/util/tgsi_transform.c | 199 ++ src/gallium/auxiliary/tgsi/util/tgsi_transform.h | 93 + src/gallium/auxiliary/tgsi/util/tgsi_util.c | 274 +++ src/gallium/auxiliary/tgsi/util/tgsi_util.h | 70 + src/gallium/auxiliary/util/p_debug.c | 76 + src/gallium/auxiliary/util/p_tile.c | 699 ++++++ src/gallium/auxiliary/util/p_tile.h | 81 + src/gallium/auxiliary/util/p_util.c | 73 + 81 files changed, 25815 insertions(+) create mode 100644 src/gallium/auxiliary/Makefile create mode 100644 src/gallium/auxiliary/cso_cache/cso_cache.c create mode 100644 src/gallium/auxiliary/cso_cache/cso_cache.h create mode 100644 src/gallium/auxiliary/cso_cache/cso_hash.c create mode 100644 src/gallium/auxiliary/cso_cache/cso_hash.h create mode 100644 src/gallium/auxiliary/draw/Makefile create mode 100644 src/gallium/auxiliary/draw/draw_clip.c create mode 100644 src/gallium/auxiliary/draw/draw_context.c create mode 100644 src/gallium/auxiliary/draw/draw_context.h create mode 100644 src/gallium/auxiliary/draw/draw_cull.c create mode 100644 src/gallium/auxiliary/draw/draw_debug.c create mode 100644 src/gallium/auxiliary/draw/draw_flatshade.c create mode 100644 src/gallium/auxiliary/draw/draw_offset.c create mode 100644 src/gallium/auxiliary/draw/draw_prim.c create mode 100644 src/gallium/auxiliary/draw/draw_private.h create mode 100644 src/gallium/auxiliary/draw/draw_stipple.c create mode 100644 src/gallium/auxiliary/draw/draw_twoside.c create mode 100644 src/gallium/auxiliary/draw/draw_unfilled.c create mode 100644 src/gallium/auxiliary/draw/draw_validate.c create mode 100644 src/gallium/auxiliary/draw/draw_vbuf.c create mode 100644 src/gallium/auxiliary/draw/draw_vbuf.h create mode 100644 src/gallium/auxiliary/draw/draw_vertex.c create mode 100644 src/gallium/auxiliary/draw/draw_vertex.h create mode 100644 src/gallium/auxiliary/draw/draw_vertex_cache.c create mode 100644 src/gallium/auxiliary/draw/draw_vertex_fetch.c create mode 100644 src/gallium/auxiliary/draw/draw_vertex_shader.c create mode 100644 src/gallium/auxiliary/draw/draw_vf.c create mode 100644 src/gallium/auxiliary/draw/draw_vf.h create mode 100644 src/gallium/auxiliary/draw/draw_vf_generic.c create mode 100644 src/gallium/auxiliary/draw/draw_vf_sse.c create mode 100644 src/gallium/auxiliary/draw/draw_wide_prims.c create mode 100644 src/gallium/auxiliary/llvm/Makefile create mode 100644 src/gallium/auxiliary/llvm/gallivm.cpp create mode 100644 src/gallium/auxiliary/llvm/gallivm.h create mode 100644 src/gallium/auxiliary/llvm/gallivm_builtins.cpp create mode 100644 src/gallium/auxiliary/llvm/gallivm_cpu.cpp create mode 100644 src/gallium/auxiliary/llvm/gallivm_p.h create mode 100644 src/gallium/auxiliary/llvm/instructions.cpp create mode 100644 src/gallium/auxiliary/llvm/instructions.h create mode 100644 src/gallium/auxiliary/llvm/instructionssoa.cpp create mode 100644 src/gallium/auxiliary/llvm/instructionssoa.h create mode 100644 src/gallium/auxiliary/llvm/llvm_builtins.c create mode 100644 src/gallium/auxiliary/llvm/loweringpass.cpp create mode 100644 src/gallium/auxiliary/llvm/loweringpass.h create mode 100644 src/gallium/auxiliary/llvm/storage.cpp create mode 100644 src/gallium/auxiliary/llvm/storage.h create mode 100644 src/gallium/auxiliary/llvm/storagesoa.cpp create mode 100644 src/gallium/auxiliary/llvm/storagesoa.h create mode 100644 src/gallium/auxiliary/llvm/tgsitollvm.cpp create mode 100644 src/gallium/auxiliary/llvm/tgsitollvm.h create mode 100644 src/gallium/auxiliary/pipebuffer/Makefile create mode 100644 src/gallium/auxiliary/pipebuffer/linked_list.h create mode 100644 src/gallium/auxiliary/pipebuffer/pb_buffer.h create mode 100644 src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c create mode 100644 src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h create mode 100644 src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr.h create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c create mode 100644 src/gallium/auxiliary/pipebuffer/pb_winsys.c create mode 100644 src/gallium/auxiliary/tgsi/Makefile create mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile create mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c create mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h create mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c create mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.h create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.h create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.h create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.h create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.h create mode 100644 src/gallium/auxiliary/util/p_debug.c create mode 100644 src/gallium/auxiliary/util/p_tile.c create mode 100644 src/gallium/auxiliary/util/p_tile.h create mode 100644 src/gallium/auxiliary/util/p_util.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile new file mode 100644 index 0000000000..da68498aa1 --- /dev/null +++ b/src/gallium/auxiliary/Makefile @@ -0,0 +1,24 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +ifeq ($(CONFIG_NAME), linux-llvm) +LLVM_DIR = llvm +endif + +SUBDIRS = pipebuffer $(LLVM_DIR) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c new file mode 100644 index 0000000000..9e77e0774d --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -0,0 +1,181 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin + */ + +#include "cso_cache.h" +#include "cso_hash.h" + +#if 1 +static unsigned hash_key(const void *key, unsigned key_size) +{ + unsigned *ikey = (unsigned *)key; + unsigned hash = 0, i; + + assert(key_size % 4 == 0); + + /* I'm sure this can be improved on: + */ + for (i = 0; i < key_size/4; i++) + hash ^= ikey[i]; + + return hash; +} +#else +static unsigned hash_key(const unsigned char *p, int n) +{ + unsigned h = 0; + unsigned g; + + while (n--) { + h = (h << 4) + *p++; + if ((g = (h & 0xf0000000)) != 0) + h ^= g >> 23; + h &= ~g; + } + return h; +} +#endif + +unsigned cso_construct_key(void *item, int item_size) +{ + return hash_key((item), item_size); +} + +static struct cso_hash *_cso_hash_for_type(struct cso_cache *sc, enum cso_cache_type type) +{ + struct cso_hash *hash = 0; + + switch(type) { + case CSO_BLEND: + hash = sc->blend_hash; + break; + case CSO_SAMPLER: + hash = sc->sampler_hash; + break; + case CSO_DEPTH_STENCIL_ALPHA: + hash = sc->depth_stencil_hash; + break; + case CSO_RASTERIZER: + hash = sc->rasterizer_hash; + break; + case CSO_FRAGMENT_SHADER: + hash = sc->fs_hash; + break; + case CSO_VERTEX_SHADER: + hash = sc->vs_hash; + break; + } + + return hash; +} + +static int _cso_size_for_type(enum cso_cache_type type) +{ + switch(type) { + case CSO_BLEND: + return sizeof(struct pipe_blend_state); + case CSO_SAMPLER: + return sizeof(struct pipe_sampler_state); + case CSO_DEPTH_STENCIL_ALPHA: + return sizeof(struct pipe_depth_stencil_alpha_state); + case CSO_RASTERIZER: + return sizeof(struct pipe_rasterizer_state); + case CSO_FRAGMENT_SHADER: + return sizeof(struct pipe_shader_state); + case CSO_VERTEX_SHADER: + return sizeof(struct pipe_shader_state); + } + return 0; +} + +struct cso_hash_iter +cso_insert_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *state) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + return cso_hash_insert(hash, hash_key, state); +} + +struct cso_hash_iter +cso_find_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + + return cso_hash_find(hash, hash_key); +} + +struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *templ) +{ + struct cso_hash_iter iter = cso_find_state(sc, hash_key, type); + int size = _cso_size_for_type(type); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) + return iter; + iter = cso_hash_iter_next(iter); + } + return iter; +} + +void * cso_take_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type) +{ + struct cso_hash *hash = _cso_hash_for_type(sc, type); + return cso_hash_take(hash, hash_key); +} + +struct cso_cache *cso_cache_create(void) +{ + struct cso_cache *sc = malloc(sizeof(struct cso_cache)); + + sc->blend_hash = cso_hash_create(); + sc->sampler_hash = cso_hash_create(); + sc->depth_stencil_hash = cso_hash_create(); + sc->rasterizer_hash = cso_hash_create(); + sc->fs_hash = cso_hash_create(); + sc->vs_hash = cso_hash_create(); + + return sc; +} + +void cso_cache_delete(struct cso_cache *sc) +{ + assert(sc); + cso_hash_delete(sc->blend_hash); + cso_hash_delete(sc->sampler_hash); + cso_hash_delete(sc->depth_stencil_hash); + cso_hash_delete(sc->rasterizer_hash); + cso_hash_delete(sc->fs_hash); + cso_hash_delete(sc->vs_hash); + free(sc); +} diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h new file mode 100644 index 0000000000..116e2eaa2c --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin + */ + +#ifndef CSO_CACHE_H +#define CSO_CACHE_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + + +struct cso_hash; + +struct cso_cache { + struct cso_hash *blend_hash; + struct cso_hash *depth_stencil_hash; + struct cso_hash *fs_hash; + struct cso_hash *vs_hash; + struct cso_hash *rasterizer_hash; + struct cso_hash *sampler_hash; +}; + +struct cso_blend { + struct pipe_blend_state state; + void *data; +}; + +struct cso_depth_stencil_alpha { + struct pipe_depth_stencil_alpha_state state; + void *data; +}; + +struct cso_rasterizer { + struct pipe_rasterizer_state state; + void *data; +}; + +struct cso_fragment_shader { + struct pipe_shader_state state; + void *data; +}; + +struct cso_vertex_shader { + struct pipe_shader_state state; + void *data; +}; + +struct cso_sampler { + struct pipe_sampler_state state; + void *data; +}; + + +enum cso_cache_type { + CSO_BLEND, + CSO_SAMPLER, + CSO_DEPTH_STENCIL_ALPHA, + CSO_RASTERIZER, + CSO_FRAGMENT_SHADER, + CSO_VERTEX_SHADER +}; + +unsigned cso_construct_key(void *item, int item_size); + +struct cso_cache *cso_cache_create(void); +void cso_cache_delete(struct cso_cache *sc); + +struct cso_hash_iter cso_insert_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *state); +struct cso_hash_iter cso_find_state(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type); +struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, + unsigned hash_key, enum cso_cache_type type, + void *templ); +void * cso_take_state(struct cso_cache *sc, unsigned hash_key, + enum cso_cache_type type); + +#endif diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c new file mode 100644 index 0000000000..0338cb3b47 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -0,0 +1,388 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin + */ + +#include "cso_hash.h" + +#include +#include +#include +#include + +#define MAX(a, b) ((a > b) ? (a) : (b)) + +static const int MinNumBits = 4; + +static const unsigned char prime_deltas[] = { + 0, 0, 1, 3, 1, 5, 3, 3, 1, 9, 7, 5, 3, 9, 25, 3, + 1, 21, 3, 21, 7, 15, 9, 5, 3, 29, 15, 0, 0, 0, 0, 0 +}; + +static int primeForNumBits(int numBits) +{ + return (1 << numBits) + prime_deltas[numBits]; +} + +/* + Returns the smallest integer n such that + primeForNumBits(n) >= hint. +*/ +static int countBits(int hint) +{ + int numBits = 0; + int bits = hint; + + while (bits > 1) { + bits >>= 1; + numBits++; + } + + if (numBits >= (int)sizeof(prime_deltas)) { + numBits = sizeof(prime_deltas) - 1; + } else if (primeForNumBits(numBits) < hint) { + ++numBits; + } + return numBits; +} + +struct cso_node { + struct cso_node *next; + unsigned key; + void *value; +}; + +struct cso_hash_data { + struct cso_node *fakeNext; + struct cso_node **buckets; + int size; + int nodeSize; + short userNumBits; + short numBits; + int numBuckets; +}; + +struct cso_hash { + union { + struct cso_hash_data *d; + struct cso_node *e; + } data; +}; + +static void *cso_data_allocate_node(struct cso_hash_data *hash) +{ + return malloc(hash->nodeSize); +} + +static void cso_data_free_node(struct cso_node *node) +{ + /* XXX still a leak here. + * Need to cast value ptr to original cso type, then free the + * driver-specific data hanging off of it. For example: + struct cso_sampler *csamp = (struct cso_sampler *) node->value; + free(csamp->data); + */ + free(node->value); + free(node); +} + +static struct cso_node * +cso_hash_create_node(struct cso_hash *hash, + unsigned akey, void *avalue, + struct cso_node **anextNode) +{ + struct cso_node *node = cso_data_allocate_node(hash->data.d); + node->key = akey; + node->value = avalue; + + node->next = (struct cso_node*)(*anextNode); + *anextNode = node; + ++hash->data.d->size; + return node; +} + +static void cso_data_rehash(struct cso_hash_data *hash, int hint) +{ + if (hint < 0) { + hint = countBits(-hint); + if (hint < MinNumBits) + hint = MinNumBits; + hash->userNumBits = hint; + while (primeForNumBits(hint) < (hash->size >> 1)) + ++hint; + } else if (hint < MinNumBits) { + hint = MinNumBits; + } + + if (hash->numBits != hint) { + struct cso_node *e = (struct cso_node *)(hash); + struct cso_node **oldBuckets = hash->buckets; + int oldNumBuckets = hash->numBuckets; + int i = 0; + + hash->numBits = hint; + hash->numBuckets = primeForNumBits(hint); + hash->buckets = malloc(sizeof(struct cso_node*) * hash->numBuckets); + for (i = 0; i < hash->numBuckets; ++i) + hash->buckets[i] = e; + + for (i = 0; i < oldNumBuckets; ++i) { + struct cso_node *firstNode = oldBuckets[i]; + while (firstNode != e) { + unsigned h = firstNode->key; + struct cso_node *lastNode = firstNode; + while (lastNode->next != e && lastNode->next->key == h) + lastNode = lastNode->next; + + struct cso_node *afterLastNode = lastNode->next; + struct cso_node **beforeFirstNode = &hash->buckets[h % hash->numBuckets]; + while (*beforeFirstNode != e) + beforeFirstNode = &(*beforeFirstNode)->next; + lastNode->next = *beforeFirstNode; + *beforeFirstNode = firstNode; + firstNode = afterLastNode; + } + } + free(oldBuckets); + } +} + +static void cso_data_might_grow(struct cso_hash_data *hash) +{ + if (hash->size >= hash->numBuckets) + cso_data_rehash(hash, hash->numBits + 1); +} + +static void cso_data_has_shrunk(struct cso_hash_data *hash) +{ + if (hash->size <= (hash->numBuckets >> 3) && + hash->numBits > hash->userNumBits) { + int max = MAX(hash->numBits-2, hash->userNumBits); + cso_data_rehash(hash, max); + } +} + +static struct cso_node *cso_data_first_node(struct cso_hash_data *hash) +{ + struct cso_node *e = (struct cso_node *)(hash); + struct cso_node **bucket = hash->buckets; + int n = hash->numBuckets; + while (n--) { + if (*bucket != e) + return *bucket; + ++bucket; + } + return e; +} + +static struct cso_node **cso_hash_find_node(struct cso_hash *hash, unsigned akey) +{ + struct cso_node **node; + + if (hash->data.d->numBuckets) { + node = (struct cso_node **)(&hash->data.d->buckets[akey % hash->data.d->numBuckets]); + assert(*node == hash->data.e || (*node)->next); + while (*node != hash->data.e && (*node)->key != akey) + node = &(*node)->next; + } else { + node = (struct cso_node **)((const struct cso_node * const *)(&hash->data.e)); + } + return node; +} + +struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, + unsigned key, void *data) +{ + cso_data_might_grow(hash->data.d); + + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); + struct cso_hash_iter iter = {hash, node}; + return iter; +} + +struct cso_hash * cso_hash_create(void) +{ + struct cso_hash *hash = malloc(sizeof(struct cso_hash)); + hash->data.d = malloc(sizeof(struct cso_hash_data)); + hash->data.d->fakeNext = 0; + hash->data.d->buckets = 0; + hash->data.d->size = 0; + hash->data.d->nodeSize = sizeof(struct cso_node); + hash->data.d->userNumBits = MinNumBits; + hash->data.d->numBits = 0; + hash->data.d->numBuckets = 0; + + return hash; +} + +void cso_hash_delete(struct cso_hash *hash) +{ + struct cso_node *e_for_x = (struct cso_node *)(hash->data.d); + struct cso_node **bucket = (struct cso_node **)(hash->data.d->buckets); + int n = hash->data.d->numBuckets; + while (n--) { + struct cso_node *cur = *bucket++; + while (cur != e_for_x) { + struct cso_node *next = cur->next; + cso_data_free_node(cur); + cur = next; + } + } + free(hash->data.d->buckets); + free(hash->data.d); + free(hash); +} + +struct cso_hash_iter cso_hash_find(struct cso_hash *hash, + unsigned key) +{ + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_hash_iter iter = {hash, *nextNode}; + return iter; +} + +unsigned cso_hash_iter_key(struct cso_hash_iter iter) +{ + if (!iter.node || iter.hash->data.e == iter.node) + return 0; + return iter.node->key; +} + +void * cso_hash_iter_data(struct cso_hash_iter iter) +{ + if (!iter.node || iter.hash->data.e == iter.node) + return 0; + return iter.node->value; +} + +static struct cso_node *cso_hash_data_next(struct cso_node *node) +{ + union { + struct cso_node *next; + struct cso_node *e; + struct cso_hash_data *d; + } a; + a.next = node->next; + if (!a.next) { + fprintf(stderr, "iterating beyond the last element\n"); + return 0; + } + if (a.next->next) + return a.next; + + int start = (node->key % a.d->numBuckets) + 1; + struct cso_node **bucket = a.d->buckets + start; + int n = a.d->numBuckets - start; + while (n--) { + if (*bucket != a.e) + return *bucket; + ++bucket; + } + return a.e; +} + + +static struct cso_node *cso_hash_data_prev(struct cso_node *node) +{ + union { + struct cso_node *e; + struct cso_hash_data *d; + } a; + + a.e = node; + while (a.e->next) + a.e = a.e->next; + + int start; + if (node == a.e) + start = a.d->numBuckets - 1; + else + start = node->key % a.d->numBuckets; + + struct cso_node *sentinel = node; + struct cso_node **bucket = a.d->buckets + start; + while (start >= 0) { + if (*bucket != sentinel) { + struct cso_node *prev = *bucket; + while (prev->next != sentinel) + prev = prev->next; + return prev; + } + + sentinel = a.e; + --bucket; + --start; + } + fprintf(stderr, "iterating backward beyond first element\n"); + return a.e; +} + +struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter) +{ + struct cso_hash_iter next = {iter.hash, cso_hash_data_next(iter.node)}; + return next; +} + +int cso_hash_iter_is_null(struct cso_hash_iter iter) +{ + if (!iter.node || iter.node == iter.hash->data.e) + return 1; + return 0; +} + +void * cso_hash_take(struct cso_hash *hash, + unsigned akey) +{ + struct cso_node **node = cso_hash_find_node(hash, akey); + if (*node != hash->data.e) { + void *t = (*node)->value; + struct cso_node *next = (*node)->next; + cso_data_free_node(*node); + *node = next; + --hash->data.d->size; + cso_data_has_shrunk(hash->data.d); + return t; + } + return 0; +} + +struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter) +{ + struct cso_hash_iter prev = {iter.hash, + cso_hash_data_prev(iter.node)}; + return prev; +} + +struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash) +{ + struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)}; + return iter; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h new file mode 100644 index 0000000000..b4aa111860 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -0,0 +1,62 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin + */ + +#ifndef CSO_HASH_H +#define CSO_HASH_H + +struct cso_hash; +struct cso_node; + +struct cso_hash_iter { + struct cso_hash *hash; + struct cso_node *node; +}; + +struct cso_hash *cso_hash_create(void); +void cso_hash_delete(struct cso_hash *hash); + +struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, + void *data); +void *cso_hash_take(struct cso_hash *hash, unsigned key); + +struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); +struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key); + + +int cso_hash_iter_is_null(struct cso_hash_iter iter); +unsigned cso_hash_iter_key(struct cso_hash_iter iter); +void *cso_hash_iter_data(struct cso_hash_iter iter); + +struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter); +struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); + +#endif diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile new file mode 100644 index 0000000000..451911a354 --- /dev/null +++ b/src/gallium/auxiliary/draw/Makefile @@ -0,0 +1,2 @@ +default: + cd .. ; make diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_clip.c new file mode 100644 index 0000000000..e3051507ea --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_clip.c @@ -0,0 +1,488 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Clipping stage + * + * \author Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#include "draw_context.h" +#include "draw_private.h" + + +#ifndef IS_NEGATIVE +#define IS_NEGATIVE(X) ((X) < 0.0) +#endif + +#ifndef DIFFERENT_SIGNS +#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F) +#endif + +#ifndef MAX_CLIPPED_VERTICES +#define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1) +#endif + + + +struct clipper { + struct draw_stage stage; /**< base class */ + + /* Basically duplicate some of the flatshading logic here: + */ + boolean flat; + uint num_color_attribs; + uint color_attribs[4]; /* front/back primary/secondary colors */ + + float (*plane)[4]; +}; + + +/* This is a bit confusing: + */ +static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) +{ + return (struct clipper *)stage; +} + + +#define LINTERP(T, OUT, IN) ((OUT) + (T) * ((IN) - (OUT))) + + +/* All attributes are float[4], so this is easy: + */ +static void interp_attr( float *fdst, + float t, + const float *fin, + const float *fout ) +{ + fdst[0] = LINTERP( t, fout[0], fin[0] ); + fdst[1] = LINTERP( t, fout[1], fin[1] ); + fdst[2] = LINTERP( t, fout[2], fin[2] ); + fdst[3] = LINTERP( t, fout[3], fin[3] ); +} + +static void copy_colors( struct draw_stage *stage, + struct vertex_header *dst, + const struct vertex_header *src ) +{ + const struct clipper *clipper = clipper_stage(stage); + uint i; + for (i = 0; i < clipper->num_color_attribs; i++) { + const uint attr = clipper->color_attribs[i]; + COPY_4FV(dst->data[attr], src->data[attr]); + } +} + + + +/* Interpolate between two vertices to produce a third. + */ +static void interp( const struct clipper *clip, + struct vertex_header *dst, + float t, + const struct vertex_header *out, + const struct vertex_header *in ) +{ + const unsigned nr_attrs = clip->stage.draw->num_vs_outputs; + unsigned j; + + /* Vertex header. + */ + { + dst->clipmask = 0; + dst->edgeflag = 0; + dst->pad = 0; + dst->vertex_id = UNDEFINED_VERTEX_ID; + } + + /* Clip coordinates: interpolate normally + */ + { + interp_attr(dst->clip, t, in->clip, out->clip); + } + + /* Do the projective divide and insert window coordinates: + */ + { + const float *pos = dst->clip; + const float *scale = clip->stage.draw->viewport.scale; + const float *trans = clip->stage.draw->viewport.translate; + const float oow = 1.0f / pos[3]; + + dst->data[0][0] = pos[0] * oow * scale[0] + trans[0]; + dst->data[0][1] = pos[1] * oow * scale[1] + trans[1]; + dst->data[0][2] = pos[2] * oow * scale[2] + trans[2]; + dst->data[0][3] = oow; + } + + /* Other attributes + * Note: start at 1 to skip winpos (data[0]) since we just computed + * it above. + */ + for (j = 1; j < nr_attrs; j++) { + interp_attr(dst->data[j], t, in->data[j], out->data[j]); + } +} + + +static void emit_poly( struct draw_stage *stage, + struct vertex_header **inlist, + unsigned n, + const struct prim_header *origPrim) +{ + struct prim_header header; + unsigned i; + + /* later stages may need the determinant, but only the sign matters */ + header.det = origPrim->det; + + for (i = 2; i < n; i++) { + header.v[0] = inlist[i-1]; + header.v[1] = inlist[i]; + header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ + + { + unsigned tmp1 = header.v[1]->edgeflag; + unsigned tmp2 = header.v[2]->edgeflag; + + if (i != n-1) header.v[1]->edgeflag = 0; + if (i != 2) header.v[2]->edgeflag = 0; + + header.edgeflags = ((header.v[0]->edgeflag << 0) | + (header.v[1]->edgeflag << 1) | + (header.v[2]->edgeflag << 2)); + + stage->next->tri( stage->next, &header ); + + header.v[1]->edgeflag = tmp1; + header.v[2]->edgeflag = tmp2; + } + } +} + + + + +/* Clip a triangle against the viewport and user clip planes. + */ +static void +do_clip_tri( struct draw_stage *stage, + struct prim_header *header, + unsigned clipmask ) +{ + struct clipper *clipper = clipper_stage( stage ); + struct vertex_header *a[MAX_CLIPPED_VERTICES]; + struct vertex_header *b[MAX_CLIPPED_VERTICES]; + struct vertex_header **inlist = a; + struct vertex_header **outlist = b; + unsigned tmpnr = 0; + unsigned n = 3; + unsigned i; + + inlist[0] = header->v[0]; + inlist[1] = header->v[1]; + inlist[2] = header->v[2]; + + while (clipmask && n >= 3) { + const unsigned plane_idx = ffs(clipmask)-1; + const float *plane = clipper->plane[plane_idx]; + struct vertex_header *vert_prev = inlist[0]; + float dp_prev = dot4( vert_prev->clip, plane ); + unsigned outcount = 0; + + clipmask &= ~(1<clip, plane ); + + if (!IS_NEGATIVE(dp_prev)) { + outlist[outcount++] = vert_prev; + } + + if (DIFFERENT_SIGNS(dp, dp_prev)) { + struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++]; + outlist[outcount++] = new_vert; + + if (IS_NEGATIVE(dp)) { + /* Going out of bounds. Avoid division by zero as we + * know dp != dp_prev from DIFFERENT_SIGNS, above. + */ + float t = dp / (dp - dp_prev); + interp( clipper, new_vert, t, vert, vert_prev ); + + /* Force edgeflag true in this case: + */ + new_vert->edgeflag = 1; + } else { + /* Coming back in. + */ + float t = dp_prev / (dp_prev - dp); + interp( clipper, new_vert, t, vert_prev, vert ); + + /* Copy starting vert's edgeflag: + */ + new_vert->edgeflag = vert_prev->edgeflag; + } + } + + vert_prev = vert; + dp_prev = dp; + } + + { + struct vertex_header **tmp = inlist; + inlist = outlist; + outlist = tmp; + n = outcount; + } + } + + /* If flat-shading, copy color to new provoking vertex. + */ + if (clipper->flat && inlist[0] != header->v[2]) { + if (1) { + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); + } + + copy_colors(stage, inlist[0], header->v[2]); + } + + + + /* Emit the polygon as triangles to the setup stage: + */ + if (n >= 3) + emit_poly( stage, inlist, n, header ); +} + + +/* Clip a line against the viewport and user clip planes. + */ +static void +do_clip_line( struct draw_stage *stage, + struct prim_header *header, + unsigned clipmask ) +{ + const struct clipper *clipper = clipper_stage( stage ); + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + const float *pos0 = v0->clip; + const float *pos1 = v1->clip; + float t0 = 0.0F; + float t1 = 0.0F; + struct prim_header newprim; + + while (clipmask) { + const unsigned plane_idx = ffs(clipmask)-1; + const float *plane = clipper->plane[plane_idx]; + const float dp0 = dot4( pos0, plane ); + const float dp1 = dot4( pos1, plane ); + + if (dp1 < 0.0F) { + float t = dp1 / (dp1 - dp0); + t1 = MAX2(t1, t); + } + + if (dp0 < 0.0F) { + float t = dp0 / (dp0 - dp1); + t0 = MAX2(t0, t); + } + + if (t0 + t1 >= 1.0F) + return; /* discard */ + + clipmask &= ~(1 << plane_idx); /* turn off this plane's bit */ + } + + if (v0->clipmask) { + interp( clipper, stage->tmp[0], t0, v0, v1 ); + + if (clipper->flat) + copy_colors(stage, stage->tmp[0], v0); + + newprim.v[0] = stage->tmp[0]; + } + else { + newprim.v[0] = v0; + } + + if (v1->clipmask) { + interp( clipper, stage->tmp[1], t1, v1, v0 ); + newprim.v[1] = stage->tmp[1]; + } + else { + newprim.v[1] = v1; + } + + stage->next->line( stage->next, &newprim ); +} + + +static void +clip_point( struct draw_stage *stage, + struct prim_header *header ) +{ + if (header->v[0]->clipmask == 0) + stage->next->point( stage->next, header ); +} + + +static void +clip_line( struct draw_stage *stage, + struct prim_header *header ) +{ + unsigned clipmask = (header->v[0]->clipmask | + header->v[1]->clipmask); + + if (clipmask == 0) { + /* no clipping needed */ + stage->next->line( stage->next, header ); + } + else if ((header->v[0]->clipmask & + header->v[1]->clipmask) == 0) { + do_clip_line(stage, header, clipmask); + } + /* else, totally clipped */ +} + + +static void +clip_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + unsigned clipmask = (header->v[0]->clipmask | + header->v[1]->clipmask | + header->v[2]->clipmask); + + if (clipmask == 0) { + /* no clipping needed */ + stage->next->tri( stage->next, header ); + } + else if ((header->v[0]->clipmask & + header->v[1]->clipmask & + header->v[2]->clipmask) == 0) { + do_clip_tri(stage, header, clipmask); + } +} + +/* Update state. Could further delay this until we hit the first + * primitive that really requires clipping. + */ +static void +clip_init_state( struct draw_stage *stage ) +{ + struct clipper *clipper = clipper_stage( stage ); + + clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; + + if (clipper->flat) { + const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + uint i; + + clipper->num_color_attribs = 0; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + clipper->color_attribs[clipper->num_color_attribs++] = i; + } + } + } + + stage->tri = clip_tri; + stage->line = clip_line; +} + + + +static void clip_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + clip_init_state( stage ); + stage->tri( stage, header ); +} + +static void clip_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + clip_init_state( stage ); + stage->line( stage, header ); +} + + +static void clip_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = clip_first_tri; + stage->line = clip_first_line; + stage->next->flush( stage->next, flags ); +} + + +static void clip_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void clip_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Allocate a new clipper stage. + * \return pointer to new stage object + */ +struct draw_stage *draw_clip_stage( struct draw_context *draw ) +{ + struct clipper *clipper = CALLOC_STRUCT(clipper); + + draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ); + + clipper->stage.draw = draw; + clipper->stage.point = clip_point; + clipper->stage.line = clip_first_line; + clipper->stage.tri = clip_first_tri; + clipper->stage.flush = clip_flush; + clipper->stage.reset_stipple_counter = clip_reset_stipple_counter; + clipper->stage.destroy = clip_destroy; + + clipper->plane = draw->plane; + + return &clipper->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c new file mode 100644 index 0000000000..4be3830316 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -0,0 +1,293 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "draw_context.h" +#include "draw_private.h" + + + +struct draw_context *draw_create( void ) +{ + struct draw_context *draw = CALLOC_STRUCT( draw_context ); + +#if defined(__i386__) || defined(__386__) + draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; +#else + draw->use_sse = FALSE; +#endif + + /* create pipeline stages */ + draw->pipeline.wide = draw_wide_stage( draw ); + draw->pipeline.stipple = draw_stipple_stage( draw ); + draw->pipeline.unfilled = draw_unfilled_stage( draw ); + draw->pipeline.twoside = draw_twoside_stage( draw ); + draw->pipeline.offset = draw_offset_stage( draw ); + draw->pipeline.clip = draw_clip_stage( draw ); + draw->pipeline.flatshade = draw_flatshade_stage( draw ); + draw->pipeline.cull = draw_cull_stage( draw ); + draw->pipeline.validate = draw_validate_stage( draw ); + draw->pipeline.first = draw->pipeline.validate; + + ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); + ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); + ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); + ASSIGN_4V( draw->plane[3], 0, 1, 0, 1 ); + ASSIGN_4V( draw->plane[4], 0, 0, 1, 1 ); /* yes these are correct */ + ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ + draw->nr_planes = 6; + + /* Statically allocate maximum sized vertices for the cache - could be cleverer... + */ + { + uint i; + const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; + char *tmp = align_malloc(Elements(draw->vcache.vertex) * size, 16); + + for (i = 0; i < Elements(draw->vcache.vertex); i++) + draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * size); + } + + draw->shader_queue_flush = draw_vertex_shader_queue_flush; + + draw->convert_wide_points = TRUE; + draw->convert_wide_lines = TRUE; + + draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ + + draw_vertex_cache_invalidate( draw ); + draw_set_mapped_element_buffer( draw, 0, NULL ); + + return draw; +} + + +void draw_destroy( struct draw_context *draw ) +{ + draw->pipeline.wide->destroy( draw->pipeline.wide ); + draw->pipeline.stipple->destroy( draw->pipeline.stipple ); + draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); + draw->pipeline.twoside->destroy( draw->pipeline.twoside ); + draw->pipeline.offset->destroy( draw->pipeline.offset ); + draw->pipeline.clip->destroy( draw->pipeline.clip ); + draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); + draw->pipeline.cull->destroy( draw->pipeline.cull ); + draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.rasterize) + draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); + tgsi_exec_machine_free_data(&draw->machine); + align_free( draw->vcache.vertex[0] ); /* Frees all the vertices. */ + FREE( draw ); +} + + + +void draw_flush( struct draw_context *draw ) +{ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); +} + + + +/** + * Register new primitive rasterization/rendering state. + * This causes the drawing pipeline to be rebuilt. + */ +void draw_set_rasterizer_state( struct draw_context *draw, + const struct pipe_rasterizer_state *raster ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->rasterizer = raster; +} + + +/** + * Plug in the primitive rendering/rasterization stage (which is the last + * stage in the drawing pipeline). + * This is provided by the device driver. + */ +void draw_set_rasterize_stage( struct draw_context *draw, + struct draw_stage *stage ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->pipeline.rasterize = stage; +} + + +/** + * Set the draw module's clipping state. + */ +void draw_set_clip_state( struct draw_context *draw, + const struct pipe_clip_state *clip ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + assert(clip->nr <= PIPE_MAX_CLIP_PLANES); + memcpy(&draw->plane[6], clip->ucp, clip->nr * sizeof(clip->ucp[0])); + draw->nr_planes = 6 + clip->nr; +} + + +/** + * Set the draw module's viewport state. + */ +void draw_set_viewport_state( struct draw_context *draw, + const struct pipe_viewport_state *viewport ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->viewport = *viewport; /* struct copy */ +} + + + +void +draw_set_vertex_buffer(struct draw_context *draw, + unsigned attr, + const struct pipe_vertex_buffer *buffer) +{ + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); + assert(attr < PIPE_ATTRIB_MAX); + draw->vertex_buffer[attr] = *buffer; +} + + +void +draw_set_vertex_element(struct draw_context *draw, + unsigned attr, + const struct pipe_vertex_element *element) +{ + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); + assert(attr < PIPE_ATTRIB_MAX); + draw->vertex_element[attr] = *element; +} + + +/** + * Tell drawing context where to find mapped vertex buffers. + */ +void +draw_set_mapped_vertex_buffer(struct draw_context *draw, + unsigned attr, const void *buffer) +{ + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); + draw->user.vbuffer[attr] = buffer; +} + + +void +draw_set_mapped_constant_buffer(struct draw_context *draw, + const void *buffer) +{ + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); + draw->user.constants = buffer; +} + + +/** + * Tells the draw module whether to convert wide points (size != 1) + * into triangles. + */ +void +draw_convert_wide_points(struct draw_context *draw, boolean enable) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->convert_wide_points = enable; +} + + +/** + * Tells the draw module whether to convert wide lines (width != 1) + * into triangles. + */ +void +draw_convert_wide_lines(struct draw_context *draw, boolean enable) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->convert_wide_lines = enable; +} + + +/** + * Allocate space for temporary post-transform vertices, such as for clipping. + */ +void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) +{ + assert(!stage->tmp); + + stage->nr_tmps = nr; + + if (nr) { + ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); + unsigned i; + + stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); + + for (i = 0; i < nr; i++) + stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + } +} + + +void draw_free_temp_verts( struct draw_stage *stage ) +{ + if (stage->tmp) { + FREE( stage->tmp[0] ); + FREE( stage->tmp ); + stage->tmp = NULL; + } +} + + +boolean draw_use_sse(struct draw_context *draw) +{ + return (boolean) draw->use_sse; +} + + +void draw_reset_vertex_ids(struct draw_context *draw) +{ + struct draw_stage *stage = draw->pipeline.first; + + while (stage) { + unsigned i; + + for (i = 0; i < stage->nr_tmps; i++) + stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; + + stage = stage->next; + } + + draw_vertex_cache_reset_vertex_ids(draw); +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h new file mode 100644 index 0000000000..ddeb184497 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -0,0 +1,142 @@ + +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Public interface into the drawing module. + */ + +/* Authors: Keith Whitwell + */ + + +#ifndef DRAW_CONTEXT_H +#define DRAW_CONTEXT_H + + +#include "pipe/p_state.h" + + +struct vertex_buffer; +struct vertex_info; +struct draw_context; +struct draw_stage; +struct draw_vertex_shader; + + +/** + * Clipmask flags + */ +/*@{*/ +#define CLIP_RIGHT_BIT 0x01 +#define CLIP_LEFT_BIT 0x02 +#define CLIP_TOP_BIT 0x04 +#define CLIP_BOTTOM_BIT 0x08 +#define CLIP_NEAR_BIT 0x10 +#define CLIP_FAR_BIT 0x20 +/*@}*/ + +/** + * Bitshift for each clip flag + */ +/*@{*/ +#define CLIP_RIGHT_SHIFT 0 +#define CLIP_LEFT_SHIFT 1 +#define CLIP_TOP_SHIFT 2 +#define CLIP_BOTTOM_SHIFT 3 +#define CLIP_NEAR_SHIFT 4 +#define CLIP_FAR_SHIFT 5 +/*@}*/ + + +struct draw_context *draw_create( void ); + +void draw_destroy( struct draw_context *draw ); + +void draw_set_viewport_state( struct draw_context *draw, + const struct pipe_viewport_state *viewport ); + +void draw_set_clip_state( struct draw_context *pipe, + const struct pipe_clip_state *clip ); + +void draw_set_rasterizer_state( struct draw_context *draw, + const struct pipe_rasterizer_state *raster ); + +void draw_set_rasterize_stage( struct draw_context *draw, + struct draw_stage *stage ); + +void draw_convert_wide_points(struct draw_context *draw, boolean enable); + +void draw_convert_wide_lines(struct draw_context *draw, boolean enable); + + +struct draw_vertex_shader * +draw_create_vertex_shader(struct draw_context *draw, + const struct pipe_shader_state *shader); +void draw_bind_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs); +void draw_delete_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs); + +boolean draw_use_sse(struct draw_context *draw); + +void draw_set_vertex_buffer(struct draw_context *draw, + unsigned attr, + const struct pipe_vertex_buffer *buffer); + +void draw_set_vertex_element(struct draw_context *draw, + unsigned attr, + const struct pipe_vertex_element *element); + +void draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, void *elements ); + +void draw_set_mapped_vertex_buffer(struct draw_context *draw, + unsigned attr, const void *buffer); + +void draw_set_mapped_constant_buffer(struct draw_context *draw, + const void *buffer); + + +/*********************************************************************** + * draw_prim.c + */ + +void draw_arrays(struct draw_context *draw, unsigned prim, + unsigned start, unsigned count); + +void draw_flush(struct draw_context *draw); + +/*********************************************************************** + * draw_debug.c + */ +boolean draw_validate_prim( unsigned prim, unsigned length ); +unsigned draw_trim_prim( unsigned mode, unsigned count ); + + + +#endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_cull.c b/src/gallium/auxiliary/draw/draw_cull.c new file mode 100644 index 0000000000..8177b0ac86 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_cull.c @@ -0,0 +1,150 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Drawing stage for polygon culling + */ + +/* Authors: Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "draw_private.h" + + +struct cull_stage { + struct draw_stage stage; + unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */ +}; + + +static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) +{ + return (struct cull_stage *)stage; +} + + + + +static void cull_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + /* Window coords: */ + const float *v0 = header->v[0]->data[0]; + const float *v1 = header->v[1]->data[0]; + const float *v2 = header->v[2]->data[0]; + + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0] - v2[0]; + const float ey = v0[1] - v2[1]; + const float fx = v1[0] - v2[0]; + const float fy = v1[1] - v2[1]; + + /* det = cross(e,f).z */ + header->det = ex * fy - ey * fx; + + if (header->det != 0) { + /* if (det < 0 then Z points toward camera and triangle is + * counter-clockwise winding. + */ + unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; + + if ((winding & cull_stage(stage)->winding) == 0) { + /* triangle is not culled, pass to next stage */ + stage->next->tri( stage->next, header ); + } + } +} + +static void cull_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct cull_stage *cull = cull_stage(stage); + + cull->winding = stage->draw->rasterizer->cull_mode; + + stage->tri = cull_tri; + stage->tri( stage, header ); +} + + + +static void cull_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line( stage->next, header ); +} + + +static void cull_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void cull_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->tri = cull_first_tri; + stage->next->flush( stage->next, flags ); +} + +static void cull_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void cull_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create a new polygon culling stage. + */ +struct draw_stage *draw_cull_stage( struct draw_context *draw ) +{ + struct cull_stage *cull = CALLOC_STRUCT(cull_stage); + + draw_alloc_temp_verts( &cull->stage, 0 ); + + cull->stage.draw = draw; + cull->stage.next = NULL; + cull->stage.point = cull_point; + cull->stage.line = cull_line; + cull->stage.tri = cull_first_tri; + cull->stage.flush = cull_flush; + cull->stage.reset_stipple_counter = cull_reset_stipple_counter; + cull->stage.destroy = cull_destroy; + + return &cull->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_debug.c b/src/gallium/auxiliary/draw/draw_debug.c new file mode 100644 index 0000000000..d6220b5f62 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_debug.c @@ -0,0 +1,113 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "draw_private.h" +#include "draw_context.h" + + + +static void +draw_prim_info(unsigned prim, unsigned *first, unsigned *incr) +{ + assert(prim >= PIPE_PRIM_POINTS); + assert(prim <= PIPE_PRIM_POLYGON); + + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + break; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; + break; + case PIPE_PRIM_LINE_STRIP: + *first = 2; + *incr = 1; + break; + case PIPE_PRIM_LINE_LOOP: + *first = 2; + *incr = 1; + break; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + *first = 3; + *incr = 1; + break; + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + *first = 3; + *incr = 1; + break; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + break; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + break; + default: + assert(0); + *first = 1; + *incr = 1; + break; + } +} + + +unsigned +draw_trim_prim( unsigned mode, unsigned count ) +{ + unsigned length, first, incr; + + draw_prim_info( mode, &first, &incr ); + + if (count < first) + length = 0; + else + length = count - (count - first) % incr; + + return length; +} + + +boolean +draw_validate_prim( unsigned mode, unsigned count ) +{ + return (count > 0 && + count == draw_trim_prim( mode, count )); +} + diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_flatshade.c new file mode 100644 index 0000000000..4398abbc60 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_flatshade.c @@ -0,0 +1,205 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +/** subclass of draw_stage */ +struct flat_stage +{ + struct draw_stage stage; + + uint num_color_attribs; + uint color_attribs[4]; /* front/back primary/secondary colors */ +}; + + +static INLINE struct flat_stage * +flat_stage(struct draw_stage *stage) +{ + return (struct flat_stage *) stage; +} + + +/** Copy all the color attributes from 'src' vertex to 'dst' vertex */ +static INLINE void copy_colors( struct draw_stage *stage, + struct vertex_header *dst, + const struct vertex_header *src ) +{ + const struct flat_stage *flat = flat_stage(stage); + uint i; + for (i = 0; i < flat->num_color_attribs; i++) { + const uint attr = flat->color_attribs[i]; + COPY_4FV(dst->data[attr], src->data[attr]); + } +} + + +/** Copy all the color attributes from src vertex to dst0 & dst1 vertices */ +static INLINE void copy_colors2( struct draw_stage *stage, + struct vertex_header *dst0, + struct vertex_header *dst1, + const struct vertex_header *src ) +{ + const struct flat_stage *flat = flat_stage(stage); + uint i; + for (i = 0; i < flat->num_color_attribs; i++) { + const uint attr = flat->color_attribs[i]; + COPY_4FV(dst0->data[attr], src->data[attr]); + COPY_4FV(dst1->data[attr], src->data[attr]); + } +} + + +/** + * Flatshade tri. Required for clipping and when unfilled tris are + * active, otherwise handled by hardware. + */ +static void flatshade_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.edgeflags = header->edgeflags; + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = dup_vert(stage, header->v[1], 1); + tmp.v[2] = header->v[2]; + + copy_colors2(stage, tmp.v[0], tmp.v[1], tmp.v[2]); + + stage->next->tri( stage->next, &tmp ); +} + + +/** + * Flatshade line. Required for clipping. + */ +static void flatshade_line( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = header->v[1]; + + copy_colors(stage, tmp.v[0], tmp.v[1]); + + stage->next->line( stage->next, &tmp ); +} + + +static void flatshade_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void flatshade_init_state( struct draw_stage *stage ) +{ + struct flat_stage *flat = flat_stage(stage); + const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + uint i; + + /* Find which vertex shader outputs are colors, make a list */ + flat->num_color_attribs = 0; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + flat->color_attribs[flat->num_color_attribs++] = i; + } + } + + stage->line = flatshade_line; + stage->tri = flatshade_tri; +} + +static void flatshade_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + flatshade_init_state( stage ); + stage->tri( stage, header ); +} + +static void flatshade_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + flatshade_init_state( stage ); + stage->line( stage, header ); +} + + +static void flatshade_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = flatshade_first_tri; + stage->line = flatshade_first_line; + stage->next->flush( stage->next, flags ); +} + + +static void flatshade_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void flatshade_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create flatshading drawing stage. + */ +struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) +{ + struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage); + + draw_alloc_temp_verts( &flatshade->stage, 2 ); + + flatshade->stage.draw = draw; + flatshade->stage.next = NULL; + flatshade->stage.point = flatshade_point; + flatshade->stage.line = flatshade_first_line; + flatshade->stage.tri = flatshade_first_tri; + flatshade->stage.flush = flatshade_flush; + flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter; + flatshade->stage.destroy = flatshade_destroy; + + return &flatshade->stage; +} + + diff --git a/src/gallium/auxiliary/draw/draw_offset.c b/src/gallium/auxiliary/draw/draw_offset.c new file mode 100644 index 0000000000..dbc676deae --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_offset.c @@ -0,0 +1,186 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief polygon offset state + * + * \author Keith Whitwell + * \author Brian Paul + */ + +#include "pipe/p_util.h" +#include "draw_private.h" + + + +struct offset_stage { + struct draw_stage stage; + + float scale; + float units; +}; + + + +static INLINE struct offset_stage *offset_stage( struct draw_stage *stage ) +{ + return (struct offset_stage *) stage; +} + + + + + +/** + * Offset tri Z. Some hardware can handle this, but not usually when + * doing unfilled rendering. + */ +static void do_offset_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct offset_stage *offset = offset_stage(stage); + float inv_det = 1.0f / header->det; + + /* Window coords: + */ + float *v0 = header->v[0]->data[0]; + float *v1 = header->v[1]->data[0]; + float *v2 = header->v[2]->data[0]; + + /* edge vectors e = v0 - v2, f = v1 - v2 */ + float ex = v0[0] - v2[0]; + float ey = v0[1] - v2[1]; + float ez = v0[2] - v2[2]; + float fx = v1[0] - v2[0]; + float fy = v1[1] - v2[1]; + float fz = v1[2] - v2[2]; + + /* (a,b) = cross(e,f).xy */ + float a = ey*fz - ez*fy; + float b = ez*fx - ex*fz; + + float dzdx = FABSF(a * inv_det); + float dzdy = FABSF(b * inv_det); + + float zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale; + + /* + * Note: we're applying the offset and clamping per-vertex. + * Ideally, the offset is applied per-fragment prior to fragment shading. + */ + v0[2] = CLAMP(v0[2] + zoffset, 0.0f, 1.0f); + v1[2] = CLAMP(v1[2] + zoffset, 0.0f, 1.0f); + v2[2] = CLAMP(v2[2] + zoffset, 0.0f, 1.0f); + + stage->next->tri( stage->next, header ); +} + + +static void offset_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.edgeflags = header->edgeflags; + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = dup_vert(stage, header->v[1], 1); + tmp.v[2] = dup_vert(stage, header->v[2], 2); + + do_offset_tri( stage, &tmp ); +} + + +static void offset_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct offset_stage *offset = offset_stage(stage); + float mrd = 1.0f / 65535.0f; /* XXX this depends on depthbuffer bits! */ + + offset->units = stage->draw->rasterizer->offset_units * mrd; + offset->scale = stage->draw->rasterizer->offset_scale; + + stage->tri = offset_tri; + stage->tri( stage, header ); +} + + +static void offset_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line( stage->next, header ); +} + + +static void offset_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void offset_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = offset_first_tri; + stage->next->flush( stage->next, flags ); +} + + +static void offset_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void offset_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create polygon offset drawing stage. + */ +struct draw_stage *draw_offset_stage( struct draw_context *draw ) +{ + struct offset_stage *offset = CALLOC_STRUCT(offset_stage); + + draw_alloc_temp_verts( &offset->stage, 3 ); + + offset->stage.draw = draw; + offset->stage.next = NULL; + offset->stage.point = offset_point; + offset->stage.line = offset_line; + offset->stage.tri = offset_first_tri; + offset->stage.flush = offset_flush; + offset->stage.reset_stipple_counter = offset_reset_stipple_counter; + offset->stage.destroy = offset_destroy; + + return &offset->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c new file mode 100644 index 0000000000..51e2242719 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -0,0 +1,482 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_debug.h" + +#include "draw_private.h" +#include "draw_context.h" + + + +#define RP_NONE 0 +#define RP_POINT 1 +#define RP_LINE 2 +#define RP_TRI 3 + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + RP_POINT, + RP_LINE, + RP_LINE, + RP_LINE, + RP_TRI, + RP_TRI, + RP_TRI, + RP_TRI, + RP_TRI, + RP_TRI +}; + + +static void draw_prim_queue_flush( struct draw_context *draw ) +{ + unsigned i; + + if (0) + debug_printf("Flushing with %d prims, %d verts\n", + draw->pq.queue_nr, draw->vs.queue_nr); + + assert (draw->pq.queue_nr != 0); + + /* NOTE: we cannot save draw->pipeline->first in a local var because + * draw->pipeline->first is often changed by the first call to tri(), + * line(), etc. + */ + if (draw->rasterizer->line_stipple_enable) { + switch (draw->reduced_prim) { + case RP_TRI: + for (i = 0; i < draw->pq.queue_nr; i++) { + if (draw->pq.queue[i].reset_line_stipple) + draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); + + draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); + } + break; + case RP_LINE: + for (i = 0; i < draw->pq.queue_nr; i++) { + if (draw->pq.queue[i].reset_line_stipple) + draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); + + draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); + } + break; + case RP_POINT: + draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); + for (i = 0; i < draw->pq.queue_nr; i++) + draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); + break; + } + } + else { + switch (draw->reduced_prim) { + case RP_TRI: + for (i = 0; i < draw->pq.queue_nr; i++) + draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); + break; + case RP_LINE: + for (i = 0; i < draw->pq.queue_nr; i++) + draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); + break; + case RP_POINT: + for (i = 0; i < draw->pq.queue_nr; i++) + draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); + break; + } + } + + draw->pq.queue_nr = 0; + draw_vertex_cache_unreference( draw ); +} + + + +void draw_do_flush( struct draw_context *draw, unsigned flags ) +{ + if (0) + debug_printf("Flushing with %d verts, %d prims\n", + draw->vs.queue_nr, + draw->pq.queue_nr ); + + + if (flags >= DRAW_FLUSH_SHADER_QUEUE) { + if (draw->vs.queue_nr) + (*draw->shader_queue_flush)(draw); + + if (flags >= DRAW_FLUSH_PRIM_QUEUE) { + if (draw->pq.queue_nr) + draw_prim_queue_flush(draw); + + if (flags >= DRAW_FLUSH_VERTEX_CACHE) { + draw_vertex_cache_invalidate(draw); + + if (flags >= DRAW_FLUSH_STATE_CHANGE) { + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; + draw->reduced_prim = ~0; + } + } + } + } +} + + + +/* Return a pointer to a freshly queued primitive header. Ensure that + * there is room in the vertex cache for a maximum of "nr_verts" new + * vertices. Flush primitive and/or vertex queues if necessary to + * make space. + */ +static struct prim_header *get_queued_prim( struct draw_context *draw, + unsigned nr_verts ) +{ + if (!draw_vertex_cache_check_space( draw, nr_verts )) { +// debug_printf("v"); + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE ); + } + else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) { +// debug_printf("p"); + draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE ); + } + + assert(draw->pq.queue_nr < PRIM_QUEUE_LENGTH); + + return &draw->pq.queue[draw->pq.queue_nr++]; +} + + + +/** + * Add a point to the primitive queue. + * \param i0 index into user's vertex arrays + */ +static void do_point( struct draw_context *draw, + unsigned i0 ) +{ + struct prim_header *prim = get_queued_prim( draw, 1 ); + + prim->reset_line_stipple = 0; + prim->edgeflags = 1; + prim->pad = 0; + prim->v[0] = draw->vcache.get_vertex( draw, i0 ); +} + + +/** + * Add a line to the primitive queue. + * \param i0 index into user's vertex arrays + * \param i1 index into user's vertex arrays + */ +static void do_line( struct draw_context *draw, + boolean reset_stipple, + unsigned i0, + unsigned i1 ) +{ + struct prim_header *prim = get_queued_prim( draw, 2 ); + + prim->reset_line_stipple = reset_stipple; + prim->edgeflags = 1; + prim->pad = 0; + prim->v[0] = draw->vcache.get_vertex( draw, i0 ); + prim->v[1] = draw->vcache.get_vertex( draw, i1 ); +} + +/** + * Add a triangle to the primitive queue. + */ +static void do_triangle( struct draw_context *draw, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + struct prim_header *prim = get_queued_prim( draw, 3 ); + + prim->reset_line_stipple = 1; + prim->edgeflags = ~0; + prim->pad = 0; + prim->v[0] = draw->vcache.get_vertex( draw, i0 ); + prim->v[1] = draw->vcache.get_vertex( draw, i1 ); + prim->v[2] = draw->vcache.get_vertex( draw, i2 ); +} + +static void do_ef_triangle( struct draw_context *draw, + boolean reset_stipple, + unsigned ef_mask, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + struct prim_header *prim = get_queued_prim( draw, 3 ); + struct vertex_header *v0 = draw->vcache.get_vertex( draw, i0 ); + struct vertex_header *v1 = draw->vcache.get_vertex( draw, i1 ); + struct vertex_header *v2 = draw->vcache.get_vertex( draw, i2 ); + + prim->reset_line_stipple = reset_stipple; + + prim->edgeflags = ef_mask & ((v0->edgeflag << 0) | + (v1->edgeflag << 1) | + (v2->edgeflag << 2)); + prim->pad = 0; + prim->v[0] = v0; + prim->v[1] = v1; + prim->v[2] = v2; +} + + +static void do_ef_quad( struct draw_context *draw, + unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3 ) +{ + const unsigned omitEdge2 = ~(1 << 1); + const unsigned omitEdge3 = ~(1 << 2); + do_ef_triangle( draw, 1, omitEdge2, v0, v1, v3 ); + do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 ); +} + +static void do_quad( struct draw_context *draw, + unsigned v0, + unsigned v1, + unsigned v2, + unsigned v3 ) +{ + do_triangle( draw, v0, v1, v3 ); + do_triangle( draw, v1, v2, v3 ); +} + + +/** + * Main entrypoint to draw some number of points/lines/triangles + */ +static void +draw_prim( struct draw_context *draw, + unsigned prim, unsigned start, unsigned count ) +{ + unsigned i; + boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); + +// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + do_point( draw, + start + i ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + do_line( draw, + TRUE, + start + i + 0, + start + i + 1); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + for (i = 1; i < count; i++) { + do_line( draw, + i == 1, /* XXX: only if vb not split */ + start + i - 1, + start + i ); + } + + do_line( draw, + 0, + start + count - 1, + start + 0 ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < count; i++) { + do_line( draw, + i == 1, + start + i - 1, + start + i ); + } + break; + + case PIPE_PRIM_TRIANGLES: + if (unfilled) { + for (i = 0; i+2 < count; i += 3) { + do_ef_triangle( draw, + 1, + ~0, + start + i + 0, + start + i + 1, + start + i + 2 ); + } + } + else { + for (i = 0; i+2 < count; i += 3) { + do_triangle( draw, + start + i + 0, + start + i + 1, + start + i + 2 ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 0; i+2 < count; i++) { + if (i & 1) { + do_triangle( draw, + start + i + 1, + start + i + 0, + start + i + 2 ); + } + else { + do_triangle( draw, + start + i + 0, + start + i + 1, + start + i + 2 ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + for (i = 0; i+2 < count; i++) { + do_triangle( draw, + start + 0, + start + i + 1, + start + i + 2 ); + } + } + break; + + + case PIPE_PRIM_QUADS: + if (unfilled) { + for (i = 0; i+3 < count; i += 4) { + do_ef_quad( draw, + start + i + 0, + start + i + 1, + start + i + 2, + start + i + 3); + } + } + else { + for (i = 0; i+3 < count; i += 4) { + do_quad( draw, + start + i + 0, + start + i + 1, + start + i + 2, + start + i + 3); + } + } + break; + + case PIPE_PRIM_QUAD_STRIP: + if (unfilled) { + for (i = 0; i+3 < count; i += 2) { + do_ef_quad( draw, + start + i + 2, + start + i + 0, + start + i + 1, + start + i + 3); + } + } + else { + for (i = 0; i+3 < count; i += 2) { + do_quad( draw, + start + i + 2, + start + i + 0, + start + i + 1, + start + i + 3); + } + } + break; + + case PIPE_PRIM_POLYGON: + if (unfilled) { + unsigned ef_mask = (1<<2) | (1<<0); + + for (i = 0; i+2 < count; i++) { + + if (i + 3 >= count) + ef_mask |= (1<<1); + + do_ef_triangle( draw, + i == 0, + ef_mask, + start + i + 1, + start + i + 2, + start + 0); + + ef_mask &= ~(1<<2); + } + } + else { + for (i = 0; i+2 < count; i++) { + do_triangle( draw, + start + i + 1, + start + i + 2, + start + 0); + } + } + break; + + default: + assert(0); + break; + } +} + + + + +/** + * Draw vertex arrays + * This is the main entrypoint into the drawing module. + * \param prim one of PIPE_PRIM_x + * \param start index of first vertex to draw + * \param count number of vertices to draw + */ +void +draw_arrays(struct draw_context *draw, unsigned prim, + unsigned start, unsigned count) +{ + if (reduced_prim[prim] != draw->reduced_prim) { + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->reduced_prim = reduced_prim[prim]; + } + + /* drawing done here: */ + draw_prim(draw, prim, start, count); +} + + diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h new file mode 100644 index 0000000000..3d09aef87c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -0,0 +1,346 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Private data structures, etc for the draw module. + */ + + +/** + * Authors: + * Keith Whitwell + * Brian Paul + */ + + +#ifndef DRAW_PRIVATE_H +#define DRAW_PRIVATE_H + + +#include "pipe/p_state.h" +#include "pipe/p_defines.h" + +#include "x86/rtasm/x86sse.h" +#include "tgsi/exec/tgsi_exec.h" + + +struct gallivm_prog; +struct gallivm_cpu_engine; + +/** + * Basic vertex info. + * Carry some useful information around with the vertices in the prim pipe. + */ +struct vertex_header { + unsigned clipmask:12; + unsigned edgeflag:1; + unsigned pad:3; + unsigned vertex_id:16; + + float clip[4]; + + float data[][4]; /* Note variable size */ +}; + +/* NOTE: It should match vertex_id size above */ +#define UNDEFINED_VERTEX_ID 0xffff + +/* XXX This is too large */ +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + + +/** + * Basic info for a point/line/triangle primitive. + */ +struct prim_header { + float det; /**< front/back face determinant */ + unsigned reset_line_stipple:1; + unsigned edgeflags:3; + unsigned pad:28; + struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */ +}; + + + +struct draw_context; + +/** + * Base class for all primitive drawing stages. + */ +struct draw_stage +{ + struct draw_context *draw; /**< parent context */ + + struct draw_stage *next; /**< next stage in pipeline */ + + struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ + unsigned nr_tmps; + + void (*point)( struct draw_stage *, + struct prim_header * ); + + void (*line)( struct draw_stage *, + struct prim_header * ); + + void (*tri)( struct draw_stage *, + struct prim_header * ); + + void (*flush)( struct draw_stage *, + unsigned flags ); + + void (*reset_stipple_counter)( struct draw_stage * ); + + void (*destroy)( struct draw_stage * ); +}; + + +#define PRIM_QUEUE_LENGTH 16 +#define VCACHE_SIZE 32 +#define VCACHE_OVERFLOW 4 +#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ + +/** + * Private version of the compiled vertex_shader + */ +struct draw_vertex_shader { + const struct pipe_shader_state *state; +#if defined(__i386__) || defined(__386__) + struct x86_function sse2_program; +#endif +#ifdef MESA_LLVM + struct gallivm_prog *llvm_prog; +#endif +}; + + +/* Internal function for vertex fetch. + */ +typedef void (*fetch_func)(const void *ptr, float *attrib); +typedef void (*full_fetch_func)( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ); + + + +/** + * Private context for the drawing module. + */ +struct draw_context +{ + /** Drawing/primitive pipeline stages */ + struct { + struct draw_stage *first; /**< one of the following */ + + struct draw_stage *validate; + + /* stages (in logical order) */ + struct draw_stage *flatshade; + struct draw_stage *clip; + struct draw_stage *cull; + struct draw_stage *twoside; + struct draw_stage *offset; + struct draw_stage *unfilled; + struct draw_stage *stipple; + struct draw_stage *wide; + struct draw_stage *rasterize; + } pipeline; + + /* pipe state that we need: */ + const struct pipe_rasterizer_state *rasterizer; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + const struct draw_vertex_shader *vertex_shader; + + uint num_vs_outputs; /**< convenience, from vertex_shader */ + + /* user-space vertex data, buffers */ + struct { + /** vertex element/index buffer (ex: glDrawElements) */ + const void *elts; + /** bytes per index (0, 1, 2 or 4) */ + unsigned eltSize; + + /** vertex arrays */ + const void *vbuffer[PIPE_ATTRIB_MAX]; + + /** constant buffer (for vertex shader) */ + const void *constants; + } user; + + /* Clip derived state: + */ + float plane[12][4]; + unsigned nr_planes; + + boolean convert_wide_points; /**< convert wide points to tris? */ + boolean convert_wide_lines; /**< convert side lines to tris? */ + + unsigned reduced_prim; + + /** TGSI program interpreter runtime state */ + struct tgsi_exec_machine machine; + + /* Vertex fetch internal state + */ + struct { + const ubyte *src_ptr[PIPE_ATTRIB_MAX]; + unsigned pitch[PIPE_ATTRIB_MAX]; + fetch_func fetch[PIPE_ATTRIB_MAX]; + unsigned nr_attrs; + full_fetch_func fetch_func; + } vertex_fetch; + + /* Post-tnl vertex cache: + */ + struct { + unsigned referenced; /**< bitfield */ + unsigned idx[VCACHE_SIZE + VCACHE_OVERFLOW]; + struct vertex_header *vertex[VCACHE_SIZE + VCACHE_OVERFLOW]; + unsigned overflow; + + /** To find space in the vertex cache: */ + struct vertex_header *(*get_vertex)( struct draw_context *draw, + unsigned i ); + } vcache; + + /* Vertex shader queue: + */ + struct { + struct { + unsigned elt; /**< index into the user's vertex arrays */ + struct vertex_header *dest; /**< points into vcache.vertex[] array */ + } queue[VS_QUEUE_LENGTH]; + unsigned queue_nr; + } vs; + + /** + * Run the vertex shader on all vertices in the vertex queue. + */ + void (*shader_queue_flush)(struct draw_context *draw); + + /* Prim pipeline queue: + */ + struct { + /* Need to queue up primitives until their vertices have been + * transformed by a vs queue flush. + */ + struct prim_header queue[PRIM_QUEUE_LENGTH]; + unsigned queue_nr; + } pq; + + int use_sse : 1; +#ifdef MESA_LLVM + struct gallivm_cpu_engine *engine; +#endif + + void *driver_private; +}; + + + +extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); +extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); +extern struct draw_stage *draw_offset_stage( struct draw_context *context ); +extern struct draw_stage *draw_clip_stage( struct draw_context *context ); +extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); +extern struct draw_stage *draw_cull_stage( struct draw_context *context ); +extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_stage( struct draw_context *context ); +extern struct draw_stage *draw_validate_stage( struct draw_context *context ); + + +extern void draw_free_temp_verts( struct draw_stage *stage ); + +extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); + +extern void draw_reset_vertex_ids( struct draw_context *draw ); + + +extern int draw_vertex_cache_check_space( struct draw_context *draw, + unsigned nr_verts ); + +extern void draw_vertex_cache_invalidate( struct draw_context *draw ); +extern void draw_vertex_cache_unreference( struct draw_context *draw ); +extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ); + + +extern void draw_vertex_shader_queue_flush( struct draw_context *draw ); +#ifdef MESA_LLVM +extern void draw_vertex_shader_queue_flush_llvm( struct draw_context *draw ); +#endif + +struct tgsi_exec_machine; + +extern void draw_update_vertex_fetch( struct draw_context *draw ); + + +#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ +#define DRAW_FLUSH_PRIM_QUEUE 0x2 +#define DRAW_FLUSH_VERTEX_CACHE 0x4 +#define DRAW_FLUSH_STATE_CHANGE 0x8 +#define DRAW_FLUSH_BACKEND 0x10 + + +void draw_do_flush( struct draw_context *draw, unsigned flags ); + + + +/** + * Get a writeable copy of a vertex. + * \param stage drawing stage info + * \param vert the vertex to copy (source) + * \param idx index into stage's tmp[] array to put the copy (dest) + * \return pointer to the copied vertex + */ +static INLINE struct vertex_header * +dup_vert( struct draw_stage *stage, + const struct vertex_header *vert, + unsigned idx ) +{ + struct vertex_header *tmp = stage->tmp[idx]; + const uint vsize = sizeof(struct vertex_header) + + stage->draw->num_vs_outputs * 4 * sizeof(float); + memcpy(tmp, vert, vsize); + tmp->vertex_id = UNDEFINED_VERTEX_ID; + return tmp; +} + +static INLINE float +dot4(const float *a, const float *b) +{ + float result = (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); + + return result; +} + +#endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_stipple.c b/src/gallium/auxiliary/draw/draw_stipple.c new file mode 100644 index 0000000000..506f33512c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_stipple.c @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +/* Implement line stipple by cutting lines up into smaller lines. + * There are hundreds of ways to implement line stipple, this is one + * choice that should work in all situations, requires no state + * manipulations, but with a penalty in terms of large amounts of + * generated geometry. + */ + + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +/** Subclass of draw_stage */ +struct stipple_stage { + struct draw_stage stage; + float counter; + uint pattern; + uint factor; +}; + + +static INLINE struct stipple_stage * +stipple_stage(struct draw_stage *stage) +{ + return (struct stipple_stage *) stage; +} + + +/** + * Compute interpolated vertex attributes for 'dst' at position 't' + * between 'v0' and 'v1'. + * XXX using linear interpolation for all attribs at this time. + */ +static void +screen_interp( struct draw_context *draw, + struct vertex_header *dst, + float t, + const struct vertex_header *v0, + const struct vertex_header *v1 ) +{ + uint attr; + for (attr = 0; attr < draw->num_vs_outputs; attr++) { + const float *val0 = v0->data[attr]; + const float *val1 = v1->data[attr]; + float *newv = dst->data[attr]; + uint i; + for (i = 0; i < 4; i++) { + newv[i] = val0[i] + t * (val1[i] - val0[i]); + } + } +} + + +static void +emit_segment(struct draw_stage *stage, struct prim_header *header, + float t0, float t1) +{ + struct vertex_header *v0new = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1new = dup_vert(stage, header->v[1], 1); + struct prim_header newprim = *header; + + if (t0 > 0.0) { + screen_interp( stage->draw, v0new, t0, header->v[0], header->v[1] ); + newprim.v[0] = v0new; + } + + if (t1 < 1.0) { + screen_interp( stage->draw, v1new, t1, header->v[0], header->v[1] ); + newprim.v[1] = v1new; + } + + stage->next->line( stage->next, &newprim ); +} + + +static INLINE unsigned +stipple_test(int counter, ushort pattern, int factor) +{ + int b = (counter / factor) & 0xf; + return (1 << b) & pattern; +} + + +static void +stipple_line(struct draw_stage *stage, struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + const float *pos0 = v0->data[0]; + const float *pos1 = v1->data[0]; + float start = 0; + int state = 0; + + float x0 = pos0[0]; + float x1 = pos1[0]; + float y0 = pos0[1]; + float y1 = pos1[1]; + + float dx = x0 > x1 ? x0 - x1 : x1 - x0; + float dy = y0 > y1 ? y0 - y1 : y1 - y0; + + float length = MAX2(dx, dy); + int i; + + /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table. + */ + for (i = 0; i < length; i++) { + int result = stipple_test( (int) stipple->counter+i, + (ushort) stipple->pattern, stipple->factor ); + if (result != state) { + /* changing from "off" to "on" or vice versa */ + if (state) { + if (start != i) { + /* finishing an "on" segment */ + emit_segment( stage, header, start / length, i / length ); + } + } + else { + /* starting an "on" segment */ + start = (float) i; + } + state = result; + } + } + + if (state && start < length) + emit_segment( stage, header, start / length, 1.0 ); + + stipple->counter += length; +} + + +static void +reset_stipple_counter(struct draw_stage *stage) +{ + struct stipple_stage *stipple = stipple_stage(stage); + stipple->counter = 0; + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +stipple_first_line(struct draw_stage *stage, + struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + struct draw_context *draw = stage->draw; + + stipple->pattern = draw->rasterizer->line_stipple_pattern; + stipple->factor = draw->rasterizer->line_stipple_factor + 1; + + stage->line = stipple_line; + stage->line( stage, header ); +} + + +static void +stipple_flush(struct draw_stage *stage, unsigned flags) +{ + stage->line = stipple_first_line; + stage->next->flush( stage->next, flags ); +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point( stage->next, header ); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +static void +stipple_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create line stippler stage + */ +struct draw_stage *draw_stipple_stage( struct draw_context *draw ) +{ + struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage); + + draw_alloc_temp_verts( &stipple->stage, 2 ); + + stipple->stage.draw = draw; + stipple->stage.next = NULL; + stipple->stage.point = passthrough_point; + stipple->stage.line = stipple_first_line; + stipple->stage.tri = passthrough_tri; + stipple->stage.reset_stipple_counter = reset_stipple_counter; + stipple->stage.flush = stipple_flush; + stipple->stage.destroy = stipple_destroy; + + return &stipple->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_twoside.c new file mode 100644 index 0000000000..1c38957987 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_twoside.c @@ -0,0 +1,203 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +struct twoside_stage { + struct draw_stage stage; + float sign; /**< +1 or -1 */ + uint attrib_front0, attrib_back0; + uint attrib_front1, attrib_back1; +}; + + +static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage ) +{ + return (struct twoside_stage *)stage; +} + + + + +/** + * Copy back color(s) to front color(s). + */ +static INLINE struct vertex_header * +copy_bfc( struct twoside_stage *twoside, + const struct vertex_header *v, + unsigned idx ) +{ + struct vertex_header *tmp = dup_vert( &twoside->stage, v, idx ); + + if (twoside->attrib_back0) { + COPY_4FV(tmp->data[twoside->attrib_front0], + tmp->data[twoside->attrib_back0]); + } + if (twoside->attrib_back1) { + COPY_4FV(tmp->data[twoside->attrib_front1], + tmp->data[twoside->attrib_back1]); + } + + return tmp; +} + + +/* Twoside tri: + */ +static void twoside_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct twoside_stage *twoside = twoside_stage(stage); + + if (header->det * twoside->sign < 0.0) { + /* this is a back-facing triangle */ + struct prim_header tmp; + + tmp.det = header->det; + tmp.edgeflags = header->edgeflags; + /* copy back attribs to front attribs */ + tmp.v[0] = copy_bfc(twoside, header->v[0], 0); + tmp.v[1] = copy_bfc(twoside, header->v[1], 1); + tmp.v[2] = copy_bfc(twoside, header->v[2], 2); + + stage->next->tri( stage->next, &tmp ); + } + else { + stage->next->tri( stage->next, header ); + } +} + + +static void twoside_line( struct draw_stage *stage, + struct prim_header *header ) +{ + /* pass-through */ + stage->next->line( stage->next, header ); +} + + +static void twoside_point( struct draw_stage *stage, + struct prim_header *header ) +{ + /* pass-through */ + stage->next->point( stage->next, header ); +} + + +static void twoside_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct twoside_stage *twoside = twoside_stage(stage); + const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + uint i; + + twoside->attrib_front0 = 0; + twoside->attrib_front1 = 0; + twoside->attrib_back0 = 0; + twoside->attrib_back1 = 0; + + /* Find which vertex shader outputs are front/back colors */ + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + if (vs->output_semantic_index[i] == 0) + twoside->attrib_front0 = i; + else + twoside->attrib_front1 = i; + } + if (vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + if (vs->output_semantic_index[i] == 0) + twoside->attrib_back0 = i; + else + twoside->attrib_back1 = i; + } + } + + if (!twoside->attrib_back0) + twoside->attrib_front0 = 0; + + if (!twoside->attrib_back1) + twoside->attrib_front1 = 0; + + /* + * We'll multiply the primitive's determinant by this sign to determine + * if the triangle is back-facing (negative). + * sign = -1 for CCW, +1 for CW + */ + twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f; + + stage->tri = twoside_tri; + stage->tri( stage, header ); +} + + +static void twoside_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->tri = twoside_first_tri; + stage->next->flush( stage->next, flags ); +} + + +static void twoside_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void twoside_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create twoside pipeline stage. + */ +struct draw_stage *draw_twoside_stage( struct draw_context *draw ) +{ + struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage); + + draw_alloc_temp_verts( &twoside->stage, 3 ); + + twoside->stage.draw = draw; + twoside->stage.next = NULL; + twoside->stage.point = twoside_point; + twoside->stage.line = twoside_line; + twoside->stage.tri = twoside_first_tri; + twoside->stage.flush = twoside_flush; + twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; + twoside->stage.destroy = twoside_destroy; + + return &twoside->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c new file mode 100644 index 0000000000..8777cfdfc8 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_unfilled.c @@ -0,0 +1,206 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Drawing stage for handling glPolygonMode(line/point). + * Convert triangles to points or lines as needed. + */ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "draw_private.h" + + +struct unfilled_stage { + struct draw_stage stage; + + /** [0] = front face, [1] = back face. + * legal values: PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE, + * and PIPE_POLYGON_MODE_POINT, + */ + unsigned mode[2]; +}; + + +static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage ) +{ + return (struct unfilled_stage *)stage; +} + + + +static void point( struct draw_stage *stage, + struct vertex_header *v0 ) +{ + struct prim_header tmp; + tmp.v[0] = v0; + stage->next->point( stage->next, &tmp ); +} + +static void line( struct draw_stage *stage, + struct vertex_header *v0, + struct vertex_header *v1 ) +{ + struct prim_header tmp; + tmp.v[0] = v0; + tmp.v[1] = v1; + stage->next->line( stage->next, &tmp ); +} + + +static void points( struct draw_stage *stage, + struct prim_header *header ) +{ + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + struct vertex_header *v2 = header->v[2]; + + if (header->edgeflags & 0x1) point( stage, v0 ); + if (header->edgeflags & 0x2) point( stage, v1 ); + if (header->edgeflags & 0x4) point( stage, v2 ); +} + + +static void lines( struct draw_stage *stage, + struct prim_header *header ) +{ + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + struct vertex_header *v2 = header->v[2]; + +#if 0 + assert(((header->edgeflags & 0x1) >> 0) == header->v[0]->edgeflag); + assert(((header->edgeflags & 0x2) >> 1) == header->v[1]->edgeflag); + assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag); +#endif + + if (header->edgeflags & 0x1) line( stage, v0, v1 ); + if (header->edgeflags & 0x2) line( stage, v1, v2 ); + if (header->edgeflags & 0x4) line( stage, v2, v0 ); +} + + +/* Unfilled tri: + * + * Note edgeflags in the vertex struct is not sufficient as we will + * need to manipulate them when decomposing primitives??? + */ +static void unfilled_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct unfilled_stage *unfilled = unfilled_stage(stage); + unsigned mode = unfilled->mode[header->det >= 0.0]; + + switch (mode) { + case PIPE_POLYGON_MODE_FILL: + stage->next->tri( stage->next, header ); + break; + case PIPE_POLYGON_MODE_LINE: + lines( stage, header ); + break; + case PIPE_POLYGON_MODE_POINT: + points( stage, header ); + break; + default: + abort(); + } +} + + +static void unfilled_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct unfilled_stage *unfilled = unfilled_stage(stage); + + unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */ + unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */ + + stage->tri = unfilled_tri; + stage->tri( stage, header ); +} + + +static void unfilled_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line( stage->next, header ); +} + + +static void unfilled_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void unfilled_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->next->flush( stage->next, flags ); + + stage->tri = unfilled_first_tri; +} + + +static void unfilled_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void unfilled_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create unfilled triangle stage. + */ +struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) +{ + struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage); + + draw_alloc_temp_verts( &unfilled->stage, 0 ); + + unfilled->stage.draw = draw; + unfilled->stage.next = NULL; + unfilled->stage.tmp = NULL; + unfilled->stage.point = unfilled_point; + unfilled->stage.line = unfilled_line; + unfilled->stage.tri = unfilled_first_tri; + unfilled->stage.flush = unfilled_flush; + unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; + unfilled->stage.destroy = unfilled_destroy; + + return &unfilled->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c new file mode 100644 index 0000000000..4375ebabbc --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -0,0 +1,185 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "draw_private.h" + + + + + +/** + * Rebuild the rendering pipeline. + */ +static struct draw_stage *validate_pipeline( struct draw_stage *stage ) +{ + struct draw_context *draw = stage->draw; + struct draw_stage *next = draw->pipeline.rasterize; + int need_det = 0; + int precalc_flat = 0; + + /* Set the validate's next stage to the rasterize stage, so that it + * can be found later if needed for flushing. + */ + stage->next = next; + + /* + * NOTE: we build up the pipeline in end-to-start order. + * + * TODO: make the current primitive part of the state and build + * shorter pipelines for lines & points. + */ + + if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines) || + (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || + draw->rasterizer->point_sprite) { + draw->pipeline.wide->next = next; + next = draw->pipeline.wide; + } + + if (draw->rasterizer->line_stipple_enable) { + draw->pipeline.stipple->next = next; + next = draw->pipeline.stipple; + precalc_flat = 1; /* only needed for lines really */ + } + + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { + draw->pipeline.unfilled->next = next; + next = draw->pipeline.unfilled; + precalc_flat = 1; /* only needed for triangles really */ + need_det = 1; + } + + if (draw->rasterizer->flatshade && precalc_flat) { + draw->pipeline.flatshade->next = next; + next = draw->pipeline.flatshade; + } + + if (draw->rasterizer->offset_cw || + draw->rasterizer->offset_ccw) { + draw->pipeline.offset->next = next; + next = draw->pipeline.offset; + need_det = 1; + } + + if (draw->rasterizer->light_twoside) { + draw->pipeline.twoside->next = next; + next = draw->pipeline.twoside; + need_det = 1; + } + + /* Always run the cull stage as we calculate determinant there + * also. + * + * This can actually be a win as culling out the triangles can lead + * to less work emitting vertices, smaller vertex buffers, etc. + * It's difficult to say whether this will be true in general. + */ + if (need_det || draw->rasterizer->cull_mode) { + draw->pipeline.cull->next = next; + next = draw->pipeline.cull; + } + + /* Clip stage + */ + if (!draw->rasterizer->bypass_clipping) + { + draw->pipeline.clip->next = next; + next = draw->pipeline.clip; + } + + + draw->pipeline.first = next; + return next; +} + +static void validate_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->tri( pipeline, header ); +} + +static void validate_line( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->line( pipeline, header ); +} + +static void validate_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->point( pipeline, header ); +} + +static void validate_reset_stipple_counter( struct draw_stage *stage ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->reset_stipple_counter( pipeline ); +} + +static void validate_flush( struct draw_stage *stage, + unsigned flags ) +{ + /* May need to pass a backend flush on to the rasterize stage. + */ + if (stage->next) + stage->next->flush( stage->next, flags ); +} + + +static void validate_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create validate pipeline stage. + */ +struct draw_stage *draw_validate_stage( struct draw_context *draw ) +{ + struct draw_stage *stage = CALLOC_STRUCT(draw_stage); + + stage->draw = draw; + stage->next = NULL; + stage->point = validate_point; + stage->line = validate_line; + stage->tri = validate_tri; + stage->flush = validate_flush; + stage->reset_stipple_counter = validate_reset_stipple_counter; + stage->destroy = validate_destroy; + + return stage; +} diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c new file mode 100644 index 0000000000..71ac73912b --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -0,0 +1,570 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Vertex buffer drawing stage. + * + * \author José Fonseca + * \author Keith Whitwell + */ + + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "draw_vbuf.h" +#include "draw_private.h" +#include "draw_vertex.h" +#include "draw_vf.h" + + +/** + * Vertex buffer emit stage. + */ +struct vbuf_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct vbuf_render *render; + + const struct vertex_info *vinfo; + + /** Vertex size in bytes */ + unsigned vertex_size; + + struct draw_vertex_fetch *vf; + + /* FIXME: we have no guarantee that 'unsigned' is 32bit */ + + /** Vertices in hardware format */ + unsigned *vertices; + unsigned *vertex_ptr; + unsigned max_vertices; + unsigned nr_vertices; + + /** Indices */ + ushort *indices; + unsigned max_indices; + unsigned nr_indices; + + /** Pipe primitive */ + unsigned prim; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct vbuf_stage * +vbuf_stage( struct draw_stage *stage ) +{ + assert(stage); + return (struct vbuf_stage *)stage; +} + + +static void vbuf_flush_indices( struct vbuf_stage *vbuf ); +static void vbuf_flush_vertices( struct vbuf_stage *vbuf ); +static void vbuf_alloc_vertices( struct vbuf_stage *vbuf ); + + +static INLINE boolean +overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz ) +{ + unsigned long used = (unsigned long) ((char *)ptr - (char *)map); + return (used + bytes) > bufsz; +} + + +static INLINE void +check_space( struct vbuf_stage *vbuf, unsigned nr ) +{ + if (vbuf->nr_vertices + nr > vbuf->max_vertices ) { + vbuf_flush_vertices(vbuf); + vbuf_alloc_vertices(vbuf); + } + + if (vbuf->nr_indices + nr > vbuf->max_indices ) + vbuf_flush_indices(vbuf); +} + + +#if 0 +static INLINE void +dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) +{ + assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); + unsigned i, j, k; + + for (i = 0; i < vinfo->num_attribs; i++) { + j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + debug_printf("EMIT_OMIT:"); + break; + case EMIT_ALL: + assert(i == 0); + assert(j == 0); + debug_printf("EMIT_ALL:\t"); + for(k = 0; k < vinfo->size*4; ++k) + debug_printf("%02x ", *data++); + break; + case EMIT_1F: + debug_printf("EMIT_1F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_1F_PSIZE: + debug_printf("EMIT_1F_PSIZE:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_2F: + debug_printf("EMIT_2F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_3F: + debug_printf("EMIT_3F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + data += sizeof(float); + break; + case EMIT_4F: + debug_printf("EMIT_4F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_4UB: + debug_printf("EMIT_4UB:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + break; + default: + assert(0); + } + debug_printf("\n"); + } + debug_printf("\n"); +} +#endif + + +/** + * Extract the needed fields from post-transformed vertex and emit + * a hardware(driver) vertex. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. We only use the vertex->data[] fields. + */ +static INLINE void +emit_vertex( struct vbuf_stage *vbuf, + struct vertex_header *vertex ) +{ +#if 0 + debug_printf("emit vertex %d to %p\n", + vbuf->nr_vertices, vbuf->vertex_ptr); +#endif + + if(vertex->vertex_id != UNDEFINED_VERTEX_ID) { + if(vertex->vertex_id < vbuf->nr_vertices) + return; + else + debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n", + vertex->vertex_id, vbuf->nr_vertices); + return; + } + + vertex->vertex_id = vbuf->nr_vertices++; + + if(!vbuf->vf) { + const struct vertex_info *vinfo = vbuf->vinfo; + uint i; + uint count = 0; /* for debug/sanity */ + + assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); + + for (i = 0; i < vinfo->num_attribs; i++) { + uint j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + /* no-op */ + break; + case EMIT_ALL: + /* just copy the whole vertex as-is to the vbuf */ + assert(i == 0); + assert(j == 0); + memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4); + vbuf->vertex_ptr += vinfo->size; + count += vinfo->size; + break; + case EMIT_1F: + *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); + count++; + break; + case EMIT_1F_PSIZE: + *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size); + count++; + break; + case EMIT_2F: + *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); + count += 2; + break; + case EMIT_3F: + *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); + count += 3; + break; + case EMIT_4F: + *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); + *vbuf->vertex_ptr++ = fui(vertex->data[j][3]); + count += 4; + break; + case EMIT_4UB: + *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ), + float_to_ubyte( vertex->data[j][1] ), + float_to_ubyte( vertex->data[j][0] ), + float_to_ubyte( vertex->data[j][3] )); + count += 1; + break; + default: + assert(0); + } + } + assert(count == vinfo->size); +#if 0 + { + static float data[256]; + draw_vf_emit_vertex(vbuf->vf, vertex, data); + if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) { + debug_printf("With VF:\n"); + dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data); + debug_printf("Without VF:\n"); + dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size); + assert(0); + } + } +#endif + } + else { + draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr); + + vbuf->vertex_ptr += vbuf->vertex_size/4; + } +} + + +static void +vbuf_tri( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + unsigned i; + + check_space( vbuf, 3 ); + + for (i = 0; i < 3; i++) { + emit_vertex( vbuf, prim->v[i] ); + + vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id; + } +} + + +static void +vbuf_line( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + unsigned i; + + check_space( vbuf, 2 ); + + for (i = 0; i < 2; i++) { + emit_vertex( vbuf, prim->v[i] ); + + vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id; + } +} + + +static void +vbuf_point( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + check_space( vbuf, 1 ); + + emit_vertex( vbuf, prim->v[0] ); + + vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[0]->vertex_id; +} + + +/** + * Set the prim type for subsequent vertices. + * This may result in a new vertex size. The existing vbuffer (if any) + * will be flushed if needed and a new one allocated. + */ +static void +vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim ) +{ + const struct vertex_info *vinfo; + unsigned vertex_size; + + assert(newprim == PIPE_PRIM_POINTS || + newprim == PIPE_PRIM_LINES || + newprim == PIPE_PRIM_TRIANGLES); + + vbuf->prim = newprim; + vbuf->render->set_primitive(vbuf->render, newprim); + + vinfo = vbuf->render->get_vertex_info(vbuf->render); + vertex_size = vinfo->size * sizeof(float); + + if (vertex_size != vbuf->vertex_size) + vbuf_flush_vertices(vbuf); + + vbuf->vinfo = vinfo; + vbuf->vertex_size = vertex_size; + if(vbuf->vf) + draw_vf_set_vertex_info(vbuf->vf, + vbuf->vinfo, + vbuf->stage.draw->rasterizer->point_size); + + if (!vbuf->vertices) + vbuf_alloc_vertices(vbuf); +} + + +static void +vbuf_first_tri( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->tri = vbuf_tri; + vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES); + stage->tri( stage, prim ); +} + + +static void +vbuf_first_line( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->line = vbuf_line; + vbuf_set_prim(vbuf, PIPE_PRIM_LINES); + stage->line( stage, prim ); +} + + +static void +vbuf_first_point( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->point = vbuf_point; + vbuf_set_prim(vbuf, PIPE_PRIM_POINTS); + stage->point( stage, prim ); +} + + +static void +vbuf_flush_indices( struct vbuf_stage *vbuf ) +{ + if(!vbuf->nr_indices) + return; + + assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == + vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); + + switch(vbuf->prim) { + case PIPE_PRIM_POINTS: + break; + case PIPE_PRIM_LINES: + assert(vbuf->nr_indices % 2 == 0); + break; + case PIPE_PRIM_TRIANGLES: + assert(vbuf->nr_indices % 3 == 0); + break; + default: + assert(0); + } + + vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); + + vbuf->nr_indices = 0; + + /* don't need to reset point/line/tri functions */ +#if 0 + stage->point = vbuf_first_point; + stage->line = vbuf_first_line; + stage->tri = vbuf_first_tri; +#endif +} + + +/** + * Flush existing vertex buffer and allocate a new one. + * + * XXX: We separate flush-on-index-full and flush-on-vb-full, but may + * raise issues uploading vertices if the hardware wants to flush when + * we flush. + */ +static void +vbuf_flush_vertices( struct vbuf_stage *vbuf ) +{ + if(vbuf->vertices) { + vbuf_flush_indices(vbuf); + + /* Reset temporary vertices ids */ + if(vbuf->nr_vertices) + draw_reset_vertex_ids( vbuf->stage.draw ); + + /* Free the vertex buffer */ + vbuf->render->release_vertices(vbuf->render, + vbuf->vertices, + vbuf->vertex_size, + vbuf->nr_vertices); + vbuf->max_vertices = vbuf->nr_vertices = 0; + vbuf->vertex_ptr = vbuf->vertices = NULL; + + } +} + + +static void +vbuf_alloc_vertices( struct vbuf_stage *vbuf ) +{ + assert(!vbuf->nr_indices); + assert(!vbuf->vertices); + + /* Allocate a new vertex buffer */ + vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; + vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, + (ushort) vbuf->vertex_size, + (ushort) vbuf->max_vertices); + vbuf->vertex_ptr = vbuf->vertices; +} + + + +static void +vbuf_flush( struct draw_stage *stage, unsigned flags ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + + stage->point = vbuf_first_point; + stage->line = vbuf_first_line; + stage->tri = vbuf_first_tri; + + if (flags & DRAW_FLUSH_BACKEND) + vbuf_flush_vertices( vbuf ); +} + + +static void +vbuf_reset_stipple_counter( struct draw_stage *stage ) +{ + /* XXX: Need to do something here for hardware with linestipple. + */ + (void) stage; +} + + +static void vbuf_destroy( struct draw_stage *stage ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + if(vbuf->indices) + align_free( vbuf->indices ); + + if(vbuf->vf) + draw_vf_destroy( vbuf->vf ); + + if (vbuf->render) + vbuf->render->destroy( vbuf->render ); + + FREE( stage ); +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *draw_vbuf_stage( struct draw_context *draw, + struct vbuf_render *render ) +{ + struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); + + if(!vbuf) + return NULL; + + vbuf->stage.draw = draw; + vbuf->stage.point = vbuf_first_point; + vbuf->stage.line = vbuf_first_line; + vbuf->stage.tri = vbuf_first_tri; + vbuf->stage.flush = vbuf_flush; + vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter; + vbuf->stage.destroy = vbuf_destroy; + + vbuf->render = render; + + assert(render->max_indices < UNDEFINED_VERTEX_ID); + vbuf->max_indices = render->max_indices; + vbuf->indices = (ushort *) + align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 ); + if(!vbuf->indices) + vbuf_destroy(&vbuf->stage); + + vbuf->vertices = NULL; + vbuf->vertex_ptr = vbuf->vertices; + + vbuf->prim = ~0; + + if(!GETENV("GALLIUM_NOVF")) + vbuf->vf = draw_vf_create(); + + return &vbuf->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h new file mode 100644 index 0000000000..cfd2b9820c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -0,0 +1,106 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Vertex buffer drawing stage. + * + * \author Keith Whitwell + * \author José Fonseca + */ + +#ifndef DRAW_VBUF_H_ +#define DRAW_VBUF_H_ + + +#include "pipe/p_util.h" + + +struct draw_context; +struct vertex_info; + + +/** + * Interface for hardware vertex buffer rendering. + */ +struct vbuf_render { + + /** + * Driver limits. May be tuned lower to improve cache hits on + * index list. + */ + unsigned max_indices; + unsigned max_vertex_buffer_bytes; + + /** + * Get the hardware vertex format. + * + * XXX: have this in draw_context instead? + */ + const struct vertex_info *(*get_vertex_info)( struct vbuf_render * ); + + /** + * Request a destination for vertices. + * Hardware renderers will use ttm memory, others will just malloc + * something. + */ + void *(*allocate_vertices)( struct vbuf_render *, + ushort vertex_size, + ushort nr_vertices ); + + /** + * Notify the renderer of the current primitive when it changes. + * Prim is restricted to TRIANGLES, LINES and POINTS. + */ + void (*set_primitive)( struct vbuf_render *, unsigned prim ); + + /** + * DrawElements, note indices are ushort: + */ + void (*draw)( struct vbuf_render *, + const ushort *indices, + uint nr_indices ); + + /** + * Called when vbuf is done with this set of vertices: + */ + void (*release_vertices)( struct vbuf_render *, + void *vertices, + unsigned vertex_size, + unsigned vertices_used ); + + void (*destroy)( struct vbuf_render * ); +}; + + + +struct draw_stage * +draw_vbuf_stage( struct draw_context *draw, + struct vbuf_render *render ); + + +#endif /*DRAW_VBUF_H_*/ diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c new file mode 100644 index 0000000000..daf1ef4b80 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Functions for specifying the post-transformation vertex layout. + * + * Author: + * Brian Paul + * Keith Whitwell + */ + + +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" + + +/** + * Compute the size of a vertex, in dwords/floats, to update the + * vinfo->size field. + */ +void +draw_compute_vertex_size(struct vertex_info *vinfo) +{ + uint i; + + vinfo->size = 0; + for (i = 0; i < vinfo->num_attribs; i++) { + switch (vinfo->emit[i]) { + case EMIT_OMIT: + break; + case EMIT_4UB: + /* fall-through */ + case EMIT_1F_PSIZE: + /* fall-through */ + case EMIT_1F: + vinfo->size += 1; + break; + case EMIT_2F: + vinfo->size += 2; + break; + case EMIT_3F: + vinfo->size += 3; + break; + case EMIT_4F: + vinfo->size += 4; + break; + case EMIT_ALL: + /* fall-through */ + default: + assert(0); + } + } + + assert(vinfo->size * 4 <= MAX_VERTEX_SIZE); +} diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h new file mode 100644 index 0000000000..267c74203b --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Post-transform vertex format info. The vertex_info struct is used by + * the draw_vbuf code to emit hardware-specific vertex layouts into hw + * vertex buffers. + * + * Author: + * Brian Paul + */ + + +#ifndef DRAW_VERTEX_H +#define DRAW_VERTEX_H + + +#include "pipe/p_state.h" + + +/** + * Vertex attribute emit modes + */ +enum attrib_emit { + EMIT_OMIT, /**< don't emit the attribute */ + EMIT_ALL, /**< emit whole post-xform vertex, w/ header */ + EMIT_1F, + EMIT_1F_PSIZE, /**< insert constant point size */ + EMIT_2F, + EMIT_3F, + EMIT_4F, + EMIT_4UB /**< XXX may need variations for RGBA vs BGRA, etc */ +}; + + +/** + * Attribute interpolation mode + */ +enum interp_mode { + INTERP_NONE, /**< never interpolate vertex header info */ + INTERP_POS, /**< special case for frag position */ + INTERP_CONSTANT, + INTERP_LINEAR, + INTERP_PERSPECTIVE +}; + + +/** + * Information about hardware/rasterization vertex layout. + */ +struct vertex_info +{ + uint num_attribs; + uint hwfmt[4]; /**< hardware format info for this format */ + enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS]; + enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS]; /**< EMIT_x */ + uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */ + uint size; /**< total vertex size in dwords */ +}; + + + +/** + * Add another attribute to the given vertex_info object. + * \param src_index indicates which post-transformed vertex attrib slot + * corresponds to this attribute. + * \return slot in which the attribute was added + */ +static INLINE uint +draw_emit_vertex_attr(struct vertex_info *vinfo, + enum attrib_emit emit, enum interp_mode interp, + uint src_index) +{ + const uint n = vinfo->num_attribs; + assert(n < PIPE_MAX_SHADER_INPUTS); + vinfo->emit[n] = emit; + vinfo->interp_mode[n] = interp; + vinfo->src_index[n] = src_index; + vinfo->num_attribs++; + return n; +} + + +extern void draw_compute_vertex_size(struct vertex_info *vinfo); + + +#endif /* DRAW_VERTEX_H */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c new file mode 100644 index 0000000000..44427999cc --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -0,0 +1,196 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw_private.h" +#include "draw_context.h" + + +void draw_vertex_cache_invalidate( struct draw_context *draw ) +{ + assert(draw->pq.queue_nr == 0); + assert(draw->vs.queue_nr == 0); + assert(draw->vcache.referenced == 0); + + memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); +} + + +/** + * Check if vertex is in cache, otherwise add it. It won't go through + * VS yet, not until there is a flush operation or the VS queue fills up. + * + * Note that cache entries are basically just two pointers: the first + * an index into the user's vertex arrays, the second a location in + * the vertex shader cache for the post-transformed vertex. + * + * \return pointer to location of (post-transformed) vertex header in the cache + */ +static struct vertex_header *get_vertex( struct draw_context *draw, + unsigned i ) +{ + unsigned slot = (i + (i>>5)) % VCACHE_SIZE; + + assert(slot < 32); /* so we don't exceed the bitfield size below */ + + /* Cache miss? + */ + if (draw->vcache.idx[slot] != i) { + + /* If slot is in use, use the overflow area: + */ + if (draw->vcache.referenced & (1 << slot)) { + slot = VCACHE_SIZE + draw->vcache.overflow++; + } + + assert(slot < Elements(draw->vcache.idx)); + + draw->vcache.idx[slot] = i; + + /* Add to vertex shader queue: + */ + assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); + draw->vs.queue[draw->vs.queue_nr].dest = draw->vcache.vertex[slot]; + draw->vs.queue[draw->vs.queue_nr].elt = i; + draw->vs.queue_nr++; + + /* Need to set the vertex's edge flag here. If we're being called + * by do_ef_triangle(), that function needs edge flag info! + */ + draw->vcache.vertex[slot]->clipmask = 0; + draw->vcache.vertex[slot]->edgeflag = 1; /*XXX use user's edge flag! */ + draw->vcache.vertex[slot]->pad = 0; + draw->vcache.vertex[slot]->vertex_id = UNDEFINED_VERTEX_ID; + } + + + /* primitive flushing may have cleared the bitfield but did not + * clear the idx[] array values. Set the bit now. This fixes a + * bug found when drawing long triangle fans. + */ + draw->vcache.referenced |= (1 << slot); + return draw->vcache.vertex[slot]; +} + + +static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw, + unsigned i ) +{ + const unsigned *elts = (const unsigned *) draw->user.elts; + return get_vertex( draw, elts[i] ); +} + + +static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw, + unsigned i ) +{ + const ushort *elts = (const ushort *) draw->user.elts; + return get_vertex( draw, elts[i] ); +} + + +static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw, + unsigned i ) +{ + const ubyte *elts = (const ubyte *) draw->user.elts; + return get_vertex( draw, elts[i] ); +} + + +void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) +{ + unsigned i; + + for (i = 0; i < Elements(draw->vcache.vertex); i++) + draw->vcache.vertex[i]->vertex_id = UNDEFINED_VERTEX_ID; +} + + +void draw_vertex_cache_unreference( struct draw_context *draw ) +{ + draw->vcache.referenced = 0; + draw->vcache.overflow = 0; +} + + +int draw_vertex_cache_check_space( struct draw_context *draw, + unsigned nr_verts ) +{ + if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) { + /* The vs queue is sized so that this can never happen: + */ + assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH); + return TRUE; + } + else + return FALSE; +} + + + +/** + * Tell the drawing context about the index/element buffer to use + * (ala glDrawElements) + * If no element buffer is to be used (i.e. glDrawArrays) then this + * should be called with eltSize=0 and elements=NULL. + * + * \param draw the drawing context + * \param eltSize size of each element (1, 2 or 4 bytes) + * \param elements the element buffer ptr + */ +void +draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, void *elements ) +{ +// draw_statechange( draw ); + + /* choose the get_vertex() function to use */ + switch (eltSize) { + case 0: + draw->vcache.get_vertex = get_vertex; + break; + case 1: + draw->vcache.get_vertex = get_ubyte_elt_vertex; + break; + case 2: + draw->vcache.get_vertex = get_ushort_elt_vertex; + break; + case 4: + draw->vcache.get_vertex = get_uint_elt_vertex; + break; + default: + assert(0); + } + draw->user.elts = elements; + draw->user.eltSize = eltSize; +} + diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c new file mode 100644 index 0000000000..e13df04605 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -0,0 +1,510 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" +#include "draw_context.h" + + +#define DRAW_DBG 0 + + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define FETCH_ATTRIB( NAME, SZ, CVT ) \ +static void \ +fetch_##NAME(const void *ptr, float *attrib) \ +{ \ + static const float defaults[4] = { 0,0,0,1 }; \ + int i; \ + \ + for (i = 0; i < SZ; i++) { \ + attrib[i] = CVT; \ + } \ + \ + for (; i < 4; i++) { \ + attrib[i] = defaults[i]; \ + } \ +} + +#define CVT_64_FLOAT (float) ((double *) ptr)[i] +#define CVT_32_FLOAT ((float *) ptr)[i] + +#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i] +#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i] +#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i] + +#define CVT_8_SSCALED (float) ((char *) ptr)[i] +#define CVT_16_SSCALED (float) ((short *) ptr)[i] +#define CVT_32_SSCALED (float) ((int *) ptr)[i] + +#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f +#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f +#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f + +#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f +#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f +#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f + +FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) +FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) +FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT ) +FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT ) + +FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT ) +FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT ) +FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT ) +FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT ) + +FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED ) +FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED ) +FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED ) +FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED ) + +FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED ) +FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED ) +FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED ) +FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED ) + +FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM ) +FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM ) +FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM ) +FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM ) + +FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM ) +FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM ) +FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM ) +FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM ) + +FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED ) +FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED ) +FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED ) +FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED ) + +FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED ) +FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED ) +FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED ) +FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED ) + +FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM ) +FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM ) +FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM ) +FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM ) + +FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM ) +FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM ) +FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM ) +FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM ) + +FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED ) +FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED ) +FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED ) +FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED ) + +FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED ) +FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED ) +FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED ) +FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED ) + +FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) +FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM ) +FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM ) +FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM ) + +FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM ) +FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM ) +FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM ) +FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM ) + +FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) +//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) + + + +static fetch_func get_fetch_func( enum pipe_format format ) +{ +#if 0 + { + char tmp[80]; + pf_sprint_name(tmp, format); + debug_printf("%s: %s\n", __FUNCTION__, tmp); + } +#endif + + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return fetch_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return fetch_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return fetch_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return fetch_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return fetch_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return fetch_A8R8G8B8_UNORM; + + case 0: + return NULL; /* not sure why this is needed */ + + default: + assert(0); + return NULL; + } +} + + +static void +transpose_4x4( float *out, const float *in ) +{ + /* This can be achieved in 12 sse instructions, plus the final + * stores I guess. This is probably a bit more than that - maybe + * 32 or so? + */ + out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12]; + out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13]; + out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14]; + out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15]; +} + + + +static void fetch_xyz_rgb( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ) +{ + const unsigned *pitch = draw->vertex_fetch.pitch; + const ubyte **src = draw->vertex_fetch.src_ptr; + int i; + + assert(count <= 4); + +// debug_printf("%s\n", __FUNCTION__); + + /* loop over vertex attributes (vertex shader inputs) + */ + + for (i = 0; i < 4; i++) { + { + const float *in = (const float *)(src[0] + elts[i] * pitch[0]); + float *out = &machine->Inputs[0].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = in[2]; + out[12] = 1.0f; + } + + { + const float *in = (const float *)(src[1] + elts[i] * pitch[1]); + float *out = &machine->Inputs[1].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = in[2]; + out[12] = 1.0f; + } + } +} + + + + +static void fetch_xyz_rgb_st( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ) +{ + const unsigned *pitch = draw->vertex_fetch.pitch; + const ubyte **src = draw->vertex_fetch.src_ptr; + int i; + + assert(count <= 4); + + /* loop over vertex attributes (vertex shader inputs) + */ + + for (i = 0; i < 4; i++) { + { + const float *in = (const float *)(src[0] + elts[i] * pitch[0]); + float *out = &machine->Inputs[0].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = in[2]; + out[12] = 1.0f; + } + + { + const float *in = (const float *)(src[1] + elts[i] * pitch[1]); + float *out = &machine->Inputs[1].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = in[2]; + out[12] = 1.0f; + } + + { + const float *in = (const float *)(src[2] + elts[i] * pitch[2]); + float *out = &machine->Inputs[2].xyzw[0].f[i]; + out[0] = in[0]; + out[4] = in[1]; + out[8] = 0.0f; + out[12] = 1.0f; + } + } +} + + + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_vertex_fetch( struct draw_context *draw, + struct tgsi_exec_machine *machine, + const unsigned *elts, + unsigned count ) +{ + unsigned nr_attrs = draw->vertex_fetch.nr_attrs; + unsigned attr; + + assert(count <= 4); + +// debug_printf("%s %d\n", __FUNCTION__, count); + + /* loop over vertex attributes (vertex shader inputs) + */ + for (attr = 0; attr < nr_attrs; attr++) { + + const unsigned pitch = draw->vertex_fetch.pitch[attr]; + const ubyte *src = draw->vertex_fetch.src_ptr[attr]; + const fetch_func fetch = draw->vertex_fetch.fetch[attr]; + unsigned i; + float p[4][4]; + + + /* Fetch four attributes for four vertices. + * + * Could fetch directly into AOS format, but this is meant to be + * a prototype for an sse implementation, which would have + * difficulties doing that. + */ + for (i = 0; i < count; i++) + fetch( src + elts[i] * pitch, p[i] ); + + /* Be nice and zero out any missing vertices: + */ + for ( ; i < 4; i++) + p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0; + + /* Transpose/swizzle into sse-friendly format. Currently + * assuming that all vertex shader inputs are float[4], but this + * isn't true -- if the vertex shader only wants tex0.xy, we + * could optimize for that. + * + * To do so fully without codegen would probably require an + * excessive number of fetch functions, but we could at least + * minimize the transpose step: + */ + transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p ); + } +} + + + +void draw_update_vertex_fetch( struct draw_context *draw ) +{ + unsigned nr_attrs, i; + +// debug_printf("%s\n", __FUNCTION__); + + /* this may happend during context init */ + if (!draw->vertex_shader) + return; + + nr_attrs = draw->vertex_shader->state->num_inputs; + + for (i = 0; i < nr_attrs; i++) { + unsigned buf = draw->vertex_element[i].vertex_buffer_index; + enum pipe_format format = draw->vertex_element[i].src_format; + + draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[i].src_offset; + + draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; + draw->vertex_fetch.fetch[i] = get_fetch_func( format ); + } + + draw->vertex_fetch.nr_attrs = nr_attrs; + + draw->vertex_fetch.fetch_func = generic_vertex_fetch; + + switch (nr_attrs) { + case 2: + if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && + draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT) + draw->vertex_fetch.fetch_func = fetch_xyz_rgb; + break; + case 3: + if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && + draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT && + draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT) + draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st; + break; + default: + break; + } + +} diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c new file mode 100644 index 0000000000..377ecbb931 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -0,0 +1,325 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#if defined(__i386__) || defined(__386__) +#include "tgsi/exec/tgsi_sse2.h" +#endif +#include "draw_private.h" +#include "draw_context.h" + +#include "x86/rtasm/x86sse.h" +#include "llvm/gallivm.h" + + +#define DBG_VS 0 + + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<machine; + unsigned int j; + + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + assert(draw->vertex_shader->state->output_semantic_name[0] + == TGSI_SEMANTIC_POSITION); + + /* Consts does not require 16 byte alignment. */ + machine->Consts = (float (*)[4]) draw->user.constants; + + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + + /* run shader */ +#ifdef MESA_LLVM + if (1) { + struct gallivm_prog *prog = draw->vertex_shader->llvm_prog; + gallivm_cpu_vs_exec(prog, + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps); + } else +#elif defined(__i386__) || defined(__386__) + if (draw->use_sse) { + /* SSE */ + /* cast away const */ + struct draw_vertex_shader *shader + = (struct draw_vertex_shader *)draw->vertex_shader; + codegen_function func + = (codegen_function) x86_get_func( &shader->sse2_program ); + + if (func) + func( + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps ); + else + /* interpreter */ + tgsi_exec_machine_run( machine ); + } + else +#endif + { + /* interpreter */ + tgsi_exec_machine_run( machine ); + } + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + +#if DBG_VS + debug_printf("output[%d]win: %f %f %f %f\n", j, + vOut[j]->data[0][0], + vOut[j]->data[0][1], + vOut[j]->data[0][2], + vOut[j]->data[0][3]); +#endif + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; +#if DBG_VS + debug_printf("output[%d][%d]: %f %f %f %f\n", j, slot, + vOut[j]->data[slot][0], + vOut[j]->data[slot][1], + vOut[j]->data[slot][2], + vOut[j]->data[slot][3]); +#endif + } + } /* loop over vertices */ +} + + +/** + * Run the vertex shader on all vertices in the vertex queue. + * Called by the draw module when the vertx cache needs to be flushed. + */ +void +draw_vertex_shader_queue_flush(struct draw_context *draw) +{ + unsigned i; + + assert(draw->vs.queue_nr != 0); + + /* XXX: do this on statechange: + */ + draw_update_vertex_fetch( draw ); + +// fprintf(stderr, " q(%d) ", draw->vs.queue_nr ); + + /* run vertex shader on vertex cache entries, four per invokation */ + for (i = 0; i < draw->vs.queue_nr; i += 4) { + struct vertex_header *dests[4]; + unsigned elts[4]; + int j, n = MIN2(4, draw->vs.queue_nr - i); + + for (j = 0; j < n; j++) { + elts[j] = draw->vs.queue[i + j].elt; + dests[j] = draw->vs.queue[i + j].dest; + } + + for ( ; j < 4; j++) { + elts[j] = elts[0]; + dests[j] = dests[0]; + } + + assert(n > 0); + assert(n <= 4); + + run_vertex_program(draw, elts, n, dests); + } + + draw->vs.queue_nr = 0; +} + + +struct draw_vertex_shader * +draw_create_vertex_shader(struct draw_context *draw, + const struct pipe_shader_state *shader) +{ + struct draw_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_vertex_shader ); + if (vs == NULL) { + return NULL; + } + + vs->state = shader; + +#ifdef MESA_LLVM + struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); + gallivm_ir_set_layout(ir, GALLIVM_SOA); + gallivm_ir_set_components(ir, 4); + gallivm_ir_fill_from_tgsi(ir, shader->tokens); + vs->llvm_prog = gallivm_ir_compile(ir); + gallivm_ir_delete(ir); + + draw->engine = gallivm_global_cpu_engine(); + if (!draw->engine) { + draw->engine = gallivm_cpu_engine_create(vs->llvm_prog); + } + else { + gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog); + } +#elif defined(__i386__) || defined(__386__) + if (draw->use_sse) { + /* cast-away const */ + struct pipe_shader_state *sh = (struct pipe_shader_state *) shader; + + x86_init_func( &vs->sse2_program ); + if (!tgsi_emit_sse2( (struct tgsi_token *) sh->tokens, + &vs->sse2_program )) { + x86_release_func( (struct x86_function *) &vs->sse2_program ); + fprintf(stdout /*err*/, + "tgsi_emit_sse2() failed, falling back to interpreter\n"); + } + } +#endif + + return vs; +} + + +void +draw_bind_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->vertex_shader = dvs; + draw->num_vs_outputs = dvs->state->num_outputs; + + /* specify the vertex program to interpret/execute */ + tgsi_exec_machine_init(&draw->machine, + draw->vertex_shader->state->tokens, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/ ); +} + + +void +draw_delete_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs) +{ +#if defined(__i386__) || defined(__386__) + x86_release_func( (struct x86_function *) &dvs->sse2_program ); +#endif + + FREE( dvs ); +} diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c new file mode 100644 index 0000000000..dc3a5ecd21 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -0,0 +1,428 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + */ + + +#include + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" + +#include "draw_vf.h" + + +#define DRAW_VF_DBG 0 + + +/* TODO: remove this */ +extern void +_mesa_exec_free( void *addr ); + + +static boolean match_fastpath( struct draw_vertex_fetch *vf, + const struct draw_vf_fastpath *fp) +{ + unsigned j; + + if (vf->attr_count != fp->attr_count) + return FALSE; + + for (j = 0; j < vf->attr_count; j++) + if (vf->attr[j].format != fp->attr[j].format || + vf->attr[j].inputsize != fp->attr[j].size || + vf->attr[j].vertoffset != fp->attr[j].offset) + return FALSE; + + if (fp->match_strides) { + if (vf->vertex_stride != fp->vertex_stride) + return FALSE; + + for (j = 0; j < vf->attr_count; j++) + if (vf->attr[j].inputstride != fp->attr[j].stride) + return FALSE; + } + + return TRUE; +} + +static boolean search_fastpath_emit( struct draw_vertex_fetch *vf ) +{ + struct draw_vf_fastpath *fp = vf->fastpath; + + for ( ; fp ; fp = fp->next) { + if (match_fastpath(vf, fp)) { + vf->emit = fp->func; + return TRUE; + } + } + + return FALSE; +} + +void draw_vf_register_fastpath( struct draw_vertex_fetch *vf, + boolean match_strides ) +{ + struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath); + unsigned i; + + fastpath->vertex_stride = vf->vertex_stride; + fastpath->attr_count = vf->attr_count; + fastpath->match_strides = match_strides; + fastpath->func = vf->emit; + fastpath->attr = (struct draw_vf_attr_type *) + MALLOC(vf->attr_count * sizeof(fastpath->attr[0])); + + for (i = 0; i < vf->attr_count; i++) { + fastpath->attr[i].format = vf->attr[i].format; + fastpath->attr[i].stride = vf->attr[i].inputstride; + fastpath->attr[i].size = vf->attr[i].inputsize; + fastpath->attr[i].offset = vf->attr[i].vertoffset; + } + + fastpath->next = vf->fastpath; + vf->fastpath = fastpath; +} + + + + +/*********************************************************************** + * Build codegen functions or return generic ones: + */ +static void choose_emit_func( struct draw_vertex_fetch *vf, + unsigned count, + uint8_t *dest) +{ + vf->emit = NULL; + + /* Does this match an existing (hardwired, codegen or known-bad) + * fastpath? + */ + if (search_fastpath_emit(vf)) { + /* Use this result. If it is null, then it is already known + * that the current state will fail for codegen and there is no + * point trying again. + */ + } + else if (vf->codegen_emit) { + vf->codegen_emit( vf ); + } + + if (!vf->emit) { + draw_vf_generate_hardwired_emit(vf); + } + + /* Otherwise use the generic version: + */ + if (!vf->emit) + vf->emit = draw_vf_generic_emit; + + vf->emit( vf, count, dest ); +} + + + + + +/*********************************************************************** + * Public entrypoints, mostly dispatch to the above: + */ + + + +static unsigned +draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, + const struct draw_vf_attr_map *map, + unsigned nr, + unsigned vertex_stride ) +{ + unsigned offset = 0; + unsigned i, j; + + assert(nr < PIPE_ATTRIB_MAX); + + for (j = 0, i = 0; i < nr; i++) { + const unsigned format = map[i].format; + if (format == DRAW_EMIT_PAD) { +#if (DRAW_VF_DBG) + debug_printf("%d: pad %d, offset %d\n", i, + map[i].offset, offset); +#endif + + offset += map[i].offset; + + } + else { + vf->attr[j].attrib = map[i].attrib; + vf->attr[j].format = format; + vf->attr[j].insert = draw_vf_format_info[format].insert; + vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize; + vf->attr[j].vertoffset = offset; + vf->attr[j].isconst = draw_vf_format_info[format].isconst; + if(vf->attr[j].isconst) + memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize); + +#if (DRAW_VF_DBG) + debug_printf("%d: %s, offset %d\n", i, + draw_vf_format_info[format].name, + vf->attr[j].vertoffset); +#endif + + offset += draw_vf_format_info[format].attrsize; + j++; + } + } + + vf->attr_count = j; + vf->vertex_stride = vertex_stride ? vertex_stride : offset; + vf->emit = choose_emit_func; + + assert(vf->vertex_stride >= offset); + return vf->vertex_stride; +} + + +void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, + const struct vertex_info *vinfo, + float point_size ) +{ + unsigned i, j, k; + struct draw_vf_attr *a = vf->attr; + struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS]; + unsigned count = 0; /* for debug/sanity */ + unsigned nr_attrs = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + /* no-op */ + break; + case EMIT_ALL: { + /* just copy the whole vertex as-is to the vbuf */ + unsigned s = vinfo->size; + assert(i == 0); + assert(j == 0); + /* copy the vertex header */ + /* XXX: we actually don't copy the header, just pad it */ + attrs[nr_attrs].attrib = 0; + attrs[nr_attrs].format = DRAW_EMIT_PAD; + attrs[nr_attrs].offset = offsetof(struct vertex_header, data); + s -= offsetof(struct vertex_header, data)/4; + count += offsetof(struct vertex_header, data)/4; + nr_attrs++; + /* copy the vertex data */ + for(k = 0; k < (s & ~0x3); k += 4) { + attrs[nr_attrs].attrib = k/4; + attrs[nr_attrs].format = DRAW_EMIT_4F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 4; + } + /* tail */ + /* XXX: actually, this shouldn't be needed */ + attrs[nr_attrs].attrib = k/4; + attrs[nr_attrs].offset = 0; + switch(s & 0x3) { + case 0: + break; + case 1: + attrs[nr_attrs].format = DRAW_EMIT_1F; + nr_attrs++; + count += 1; + break; + case 2: + attrs[nr_attrs].format = DRAW_EMIT_2F; + nr_attrs++; + count += 2; + break; + case 3: + attrs[nr_attrs].format = DRAW_EMIT_3F; + nr_attrs++; + count += 3; + break; + } + break; + } + case EMIT_1F: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_1F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count++; + break; + case EMIT_1F_PSIZE: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_1F_CONST; + attrs[nr_attrs].offset = 0; + attrs[nr_attrs].data.f[0] = point_size; + nr_attrs++; + count++; + break; + case EMIT_2F: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_2F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 2; + break; + case EMIT_3F: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_3F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 3; + break; + case EMIT_4F: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_4F; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 4; + break; + case EMIT_4UB: + attrs[nr_attrs].attrib = j; + attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA; + attrs[nr_attrs].offset = 0; + nr_attrs++; + count += 1; + break; + default: + assert(0); + } + } + + assert(count == vinfo->size); + + draw_vf_set_vertex_attributes(vf, + attrs, + nr_attrs, + vinfo->size * sizeof(float) ); + + for (j = 0; j < vf->attr_count; j++) { + a[j].inputsize = 4; + a[j].do_insert = a[j].insert[4 - 1]; + if(a[j].isconst) { + a[j].inputptr = a[j].data; + a[j].inputstride = 0; + } + } +} + + +#if 0 +/* Set attribute pointers, adjusted for start position: + */ +void draw_vf_set_sources( struct draw_vertex_fetch *vf, + GLvector4f * const sources[], + unsigned start ) +{ + struct draw_vf_attr *a = vf->attr; + unsigned j; + + for (j = 0; j < vf->attr_count; j++) { + const GLvector4f *vptr = sources[a[j].attrib]; + + if ((a[j].inputstride != vptr->stride) || + (a[j].inputsize != vptr->size)) + vf->emit = choose_emit_func; + + a[j].inputstride = vptr->stride; + a[j].inputsize = vptr->size; + a[j].do_insert = a[j].insert[vptr->size - 1]; + a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride; + } +} +#endif + + +/** + * Emit a vertex to dest. + */ +void draw_vf_emit_vertex( struct draw_vertex_fetch *vf, + struct vertex_header *vertex, + void *dest ) +{ + struct draw_vf_attr *a = vf->attr; + unsigned j; + + for (j = 0; j < vf->attr_count; j++) { + if (!a[j].isconst) { + a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0]; + a[j].inputstride = 0; /* XXX: one-vertex-max ATM */ + } + } + + vf->emit( vf, 1, (uint8_t*) dest ); +} + + + +struct draw_vertex_fetch *draw_vf_create( void ) +{ + struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch); + unsigned i; + + for (i = 0; i < PIPE_ATTRIB_MAX; i++) + vf->attr[i].vf = vf; + + vf->identity[0] = 0.0; + vf->identity[1] = 0.0; + vf->identity[2] = 0.0; + vf->identity[3] = 1.0; + + vf->codegen_emit = NULL; + +#ifdef USE_SSE_ASM + if (!GETENV("GALLIUM_NO_CODEGEN")) + vf->codegen_emit = draw_vf_generate_sse_emit; +#endif + + return vf; +} + + +void draw_vf_destroy( struct draw_vertex_fetch *vf ) +{ + struct draw_vf_fastpath *fp, *tmp; + + for (fp = vf->fastpath ; fp ; fp = tmp) { + tmp = fp->next; + FREE(fp->attr); + + /* KW: At the moment, fp->func is constrained to be allocated by + * _mesa_exec_alloc(), as the hardwired fastpaths in + * t_vertex_generic.c are handled specially. It would be nice + * to unify them, but this probably won't change until this + * module gets another overhaul. + */ + //_mesa_exec_free((void *) fp->func); + FREE(fp); + } + + vf->fastpath = NULL; + FREE(vf); +} diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h new file mode 100644 index 0000000000..011c8f0ff1 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vf.h @@ -0,0 +1,236 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * Vertex fetch/store/convert code. This functionality is used in two places: + * 1. Vertex fetch/convert - to grab vertex data from incoming vertex + * arrays and convert to format needed by vertex shaders. + * 2. Vertex store/emit - to convert simple float[][4] vertex attributes + * (which is the organization used throughout the draw/prim pipeline) to + * hardware-specific formats and emit into hardware vertex buffers. + * + * + * Authors: + * Keith Whitwell + */ + +#ifndef DRAW_VF_H +#define DRAW_VF_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" + +#include "draw_vertex.h" +#include "draw_private.h" /* for vertex_header */ + + +enum draw_vf_attr_format { + DRAW_EMIT_1F, + DRAW_EMIT_2F, + DRAW_EMIT_3F, + DRAW_EMIT_4F, + DRAW_EMIT_3F_XYW, /**< for projective texture */ + DRAW_EMIT_1UB_1F, /**< for fog coordinate */ + DRAW_EMIT_3UB_3F_RGB, /**< for specular color */ + DRAW_EMIT_3UB_3F_BGR, /**< for specular color */ + DRAW_EMIT_4UB_4F_RGBA, /**< for color */ + DRAW_EMIT_4UB_4F_BGRA, /**< for color */ + DRAW_EMIT_4UB_4F_ARGB, /**< for color */ + DRAW_EMIT_4UB_4F_ABGR, /**< for color */ + DRAW_EMIT_1F_CONST, + DRAW_EMIT_2F_CONST, + DRAW_EMIT_3F_CONST, + DRAW_EMIT_4F_CONST, + DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */ + DRAW_EMIT_MAX +}; + +struct draw_vf_attr_map +{ + /** Input attribute number */ + unsigned attrib; + + enum draw_vf_attr_format format; + + unsigned offset; + + /** + * Constant data for DRAW_EMIT_*_CONST + */ + union { + uint8_t ub[4]; + float f[4]; + } data; +}; + +struct draw_vertex_fetch; + + + +#if 0 +unsigned +draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, + const struct draw_vf_attr_map *map, + unsigned nr, + unsigned vertex_stride ); +#endif + +void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, + const struct vertex_info *vinfo, + float point_size ); + +#if 0 +void +draw_vf_set_sources( struct draw_vertex_fetch *vf, + GLvector4f * const attrib[], + unsigned start ); +#endif + +void +draw_vf_emit_vertex( struct draw_vertex_fetch *vf, + struct vertex_header *vertex, + void *dest ); + +struct draw_vertex_fetch * +draw_vf_create( void ); + +void +draw_vf_destroy( struct draw_vertex_fetch *vf ); + + + +/*********************************************************************** + * Internal functions and structs: + */ + +struct draw_vf_attr; + +typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a, + float *out, + const uint8_t *v ); + +typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a, + uint8_t *v, + const float *in ); + +typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf, + unsigned count, + uint8_t *dest ); + + + +/** + * Describes how to convert/move a vertex attribute from a vertex + * array to a vertex structure. + */ +struct draw_vf_attr +{ + struct draw_vertex_fetch *vf; + + unsigned format; + unsigned inputsize; + unsigned inputstride; + unsigned vertoffset; /**< position of the attrib in the vertex struct */ + + boolean isconst; /**< read from const data below */ + uint8_t data[16]; + + unsigned attrib; /**< which vertex attrib (0=position, etc) */ + unsigned vertattrsize; /**< size of the attribute in bytes */ + + uint8_t *inputptr; + const draw_vf_insert_func *insert; + draw_vf_insert_func do_insert; + draw_vf_extract_func extract; +}; + +struct draw_vertex_fetch +{ + struct draw_vf_attr attr[PIPE_ATTRIB_MAX]; + unsigned attr_count; + unsigned vertex_stride; + + draw_vf_emit_func emit; + + /* Parameters and constants for codegen: + */ + float identity[4]; + + struct draw_vf_fastpath *fastpath; + + void (*codegen_emit)( struct draw_vertex_fetch *vf ); +}; + + +struct draw_vf_attr_type { + unsigned format; + unsigned size; + unsigned stride; + unsigned offset; +}; + +/** XXX this could be moved into draw_vf.c */ +struct draw_vf_fastpath { + unsigned vertex_stride; + unsigned attr_count; + boolean match_strides; + + struct draw_vf_attr_type *attr; + + draw_vf_emit_func func; + struct draw_vf_fastpath *next; +}; + + +void +draw_vf_register_fastpath( struct draw_vertex_fetch *vtx, + boolean match_strides ); + +void +draw_vf_generic_emit( struct draw_vertex_fetch *vf, + unsigned count, + uint8_t *v ); + +void +draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ); + +void +draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ); + + +/** XXX this type and function could probably be moved into draw_vf.c */ +struct draw_vf_format_info { + const char *name; + draw_vf_insert_func insert[4]; + const unsigned attrsize; + const boolean isconst; +}; + +extern const struct draw_vf_format_info +draw_vf_format_info[DRAW_EMIT_MAX]; + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_vf_generic.c b/src/gallium/auxiliary/draw/draw_vf_generic.c new file mode 100644 index 0000000000..7a60a9db9c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vf_generic.c @@ -0,0 +1,585 @@ + +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "draw_vf.h" + + + +static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; +} + +static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = 1; +} + +static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = 0; + out[3] = 1; +} + +static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; + out[2] = 0; + out[3] = 1; +} + +static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[3]; +} + +static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + (void) a; (void) v; (void) in; + assert(0); +} + +static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; +} + +static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; + out[2] = 0; +} + +static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; + out[2] = 0; +} + + +static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = in[1]; +} + +static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; + out[1] = 0; +} + +static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + float *out = (float *)(v); + (void) a; + + out[0] = in[0]; +} + +static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in ) +{ + (void) a; (void) v; (void) in; +} + +static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); +} + +static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[2] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + v[1] = 0; + v[2] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); +} + +static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[0] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + v[1] = 0; + v[0] = 0; + v[3] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); +} + +static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + v[3] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); + v[2] = 0x00; + v[3] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); +} + +static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); + v[1] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); + v[2] = 0x00; + v[1] = 0x00; + v[0] = 0xff; +} + +static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); +} + +static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[2] = 0; +} + +static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); + v[1] = 0; + v[2] = 0; +} + +static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); +} + +static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); + v[0] = 0; +} + +static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); + v[1] = 0; + v[0] = 0; +} + + +static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v, + const float *in ) +{ + (void) a; + UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); +} + + +const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] = +{ + { "1f", + { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, + sizeof(float), FALSE }, + + { "2f", + { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, + 2 * sizeof(float), FALSE }, + + { "3f", + { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, + 3 * sizeof(float), FALSE }, + + { "4f", + { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, + 4 * sizeof(float), FALSE }, + + { "3f_xyw", + { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err, + insert_3f_xyw_4 }, + 3 * sizeof(float), FALSE }, + + { "1ub_1f", + { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 }, + sizeof(uint8_t), FALSE }, + + { "3ub_3f_rgb", + { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3, + insert_3ub_3f_rgb_3 }, + 3 * sizeof(uint8_t), FALSE }, + + { "3ub_3f_bgr", + { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3, + insert_3ub_3f_bgr_3 }, + 3 * sizeof(uint8_t), FALSE }, + + { "4ub_4f_rgba", + { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3, + insert_4ub_4f_rgba_4 }, + 4 * sizeof(uint8_t), FALSE }, + + { "4ub_4f_bgra", + { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3, + insert_4ub_4f_bgra_4 }, + 4 * sizeof(uint8_t), FALSE }, + + { "4ub_4f_argb", + { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3, + insert_4ub_4f_argb_4 }, + 4 * sizeof(uint8_t), FALSE }, + + { "4ub_4f_abgr", + { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3, + insert_4ub_4f_abgr_4 }, + 4 * sizeof(uint8_t), FALSE }, + + { "1f_const", + { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, + sizeof(float), TRUE }, + + { "2f_const", + { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, + 2 * sizeof(float), TRUE }, + + { "3f_const", + { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, + 3 * sizeof(float), TRUE }, + + { "4f_const", + { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, + 4 * sizeof(float), TRUE }, + + { "pad", + { NULL, NULL, NULL, NULL }, + 0, FALSE }, + +}; + + + + +/*********************************************************************** + * Hardwired fastpaths for emitting whole vertices or groups of + * vertices + */ +#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \ +static void NAME( struct draw_vertex_fetch *vf, \ + unsigned count, \ + uint8_t *v ) \ +{ \ + struct draw_vf_attr *a = vf->attr; \ + unsigned i; \ + \ + for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \ + if (NR > 0) { \ + F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \ + a[0].inputptr += a[0].inputstride; \ + } \ + \ + if (NR > 1) { \ + F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \ + a[1].inputptr += a[1].inputstride; \ + } \ + \ + if (NR > 2) { \ + F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \ + a[2].inputptr += a[2].inputstride; \ + } \ + \ + if (NR > 3) { \ + F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \ + a[3].inputptr += a[3].inputstride; \ + } \ + \ + if (NR > 4) { \ + F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \ + a[4].inputptr += a[4].inputstride; \ + } \ + } \ +} + + +#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \ + insert_null, insert_null, NAME) + +#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \ + insert_null, NAME) + +#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \ + insert_null, NAME) + + +EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4) + +EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2) + +EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2) + + +/* Use the codegen paths to select one of a number of hardwired + * fastpaths. + */ +void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ) +{ + draw_vf_emit_func func = NULL; + + /* Does it fit a hardwired fastpath? Help! this is growing out of + * control! + */ + switch (vf->attr_count) { + case 2: + if (vf->attr[0].do_insert == insert_3f_3 && + vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + func = emit_xyz3_rgba4; + } + break; + case 3: + if (vf->attr[2].do_insert == insert_2f_2) { + if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + if (vf->attr[0].do_insert == insert_4f_4) + func = emit_xyzw4_rgba4_st2; + } + } + break; + case 4: + if (vf->attr[2].do_insert == insert_2f_2 && + vf->attr[3].do_insert == insert_2f_2) { + if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { + if (vf->attr[0].do_insert == insert_4f_4) + func = emit_xyzw4_rgba4_st2_st2; + } + } + break; + } + + vf->emit = func; +} + +/*********************************************************************** + * Generic (non-codegen) functions for whole vertices or groups of + * vertices + */ + +void draw_vf_generic_emit( struct draw_vertex_fetch *vf, + unsigned count, + uint8_t *v ) +{ + struct draw_vf_attr *a = vf->attr; + const unsigned attr_count = vf->attr_count; + const unsigned stride = vf->vertex_stride; + unsigned i, j; + + for (i = 0 ; i < count ; i++, v += stride) { + for (j = 0; j < attr_count; j++) { + float *in = (float *)a[j].inputptr; + a[j].inputptr += a[j].inputstride; + a[j].do_insert( &a[j], v + a[j].vertoffset, in ); + } + } +} + + diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c new file mode 100644 index 0000000000..1ad2ae756d --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vf_sse.c @@ -0,0 +1,614 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + */ + + +#include "simple_list.h" + +#include "pipe/p_compiler.h" + +#include "draw_vf.h" + + +#if defined(USE_SSE_ASM) + +#include "x86/rtasm/x86sse.h" +#include "x86/common_x86_asm.h" + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +struct x86_program { + struct x86_function func; + + struct draw_vertex_fetch *vf; + boolean inputs_safe; + boolean outputs_safe; + boolean have_sse2; + + struct x86_reg identity; + struct x86_reg chan0; +}; + + +static struct x86_reg get_identity( struct x86_program *p ) +{ + return p->identity; +} + +static void emit_load4f_4( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movups(&p->func, dest, arg0); +} + +static void emit_load4f_3( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Have to jump through some hoops: + * + * c 0 0 0 + * c 0 0 1 + * 0 0 c 1 + * a b c 1 + */ + sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); + sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); + sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) ); + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load4f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Initialize from identity, then pull in low two words: + */ + sse_movups(&p->func, dest, get_identity(p)); + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load4f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Pull in low word, then swizzle in identity */ + sse_movss(&p->func, dest, arg0); + sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); +} + + + +static void emit_load3f_3( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + /* Over-reads by 1 dword - potential SEGV if input is a vertex + * array. + */ + if (p->inputs_safe) { + sse_movups(&p->func, dest, arg0); + } + else { + /* c 0 0 0 + * c c c c + * a b c c + */ + sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); + sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); + sse_movlps(&p->func, dest, arg0); + } +} + +static void emit_load3f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_2(p, dest, arg0); +} + +static void emit_load3f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_1(p, dest, arg0); +} + +static void emit_load2f_2( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movlps(&p->func, dest, arg0); +} + +static void emit_load2f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + emit_load4f_1(p, dest, arg0); +} + +static void emit_load1f_1( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movss(&p->func, dest, arg0); +} + +static void (*load[4][4])( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) = { + { emit_load1f_1, + emit_load1f_1, + emit_load1f_1, + emit_load1f_1 }, + + { emit_load2f_1, + emit_load2f_2, + emit_load2f_2, + emit_load2f_2 }, + + { emit_load3f_1, + emit_load3f_2, + emit_load3f_3, + emit_load3f_3 }, + + { emit_load4f_1, + emit_load4f_2, + emit_load4f_3, + emit_load4f_4 } +}; + +static void emit_load( struct x86_program *p, + struct x86_reg dest, + unsigned sz, + struct x86_reg src, + unsigned src_sz) +{ + load[sz-1][src_sz-1](p, dest, src); +} + +static void emit_store4f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movups(&p->func, dest, arg0); +} + +static void emit_store3f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + if (p->outputs_safe) { + /* Emit the extra dword anyway. This may hurt writecombining, + * may cause other problems. + */ + sse_movups(&p->func, dest, arg0); + } + else { + /* Alternate strategy - emit two, shuffle, emit one. + */ + sse_movlps(&p->func, dest, arg0); + sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ + sse_movss(&p->func, x86_make_disp(dest,8), arg0); + } +} + +static void emit_store2f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movlps(&p->func, dest, arg0); +} + +static void emit_store1f( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) +{ + sse_movss(&p->func, dest, arg0); +} + + +static void (*store[4])( struct x86_program *p, + struct x86_reg dest, + struct x86_reg arg0 ) = +{ + emit_store1f, + emit_store2f, + emit_store3f, + emit_store4f +}; + +static void emit_store( struct x86_program *p, + struct x86_reg dest, + unsigned sz, + struct x86_reg temp ) + +{ + store[sz-1](p, dest, temp); +} + +static void emit_pack_store_4ub( struct x86_program *p, + struct x86_reg dest, + struct x86_reg temp ) +{ + /* Scale by 255.0 + */ + sse_mulps(&p->func, temp, p->chan0); + + if (p->have_sse2) { + sse2_cvtps2dq(&p->func, temp, temp); + sse2_packssdw(&p->func, temp, temp); + sse2_packuswb(&p->func, temp, temp); + sse_movss(&p->func, dest, temp); + } + else { + struct x86_reg mmx0 = x86_make_reg(file_MMX, 0); + struct x86_reg mmx1 = x86_make_reg(file_MMX, 1); + sse_cvtps2pi(&p->func, mmx0, temp); + sse_movhlps(&p->func, temp, temp); + sse_cvtps2pi(&p->func, mmx1, temp); + mmx_packssdw(&p->func, mmx0, mmx1); + mmx_packuswb(&p->func, mmx0, mmx0); + mmx_movd(&p->func, dest, mmx0); + } +} + +static int get_offset( const void *a, const void *b ) +{ + return (const char *)b - (const char *)a; +} + +/* Not much happens here. Eventually use this function to try and + * avoid saving/reloading the source pointers each vertex (if some of + * them can fit in registers). + */ +static void get_src_ptr( struct x86_program *p, + struct x86_reg srcREG, + struct x86_reg vfREG, + struct draw_vf_attr *a ) +{ + struct draw_vertex_fetch *vf = p->vf; + struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); + + /* Load current a[j].inputptr + */ + x86_mov(&p->func, srcREG, ptr_to_src); +} + +static void update_src_ptr( struct x86_program *p, + struct x86_reg srcREG, + struct x86_reg vfREG, + struct draw_vf_attr *a ) +{ + if (a->inputstride) { + struct draw_vertex_fetch *vf = p->vf; + struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); + + /* add a[j].inputstride (hardcoded value - could just as easily + * pull the stride value from memory each time). + */ + x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride)); + + /* save new value of a[j].inputptr + */ + x86_mov(&p->func, ptr_to_src, srcREG); + } +} + + +/* Lots of hardcoding + * + * EAX -- pointer to current output vertex + * ECX -- pointer to current attribute + * + */ +static boolean build_vertex_emit( struct x86_program *p ) +{ + struct draw_vertex_fetch *vf = p->vf; + unsigned j = 0; + + struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX); + struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX); + struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); + struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI); + struct x86_reg temp = x86_make_reg(file_XMM, 0); + uint8_t *fixup, *label; + + /* Push a few regs? + */ + x86_push(&p->func, countEBP); + x86_push(&p->func, vfESI); + + + /* Get vertex count, compare to zero + */ + x86_xor(&p->func, srcECX, srcECX); + x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2)); + x86_cmp(&p->func, countEBP, srcECX); + fixup = x86_jcc_forward(&p->func, cc_E); + + /* Initialize destination register. + */ + x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3)); + + /* Move argument 1 (vf) into a reg: + */ + x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1)); + + + /* always load, needed or not: + */ + sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0]))); + + /* Note address for loop jump */ + label = x86_get_label(&p->func); + + /* Emit code for each of the attributes. Currently routes + * everything through SSE registers, even when it might be more + * efficient to stick with regular old x86. No optimization or + * other tricks - enough new ground to cover here just getting + * things working. + */ + while (j < vf->attr_count) { + struct draw_vf_attr *a = &vf->attr[j]; + struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset); + + /* Now, load an XMM reg from src, perhaps transform, then save. + * Could be shortcircuited in specific cases: + */ + switch (a->format) { + case DRAW_EMIT_1F: + case DRAW_EMIT_1F_CONST: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 1, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_2F: + case DRAW_EMIT_2F_CONST: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 2, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_3F: + case DRAW_EMIT_3F_CONST: + /* Potentially the worst case - hardcode 2+1 copying: + */ + if (0) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 3, temp); + update_src_ptr(p, srcECX, vfESI, a); + } + else { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 2, temp); + if (a->inputsize > 2) { + emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1); + emit_store(p, x86_make_disp(dest,8), 1, temp); + } + else { + sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p)); + } + update_src_ptr(p, srcECX, vfESI, a); + } + break; + case DRAW_EMIT_4F: + case DRAW_EMIT_4F_CONST: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + emit_store(p, dest, 4, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_3F_XYW: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z)); + emit_store(p, dest, 3, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + + case DRAW_EMIT_1UB_1F: + /* Test for PAD3 + 1UB: + */ + if (j > 0 && + a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3) + { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X)); + emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */ + update_src_ptr(p, srcECX, vfESI, a); + } + else { + debug_printf("Can't emit 1ub %x %x %d\n", + a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); + return FALSE; + } + break; + case DRAW_EMIT_3UB_3F_RGB: + case DRAW_EMIT_3UB_3F_BGR: + /* Test for 3UB + PAD1: + */ + if (j == vf->attr_count - 1 || + a[1].vertoffset >= a->vertoffset + 4) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + if (a->format == DRAW_EMIT_3UB_3F_BGR) + sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + } + /* Test for 3UB + 1UB: + */ + else if (j < vf->attr_count - 1 && + a[1].format == DRAW_EMIT_1UB_1F && + a[1].vertoffset == a->vertoffset + 3) { + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); + update_src_ptr(p, srcECX, vfESI, a); + + /* Make room for incoming value: + */ + sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); + + get_src_ptr(p, srcECX, vfESI, &a[1]); + emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); + update_src_ptr(p, srcECX, vfESI, &a[1]); + + /* Rearrange and possibly do BGR conversion: + */ + if (a->format == DRAW_EMIT_3UB_3F_BGR) + sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); + else + sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X)); + + emit_pack_store_4ub(p, dest, temp); + j++; /* NOTE: two attrs consumed */ + } + else { + debug_printf("Can't emit 3ub\n"); + } + return FALSE; /* add this later */ + break; + + case DRAW_EMIT_4UB_4F_RGBA: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_4UB_4F_BGRA: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_4UB_4F_ARGB: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + case DRAW_EMIT_4UB_4F_ABGR: + get_src_ptr(p, srcECX, vfESI, a); + emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); + sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); + emit_pack_store_4ub(p, dest, temp); + update_src_ptr(p, srcECX, vfESI, a); + break; + default: + debug_printf("unknown a[%d].format %d\n", j, a->format); + return FALSE; /* catch any new opcodes */ + } + + /* Increment j by at least 1 - may have been incremented above also: + */ + j++; + } + + /* Next vertex: + */ + x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride)); + + /* decr count, loop if not zero + */ + x86_dec(&p->func, countEBP); + x86_test(&p->func, countEBP, countEBP); + x86_jcc(&p->func, cc_NZ, label); + + /* Exit mmx state? + */ + if (p->func.need_emms) + mmx_emms(&p->func); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(&p->func, fixup); + + /* Pop regs and return + */ + x86_pop(&p->func, x86_get_base_reg(vfESI)); + x86_pop(&p->func, countEBP); + x86_ret(&p->func); + + vf->emit = (draw_vf_emit_func)x86_get_func(&p->func); + return TRUE; +} + + + +void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) +{ + struct x86_program p; + + if (!cpu_has_xmm) { + vf->codegen_emit = NULL; + return; + } + + memset(&p, 0, sizeof(p)); + + p.vf = vf; + p.inputs_safe = 0; /* for now */ + p.outputs_safe = 1; /* for now */ + p.have_sse2 = cpu_has_xmm2; + p.identity = x86_make_reg(file_XMM, 6); + p.chan0 = x86_make_reg(file_XMM, 7); + + x86_init_func(&p.func); + + if (build_vertex_emit(&p)) { + draw_vf_register_fastpath( vf, TRUE ); + } + else { + /* Note the failure so that we don't keep trying to codegen an + * impossible state: + */ + draw_vf_register_fastpath( vf, FALSE ); + x86_release_func(&p.func); + } +} + +#else + +void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) +{ + /* Dummy version for when USE_SSE_ASM not defined */ +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c new file mode 100644 index 0000000000..655774b155 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_wide_prims.c @@ -0,0 +1,432 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +struct wide_stage { + struct draw_stage stage; + + float half_line_width; + float half_point_size; + + uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; + uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; + uint num_texcoords; + + int psize_slot; +}; + + + +static INLINE struct wide_stage *wide_stage( struct draw_stage *stage ) +{ + return (struct wide_stage *)stage; +} + + +static void passthrough_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + +static void passthrough_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line(stage->next, header); +} + +static void passthrough_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad (two triangles). + * XXX need to disable polygon stipple. + */ +static void wide_line( struct draw_stage *stage, + struct prim_header *header ) +{ + const struct wide_stage *wide = wide_stage(stage); + const float half_width = wide->half_line_width; + + struct prim_header tri; + + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + const float dx = FABSF(pos0[0] - pos2[0]); + const float dy = FABSF(pos0[1] - pos2[1]); + + /* + * Draw wide line as a quad (two tris) by "stretching" the line along + * X or Y. + * We need to tweak coords in several ways to be conformant here. + */ + + if (dx > dy) { + /* x-major line */ + pos0[1] = pos0[1] - half_width - 0.25f; + pos1[1] = pos1[1] + half_width - 0.25f; + pos2[1] = pos2[1] - half_width - 0.25f; + pos3[1] = pos3[1] + half_width - 0.25f; + if (pos0[0] < pos2[0]) { + /* left to right line */ + pos0[0] -= 0.5f; + pos1[0] -= 0.5f; + pos2[0] -= 0.5f; + pos3[0] -= 0.5f; + } + else { + /* right to left line */ + pos0[0] += 0.5f; + pos1[0] += 0.5f; + pos2[0] += 0.5f; + pos3[0] += 0.5f; + } + } + else { + /* y-major line */ + pos0[0] = pos0[0] - half_width + 0.25f; + pos1[0] = pos1[0] + half_width + 0.25f; + pos2[0] = pos2[0] - half_width + 0.25f; + pos3[0] = pos3[0] + half_width + 0.25f; + if (pos0[1] < pos2[1]) { + /* top to bottom line */ + pos0[1] -= 0.5f; + pos1[1] -= 0.5f; + pos2[1] -= 0.5f; + pos3[1] -= 0.5f; + } + else { + /* bottom to top line */ + pos0[1] += 0.5f; + pos1[1] += 0.5f; + pos2[1] += 0.5f; + pos3[1] += 0.5f; + } + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +/** + * Draw a wide line by drawing a quad, using geometry which will + * fullfill GL's antialiased line requirements. + */ +static void wide_line_aa(struct draw_stage *stage, + struct prim_header *header) +{ + const struct wide_stage *wide = wide_stage(stage); + const float half_width = wide->half_line_width; + struct prim_header tri; + struct vertex_header *v[4]; + float *pos; + float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; + float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; + const float len = (float) sqrt(dx * dx + dy * dy); + uint i; + + dx = dx * half_width / len; + dy = dy * half_width / len; + + /* allocate/dup new verts */ + for (i = 0; i < 4; i++) { + v[i] = dup_vert(stage, header->v[i/2], i); + } + + /* + * Quad for line from v0 to v1: + * + * 1 3 + * +-------------------------+ + * | | + * *v0 v1* + * | | + * +-------------------------+ + * 0 2 + */ + + pos = v[0]->data[0]; + pos[0] += dy; + pos[1] -= dx; + + pos = v[1]->data[0]; + pos[0] -= dy; + pos[1] += dx; + + pos = v[2]->data[0]; + pos[0] += dy; + pos[1] -= dx; + + pos = v[3]->data[0]; + pos[0] -= dy; + pos[1] += dx; + + tri.det = header->det; /* only the sign matters */ + + tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + +} + + +/** + * Set the vertex texcoords for sprite mode. + * Coords may be left untouched or set to a right-side-up or upside-down + * orientation. + */ +static void set_texcoords(const struct wide_stage *wide, + struct vertex_header *v, const float tc[4]) +{ + uint i; + for (i = 0; i < wide->num_texcoords; i++) { + if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { + uint j = wide->texcoord_slot[i]; + v->data[j][0] = tc[0]; + if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) + v->data[j][1] = 1.0f - tc[1]; + else + v->data[j][1] = tc[1]; + v->data[j][2] = tc[2]; + v->data[j][3] = tc[3]; + } + } +} + + +/* If there are lots of sprite points (and why wouldn't there be?) it + * would probably be more sensible to change hardware setup to + * optimize this rather than doing the whole thing in software like + * this. + */ +static void wide_point( struct draw_stage *stage, + struct prim_header *header ) +{ + const struct wide_stage *wide = wide_stage(stage); + const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; + float half_size; + float left_adj, right_adj; + + struct prim_header tri; + + /* four dups of original vertex */ + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + /* point size is either per-vertex or fixed size */ + if (wide->psize_slot >= 0) { + half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; + } + else { + half_size = wide->half_point_size; + } + + left_adj = -half_size + 0.25f; + right_adj = half_size + 0.25f; + + pos0[0] += left_adj; + pos0[1] -= half_size; + + pos1[0] += left_adj; + pos1[1] += half_size; + + pos2[0] += right_adj; + pos2[1] -= half_size; + + pos3[0] += right_adj; + pos3[1] += half_size; + + if (sprite) { + static const float tex00[4] = { 0, 0, 0, 1 }; + static const float tex01[4] = { 0, 1, 0, 1 }; + static const float tex11[4] = { 1, 1, 0, 1 }; + static const float tex10[4] = { 1, 0, 0, 1 }; + set_texcoords( wide, v0, tex00 ); + set_texcoords( wide, v1, tex01 ); + set_texcoords( wide, v2, tex10 ); + set_texcoords( wide, v3, tex11 ); + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void wide_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct wide_stage *wide = wide_stage(stage); + struct draw_context *draw = stage->draw; + + wide->half_point_size = 0.5f * draw->rasterizer->point_size; + + if (draw->rasterizer->point_size != 1.0) { + stage->point = wide_point; + } + else { + stage->point = passthrough_point; + } + + if (draw->rasterizer->point_sprite) { + /* find vertex shader texcoord outputs */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i, j = 0; + for (i = 0; i < vs->state->num_outputs; i++) { + if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + wide->texcoord_slot[j] = i; + wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; + j++; + } + } + wide->num_texcoords = j; + } + + wide->psize_slot = -1; + + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->state->num_outputs; i++) { + if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + wide->psize_slot = i; + break; + } + } + } + + stage->point( stage, header ); +} + + + +static void wide_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + struct wide_stage *wide = wide_stage(stage); + struct draw_context *draw = stage->draw; + + wide->half_line_width = 0.5f * draw->rasterizer->line_width; + + if (draw->rasterizer->line_width != 1.0) { + if (draw->rasterizer->line_smooth) + wide->stage.line = wide_line_aa; + else + wide->stage.line = wide_line; + } + else { + wide->stage.line = passthrough_line; + } + + stage->line( stage, header ); +} + + +static void wide_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->line = wide_first_line; + stage->point = wide_first_point; + stage->next->flush( stage->next, flags ); +} + + +static void wide_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void wide_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_stage( struct draw_context *draw ) +{ + struct wide_stage *wide = CALLOC_STRUCT(wide_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = wide_first_point; + wide->stage.line = wide_first_line; + wide->stage.tri = passthrough_tri; + wide->stage.flush = wide_flush; + wide->stage.reset_stipple_counter = wide_reset_stipple_counter; + wide->stage.destroy = wide_destroy; + + return &wide->stage; +} diff --git a/src/gallium/auxiliary/llvm/Makefile b/src/gallium/auxiliary/llvm/Makefile new file mode 100644 index 0000000000..e6ac399d08 --- /dev/null +++ b/src/gallium/auxiliary/llvm/Makefile @@ -0,0 +1,85 @@ +# -*-makefile-*- +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = gallivm + + +GALLIVM_SOURCES = \ + gallivm.cpp \ + gallivm_cpu.cpp \ + instructions.cpp \ + loweringpass.cpp \ + tgsitollvm.cpp \ + storage.cpp \ + storagesoa.cpp \ + instructionssoa.cpp + +INC_SOURCES = gallivm_builtins.cpp + +CPP_SOURCES = \ + $(GALLIVM_SOURCES) + +C_SOURCES = +ASM_SOURCES = + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +### Include directories +INCLUDES = \ + -I. \ + -I$(TOP)/src/gallium/drivers + -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/include + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +##### TARGETS ##### + +default:: depend symlinks $(LIBNAME) + + +$(LIBNAME): $(OBJECTS) Makefile + $(TOP)/bin/mklib -o $@ -static $(OBJECTS) + + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ + $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null + + +gallivm_builtins.cpp: llvm_builtins.c + clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o + -rm -f depend depend.bak + -rm -f gallivm_builtins.cpp + +symlinks: + + +include depend diff --git a/src/gallium/auxiliary/llvm/gallivm.cpp b/src/gallium/auxiliary/llvm/gallivm.cpp new file mode 100644 index 0000000000..d14bb3b99a --- /dev/null +++ b/src/gallium/auxiliary/llvm/gallivm.cpp @@ -0,0 +1,327 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int GLOBAL_ID = 0; + +using namespace llvm; + +static inline +void AddStandardCompilePasses(PassManager &PM) +{ + PM.add(new LoweringPass()); + PM.add(createVerifierPass()); // Verify that input is correct + + PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp + + //PM.add(createStripSymbolsPass(true)); + + PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst + PM.add(createCFGSimplificationPass()); // Clean up disgusting code + PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas + PM.add(createGlobalOptimizerPass()); // Optimize out global vars + PM.add(createGlobalDCEPass()); // Remove unused fns and globs + PM.add(createIPConstantPropagationPass());// IP Constant Propagation + PM.add(createDeadArgEliminationPass()); // Dead argument elimination + PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE + PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + + PM.add(createPruneEHPass()); // Remove dead EH info + + PM.add(createFunctionInliningPass()); // Inline small functions + PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + PM.add(createTailDuplicationPass()); // Simplify cfg by copying code + PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas + PM.add(createInstructionCombiningPass()); // Combine silly seq's + PM.add(createCondPropagationPass()); // Propagate conditionals + + PM.add(createTailCallEliminationPass()); // Eliminate tail calls + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createReassociatePass()); // Reassociate expressions + PM.add(createLoopRotatePass()); + PM.add(createLICMPass()); // Hoist loop invariants + PM.add(createLoopUnswitchPass()); // Unswitch loops. + PM.add(createLoopIndexSplitPass()); // Index split loops. + PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc + PM.add(createIndVarSimplifyPass()); // Canonicalize indvars + PM.add(createLoopUnrollPass()); // Unroll small loops + PM.add(createInstructionCombiningPass()); // Clean up after the unroller + PM.add(createGVNPass()); // Remove redundancies + PM.add(createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + PM.add(createInstructionCombiningPass()); + PM.add(createCondPropagationPass()); // Propagate conditionals + + PM.add(createDeadStoreEliminationPass()); // Delete dead stores + PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations + PM.add(createDeadTypeEliminationPass()); // Eliminate dead types + PM.add(createConstantMergePass()); // Merge dup global constants +} + +void gallivm_prog_delete(struct gallivm_prog *prog) +{ + delete prog->module; + prog->module = 0; + prog->function = 0; + free(prog); +} + +static inline void +constant_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan) +{ + unsigned i; + + for (i = 0; i < QUAD_SIZE; ++i) { + inputs[i][attrib][chan] = coefs[attrib].a0[chan]; + } +} + +static inline void +linear_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = inputs[i][0][0]; + const float y = inputs[i][0][1]; + + inputs[i][attrib][chan] = + coefs[attrib].a0[chan] + + coefs[attrib].dadx[chan] * x + + coefs[attrib].dady[chan] * y; + } +} + +static inline void +perspective_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = inputs[i][0][0]; + const float y = inputs[i][0][1]; + /* WPOS.w here is really 1/w */ + const float w = 1.0f / inputs[i][0][3]; + assert(inputs[i][0][3] != 0.0); + + inputs[i][attrib][chan] = + (coefs[attrib].a0[chan] + + coefs[attrib].dadx[chan] * x + + coefs[attrib].dady[chan] * y) * w; + } +} + +void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix) +{ + if (!ir || !ir->module) + return; + + if (file_prefix) { + std::ostringstream stream; + stream << file_prefix; + stream << ir->id; + stream << ".ll"; + std::string name = stream.str(); + std::ofstream out(name.c_str()); + if (!out) { + std::cerr<<"Can't open file : "<module); + out.close(); + } else { + const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList(); + llvm::Module::FunctionListType::const_iterator itr; + std::cout<<"; ---------- Start shader "<id<id<num_interp; ++i) { + const gallivm_interpolate &interp = prog->interpolators[i]; + switch (interp.type) { + case TGSI_INTERPOLATE_CONSTANT: + constant_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + case TGSI_INTERPOLATE_LINEAR: + linear_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + perspective_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + default: + assert( 0 ); + } + } +} + + +struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type) +{ + struct gallivm_ir *ir = + (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir)); + ++GLOBAL_ID; + ir->id = GLOBAL_ID; + ir->type = type; + + return ir; +} + +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout) +{ + ir->layout = layout; +} + +void gallivm_ir_set_components(struct gallivm_ir *ir, int num) +{ + ir->num_components = num; +} + +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + std::cout << "Creating llvm from: " <module = mod; + gallivm_ir_dump(ir, 0); +} + +void gallivm_ir_delete(struct gallivm_ir *ir) +{ + delete ir->module; + free(ir); +} + +struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) +{ + struct gallivm_prog *prog = + (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); + llvm::Module *mod = llvm::CloneModule(ir->module); + prog->num_consts = ir->num_consts; + memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); + prog->num_interp = ir->num_interp; + + /* Run optimization passes over it */ + PassManager passes; + passes.add(new TargetData(mod)); + AddStandardCompilePasses(passes); + passes.run(*mod); + prog->module = mod; + + std::cout << "After optimizations:"<dump(); + + return prog; +} + +#endif /* MESA_LLVM */ diff --git a/src/gallium/auxiliary/llvm/gallivm.h b/src/gallium/auxiliary/llvm/gallivm.h new file mode 100644 index 0000000000..92da4bca7f --- /dev/null +++ b/src/gallium/auxiliary/llvm/gallivm.h @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef GALLIVM_H +#define GALLIVM_H + +#if defined __cplusplus +extern "C" { +#endif + +#include "pipe/p_state.h" + +#ifdef MESA_LLVM + +struct tgsi_token; + +struct gallivm_ir; +struct gallivm_prog; +struct gallivm_cpu_engine; +struct tgsi_interp_coef; +struct tgsi_sampler; +struct tgsi_exec_vector; + +enum gallivm_shader_type { + GALLIVM_VS, + GALLIVM_FS +}; + +enum gallivm_vector_layout { + GALLIVM_AOS, + GALLIVM_SOA +}; + +struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type); +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout); +void gallivm_ir_set_components(struct gallivm_ir *ir, int num); +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens); +void gallivm_ir_delete(struct gallivm_ir *ir); + + +struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir); + +void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + const struct tgsi_interp_coef *coefs); +void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); + + +struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); +struct gallivm_cpu_engine *gallivm_global_cpu_engine(); +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_vector *inputs, + struct tgsi_exec_vector *dests, + float (*consts)[4], + struct tgsi_exec_vector *temps); +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float x, float y, + float (*dests)[PIPE_MAX_SHADER_INPUTS][4], + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + float (*consts)[4], + struct tgsi_sampler *samplers); +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog); +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); + + +#endif /* MESA_LLVM */ + +#if defined __cplusplus +} // extern "C" +#endif + +#endif diff --git a/src/gallium/auxiliary/llvm/gallivm_builtins.cpp b/src/gallium/auxiliary/llvm/gallivm_builtins.cpp new file mode 100644 index 0000000000..1796f0a177 --- /dev/null +++ b/src/gallium/auxiliary/llvm/gallivm_builtins.cpp @@ -0,0 +1,567 @@ +// Generated by llvm2cpp - DO NOT MODIFY! + + +Module* createGallivmBuiltins(Module *mod) { + +mod->setModuleIdentifier("shader"); + +// Type Definitions +ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25); + +PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0); + +std::vectorFuncTy_2_args; +FuncTy_2_args.push_back(Type::FloatTy); +FuncTy_2_args.push_back(Type::FloatTy); +FunctionType* FuncTy_2 = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/FuncTy_2_args, + /*isVarArg=*/false); + +PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0); + +VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4); + +std::vectorFuncTy_5_args; +FuncTy_5_args.push_back(VectorTy_4); +FunctionType* FuncTy_5 = FunctionType::get( + /*Result=*/VectorTy_4, + /*Params=*/FuncTy_5_args, + /*isVarArg=*/false); + +std::vectorFuncTy_6_args; +FuncTy_6_args.push_back(VectorTy_4); +FuncTy_6_args.push_back(VectorTy_4); +FuncTy_6_args.push_back(VectorTy_4); +FunctionType* FuncTy_6 = FunctionType::get( + /*Result=*/VectorTy_4, + /*Params=*/FuncTy_6_args, + /*isVarArg=*/false); + +VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4); + +std::vectorFuncTy_9_args; +FunctionType* FuncTy_9 = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/FuncTy_9_args, + /*isVarArg=*/true); + +PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0); + +PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0); + +std::vectorFuncTy_12_args; +FuncTy_12_args.push_back(Type::FloatTy); +FunctionType* FuncTy_12 = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/FuncTy_12_args, + /*isVarArg=*/false); + +PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0); + +std::vectorFuncTy_13_args; +FuncTy_13_args.push_back(VectorTy_4); +FunctionType* FuncTy_13 = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/FuncTy_13_args, + /*isVarArg=*/false); + + +// Function Declarations + +Function* func_approx = new Function( + /*Type=*/FuncTy_2, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"approx", mod); +func_approx->setCallingConv(CallingConv::C); +const ParamAttrsList *func_approx_PAL = 0; +func_approx->setParamAttrs(func_approx_PAL); + +Function* func_powf = new Function( + /*Type=*/FuncTy_2, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"powf", mod); // (external, no body) +func_powf->setCallingConv(CallingConv::C); +const ParamAttrsList *func_powf_PAL = 0; +func_powf->setParamAttrs(func_powf_PAL); + +Function* func_lit = new Function( + /*Type=*/FuncTy_5, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"lit", mod); +func_lit->setCallingConv(CallingConv::C); +const ParamAttrsList *func_lit_PAL = 0; +func_lit->setParamAttrs(func_lit_PAL); + +Function* func_cmp = new Function( + /*Type=*/FuncTy_6, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"cmp", mod); +func_cmp->setCallingConv(CallingConv::C); +const ParamAttrsList *func_cmp_PAL = 0; +{ + ParamAttrsVector Attrs; + ParamAttrsWithIndex PAWI; + PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; + Attrs.push_back(PAWI); + func_cmp_PAL = ParamAttrsList::get(Attrs); + +} +func_cmp->setParamAttrs(func_cmp_PAL); + +Function* func_vcos = new Function( + /*Type=*/FuncTy_5, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"vcos", mod); +func_vcos->setCallingConv(CallingConv::C); +const ParamAttrsList *func_vcos_PAL = 0; +func_vcos->setParamAttrs(func_vcos_PAL); + +Function* func_printf = new Function( + /*Type=*/FuncTy_9, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"printf", mod); // (external, no body) +func_printf->setCallingConv(CallingConv::C); +const ParamAttrsList *func_printf_PAL = 0; +func_printf->setParamAttrs(func_printf_PAL); + +Function* func_cosf = new Function( + /*Type=*/FuncTy_12, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"cosf", mod); // (external, no body) +func_cosf->setCallingConv(CallingConv::C); +const ParamAttrsList *func_cosf_PAL = 0; +func_cosf->setParamAttrs(func_cosf_PAL); + +Function* func_scs = new Function( + /*Type=*/FuncTy_5, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"scs", mod); +func_scs->setCallingConv(CallingConv::C); +const ParamAttrsList *func_scs_PAL = 0; +func_scs->setParamAttrs(func_scs_PAL); + +Function* func_sinf = new Function( + /*Type=*/FuncTy_12, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"sinf", mod); // (external, no body) +func_sinf->setCallingConv(CallingConv::C); +const ParamAttrsList *func_sinf_PAL = 0; +func_sinf->setParamAttrs(func_sinf_PAL); + +Function* func_vsin = new Function( + /*Type=*/FuncTy_5, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"vsin", mod); +func_vsin->setCallingConv(CallingConv::C); +const ParamAttrsList *func_vsin_PAL = 0; +func_vsin->setParamAttrs(func_vsin_PAL); + +Function* func_kilp = new Function( + /*Type=*/FuncTy_13, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"kilp", mod); +func_kilp->setCallingConv(CallingConv::C); +const ParamAttrsList *func_kilp_PAL = 0; +{ + ParamAttrsVector Attrs; + ParamAttrsWithIndex PAWI; + PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; + Attrs.push_back(PAWI); + func_kilp_PAL = ParamAttrsList::get(Attrs); + +} +func_kilp->setParamAttrs(func_kilp_PAL); + +// Global Variable Declarations + + +GlobalVariable* gvar_array__str = new GlobalVariable( +/*Type=*/ArrayTy_0, +/*isConstant=*/true, +/*Linkage=*/GlobalValue::InternalLinkage, +/*Initializer=*/0, // has initializer, specified below +/*Name=*/".str", +mod); + +GlobalVariable* gvar_array__str1 = new GlobalVariable( +/*Type=*/ArrayTy_0, +/*isConstant=*/true, +/*Linkage=*/GlobalValue::InternalLinkage, +/*Initializer=*/0, // has initializer, specified below +/*Name=*/".str1", +mod); + +// Constant Definitions +Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true); +Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true); +ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f)); +ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f)); +Constant* const_float_18 = Constant::getNullValue(Type::FloatTy); +Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32)); +std::vector const_packed_20_elems; +ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); +const_packed_20_elems.push_back(const_float_21); +UndefValue* const_float_22 = UndefValue::get(Type::FloatTy); +const_packed_20_elems.push_back(const_float_22); +const_packed_20_elems.push_back(const_float_22); +const_packed_20_elems.push_back(const_float_21); +Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems); +ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10)); +ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10)); +ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10)); +std::vector const_packed_26_elems; +const_packed_26_elems.push_back(const_float_21); +const_packed_26_elems.push_back(const_float_18); +const_packed_26_elems.push_back(const_float_18); +const_packed_26_elems.push_back(const_float_21); +Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems); +Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy); +std::vector const_packed_28_elems; +const_packed_28_elems.push_back(const_int32_19); +ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10)); +const_packed_28_elems.push_back(const_int32_29); +const_packed_28_elems.push_back(const_int32_25); +const_packed_28_elems.push_back(const_int32_24); +Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems); +std::vector const_packed_30_elems; +const_packed_30_elems.push_back(const_int32_19); +const_packed_30_elems.push_back(const_int32_23); +ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10)); +const_packed_30_elems.push_back(const_int32_31); +const_packed_30_elems.push_back(const_int32_24); +Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems); +std::vector const_packed_32_elems; +const_packed_32_elems.push_back(const_int32_19); +const_packed_32_elems.push_back(const_int32_23); +const_packed_32_elems.push_back(const_int32_25); +ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10)); +const_packed_32_elems.push_back(const_int32_33); +Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems); +std::vector const_ptr_34_indices; +const_ptr_34_indices.push_back(const_int32_19); +const_ptr_34_indices.push_back(const_int32_19); +Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() ); +UndefValue* const_packed_35 = UndefValue::get(VectorTy_4); +std::vector const_ptr_36_indices; +const_ptr_36_indices.push_back(const_int32_19); +const_ptr_36_indices.push_back(const_int32_19); +Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() ); + +// Global Variable Definitions +gvar_array__str->setInitializer(const_array_14); +gvar_array__str1->setInitializer(const_array_15); + +// Function Definitions + +// Function: approx (func_approx) +{ + Function::arg_iterator args = func_approx->arg_begin(); + Value* float_a = args++; + float_a->setName("a"); + Value* float_b = args++; + float_b->setName("b"); + + BasicBlock* label_entry = new BasicBlock("entry",func_approx,0); + + // Block entry (label_entry) + FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry); + SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry); + FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry); + SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry); + FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry); + SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry); + std::vector float_call_params; + float_call_params.push_back(float_a_addr_0); + float_call_params.push_back(float_b_addr_1); + CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry); + float_call->setCallingConv(CallingConv::C); + float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0; + float_call->setParamAttrs(float_call_PAL); + + new ReturnInst(float_call, label_entry); + +} + +// Function: lit (func_lit) +{ + Function::arg_iterator args = func_lit->arg_begin(); + Value* packed_tmp = args++; + packed_tmp->setName("tmp"); + + BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0); + BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0); + BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0); + + // Block entry (label_entry_38) + ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38); + FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38); + new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38); + + // Block ifthen (label_ifthen) + InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen); + ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen); + ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen); + std::vector float_call_41_params; + float_call_41_params.push_back(float_tmp12); + float_call_41_params.push_back(float_tmp14); + CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen); + float_call_41->setCallingConv(CallingConv::C); + float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0; + float_call_41->setParamAttrs(float_call_41_PAL); + + InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen); + new ReturnInst(packed_tmp16, label_ifthen); + + // Block UnifiedReturnBlock (label_UnifiedReturnBlock) + new ReturnInst(const_packed_26, label_UnifiedReturnBlock); + +} + +// Function: cmp (func_cmp) +{ + Function::arg_iterator args = func_cmp->arg_begin(); + Value* packed_tmp0 = args++; + packed_tmp0->setName("tmp0"); + Value* packed_tmp1 = args++; + packed_tmp1->setName("tmp1"); + Value* packed_tmp2 = args++; + packed_tmp2->setName("tmp2"); + + BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0); + BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0); + BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0); + BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0); + BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0); + BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0); + BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0); + + // Block entry (label_entry_44) + ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44); + CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44); + FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44); + ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44); + CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44); + FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44); + SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44); + new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44); + + // Block cond.?14 (label_cond__14) + ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14); + ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14); + CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14); + FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14); + new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14); + + // Block cond.cont20 (label_cond_cont20) + ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20); + ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20); + CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20); + FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20); + new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20); + + // Block cond.?28 (label_cond__28) + PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28); + packed_tmp23_reg2mem_0->reserveOperandSpace(2); + packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14); + packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20); + + ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28); + ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28); + CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28); + FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28); + new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28); + + // Block cond.cont34 (label_cond_cont34) + PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34); + packed_tmp23_reg2mem_1->reserveOperandSpace(2); + packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14); + packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20); + + ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34); + ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34); + CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34); + FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34); + new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34); + + // Block cond.?42 (label_cond__42) + PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42); + packed_tmp37_reg2mem_0->reserveOperandSpace(2); + packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28); + packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34); + + ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42); + new ReturnInst(packed_tmp5113, label_cond__42); + + // Block cond.cont48 (label_cond_cont48) + PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48); + packed_tmp37_reg2mem_1->reserveOperandSpace(2); + packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28); + packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34); + + ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48); + new ReturnInst(packed_tmp51, label_cond_cont48); + +} + +// Function: vcos (func_vcos) +{ + Function::arg_iterator args = func_vcos->arg_begin(); + Value* packed_val = args++; + packed_val->setName("val"); + + BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0); + + // Block entry (label_entry_53) + ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53); + CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53); + ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53); + CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53); + ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53); + CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53); + ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53); + CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53); + std::vector int32_call_params; + int32_call_params.push_back(const_ptr_34); + int32_call_params.push_back(double_conv_54); + int32_call_params.push_back(double_conv4); + int32_call_params.push_back(double_conv7); + int32_call_params.push_back(double_conv10); + CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53); + int32_call->setCallingConv(CallingConv::C); + int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0; + int32_call->setParamAttrs(int32_call_PAL); + + CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53); + float_call13->setCallingConv(CallingConv::C); + float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0; + float_call13->setParamAttrs(float_call13_PAL); + + InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53); + CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53); + float_call18->setCallingConv(CallingConv::C); + float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0; + float_call18->setParamAttrs(float_call18_PAL); + + InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53); + CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53); + float_call23->setCallingConv(CallingConv::C); + float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0; + float_call23->setParamAttrs(float_call23_PAL); + + InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53); + CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53); + float_call28->setCallingConv(CallingConv::C); + float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0; + float_call28->setParamAttrs(float_call28_PAL); + + InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53); + CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53); + CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53); + CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53); + CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53); + std::vector int32_call43_params; + int32_call43_params.push_back(const_ptr_36); + int32_call43_params.push_back(double_conv33); + int32_call43_params.push_back(double_conv36); + int32_call43_params.push_back(double_conv39); + int32_call43_params.push_back(double_conv42); + CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53); + int32_call43->setCallingConv(CallingConv::C); + int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0; + int32_call43->setParamAttrs(int32_call43_PAL); + + new ReturnInst(packed_tmp30, label_entry_53); + +} + +// Function: scs (func_scs) +{ + Function::arg_iterator args = func_scs->arg_begin(); + Value* packed_val_58 = args++; + packed_val_58->setName("val"); + + BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0); + + // Block entry (label_entry_59) + ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59); + CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59); + float_call_60->setCallingConv(CallingConv::C); + float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0; + float_call_60->setParamAttrs(float_call_60_PAL); + + InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59); + CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59); + float_call7->setCallingConv(CallingConv::C); + float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0; + float_call7->setParamAttrs(float_call7_PAL); + + InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59); + new ReturnInst(packed_tmp9, label_entry_59); + +} + +// Function: vsin (func_vsin) +{ + Function::arg_iterator args = func_vsin->arg_begin(); + Value* packed_val_62 = args++; + packed_val_62->setName("val"); + + BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0); + + // Block entry (label_entry_63) + ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63); + CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63); + float_call_65->setCallingConv(CallingConv::C); + float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0; + float_call_65->setParamAttrs(float_call_65_PAL); + + InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63); + InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63); + InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63); + InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63); + new ReturnInst(packed_tmp15_67, label_entry_63); + +} + +// Function: kilp (func_kilp) +{ + Function::arg_iterator args = func_kilp->arg_begin(); + Value* packed_val_69 = args++; + packed_val_69->setName("val"); + + BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0); + BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0); + BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0); + BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0); + BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0); + + // Block entry (label_entry_70) + ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70); + FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70); + new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70); + + // Block lor_rhs (label_lor_rhs) + ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs); + FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs); + new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs); + + // Block lor_rhs5 (label_lor_rhs5) + ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5); + FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5); + new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5); + + // Block lor_rhs11 (label_lor_rhs11) + ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11); + FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11); + CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11); + new ReturnInst(int32_retval, label_lor_rhs11); + + // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71) + new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71); + +} + +return mod; + +} diff --git a/src/gallium/auxiliary/llvm/gallivm_cpu.cpp b/src/gallium/auxiliary/llvm/gallivm_cpu.cpp new file mode 100644 index 0000000000..8f9830d0b1 --- /dev/null +++ b/src/gallium/auxiliary/llvm/gallivm_cpu.cpp @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +struct gallivm_cpu_engine { + llvm::ExecutionEngine *engine; +}; + +static struct gallivm_cpu_engine *CPU = 0; + +typedef int (*fragment_shader_runner)(float x, float y, + float (*dests)[16][4], + float (*inputs)[16][4], + int num_attribs, + float (*consts)[4], int num_consts, + struct tgsi_sampler *samplers); + +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float fx, float fy, + float (*dests)[16][4], + float (*inputs)[16][4], + float (*consts)[4], + struct tgsi_sampler *samplers) +{ + fragment_shader_runner runner = reinterpret_cast(prog->function); + assert(runner); + + return runner(fx, fy, dests, inputs, prog->num_interp, + consts, prog->num_consts, + samplers); +} + +static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) +{ + llvm::Module *mod = prog->module; + llvm::Function *func = 0; + + switch (prog->type) { + case GALLIVM_VS: + func = mod->getFunction("vs_shader"); + break; + case GALLIVM_FS: + func = mod->getFunction("fs_shader"); + break; + default: + assert(!"Unknown shader type!"); + break; + } + return func; +} + +/*! + This function creates a CPU based execution engine for the given gallivm_prog. + gallivm_cpu_engine should be used as a singleton throughout the library. Before + executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. + The gallivm_prog instance which is being passed to the constructor is being + automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile + with it again. + */ +struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) +{ + struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) + calloc(1, sizeof(struct gallivm_cpu_engine)); + llvm::Module *mod = static_cast(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); + ee->DisableLazyCompilation(); + cpu->engine = ee; + + llvm::Function *func = func_for_shader(prog); + + prog->function = ee->getPointerToFunction(func); + CPU = cpu; + return cpu; +} + + +/*! + This function JIT compiles the given gallivm_prog with the given cpu based execution engine. + The reference to the generated machine code entry point will be stored + in the gallivm_prog program. After executing this function one can call gallivm_prog_exec + in order to execute the gallivm_prog on the CPU. + */ +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) +{ + llvm::Module *mod = static_cast(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = cpu->engine; + assert(ee); + /*FIXME : remove */ + ee->DisableLazyCompilation(); + ee->addModuleProvider(mp); + + llvm::Function *func = func_for_shader(prog); + prog->function = ee->getPointerToFunction(func); +} + +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) +{ + free(cpu); +} + +struct gallivm_cpu_engine * gallivm_global_cpu_engine() +{ + return CPU; +} + + +typedef void (*vertex_shader_runner)(void *ainputs, + void *dests, + float (*aconsts)[4], + void *temps); + + +/*! + This function is used to execute the gallivm_prog in software. Before calling + this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile + function. + */ +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_vector *inputs, + struct tgsi_exec_vector *dests, + float (*consts)[4], + struct tgsi_exec_vector *temps) +{ + vertex_shader_runner runner = reinterpret_cast(prog->function); + assert(runner); + /*FIXME*/ + runner(inputs, dests, consts, temps); + + return 0; +} + +#endif diff --git a/src/gallium/auxiliary/llvm/gallivm_p.h b/src/gallium/auxiliary/llvm/gallivm_p.h new file mode 100644 index 0000000000..cfe7b1901b --- /dev/null +++ b/src/gallium/auxiliary/llvm/gallivm_p.h @@ -0,0 +1,110 @@ +#ifndef GALLIVM_P_H +#define GALLIVM_P_H + +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_compiler.h" + +namespace llvm { + class Module; +} + +#if defined __cplusplus +extern "C" { +#endif + +enum gallivm_shader_type; +enum gallivm_vector_layout; + +struct gallivm_interpolate { + int attrib; + int chan; + int type; +}; + +struct gallivm_ir { + llvm::Module *module; + int id; + enum gallivm_shader_type type; + enum gallivm_vector_layout layout; + int num_components; + int num_consts; + + //FIXME: this might not be enough for some shaders + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +struct gallivm_prog { + llvm::Module *module; + void *function; + + int id; + enum gallivm_shader_type type; + + int num_consts; + + //FIXME: this might not be enough for some shaders + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +static INLINE void gallivm_swizzle_components(int swizzle, + int *xc, int *yc, + int *zc, int *wc) +{ + int x = swizzle / 1000; swizzle -= x * 1000; + int y = swizzle / 100; swizzle -= y * 100; + int z = swizzle / 10; swizzle -= z * 10; + int w = swizzle; + + if (xc) *xc = x; + if (yc) *yc = y; + if (zc) *zc = z; + if (wc) *wc = w; +} + +static INLINE boolean gallivm_is_swizzle(int swizzle) +{ + const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + + TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; + return swizzle != NO_SWIZZLE; +} + +static INLINE int gallivm_x_swizzle(int swizzle) +{ + int x; + gallivm_swizzle_components(swizzle, &x, 0, 0, 0); + return x; +} + +static INLINE int gallivm_y_swizzle(int swizzle) +{ + int y; + gallivm_swizzle_components(swizzle, 0, &y, 0, 0); + return y; +} + +static INLINE int gallivm_z_swizzle(int swizzle) +{ + int z; + gallivm_swizzle_components(swizzle, 0, 0, &z, 0); + return z; +} + +static INLINE int gallivm_w_swizzle(int swizzle) +{ + int w; + gallivm_swizzle_components(swizzle, 0, 0, 0, &w); + return w; +} + +#endif /* MESA_LLVM */ + +#if defined __cplusplus +} // extern "C" +#endif + +#endif diff --git a/src/gallium/auxiliary/llvm/instructions.cpp b/src/gallium/auxiliary/llvm/instructions.cpp new file mode 100644 index 0000000000..55d39fa5f1 --- /dev/null +++ b/src/gallium/auxiliary/llvm/instructions.cpp @@ -0,0 +1,889 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "instructions.h" + +#include "storage.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace llvm; + +#include "gallivm_builtins.cpp" + +static inline std::string createFuncName(int label) +{ + std::ostringstream stream; + stream << "function"; + stream << label; + return stream.str(); +} + +Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, + Storage *storage) + : m_mod(mod), m_func(func), m_builder(block), m_idx(0), + m_storage(storage) +{ + m_floatVecType = VectorType::get(Type::FloatTy, 4); + + m_llvmFSqrt = 0; + m_llvmFAbs = 0; + m_llvmPow = 0; + m_llvmFloor = 0; + m_llvmFlog = 0; + m_llvmLit = 0; + m_fmtPtr = 0; + + createGallivmBuiltins(m_mod); +} + +llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateAdd(in1, in2, name("add")); +} + +llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + return add(mulRes, in3); +} + +llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateMul(in1, in2, name("mul")); +} + +const char * Instructions::name(const char *prefix) +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); + return vectorFromVals(dot3, dot3, dot3, dot3); +} + +llvm::Value *Instructions::callFSqrt(llvm::Value *val) +{ + if (!m_llvmFSqrt) { + // predeclare the intrinsic + std::vector fsqrtArgs; + fsqrtArgs.push_back(Type::FloatTy); + ParamAttrsList *fsqrtPal = 0; + FunctionType* fsqrtType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fsqrtArgs, + /*isVarArg=*/false); + m_llvmFSqrt = new Function( + /*Type=*/fsqrtType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.sqrt.f32", m_mod); + m_llvmFSqrt->setCallingConv(CallingConv::C); + m_llvmFSqrt->setParamAttrs(fsqrtPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, + name("sqrt")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::rsq(llvm::Value *in1) +{ + Value *x = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("extractx")); + Value *abs = callFAbs(x); + Value *sqrt = callFSqrt(abs); + + Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, + APFloat(1.f)), + sqrt, + name("rsqrt")); + return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); +} + +llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) +{ + Constant *const_vec = Constant::getNullValue(m_floatVecType); + Value *res = m_builder.CreateInsertElement(const_vec, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; +} + +llvm::Value *Instructions::callFAbs(llvm::Value *val) +{ + if (!m_llvmFAbs) { + // predeclare the intrinsic + std::vector fabsArgs; + fabsArgs.push_back(Type::FloatTy); + ParamAttrsList *fabsPal = 0; + FunctionType* fabsType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fabsArgs, + /*isVarArg=*/false); + m_llvmFAbs = new Function( + /*Type=*/fabsType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"fabs", m_mod); + m_llvmFAbs->setCallingConv(CallingConv::C); + m_llvmFAbs->setParamAttrs(fabsPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, + name("fabs")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::lit(llvm::Value *in) +{ + if (!m_llvmLit) { + m_llvmLit = m_mod->getFunction("lit"); + } + CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) +{ + Value *res = m_builder.CreateSub(in1, in2, name("sub")); + return res; +} + +llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) +{ + if (!m_llvmPow) { + // predeclare the intrinsic + std::vector powArgs; + powArgs.push_back(Type::FloatTy); + powArgs.push_back(Type::FloatTy); + ParamAttrsList *powPal = 0; + FunctionType* powType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/powArgs, + /*isVarArg=*/false); + m_llvmPow = new Function( + /*Type=*/powType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.pow.f32", m_mod); + m_llvmPow->setCallingConv(CallingConv::C); + m_llvmPow->setParamAttrs(powPal); + } + std::vector params; + params.push_back(val1); + params.push_back(val2); + CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), + name("pow")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + llvm::Value *val = callPow(x1, x2); + return vectorFromVals(val, val, val, val); +} + +llvm::Value * Instructions::rcp(llvm::Value *in1) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *res = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, + APFloat(1.f)), + x1, name("rcp")); + return vectorFromVals(res, res, res, res); +} + +llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + std::vector vec = extractVector(mulRes); + Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy")); + Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz")); + Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4")); + return vectorFromVals(dot4, dot4, dot4, dot4); +} + +llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + std::vector vec1 = extractVector(mulRes); + Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy")); + Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz")); + Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph")); + return vectorFromVals(dph, dph, dph, dph); +} + +llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) +{ + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *w = m_builder.CreateExtractElement(in2, + m_storage->constantInt(3), + name("w")); + Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); + return vectorFromVals(ConstantFP::get(Type::FloatTy, APFloat(1.f)), + ry, z, w); +} + +llvm::Value * Instructions::ex2(llvm::Value *in) +{ + llvm::Value *val = callPow(ConstantFP::get(Type::FloatTy, APFloat(2.f)), + m_builder.CreateExtractElement( + in, m_storage->constantInt(0), + name("x1"))); + return vectorFromVals(val, val, val, val); +} + +llvm::Value * Instructions::callFloor(llvm::Value *val) +{ + if (!m_llvmFloor) { + // predeclare the intrinsic + std::vector floorArgs; + floorArgs.push_back(Type::FloatTy); + ParamAttrsList *floorPal = 0; + FunctionType* floorType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/floorArgs, + /*isVarArg=*/false); + m_llvmFloor = new Function( + /*Type=*/floorType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"floorf", m_mod); + m_llvmFloor->setCallingConv(CallingConv::C); + m_llvmFloor->setParamAttrs(floorPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFloor, val, + name("floorf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::floor(llvm::Value *in) +{ + std::vector vec = extractVector(in); + return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]), + callFloor(vec[2]), callFloor(vec[3])); +} + +llvm::Value * Instructions::arl(llvm::Value *in) +{ + return floor(in); +} + +llvm::Value * Instructions::frc(llvm::Value *in) +{ + llvm::Value *flr = floor(in); + return sub(in, flr); +} + +llvm::Value * Instructions::callFLog(llvm::Value *val) +{ + if (!m_llvmFlog) { + // predeclare the intrinsic + std::vector flogArgs; + flogArgs.push_back(Type::FloatTy); + ParamAttrsList *flogPal = 0; + FunctionType* flogType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/flogArgs, + /*isVarArg=*/false); + m_llvmFlog = new Function( + /*Type=*/flogType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"logf", m_mod); + m_llvmFlog->setCallingConv(CallingConv::C); + m_llvmFlog->setParamAttrs(flogPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFlog, val, + name("logf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::lg2(llvm::Value *in) +{ + std::vector vec = extractVector(in); + llvm::Value *const_vec = constVector(1.442695f, 1.442695f, + 1.442695f, 1.442695f); + return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), + callFLog(vec[2]), callFLog(vec[3])), const_vec); +} + +llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) +{ + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) +{ + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], + name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], + name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], + name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], + name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +void Instructions::printVector(llvm::Value *val) +{ + static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; + + if (!m_fmtPtr) { + Constant *format = ConstantArray::get(frmt, true); + ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); + GlobalVariable* globalFormat = new GlobalVariable( + /*Type=*/arrayTy, + /*isConstant=*/true, + /*Linkage=*/GlobalValue::InternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name(".str"), + m_mod); + globalFormat->setInitializer(format); + + Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); + std::vector const_ptr_21_indices; + const_ptr_21_indices.push_back(const_int0); + const_ptr_21_indices.push_back(const_int0); + m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, + &const_ptr_21_indices[0], const_ptr_21_indices.size()); + } + + Function *func_printf = m_mod->getFunction("printf"); + if (!func_printf) + func_printf = declarePrintf(); + assert(func_printf); + std::vector vec = extractVector(val); + Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); + Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); + Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); + Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); + std::vector params; + params.push_back(m_fmtPtr); + params.push_back(dx); + params.push_back(dy); + params.push_back(dz); + params.push_back(dw); + CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), + name("printf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(true); +} + +llvm::Function * Instructions::declarePrintf() +{ + std::vector args; + ParamAttrsList *params = 0; + FunctionType* funcTy = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/args, + /*isVarArg=*/true); + Function* func_printf = new Function( + /*Type=*/funcTy, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"printf", m_mod); + func_printf->setCallingConv(CallingConv::C); + func_printf->setParamAttrs(params); + return func_printf; +} + + +llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} +llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + + +llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z1")); + + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *z2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(2), + name("z2")); + Value *y1z2 = mul(y1, z2); + Value *z1y2 = mul(z1, y2); + + Value *z1x2 = mul(z1, x2); + Value *x1z2 = mul(x1, z2); + + Value *x1y2 = mul(x1, y2); + Value *y1x2 = mul(y1, x2); + + return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); +} + + +llvm::Value * Instructions::abs(llvm::Value *in) +{ + std::vector vec = extractVector(in); + Value *xabs = callFAbs(vec[0]); + Value *yabs = callFAbs(vec[1]); + Value *zabs = callFAbs(vec[2]); + Value *wabs = callFAbs(vec[3]); + return vectorFromVals(xabs, yabs, zabs, wabs); +} + +void Instructions::ifop(llvm::Value *in) +{ + BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0); + BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0); + + //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); + //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); + //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); + + Constant *float0 = Constant::getNullValue(Type::FloatTy); + + Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), + name("extractx")); + Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); + m_builder.CreateCondBr(xcmp, ifthen, ifend); + //m_builder.SetInsertPoint(yblock); + + m_builder.SetInsertPoint(ifthen); + m_ifStack.push(ifend); +} + +llvm::BasicBlock * Instructions::currentBlock() const +{ + return m_builder.GetInsertBlock(); +} + +void Instructions::elseop() +{ + assert(!m_ifStack.empty()); + BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0); + m_builder.CreateBr(ifend); + m_builder.SetInsertPoint(m_ifStack.top()); + currentBlock()->setName(name("ifelse")); + m_ifStack.pop(); + m_ifStack.push(ifend); +} + +void Instructions::endif() +{ + assert(!m_ifStack.empty()); + m_builder.CreateBr(m_ifStack.top()); + m_builder.SetInsertPoint(m_ifStack.top()); + m_ifStack.pop(); +} + +llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + llvm::Value *m = mul(in1, in2); + llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); + llvm::Value *s = sub(vec1, in1); + return add(m, mul(s, in3)); +} + +void Instructions::beginLoop() +{ + BasicBlock *begin = new BasicBlock(name("loop"), m_func,0); + BasicBlock *end = new BasicBlock(name("endloop"), m_func,0); + + m_builder.CreateBr(begin); + Loop loop; + loop.begin = begin; + loop.end = end; + m_builder.SetInsertPoint(begin); + m_loopStack.push(loop); +} + +void Instructions::endLoop() +{ + assert(!m_loopStack.empty()); + Loop loop = m_loopStack.top(); + m_builder.CreateBr(loop.begin); + loop.end->moveAfter(currentBlock()); + m_builder.SetInsertPoint(loop.end); + m_loopStack.pop(); +} + +void Instructions::brk() +{ + assert(!m_loopStack.empty()); + BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0); + m_builder.CreateBr(m_loopStack.top().end); + m_builder.SetInsertPoint(unr); +} + +llvm::Value * Instructions::trunc(llvm::Value *in) +{ + std::vector vec = extractVector(in); + Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), + name("ftoix")); + Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), + name("ftoiy")); + Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), + name("ftoiz")); + Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), + name("ftoiw")); + Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, + name("fx")); + Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, + name("fy")); + Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, + name("fz")); + Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, + name("fw")); + return vectorFromVals(fx, fy, fz, fw); +} + +void Instructions::end() +{ + m_builder.CreateRetVoid(); +} + +void Instructions::cal(int label, llvm::Value *input) +{ + std::vector params; + params.push_back(input); + llvm::Function *func = findFunction(label); + + m_builder.CreateCall(func, params.begin(), params.end()); +} + +llvm::Function * Instructions::declareFunc(int label) +{ + PointerType *vecPtr = PointerType::getUnqual(m_floatVecType); + std::vector args; + args.push_back(vecPtr); + args.push_back(vecPtr); + args.push_back(vecPtr); + args.push_back(vecPtr); + ParamAttrsList *params = 0; + FunctionType *funcType = FunctionType::get( + /*Result=*/Type::VoidTy, + /*Params=*/args, + /*isVarArg=*/false); + std::string name = createFuncName(label); + Function *func = new Function( + /*Type=*/funcType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/name.c_str(), m_mod); + func->setCallingConv(CallingConv::C); + func->setParamAttrs(params); + return func; +} + +void Instructions::bgnSub(unsigned label) +{ + llvm::Function *func = findFunction(label); + + Function::arg_iterator args = func->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("INPUT"); + m_storage->pushArguments(ptr_INPUT); + + llvm::BasicBlock *entry = new BasicBlock("entry", func, 0); + + m_func = func; + m_builder.SetInsertPoint(entry); +} + +void Instructions::endSub() +{ + m_func = 0; + m_builder.SetInsertPoint(0); +} + +llvm::Function * Instructions::findFunction(int label) +{ + llvm::Function *func = m_functions[label]; + if (!func) { + func = declareFunc(label); + m_functions[label] = func; + } + return func; +} + +llvm::Value * Instructions::constVector(float x, float y, float z, float w) +{ + std::vector vec(4); + vec[0] = ConstantFP::get(Type::FloatTy, APFloat(x)); + vec[1] = ConstantFP::get(Type::FloatTy, APFloat(y)); + vec[2] = ConstantFP::get(Type::FloatTy, APFloat(z)); + vec[3] = ConstantFP::get(Type::FloatTy, APFloat(w)); + return ConstantVector::get(m_floatVecType, vec); +} + + +std::vector Instructions::extractVector(llvm::Value *vec) +{ + std::vector elems(4); + elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0), + name("x")); + elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1), + name("y")); + elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2), + name("z")); + elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3), + name("w")); + return elems; +} + +llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + llvm::Function *func = m_mod->getFunction("cmp"); + assert(func); + + std::vector params; + params.push_back(in1); + params.push_back(in2); + params.push_back(in3); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::cos(llvm::Value *in) +{ +#if 0 + llvm::Function *func = m_mod->getFunction("vcos"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("cosres")); + call->setTailCall(false); + return call; +#else + std::vector elems = extractVector(in); + Function *func = m_mod->getFunction("cosf"); + assert(func); + CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); + cos->setCallingConv(CallingConv::C); + cos->setTailCall(true); + return vectorFromVals(cos, cos, cos, cos); +#endif +} + +llvm::Value * Instructions::scs(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("scs"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("scsres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::kilp(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("kilp"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::sin(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("vsin"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("sinres")); + call->setTailCall(false); + return call; +} +#endif //MESA_LLVM + + diff --git a/src/gallium/auxiliary/llvm/instructions.h b/src/gallium/auxiliary/llvm/instructions.h new file mode 100644 index 0000000000..9ebc17dd8e --- /dev/null +++ b/src/gallium/auxiliary/llvm/instructions.h @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef INSTRUCTIONS_H +#define INSTRUCTIONS_H + +#include +#include +#include +#include + +#include +#include + +namespace llvm { + class VectorType; + class Function; +} + +class Storage; + +class Instructions +{ +public: + Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, + Storage *storage); + + llvm::BasicBlock *currentBlock() const; + + llvm::Value *abs(llvm::Value *in1); + llvm::Value *arl(llvm::Value *in1); + llvm::Value *add(llvm::Value *in1, llvm::Value *in2); + void beginLoop(); + void bgnSub(unsigned); + void brk(); + void cal(int label, llvm::Value *input); + llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cos(llvm::Value *in); + llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dst(llvm::Value *in1, llvm::Value *in2); + void elseop(); + void endif(); + void endLoop(); + void end(); + void endSub(); + llvm::Value *ex2(llvm::Value *in); + llvm::Value *floor(llvm::Value *in); + llvm::Value *frc(llvm::Value *in); + void ifop(llvm::Value *in); + llvm::Value *kilp(llvm::Value *in); + llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3); + llvm::Value *lit(llvm::Value *in); + llvm::Value *lg2(llvm::Value *in); + llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in2); + llvm::Value *min(llvm::Value *in1, llvm::Value *in2); + llvm::Value *max(llvm::Value *in1, llvm::Value *in2); + llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); + llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); + llvm::Value *rcp(llvm::Value *in); + llvm::Value *rsq(llvm::Value *in); + llvm::Value *scs(llvm::Value *in); + llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sin(llvm::Value *in); + llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); + llvm::Value *trunc(llvm::Value *in); + + void printVector(llvm::Value *val); +private: + const char *name(const char *prefix); + + llvm::Value *callFAbs(llvm::Value *val); + llvm::Value *callFloor(llvm::Value *val); + llvm::Value *callFSqrt(llvm::Value *val); + llvm::Value *callFLog(llvm::Value *val); + llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); + + llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w=0); + + llvm::Value *constVector(float x, float y, float z, float w); + + llvm::Function *declarePrintf(); + llvm::Function *declareFunc(int label); + + llvm::Function *findFunction(int label); + + std::vector extractVector(llvm::Value *vec); +private: + llvm::Module *m_mod; + llvm::Function *m_func; + char m_name[32]; + llvm::LLVMFoldingBuilder m_builder; + int m_idx; + + llvm::VectorType *m_floatVecType; + + llvm::Function *m_llvmFSqrt; + llvm::Function *m_llvmFAbs; + llvm::Function *m_llvmPow; + llvm::Function *m_llvmFloor; + llvm::Function *m_llvmFlog; + llvm::Function *m_llvmLit; + + llvm::Constant *m_fmtPtr; + + std::stack m_ifStack; + struct Loop { + llvm::BasicBlock *begin; + llvm::BasicBlock *end; + }; + std::stack m_loopStack; + std::map m_functions; + Storage *m_storage; +}; + +#endif diff --git a/src/gallium/auxiliary/llvm/instructionssoa.cpp b/src/gallium/auxiliary/llvm/instructionssoa.cpp new file mode 100644 index 0000000000..a4d5046637 --- /dev/null +++ b/src/gallium/auxiliary/llvm/instructionssoa.cpp @@ -0,0 +1,121 @@ +#include "instructionssoa.h" + +#include "storagesoa.h" + +#include + +using namespace llvm; + +InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage) + : m_builder(block), + m_storage(storage), + m_idx(0) +{ +} + +const char * InstructionsSoa::name(const char *prefix) const +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder.CreateInsertElement(constVector, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; +} + +std::vector InstructionsSoa::arl(const std::vector in) +{ + std::vector res(4); + + //Extract x's + llvm::Value *x1 = m_builder.CreateExtractElement(in[0], + m_storage->constantInt(0), + name("extractX")); + //cast it to an unsigned int + x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); + + res[0] = x1;//vectorFromVals(x1, x2, x3, x4); + //only x is valid. the others shouldn't be necessary + /* + res[1] = Constant::getNullValue(m_floatVecType); + res[2] = Constant::getNullValue(m_floatVecType); + res[3] = Constant::getNullValue(m_floatVecType); + */ + + return res; +} + + +std::vector InstructionsSoa::add(const std::vector in1, + const std::vector in2) +{ + std::vector res(4); + + res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); + res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); + res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); + res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); + + return res; +} + +std::vector InstructionsSoa::mul(const std::vector in1, + const std::vector in2) +{ + std::vector res(4); + + res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); + res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); + res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); + res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); + + return res; +} + +void InstructionsSoa::end() +{ + m_builder.CreateRetVoid(); +} + +std::vector InstructionsSoa::madd(const std::vector in1, + const std::vector in2, + const std::vector in3) +{ + std::vector res = mul(in1, in2); + return add(res, in3); +} + +std::vector InstructionsSoa::extractVector(llvm::Value *vector) +{ + std::vector res(4); + res[0] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(0), + name("extract1X")); + res[1] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(1), + name("extract2X")); + res[2] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(2), + name("extract3X")); + res[3] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(3), + name("extract4X")); + + return res; +} diff --git a/src/gallium/auxiliary/llvm/instructionssoa.h b/src/gallium/auxiliary/llvm/instructionssoa.h new file mode 100644 index 0000000000..4169dcbb2e --- /dev/null +++ b/src/gallium/auxiliary/llvm/instructionssoa.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INSTRUCTIONSSOA_H +#define INSTRUCTIONSSOA_H + +#include + +#include + +namespace llvm { + class Module; + class Function; + class BasicBlock; + class Value; +} +class StorageSoa; + +class InstructionsSoa +{ +public: + InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage); + + std::vector arl(const std::vector in); + + std::vector add(const std::vector in1, + const std::vector in2); + std::vector madd(const std::vector in1, + const std::vector in2, + const std::vector in3); + std::vector mul(const std::vector in1, + const std::vector in2); + void end(); + + std::vector extractVector(llvm::Value *vector); +private: + const char * name(const char *prefix) const; + llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w); +private: + llvm::LLVMFoldingBuilder m_builder; + StorageSoa *m_storage; +private: + mutable int m_idx; + mutable char m_name[32]; +}; + + +#endif diff --git a/src/gallium/auxiliary/llvm/llvm_builtins.c b/src/gallium/auxiliary/llvm/llvm_builtins.c new file mode 100644 index 0000000000..4f98d754ba --- /dev/null +++ b/src/gallium/auxiliary/llvm/llvm_builtins.c @@ -0,0 +1,115 @@ +/*clang --emit-llvm llvm_builtins.c |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=gallivm_builtins.cpp -f -for=shader -funcname=createGallivmBuiltins*/ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +typedef __attribute__(( ocu_vector_type(4) )) float float4; + +extern float powf(float a, float b); + +inline float approx(float a, float b) +{ + if (b < -128.0f) b = -128.0f; + if (b > 128.0f) b = 128.0f; + if (a < 0) a = 0; + return powf(a, b); +} + +inline float4 lit(float4 tmp) +{ + float4 result; + result.x = 1.0; + result.w = 1.0; + if (tmp.x > 0) { + result.y = tmp.x; + result.z = approx(tmp.y, tmp.w); + } else { + result.y = 0; + result.z = 0; + } + return result; +} + +inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2) +{ + float4 result; + + result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x; + result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y; + result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z; + result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w; + + return result; +} + +extern float cosf(float val); +extern float sinf(float val); + +inline float4 vcos(float4 val) +{ + float4 result; + printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w); + result.x = cosf(val.x); + result.y = cosf(val.x); + result.z = cosf(val.x); + result.w = cosf(val.x); + printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w); + return result; +} + +inline float4 scs(float4 val) +{ + float4 result; + float tmp = val.x; + result.x = cosf(tmp); + result.y = sinf(tmp); + return result; +} + + +inline float4 vsin(float4 val) +{ + float4 result; + float tmp = val.x; + float res = sinf(tmp); + result.x = res; + result.y = res; + result.z = res; + result.w = res; + return result; +} + +inline int kilp(float4 val) +{ + if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0) + return 1; + else + return 0; +} diff --git a/src/gallium/auxiliary/llvm/loweringpass.cpp b/src/gallium/auxiliary/llvm/loweringpass.cpp new file mode 100644 index 0000000000..556dbec366 --- /dev/null +++ b/src/gallium/auxiliary/llvm/loweringpass.cpp @@ -0,0 +1,17 @@ +#include "loweringpass.h" + +using namespace llvm; + +char LoweringPass::ID = 0; +RegisterPass X("lowering", "Lowering Pass"); + +LoweringPass::LoweringPass() + : ModulePass((intptr_t)&ID) +{ +} + +bool LoweringPass::runOnModule(Module &m) +{ + llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n"; + return false; +} diff --git a/src/gallium/auxiliary/llvm/loweringpass.h b/src/gallium/auxiliary/llvm/loweringpass.h new file mode 100644 index 0000000000..f62dcf6ba7 --- /dev/null +++ b/src/gallium/auxiliary/llvm/loweringpass.h @@ -0,0 +1,15 @@ +#ifndef LOWERINGPASS_H +#define LOWERINGPASS_H + +#include "llvm/Pass.h" +#include "llvm/Module.h" + +struct LoweringPass : public llvm::ModulePass +{ + static char ID; + LoweringPass(); + + virtual bool runOnModule(llvm::Module &m); +}; + +#endif diff --git a/src/gallium/auxiliary/llvm/storage.cpp b/src/gallium/auxiliary/llvm/storage.cpp new file mode 100644 index 0000000000..c4326de8c5 --- /dev/null +++ b/src/gallium/auxiliary/llvm/storage.cpp @@ -0,0 +1,364 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "storage.h" + +#include "gallivm_p.h" + +#include "pipe/p_shader_tokens.h" +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace llvm; + +Storage::Storage(llvm::BasicBlock *block, llvm::Value *input) + : m_block(block), + m_INPUT(input), + m_addrs(32), + m_idx(0) +{ + m_floatVecType = VectorType::get(Type::FloatTy, 4); + m_intVecType = VectorType::get(IntegerType::get(32), 4); + + m_undefFloatVec = UndefValue::get(m_floatVecType); + m_undefIntVec = UndefValue::get(m_intVecType); + m_extSwizzleVec = 0; + + m_numConsts = 0; +} + +//can only build vectors with all members in the [0, 9] range +llvm::Constant *Storage::shuffleMask(int vec) +{ + if (!m_extSwizzleVec) { + std::vector elems; + elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); + elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); + elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); + elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); + m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); + } + + if (m_intVecs.find(vec) != m_intVecs.end()) { + return m_intVecs[vec]; + } + int origVec = vec; + Constant* const_vec = 0; + if (origVec == 0) { + const_vec = Constant::getNullValue(m_intVecType); + } else { + int x = gallivm_x_swizzle(vec); + int y = gallivm_y_swizzle(vec); + int z = gallivm_z_swizzle(vec); + int w = gallivm_w_swizzle(vec); + std::vector elems; + elems.push_back(constantInt(x)); + elems.push_back(constantInt(y)); + elems.push_back(constantInt(z)); + elems.push_back(constantInt(w)); + const_vec = ConstantVector::get(m_intVecType, elems); + } + + m_intVecs[origVec] = const_vec; + return const_vec; +} + +llvm::ConstantInt *Storage::constantInt(int idx) +{ + if (m_constInts.find(idx) != m_constInts.end()) { + return m_constInts[idx]; + } + ConstantInt *const_int = ConstantInt::get(APInt(32, idx)); + m_constInts[idx] = const_int; + return const_int; +} + +llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx) +{ + Value *val = element(InputsArg, idx, indIdx); + LoadInst *load = new LoadInst(val, name("input"), false, m_block); + load->setAlignment(8); + + return load; +} + +llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx) +{ + m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts; + + Value *elem = element(ConstsArg, idx, indIdx); + LoadInst *load = new LoadInst(elem, name("const"), false, m_block); + load->setAlignment(8); + return load; +} + +llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle) +{ + Constant *mask = shuffleMask(shuffle); + ShuffleVectorInst *res = + new ShuffleVectorInst(vec, m_extSwizzleVec, mask, + name("shuffle"), m_block); + return res; +} + + +llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx) +{ + Value *elem = element(TempsArg, idx, indIdx); + + LoadInst *load = new LoadInst(elem, name("temp"), false, m_block); + load->setAlignment(8); + + return load; +} + +void Storage::setTempElement(int idx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = 0; + if (m_tempWriteMap[idx]) + templ = tempElement(idx); + val = maskWrite(val, mask, templ); + } + Value *elem = element(TempsArg, idx); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); + m_tempWriteMap[idx] = true; +} + +void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = 0; + if (m_destWriteMap[dstIdx]) + templ = outputElement(dstIdx); + val = maskWrite(val, mask, templ); + } + + Value *elem = element(DestsArg, dstIdx); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); + m_destWriteMap[dstIdx] = true; +} + +llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) +{ + llvm::Value *dst = templ; + if (!dst) + dst = Constant::getNullValue(m_floatVecType); + if ((mask & TGSI_WRITEMASK_X)) { + llvm::Value *x = new ExtractElementInst(src, unsigned(0), + name("x"), m_block); + dst = new InsertElementInst(dst, x, unsigned(0), + name("dstx"), m_block); + } + if ((mask & TGSI_WRITEMASK_Y)) { + llvm::Value *y = new ExtractElementInst(src, unsigned(1), + name("y"), m_block); + dst = new InsertElementInst(dst, y, unsigned(1), + name("dsty"), m_block); + } + if ((mask & TGSI_WRITEMASK_Z)) { + llvm::Value *z = new ExtractElementInst(src, unsigned(2), + name("z"), m_block); + dst = new InsertElementInst(dst, z, unsigned(2), + name("dstz"), m_block); + } + if ((mask & TGSI_WRITEMASK_W)) { + llvm::Value *w = new ExtractElementInst(src, unsigned(3), + name("w"), m_block); + dst = new InsertElementInst(dst, w, unsigned(3), + name("dstw"), m_block); + } + return dst; +} + +const char * Storage::name(const char *prefix) +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +int Storage::numConsts() const +{ + return m_numConsts; +} + +llvm::Value * Storage::addrElement(int idx) const +{ + Value *ret = m_addrs[idx]; + if (!ret) + return m_undefFloatVec; + return ret; +} + +void Storage::setAddrElement(int idx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = m_addrs[idx]; + val = maskWrite(val, mask, templ); + } + m_addrs[idx] = val; +} + +llvm::Value * Storage::extractIndex(llvm::Value *vec) +{ + llvm::Value *x = new ExtractElementInst(vec, unsigned(0), + name("x"), m_block); + return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block); +} + +void Storage::setCurrentBlock(llvm::BasicBlock *block) +{ + m_block = block; +} + +llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx) +{ + Value *elem = element(DestsArg, idx, indIdx); + LoadInst *load = new LoadInst(elem, name("output"), false, m_block); + load->setAlignment(8); + + return load; +} + +llvm::Value * Storage::inputPtr() const +{ + return m_INPUT; +} + +void Storage::pushArguments(llvm::Value *input) +{ + m_argStack.push(m_INPUT); + + m_INPUT = input; +} + +void Storage::popArguments() +{ + m_INPUT = m_argStack.top(); + m_argStack.pop(); +} + +void Storage::pushTemps() +{ + m_extSwizzleVec = 0; +} + +void Storage::popTemps() +{ +} + +llvm::Value * Storage::immediateElement(int idx) +{ + return m_immediates[idx]; +} + +void Storage::addImmediate(float *val) +{ + std::vector vec(4); + vec[0] = ConstantFP::get(Type::FloatTy, APFloat(val[0])); + vec[1] = ConstantFP::get(Type::FloatTy, APFloat(val[1])); + vec[2] = ConstantFP::get(Type::FloatTy, APFloat(val[2])); + vec[3] = ConstantFP::get(Type::FloatTy, APFloat(val[3])); + m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); +} + + +llvm::Value * Storage::elemPtr(Args arg) +{ + std::vector indices; + indices.push_back(constantInt(0)); + indices.push_back(constantInt(static_cast(arg))); + GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT, + indices.begin(), + indices.end(), + name("input_ptr"), + m_block); + return new LoadInst(getElem, name("input_field"), false, m_block); +} + +llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, + llvm::Value *indIdx ) +{ + GetElementPtrInst *getElem = 0; + + if (indIdx) { + getElem = new GetElementPtrInst(ptr, + BinaryOperator::create(Instruction::Add, + indIdx, + constantInt(idx), + name("add"), + m_block), + name("field"), + m_block); + } else { + getElem = new GetElementPtrInst(ptr, + constantInt(idx), + name("field"), + m_block); + } + return getElem; +} + +llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx ) +{ + Value *val = elemPtr(arg); + return elemIdx(val, idx, indIdx); +} + +void Storage::setKilElement(llvm::Value *val) +{ + std::vector indices; + indices.push_back(constantInt(0)); + indices.push_back(constantInt(static_cast(KilArg))); + GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT, + indices.begin(), + indices.end(), + name("kil_ptr"), + m_block); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); +} + +#endif //MESA_LLVM + + diff --git a/src/gallium/auxiliary/llvm/storage.h b/src/gallium/auxiliary/llvm/storage.h new file mode 100644 index 0000000000..8574f7554e --- /dev/null +++ b/src/gallium/auxiliary/llvm/storage.h @@ -0,0 +1,133 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef STORAGE_H +#define STORAGE_H + +#include +#include +#include +#include + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class LoadInst; + class Value; + class VectorType; +} + +class Storage +{ +public: + Storage(llvm::BasicBlock *block, + llvm::Value *input); + + llvm::Value *inputPtr() const; + + void setCurrentBlock(llvm::BasicBlock *block); + + llvm::ConstantInt *constantInt(int); + llvm::Constant *shuffleMask(int vec); + llvm::Value *inputElement(int idx, llvm::Value *indIdx =0); + llvm::Value *constElement(int idx, llvm::Value *indIdx =0); + llvm::Value *outputElement(int idx, llvm::Value *indIdx =0); + llvm::Value *tempElement(int idx, llvm::Value *indIdx =0); + llvm::Value *immediateElement(int idx); + + void setOutputElement(int dstIdx, llvm::Value *val, int mask); + void setTempElement(int idx, llvm::Value *val, int mask); + + llvm::Value *addrElement(int idx) const; + void setAddrElement(int idx, llvm::Value *val, int mask); + + void setKilElement(llvm::Value *val); + + llvm::Value *shuffleVector(llvm::Value *vec, int shuffle); + + llvm::Value *extractIndex(llvm::Value *vec); + + int numConsts() const; + + void pushArguments(llvm::Value *input); + void popArguments(); + void pushTemps(); + void popTemps(); + + void addImmediate(float *val); + +private: + llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ); + const char *name(const char *prefix); + + enum Args { + DestsArg = 0, + InputsArg = 1, + TempsArg = 2, + ConstsArg = 3, + KilArg = 4 + }; + llvm::Value *elemPtr(Args arg); + llvm::Value *elemIdx(llvm::Value *ptr, int idx, + llvm::Value *indIdx = 0); + llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0); + +private: + llvm::BasicBlock *m_block; + llvm::Value *m_INPUT; + + std::map m_constInts; + std::map m_intVecs; + std::vector m_addrs; + std::vector m_immediates; + + llvm::VectorType *m_floatVecType; + llvm::VectorType *m_intVecType; + + char m_name[32]; + int m_idx; + + int m_numConsts; + + std::map m_destWriteMap; + std::map m_tempWriteMap; + + llvm::Value *m_undefFloatVec; + llvm::Value *m_undefIntVec; + llvm::Value *m_extSwizzleVec; + + std::stack m_argStack; + std::stack > m_tempStack; +}; + +#endif diff --git a/src/gallium/auxiliary/llvm/storagesoa.cpp b/src/gallium/auxiliary/llvm/storagesoa.cpp new file mode 100644 index 0000000000..ed0674a96f --- /dev/null +++ b/src/gallium/auxiliary/llvm/storagesoa.cpp @@ -0,0 +1,389 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "storagesoa.h" + +#include "gallivm_p.h" + +#include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace llvm; + + +StorageSoa::StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts, + llvm::Value *temps) + : m_block(block), + m_input(input), + m_output(output), + m_consts(consts), + m_temps(temps), + m_immediates(0), + m_idx(0) +{ +} + +void StorageSoa::addImmediate(float *vec) +{ + std::vector vals(4); + vals[0] = vec[0]; + vals[1] = vec[1]; + vals[2] = vec[2]; + vals[3] = vec[3]; + m_immediatesToFlush.push_back(vals); +} + +void StorageSoa::declareImmediates() +{ + if (m_immediatesToFlush.empty()) + return; + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorChannels = ArrayType::get(vectorType, 4); + ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size()); + + m_immediates = new GlobalVariable( + /*Type=*/arrayType, + /*isConstant=*/false, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name("immediates"), + currentModule()); + + std::vector arrayVals; + for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { + std::vector vec = m_immediatesToFlush[i]; + std::vector vals(4); + std::vector channelArray; + + vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0]; + llvm::Constant *xChannel = createConstGlobalVector(vals); + + vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; + llvm::Constant *yChannel = createConstGlobalVector(vals); + + vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; + llvm::Constant *zChannel = createConstGlobalVector(vals); + + vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; + llvm::Constant *wChannel = createConstGlobalVector(vals); + channelArray.push_back(xChannel); + channelArray.push_back(yChannel); + channelArray.push_back(zChannel); + channelArray.push_back(wChannel); + Constant *constChannels = ConstantArray::get(vectorChannels, + channelArray); + arrayVals.push_back(constChannels); + } + Constant *constArray = ConstantArray::get(arrayType, arrayVals); + m_immediates->setInitializer(constArray); + + m_immediatesToFlush.clear(); +} + +llvm::Value *StorageSoa::addrElement(int idx) const +{ + std::map::const_iterator itr = m_addresses.find(idx); + if (itr == m_addresses.end()) { + debug_printf("Trying to access invalid shader 'address'\n"); + return 0; + } + llvm::Value * res = (*itr).second; + + res = new LoadInst(res, name("addr"), false, m_block); + + return res; +} + +std::vector StorageSoa::inputElement(llvm::Value *idx) +{ + std::vector res(4); + + res[0] = element(m_input, idx, 0); + res[1] = element(m_input, idx, 1); + res[2] = element(m_input, idx, 2); + res[3] = element(m_input, idx, 3); + + return res; +} + +std::vector StorageSoa::constElement(llvm::Value *idx) +{ + std::vector res(4); + llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; + + xChannel = elementPointer(m_consts, idx, 0); + yChannel = elementPointer(m_consts, idx, 1); + zChannel = elementPointer(m_consts, idx, 2); + wChannel = elementPointer(m_consts, idx, 3); + + res[0] = alignedArrayLoad(xChannel); + res[1] = alignedArrayLoad(yChannel); + res[2] = alignedArrayLoad(zChannel); + res[3] = alignedArrayLoad(wChannel); + + return res; +} + +std::vector StorageSoa::outputElement(llvm::Value *idx) +{ + std::vector res(4); + + res[0] = element(m_output, idx, 0); + res[1] = element(m_output, idx, 1); + res[2] = element(m_output, idx, 2); + res[3] = element(m_output, idx, 3); + + return res; +} + +std::vector StorageSoa::tempElement(llvm::Value *idx) +{ + std::vector res(4); + + res[0] = element(m_temps, idx, 0); + res[1] = element(m_temps, idx, 1); + res[2] = element(m_temps, idx, 2); + res[3] = element(m_temps, idx, 3); + + return res; +} + +std::vector StorageSoa::immediateElement(llvm::Value *idx) +{ + std::vector res(4); + + res[0] = element(m_immediates, idx, 0); + res[1] = element(m_immediates, idx, 1); + res[2] = element(m_immediates, idx, 2); + res[3] = element(m_immediates, idx, 3); + + return res; +} + +llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, + int channel) const +{ + std::vector indices; + if (m_immediates == ptr) + indices.push_back(constantInt(0)); + indices.push_back(index); + indices.push_back(constantInt(channel)); + + GetElementPtrInst *getElem = new GetElementPtrInst(ptr, + indices.begin(), + indices.end(), + name("ptr"), + m_block); + return getElem; +} + +llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index, + int channel) const +{ + llvm::Value *res = elementPointer(ptr, index, channel); + LoadInst *load = new LoadInst(res, name("element"), false, m_block); + //load->setAlignment(8); + return load; +} + +const char * StorageSoa::name(const char *prefix) const +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::ConstantInt * StorageSoa::constantInt(int idx) const +{ + if (m_constInts.find(idx) != m_constInts.end()) { + return m_constInts[idx]; + } + ConstantInt *constInt = ConstantInt::get(APInt(32, idx)); + m_constInts[idx] = constInt; + return constInt; +} + +llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + PointerType *vectorPtr = PointerType::get(vectorType, 0); + + CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block); + LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block); + load->setAlignment(8); + return load; +} + +llvm::Module * StorageSoa::currentModule() const +{ + if (!m_block || !m_block->getParent()) + return 0; + + return m_block->getParent()->getParent(); +} + +llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector &vec) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + std::vector immValues; + ConstantFP *constx = ConstantFP::get(Type::FloatTy, APFloat(vec[0])); + ConstantFP *consty = ConstantFP::get(Type::FloatTy, APFloat(vec[1])); + ConstantFP *constz = ConstantFP::get(Type::FloatTy, APFloat(vec[2])); + ConstantFP *constw = ConstantFP::get(Type::FloatTy, APFloat(vec[3])); + immValues.push_back(constx); + immValues.push_back(consty); + immValues.push_back(constz); + immValues.push_back(constw); + Constant *constVector = ConstantVector::get(vectorType, immValues); + + return constVector; +} + +std::vector StorageSoa::load(Argument type, int idx, int swizzle, + llvm::Value *indIdx) +{ + std::vector val(4); + + //if we have an indirect index, always use that + // if not use the integer offset to create one + llvm::Value *realIndex = 0; + if (indIdx) + realIndex = indIdx; + else + realIndex = constantInt(idx); + debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); + + switch(type) { + case Input: + val = inputElement(realIndex); + break; + case Output: + val = outputElement(realIndex); + break; + case Temp: + val = tempElement(realIndex); + break; + case Const: + val = constElement(realIndex); + break; + case Immediate: + val = immediateElement(realIndex); + break; + case Address: + debug_printf("Address not handled in the load phase!\n"); + assert(0); + break; + } + if (!gallivm_is_swizzle(swizzle)) + return val; + + std::vector res(4); + + res[0] = val[gallivm_x_swizzle(swizzle)]; + res[1] = val[gallivm_y_swizzle(swizzle)]; + res[2] = val[gallivm_z_swizzle(swizzle)]; + res[3] = val[gallivm_w_swizzle(swizzle)]; + return res; +} + +void StorageSoa::store(Argument type, int idx, const std::vector &val, + int mask) +{ + llvm::Value *out = 0; + switch(type) { + case Output: + out = m_output; + break; + case Temp: + out = m_temps; + break; + case Input: + out = m_input; + break; + case Address: { + llvm::Value *addr = m_addresses[idx]; + if (!addr) { + addAddress(idx); + addr = m_addresses[idx]; + assert(addr); + } + new StoreInst(val[0], addr, false, m_block); + return; + break; + } + default: + debug_printf("Can't save output of this type: %d !\n", type); + assert(0); + break; + } + llvm::Value *realIndex = constantInt(idx); + if ((mask & TGSI_WRITEMASK_X)) { + llvm::Value *xChannel = elementPointer(out, realIndex, 0); + new StoreInst(val[0], xChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_Y)) { + llvm::Value *yChannel = elementPointer(out, realIndex, 1); + new StoreInst(val[1], yChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_Z)) { + llvm::Value *zChannel = elementPointer(out, realIndex, 2); + new StoreInst(val[2], zChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_W)) { + llvm::Value *wChannel = elementPointer(out, realIndex, 3); + new StoreInst(val[3], wChannel, false, m_block); + } +} + +void StorageSoa::addAddress(int idx) +{ + GlobalVariable *val = new GlobalVariable( + /*Type=*/IntegerType::get(32), + /*isConstant=*/false, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name("address"), + currentModule()); + val->setInitializer(Constant::getNullValue(IntegerType::get(32))); + + debug_printf("adding to %d\n", idx); + m_addresses[idx] = val; +} diff --git a/src/gallium/auxiliary/llvm/storagesoa.h b/src/gallium/auxiliary/llvm/storagesoa.h new file mode 100644 index 0000000000..6443351f27 --- /dev/null +++ b/src/gallium/auxiliary/llvm/storagesoa.h @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STORAGESOA_H +#define STORAGESOA_H + +#include +#include +#include + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class GlobalVariable; + class LoadInst; + class Value; + class VectorType; + class Module; +} + +class StorageSoa +{ +public: + enum Argument { + Input, + Output, + Temp, + Const, + Immediate, + Address + }; +public: + StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts, + llvm::Value *temps); + + + std::vector load(Argument type, int idx, int swizzle, + llvm::Value *indIdx =0); + void store(Argument type, int idx, const std::vector &val, + int mask); + + void addImmediate(float *vec); + void declareImmediates(); + + void addAddress(int idx); + + llvm::Value * addrElement(int idx) const; + + llvm::ConstantInt *constantInt(int) const; +private: + llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx, + int channel) const; + llvm::Value *element(llvm::Value *ptr, llvm::Value *idx, + int channel) const; + const char *name(const char *prefix) const; + llvm::Value *alignedArrayLoad(llvm::Value *val); + llvm::Module *currentModule() const; + llvm::Constant *createConstGlobalVector(const std::vector &vec); + + std::vector inputElement(llvm::Value *indIdx); + std::vector constElement(llvm::Value *indIdx); + std::vector outputElement(llvm::Value *indIdx); + std::vector tempElement(llvm::Value *indIdx); + std::vector immediateElement(llvm::Value *indIdx); +private: + llvm::BasicBlock *m_block; + + llvm::Value *m_input; + llvm::Value *m_output; + llvm::Value *m_consts; + llvm::Value *m_temps; + llvm::GlobalVariable *m_immediates; + + std::map m_addresses; + + std::vector > m_immediatesToFlush; + + mutable std::map m_constInts; + mutable char m_name[32]; + mutable int m_idx; +}; + +#endif diff --git a/src/gallium/auxiliary/llvm/tgsitollvm.cpp b/src/gallium/auxiliary/llvm/tgsitollvm.cpp new file mode 100644 index 0000000000..2cb4acce32 --- /dev/null +++ b/src/gallium/auxiliary/llvm/tgsitollvm.cpp @@ -0,0 +1,1221 @@ +#include "tgsitollvm.h" + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "storage.h" +#include "instructions.h" +#include "storagesoa.h" +#include "instructionssoa.h" + +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include + +using namespace llvm; + +static inline FunctionType *vertexShaderFunctionType() +{ + //Function takes three arguments, + // the calling code has to make sure the types it will + // pass are castable to the following: + // [4 x <4 x float>] inputs, + // [4 x <4 x float>] output, + // [4 x [4 x float]] consts, + // [4 x <4 x float>] temps + + std::vector funcArgs; + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); + PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); + + ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); + ArrayType *constsArray = ArrayType::get(floatArray, 4); + PointerType *constsArrayPtr = PointerType::get(constsArray, 0); + + funcArgs.push_back(vectorArrayPtr);//inputs + funcArgs.push_back(vectorArrayPtr);//output + funcArgs.push_back(constsArrayPtr);//consts + funcArgs.push_back(vectorArrayPtr);//temps + + FunctionType *functionType = FunctionType::get( + /*Result=*/Type::VoidTy, + /*Params=*/funcArgs, + /*isVarArg=*/false); + + return functionType; +} + +static inline void +add_interpolator(struct gallivm_ir *ir, + struct gallivm_interpolate *interp) +{ + ir->interpolators[ir->num_interp] = *interp; + ++ir->num_interp; +} + +static void +translate_declaration(struct gallivm_ir *prog, + llvm::Module *module, + Storage *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *fd) +{ + if (decl->Declaration.File == TGSI_FILE_INPUT) { + unsigned first, last, mask; + uint interp_method; + + assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + /* Do not touch WPOS.xy */ + if (first == 0) { + mask &= ~TGSI_WRITEMASK_XY; + if (mask == TGSI_WRITEMASK_NONE) { + first++; + if (first > last) { + return; + } + } + } + + interp_method = decl->Interpolation.Interpolate; + + if (mask == TGSI_WRITEMASK_XYZW) { + unsigned i, j; + + for (i = first; i <= last; i++) { + for (j = 0; j < NUM_CHANNELS; j++) { + //interp( mach, i, j ); + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } else { + unsigned i, j; + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } + } + } +} + +static void +translate_declarationir(struct gallivm_ir *, + llvm::Module *, + StorageSoa *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *) +{ + if (decl->Declaration.File == TGSI_FILE_ADDRESS) { + int idx = decl->u.DeclarationRange.First; + storage->addAddress(idx); + } +} + +static void +translate_immediate(Storage *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.Size - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + + +static void +translate_immediateir(StorageSoa *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.Size - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + +static inline int +swizzleInt(struct tgsi_full_src_register *src) +{ + int swizzle = 0; + int start = 1000; + + for (int k = 0; k < 4; ++k) { + swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; + start /= 10; + } + return swizzle; +} + +static inline llvm::Value * +swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, + Storage *storage) +{ + int swizzle = swizzleInt(src); + + if (gallivm_is_swizzle(swizzle)) { + /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ + val = storage->shuffleVector(val, swizzle); + } + return val; +} + +static void +translate_instruction(llvm::Module *module, + Storage *storage, + Instructions *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + llvm::Value *inputs[4]; + inputs[0] = 0; + inputs[1] = 0; + inputs[2] = 0; + inputs[3] = 0; + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + llvm::Value *val = 0; + llvm::Value *indIdx = 0; + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + indIdx = storage->extractIndex(indIdx); + } + if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->SrcRegister.Index); + } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->SrcRegister.Index); + } else { + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + return; + } + + inputs[i] = swizzleVector(val, src, storage); + } + + /*if (inputs[0]) + instr->printVector(inputs[0]); + if (inputs[1]) + instr->printVector(inputs[1]);*/ + llvm::Value *out = 0; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); + } + break; + case TGSI_OPCODE_RCP: { + out = instr->rcp(inputs[0]); + } + break; + case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); + } + break; + case TGSI_OPCODE_EXP: + break; + case TGSI_OPCODE_LOG: + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DST: { + out = instr->dst(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGE: { + out = instr->sge(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_LERP: { + out = instr->lerp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_CND: + break; + case TGSI_OPCODE_CND0: + break; + case TGSI_OPCODE_DOT2ADD: + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: + break; + case TGSI_OPCODE_FRAC: { + out = instr->frc(inputs[0]); + } + break; + case TGSI_OPCODE_CLAMP: + break; + case TGSI_OPCODE_FLOOR: { + out = instr->floor(inputs[0]); + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + out = instr->ex2(inputs[0]); + } + break; + case TGSI_OPCODE_LOGBASE2: { + out = instr->lg2(inputs[0]); + } + break; + case TGSI_OPCODE_POWER: { + out = instr->pow(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + out = instr->cross(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + out = instr->dph(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_COS: { + out = instr->cos(inputs[0]); + } + break; + case TGSI_OPCODE_DDX: + break; + case TGSI_OPCODE_DDY: + break; + case TGSI_OPCODE_KILP: { + out = instr->kilp(inputs[0]); + storage->setKilElement(out); + return; + } + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: + break; + case TGSI_OPCODE_SFL: + break; + case TGSI_OPCODE_SGT: { + out = instr->sgt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SIN: { + out = instr->sin(inputs[0]); + } + break; + case TGSI_OPCODE_SLE: + break; + case TGSI_OPCODE_SNE: + break; + case TGSI_OPCODE_STR: + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); + return; + } + break; + case TGSI_OPCODE_RET: { + instr->end(); + return; + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + out = instr->cmp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SCS: { + out = instr->scs(inputs[0]); + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM: + break; + case TGSI_OPCODE_DIV: + break; + case TGSI_OPCODE_DP2: + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + instr->brk(); + return; + } + break; + case TGSI_OPCODE_IF: { + instr->ifop(inputs[0]); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + instr->elseop(); + storage->setCurrentBlock(instr->currentBlock()); + return; //only state update + } + break; + case TGSI_OPCODE_ENDIF: { + instr->endif(); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + out = instr->trunc(inputs[0]); + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + instr->beginLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_BGNSUB: { + instr->bgnSub(instno); + storage->setCurrentBlock(instr->currentBlock()); + storage->pushTemps(); + return; + } + break; + case TGSI_OPCODE_ENDLOOP2: { + instr->endLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_ENDSUB: { + instr->endSub(); + storage->setCurrentBlock(instr->currentBlock()); + storage->popArguments(); + storage->popTemps(); + return; + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_TEXBEM: + break; + case TGSI_OPCODE_TEXBEML: + break; + case TGSI_OPCODE_TEXREG2AR: + break; + case TGSI_OPCODE_TEXM3X2PAD: + break; + case TGSI_OPCODE_TEXM3X2TEX: + break; + case TGSI_OPCODE_TEXM3X3PAD: + break; + case TGSI_OPCODE_TEXM3X3TEX: + break; + case TGSI_OPCODE_TEXM3X3SPEC: + break; + case TGSI_OPCODE_TEXM3X3VSPEC: + break; + case TGSI_OPCODE_TEXREG2GB: + break; + case TGSI_OPCODE_TEXREG2RGB: + break; + case TGSI_OPCODE_TEXDP3TEX: + break; + case TGSI_OPCODE_TEXDP3: + break; + case TGSI_OPCODE_TEXM3X3: + break; + case TGSI_OPCODE_TEXM3X2DEPTH: + break; + case TGSI_OPCODE_TEXDEPTH: + break; + case TGSI_OPCODE_BEM: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_NRM4: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* # not sure if we need this */ + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + /*TXT( "_SAT" );*/ + break; + case TGSI_SAT_MINUS_PLUS_ONE: + /*TXT( "_SAT[-1,1]" );*/ + break; + default: + assert( 0 ); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { + storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { + storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else { + fprintf(stderr, "ERROR: unsupported LLVM destination!"); + assert(!"wrong destination"); + } + } +} + + +static void +translate_instructionir(llvm::Module *module, + StorageSoa *storage, + InstructionsSoa *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + std::vector< std::vector > inputs(inst->Instruction.NumSrcRegs); + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + std::vector val; + llvm::Value *indIdx = 0; + int swizzle = swizzleInt(src); + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + } + if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { + val = storage->load(StorageSoa::Const, + src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { + val = storage->load(StorageSoa::Input, + src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { + val = storage->load(StorageSoa::Temp, + src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { + val = storage->load(StorageSoa::Output, + src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + val = storage->load(StorageSoa::Immediate, + src->SrcRegister.Index, swizzle, indIdx); + } else { + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + return; + } + + inputs[i] = val; + } + + std::vector out(4); + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + } + break; + case TGSI_OPCODE_RCP: { + } + break; + case TGSI_OPCODE_RSQ: { + } + break; + case TGSI_OPCODE_EXP: + break; + case TGSI_OPCODE_LOG: + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + } + break; + case TGSI_OPCODE_DP4: { + } + break; + case TGSI_OPCODE_DST: { + } + break; + case TGSI_OPCODE_MIN: { + } + break; + case TGSI_OPCODE_MAX: { + } + break; + case TGSI_OPCODE_SLT: { + } + break; + case TGSI_OPCODE_SGE: { + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + } + break; + case TGSI_OPCODE_LERP: { + } + break; + case TGSI_OPCODE_CND: + break; + case TGSI_OPCODE_CND0: + break; + case TGSI_OPCODE_DOT2ADD: + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: + break; + case TGSI_OPCODE_FRAC: { + } + break; + case TGSI_OPCODE_CLAMP: + break; + case TGSI_OPCODE_FLOOR: { + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + } + break; + case TGSI_OPCODE_LOGBASE2: { + } + break; + case TGSI_OPCODE_POWER: { + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + } + break; + case TGSI_OPCODE_COS: { + } + break; + case TGSI_OPCODE_DDX: + break; + case TGSI_OPCODE_DDY: + break; + case TGSI_OPCODE_KILP: { + } + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: + break; + case TGSI_OPCODE_SFL: + break; + case TGSI_OPCODE_SGT: { + } + break; + case TGSI_OPCODE_SIN: { + } + break; + case TGSI_OPCODE_SLE: + break; + case TGSI_OPCODE_SNE: + break; + case TGSI_OPCODE_STR: + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + } + break; + case TGSI_OPCODE_RET: { + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + } + break; + case TGSI_OPCODE_SCS: { + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM: + break; + case TGSI_OPCODE_DIV: + break; + case TGSI_OPCODE_DP2: + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + } + break; + case TGSI_OPCODE_IF: { + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + } + break; + case TGSI_OPCODE_ENDIF: { + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + } + break; + case TGSI_OPCODE_BGNSUB: { + } + break; + case TGSI_OPCODE_ENDLOOP2: { + } + break; + case TGSI_OPCODE_ENDSUB: { + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_TEXBEM: + break; + case TGSI_OPCODE_TEXBEML: + break; + case TGSI_OPCODE_TEXREG2AR: + break; + case TGSI_OPCODE_TEXM3X2PAD: + break; + case TGSI_OPCODE_TEXM3X2TEX: + break; + case TGSI_OPCODE_TEXM3X3PAD: + break; + case TGSI_OPCODE_TEXM3X3TEX: + break; + case TGSI_OPCODE_TEXM3X3SPEC: + break; + case TGSI_OPCODE_TEXM3X3VSPEC: + break; + case TGSI_OPCODE_TEXREG2GB: + break; + case TGSI_OPCODE_TEXREG2RGB: + break; + case TGSI_OPCODE_TEXDP3TEX: + break; + case TGSI_OPCODE_TEXDP3: + break; + case TGSI_OPCODE_TEXM3X3: + break; + case TGSI_OPCODE_TEXM3X2DEPTH: + break; + case TGSI_OPCODE_TEXDEPTH: + break; + case TGSI_OPCODE_BEM: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_NRM4: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out[0]) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + storage->store(StorageSoa::Output, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { + storage->store(StorageSoa::Temp, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { + storage->store(StorageSoa::Address, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else { + fprintf(stderr, "ERROR: unsupported LLVM destination!"); + assert(!"wrong destination"); + } + } +} + +llvm::Module * +tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) +{ + llvm::Module *mod = new Module("shader"); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + Function* shader = mod->getFunction("execute_shader"); + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + stream << ir->id; + std::string func_name = stream.str(); + shader->setName(func_name.c_str()); + + Function::arg_iterator args = shader->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("input"); + + BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + Storage storage(label_entry, ptr_INPUT); + Instructions instr(mod, shader, label_entry, &storage); + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declaration(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediate(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + translate_instruction(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + ir->num_consts = storage.numConsts(); + return mod; +} + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + llvm::Module *mod = new Module("shader"); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + //stream << ir->id; + std::string func_name = stream.str(); + Function *shader = llvm::cast(mod->getOrInsertFunction( + func_name.c_str(), + vertexShaderFunctionType())); + + Function::arg_iterator args = shader->arg_begin(); + Value *input = args++; + input->setName("inputs"); + Value *output = args++; + output->setName("outputs"); + Value *consts = args++; + consts->setName("consts"); + Value *temps = args++; + temps->setName("temps"); + + BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + StorageSoa storage(label_entry, input, output, consts, temps); + InstructionsSoa instr(mod, shader, label_entry, &storage); + + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declarationir(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediateir(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + storage.declareImmediates(); + translate_instructionir(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + return mod; +} diff --git a/src/gallium/auxiliary/llvm/tgsitollvm.h b/src/gallium/auxiliary/llvm/tgsitollvm.h new file mode 100644 index 0000000000..7ada04d629 --- /dev/null +++ b/src/gallium/auxiliary/llvm/tgsitollvm.h @@ -0,0 +1,20 @@ +#ifndef TGSITOLLVM_H +#define TGSITOLLVM_H + + +namespace llvm { + class Module; +} + +struct gallivm_ir; +struct tgsi_token; + + +llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + +#endif diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile new file mode 100644 index 0000000000..588629e870 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -0,0 +1,23 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = pipebuffer + +DRIVER_SOURCES = \ + pb_buffer_fenced.c \ + pb_buffer_malloc.c \ + pb_bufmgr_fenced.c \ + pb_bufmgr_mm.c \ + pb_bufmgr_pool.c \ + pb_winsys.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/pipebuffer/linked_list.h b/src/gallium/auxiliary/pipebuffer/linked_list.h new file mode 100644 index 0000000000..e99817fb13 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/linked_list.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * \file + * List macros heavily inspired by the Linux kernel + * list handling. No list looping yet. + * + * Is not threadsafe, so common operations need to + * be protected using an external mutex. + */ + +#ifndef LINKED_LIST_H_ +#define LINKED_LIST_H_ + + +#include + + +struct list_head +{ + struct list_head *prev; + struct list_head *next; +}; + + +#define LIST_INITHEAD(__item) \ + do { \ + (__item)->prev = (__item); \ + (__item)->next = (__item); \ + } while (0) + +#define LIST_ADD(__item, __list) \ + do { \ + (__item)->prev = (__list); \ + (__item)->next = (__list)->next; \ + (__list)->next->prev = (__item); \ + (__list)->next = (__item); \ + } while (0) + +#define LIST_ADDTAIL(__item, __list) \ + do { \ + (__item)->next = (__list); \ + (__item)->prev = (__list)->prev; \ + (__list)->prev->next = (__item); \ + (__list)->prev = (__item); \ + } while(0) + +#define LIST_DEL(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + } while(0) + +#define LIST_DELINIT(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + (__item)->next = (__item); \ + (__item)->prev = (__item); \ + } while(0) + +#define LIST_ENTRY(__type, __item, __field) \ + ((__type *)(((char *)(__item)) - offsetof(__type, __field))) + + +#endif /*LINKED_LIST_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h new file mode 100644 index 0000000000..97beb5f72a --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Generic code for buffers. + * + * Behind a pipe buffle handle there can be DMA buffers, client (or user) + * buffers, regular malloced buffers, etc. This file provides an abstract base + * buffer handle that allows the driver to cope with all those kinds of buffers + * in a more flexible way. + * + * There is no obligation of a winsys driver to use this library. And a pipe + * driver should be completly agnostic about it. + * + * \author José Fonseca + */ + +#ifndef PB_BUFFER_H_ +#define PB_BUFFER_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + + +struct pb_vtbl; + +/** + * Buffer description. + * + * Used when allocating the buffer. + */ +struct pb_desc +{ + unsigned alignment; + unsigned usage; +}; + + +/** + * Base class for all pb_* buffers. + */ +struct pb_buffer +{ + struct pipe_buffer base; + + /** + * Pointer to the virtual function table. + * + * Avoid accessing this table directly. Use the inline functions below + * instead to avoid mistakes. + */ + const struct pb_vtbl *vtbl; +}; + + +/** + * Virtual function table for the buffer storage operations. + * + * Note that creation is not done through this table. + */ +struct pb_vtbl +{ + void (*destroy)( struct pb_buffer *buf ); + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of PIPE_BUFFER_FLAG_READ/WRITE. + */ + void *(*map)( struct pb_buffer *buf, + unsigned flags ); + + void (*unmap)( struct pb_buffer *buf ); + + /** + * Get the base buffer and the offset. + * + * A buffer can be subdivided in smaller buffers. This method should return + * the underlaying buffer, and the relative offset. + * + * Buffers without an underlaying base buffer should return themselves, with + * a zero offset. + * + * Note that this will increase the reference count of the base buffer. + */ + void (*get_base_buffer)( struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset ); +}; + + +static INLINE struct pipe_buffer * +pb_pipe_buffer( struct pb_buffer *pbuf ) +{ + assert(pbuf); + return &pbuf->base; +} + + +static INLINE struct pb_buffer * +pb_buffer( struct pipe_buffer *buf ) +{ + assert(buf); + /* Could add a magic cookie check on debug builds. + */ + return (struct pb_buffer *)buf; +} + + +/* Accessor functions for pb->vtbl: + */ +static INLINE void * +pb_map(struct pb_buffer *buf, + unsigned flags) +{ + assert(buf); + return buf->vtbl->map(buf, flags); +} + + +static INLINE void +pb_unmap(struct pb_buffer *buf) +{ + assert(buf); + buf->vtbl->unmap(buf); +} + + +static INLINE void +pb_get_base_buffer( struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset ) +{ + buf->vtbl->get_base_buffer(buf, base_buf, offset); +} + + +static INLINE void +pb_destroy(struct pb_buffer *buf) +{ + assert(buf); + buf->vtbl->destroy(buf); +} + + +/* XXX: thread safety issues! + */ +static INLINE void +pb_reference(struct pb_buffer **dst, + struct pb_buffer *src) +{ + if (src) + src->base.refcount++; + + if (*dst && --(*dst)->base.refcount == 0) + pb_destroy( *dst ); + + *dst = src; +} + + +/** + * Malloc-based buffer to store data that can't be used by the graphics + * hardware. + */ +struct pb_buffer * +pb_malloc_buffer_create(size_t size, + const struct pb_desc *desc); + + +void +pb_init_winsys(struct pipe_winsys *winsys); + + +#endif /*PB_BUFFER_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c new file mode 100644 index 0000000000..f4fc3f6d71 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of fenced buffers. + * + * \author José Fonseca + * \author Thomas Hellström + */ + + +#include "linked_list.h" + +#include "p_compiler.h" +#include "p_debug.h" +#include "p_winsys.h" +#include "p_thread.h" +#include "p_util.h" + +#include "pb_buffer.h" +#include "pb_buffer_fenced.h" + +#ifndef __MSC__ +#include +#endif + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct fenced_buffer_list +{ + _glthread_Mutex mutex; + + struct pipe_winsys *winsys; + + size_t numDelayed; + size_t checkDelayed; + + struct list_head delayed; +}; + + +/** + * Wrapper around a pipe buffer which adds fencing and reference counting. + */ +struct fenced_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + + struct pipe_fence_handle *fence; + + struct list_head head; + struct fenced_buffer_list *list; +}; + + +static INLINE struct fenced_buffer * +fenced_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &fenced_buffer_vtbl); + return (struct fenced_buffer *)buf; +} + + + + +static void +_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, + int wait) +{ + struct pipe_winsys *winsys = fenced_list->winsys; + struct fenced_buffer *fenced_buf; + struct list_head *list, *prev; + int signaled = -1; + + list = fenced_list->delayed.next; + + if (fenced_list->numDelayed > 3) { + unsigned i; + + for (i = 0; i < fenced_list->numDelayed; i += 3) { + list = list->next; + } + } + + prev = list->prev; + for (; list != &fenced_list->delayed; list = prev, prev = list->prev) { + + fenced_buf = LIST_ENTRY(struct fenced_buffer, list, head); + + if (signaled != 0) { + if (wait) { + signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0); + } + else { + signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0); + } + } + + if (signaled != 0) + /* XXX: we are assuming that buffers are freed in the same order they + * are fenced which may not always be true... + */ + break; + + winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + + LIST_DEL(list); + fenced_list->numDelayed--; + + /* Do the delayed destroy: + */ + pb_reference(&fenced_buf->buffer, NULL); + FREE(fenced_buf); + } +} + + +static void +fenced_buffer_destroy(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_buffer_list *fenced_list = fenced_buf->list; + + if (fenced_buf->fence) { + LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); + fenced_list->numDelayed++; + } + else { + pb_reference(&fenced_buf->buffer, NULL); + FREE(fenced_buf); + } + + if ((fenced_list->numDelayed % fenced_list->checkDelayed) == 0) + _fenced_buffer_list_check_free(fenced_list, 0); +} + + +static void * +fenced_buffer_map(struct pb_buffer *buf, + unsigned flags) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + return pb_map(fenced_buf->buffer, flags); +} + + +static void +fenced_buffer_unmap(struct pb_buffer *buf) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + pb_unmap(fenced_buf->buffer); +} + + +static void +fenced_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + pb_get_base_buffer(fenced_buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +fenced_buffer_vtbl = { + fenced_buffer_destroy, + fenced_buffer_map, + fenced_buffer_unmap, + fenced_buffer_get_base_buffer +}; + + +struct pb_buffer * +fenced_buffer_create(struct fenced_buffer_list *fenced_list, + struct pb_buffer *buffer) +{ + struct fenced_buffer *buf; + + if(!buffer) + return NULL; + + buf = CALLOC_STRUCT(fenced_buffer); + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.alignment = buffer->base.alignment; + buf->base.base.usage = buffer->base.usage; + buf->base.base.size = buffer->base.size; + + buf->base.vtbl = &fenced_buffer_vtbl; + buf->buffer = buffer; + buf->list = fenced_list; + + return &buf->base; +} + + +void +buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_buffer_list *fenced_list = fenced_buf->list; + struct pipe_winsys *winsys = fenced_list->winsys; + + _glthread_LOCK_MUTEX(fenced_list->mutex); + winsys->fence_reference(winsys, &fenced_buf->fence, fence); + _glthread_UNLOCK_MUTEX(fenced_list->mutex); +} + + +struct fenced_buffer_list * +fenced_buffer_list_create(struct pipe_winsys *winsys) +{ + struct fenced_buffer_list *fenced_list; + + fenced_list = (struct fenced_buffer_list *)CALLOC(1, sizeof(*fenced_list)); + if (!fenced_list) + return NULL; + + fenced_list->winsys = winsys; + + LIST_INITHEAD(&fenced_list->delayed); + + fenced_list->numDelayed = 0; + + /* TODO: don't hard code this */ + fenced_list->checkDelayed = 5; + + _glthread_INIT_MUTEX(fenced_list->mutex); + + return fenced_list; +} + + +void +fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, + int wait) +{ + _glthread_LOCK_MUTEX(fenced_list->mutex); + _fenced_buffer_list_check_free(fenced_list, wait); + _glthread_UNLOCK_MUTEX(fenced_list->mutex); +} + + +void +fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) +{ + _glthread_LOCK_MUTEX(fenced_list->mutex); + + /* Wait on outstanding fences */ + while (fenced_list->numDelayed) { + _glthread_UNLOCK_MUTEX(fenced_list->mutex); + sched_yield(); + _fenced_buffer_list_check_free(fenced_list, 1); + _glthread_LOCK_MUTEX(fenced_list->mutex); + } + + _glthread_UNLOCK_MUTEX(fenced_list->mutex); + + FREE(fenced_list); +} + + diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h new file mode 100644 index 0000000000..c40b9c75e1 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer fencing. + * + * "Fenced buffers" is actually a misnomer. They should be referred as + * "fenceable buffers", i.e, buffers that can be fenced, but I couldn't find + * the word "fenceable" in the dictionary. + * + * A "fenced buffer" is a decorator around a normal buffer, which adds two + * special properties: + * - the ability for the destruction to be delayed by a fence; + * - reference counting. + * + * Usually DMA buffers have a life-time that will extend the life-time of its + * handle. The end-of-life is dictated by the fence signalling. + * + * Between the handle's destruction, and the fence signalling, the buffer is + * stored in a fenced buffer list. + * + * \author José Fonseca + */ + +#ifndef PB_BUFFER_FENCED_H_ +#define PB_BUFFER_FENCED_H_ + + +#include "pipe/p_debug.h" + + +struct pipe_winsys; +struct pipe_buffer; +struct pipe_fence_handle; + + +/** + * List of buffers which are awaiting fence signalling. + */ +struct fenced_buffer_list; + + +/** + * The fenced buffer's virtual function table. + * + * NOTE: Made public for debugging purposes. + */ +extern const struct pb_vtbl fenced_buffer_vtbl; + + +/** + * Create a fenced buffer list. + * + * See also fenced_bufmgr_create for a more convenient way to use this. + */ +struct fenced_buffer_list * +fenced_buffer_list_create(struct pipe_winsys *winsys); + + +/** + * Walk the fenced buffer list to check and free signalled buffers. + */ +void +fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, + int wait); + +void +fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list); + + +/** + * Wrap a buffer in a fenced buffer. + * + * NOTE: this will not increase the buffer reference count. + */ +struct pb_buffer * +fenced_buffer_create(struct fenced_buffer_list *fenced, + struct pb_buffer *buffer); + + +/** + * Set a buffer's fence. + * + * NOTE: Although it takes a generic pb_buffer argument, it will fail + * on everything but buffers returned by fenced_buffer_create. + */ +void +buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence); + + +#endif /*PB_BUFFER_FENCED_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c new file mode 100644 index 0000000000..9e8244f909 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -0,0 +1,127 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of malloc-based buffers to store data that can't be processed + * by the hardware. + * + * \author José Fonseca + */ + + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pb_buffer.h" + + +struct malloc_buffer +{ + struct pb_buffer base; + void *data; +}; + + +extern const struct pb_vtbl malloc_buffer_vtbl; + +static INLINE struct malloc_buffer * +malloc_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &malloc_buffer_vtbl); + return (struct malloc_buffer *)buf; +} + + +static void +malloc_buffer_destroy(struct pb_buffer *buf) +{ + align_free(malloc_buffer(buf)->data); + FREE(buf); +} + + +static void * +malloc_buffer_map(struct pb_buffer *buf, + unsigned flags) +{ + return malloc_buffer(buf)->data; +} + + +static void +malloc_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static void +malloc_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + *base_buf = buf; + *offset = 0; +} + + +const struct pb_vtbl +malloc_buffer_vtbl = { + malloc_buffer_destroy, + malloc_buffer_map, + malloc_buffer_unmap, + malloc_buffer_get_base_buffer +}; + + +struct pb_buffer * +pb_malloc_buffer_create(size_t size, + const struct pb_desc *desc) +{ + struct malloc_buffer *buf; + + /* TODO: do a single allocation */ + + buf = CALLOC_STRUCT(malloc_buffer); + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + buf->base.vtbl = &malloc_buffer_vtbl; + + buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); + if(!buf->data) { + align_free(buf); + return NULL; + } + + return &buf->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h new file mode 100644 index 0000000000..1ddf784c97 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -0,0 +1,126 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer management. + * + * A buffer manager does only one basic thing: it creates buffers. Actually, + * "buffer factory" would probably a more accurate description. + * + * You can chain buffer managers so that you can have a finer grained memory + * management and pooling. + * + * For example, for a simple batch buffer manager you would chain: + * - the native buffer manager, which provides DMA memory from the graphics + * memory space; + * - the pool buffer manager, which keep around a pool of equally sized buffers + * to avoid latency associated with the native buffer manager; + * - the fenced buffer manager, which will delay buffer destruction until the + * the moment the card finishing processing it. + * + * \author José Fonseca + */ + +#ifndef PB_BUFMGR_H_ +#define PB_BUFMGR_H_ + + +#include + + +struct pb_desc; +struct pipe_buffer; +struct pipe_winsys; + + +/** + * Abstract base class for all buffer managers. + */ +struct pb_manager +{ + /* XXX: we will likely need more allocation flags */ + struct pb_buffer * + (*create_buffer)( struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc); + + void + (*destroy)( struct pb_manager *mgr ); +}; + + +/** + * Static buffer pool manager. + * + * Manages the allocation of equally sized buffers. It does so by allocating + * a single big buffer and divide it equally sized buffers. + * + * It is meant to manage the allocation of batch buffer pools. + */ +struct pb_manager * +pool_bufmgr_create(struct pb_manager *provider, + size_t n, size_t size, + const struct pb_desc *desc); + + +/** + * Wraper around the old memory manager. + * + * It managers buffers of different sizes. It does so by allocating a buffer + * with the size of the heap, and then using the old mm memory manager to manage + * that heap. + */ +struct pb_manager * +mm_bufmgr_create(struct pb_manager *provider, + size_t size, size_t align2); + +/** + * Same as mm_bufmgr_create. + * + * Buffer will be release when the manager is destroyed. + */ +struct pb_manager * +mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, + size_t size, size_t align2); + + +/** + * Fenced buffer manager. + * + * This manager is just meant for convenience. It wraps the buffers returned + * by another manager in fenced buffers, so that + * + * NOTE: the buffer manager that provides the buffers will be destroyed + * at the same time. + */ +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pipe_winsys *winsys); + + +#endif /*PB_BUFMGR_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c new file mode 100644 index 0000000000..c535d3276c --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -0,0 +1,131 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * \file + * A buffer manager that wraps buffers in fenced buffers. + * + * \author José Fonseca + */ + + +#include "p_debug.h" +#include "p_util.h" + +#include "pb_buffer.h" +#include "pb_buffer_fenced.h" +#include "pb_bufmgr.h" + + +struct fenced_pb_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + + struct fenced_buffer_list *fenced_list; +}; + + +static INLINE struct fenced_pb_manager * +fenced_pb_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct fenced_pb_manager *)mgr; +} + + +static struct pb_buffer * +fenced_bufmgr_create_buffer(struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc) +{ + struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); + struct pb_buffer *buf; + struct pb_buffer *fenced_buf; + + /* check for free buffers before allocating new ones */ + fenced_buffer_list_check_free(fenced_mgr->fenced_list, 0); + + buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); + if(!buf) { + /* try harder to get a buffer */ + fenced_buffer_list_check_free(fenced_mgr->fenced_list, 1); + + buf = fenced_mgr->provider->create_buffer(fenced_mgr->provider, size, desc); + if(!buf) { + /* give up */ + return NULL; + } + } + + fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf); + if(!fenced_buf) { + assert(buf->base.refcount == 1); + pb_destroy(buf); + } + + return fenced_buf; +} + + +static void +fenced_bufmgr_destroy(struct pb_manager *mgr) +{ + struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); + + fenced_buffer_list_destroy(fenced_mgr->fenced_list); + + fenced_mgr->provider->destroy(fenced_mgr->provider); + + FREE(fenced_mgr); +} + + +struct pb_manager * +fenced_bufmgr_create(struct pb_manager *provider, + struct pipe_winsys *winsys) +{ + struct fenced_pb_manager *fenced_mgr; + + fenced_mgr = (struct fenced_pb_manager *)CALLOC(1, sizeof(*fenced_mgr)); + if (!fenced_mgr) + return NULL; + + fenced_mgr->base.destroy = fenced_bufmgr_destroy; + fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; + + fenced_mgr->provider = provider; + fenced_mgr->fenced_list = fenced_buffer_list_create(winsys); + if(!fenced_mgr->fenced_list) { + FREE(fenced_mgr); + return NULL; + } + + return &fenced_mgr->base; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c new file mode 100644 index 0000000000..8b1b51c0e2 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -0,0 +1,593 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 1999 Wittawat Yamwong + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer manager using the old texture memory manager. + * + * \author José Fonseca + */ + + +#include "linked_list.h" + +#include "p_defines.h" +#include "p_debug.h" +#include "p_thread.h" +#include "p_util.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct mem_block +{ + struct mem_block *next, *prev; + struct mem_block *next_free, *prev_free; + struct mem_block *heap; + int ofs, size; + unsigned int free:1; + unsigned int reserved:1; +}; + + +#ifdef DEBUG +/** + * For debugging purposes. + */ +static void +mmDumpMemInfo(const struct mem_block *heap) +{ + debug_printf("Memory heap %p:\n", (void *)heap); + if (heap == 0) { + debug_printf(" heap == 0\n"); + } else { + const struct mem_block *p; + + for(p = heap->next; p != heap; p = p->next) { + debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + debug_printf("\nFree list:\n"); + + for(p = heap->next_free; p != heap; p = p->next_free) { + debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + } + debug_printf("End of memory blocks\n"); +} +#endif + + +/** + * input: total size in bytes + * return: a heap pointer if OK, NULL if error + */ +static struct mem_block * +mmInit(int ofs, int size) +{ + struct mem_block *heap, *block; + + if (size <= 0) + return NULL; + + heap = CALLOC_STRUCT(mem_block); + if (!heap) + return NULL; + + block = CALLOC_STRUCT(mem_block); + if (!block) { + FREE(heap); + return NULL; + } + + heap->next = block; + heap->prev = block; + heap->next_free = block; + heap->prev_free = block; + + block->heap = heap; + block->next = heap; + block->prev = heap; + block->next_free = heap; + block->prev_free = heap; + + block->ofs = ofs; + block->size = size; + block->free = 1; + + return heap; +} + + +static struct mem_block * +SliceBlock(struct mem_block *p, + int startofs, int size, + int reserved, int alignment) +{ + struct mem_block *newblock; + + /* break left [p, newblock, p->next], then p = newblock */ + if (startofs > p->ofs) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs; + newblock->size = p->size - (startofs - p->ofs); + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size -= newblock->size; + p = newblock; + } + + /* break right, also [p, newblock, p->next] */ + if (size < p->size) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs + size; + newblock->size = p->size - size; + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size = size; + } + + /* p = middle block */ + p->free = 0; + + /* Remove p from the free list: + */ + p->next_free->prev_free = p->prev_free; + p->prev_free->next_free = p->next_free; + + p->next_free = 0; + p->prev_free = 0; + + p->reserved = reserved; + return p; +} + + +/** + * Allocate 'size' bytes with 2^align2 bytes alignment, + * restrict the search to free memory after 'startSearch' + * depth and back buffers should be in different 4mb banks + * to get better page hits if possible + * input: size = size of block + * align2 = 2^align2 bytes alignment + * startSearch = linear offset from start of heap to begin search + * return: pointer to the allocated block, 0 if error + */ +static struct mem_block * +mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +{ + struct mem_block *p; + const int mask = (1 << align2)-1; + int startofs = 0; + int endofs; + + if (!heap || align2 < 0 || size <= 0) + return NULL; + + for (p = heap->next_free; p != heap; p = p->next_free) { + assert(p->free); + + startofs = (p->ofs + mask) & ~mask; + if ( startofs < startSearch ) { + startofs = startSearch; + } + endofs = startofs+size; + if (endofs <= (p->ofs+p->size)) + break; + } + + if (p == heap) + return NULL; + + assert(p->free); + p = SliceBlock(p,startofs,size,0,mask+1); + + return p; +} + + +#if 0 +/** + * Free block starts at offset + * input: pointer to a heap, start offset + * return: pointer to a block + */ +static struct mem_block * +mmFindBlock(struct mem_block *heap, int start) +{ + struct mem_block *p; + + for (p = heap->next; p != heap; p = p->next) { + if (p->ofs == start) + return p; + } + + return NULL; +} +#endif + + +static INLINE int +Join2Blocks(struct mem_block *p) +{ + /* XXX there should be some assertions here */ + + /* NOTE: heap->free == 0 */ + + if (p->free && p->next->free) { + struct mem_block *q = p->next; + + assert(p->ofs + p->size == q->ofs); + p->size += q->size; + + p->next = q->next; + q->next->prev = p; + + q->next_free->prev_free = q->prev_free; + q->prev_free->next_free = q->next_free; + + FREE(q); + return 1; + } + return 0; +} + + +/** + * Free block starts at offset + * input: pointer to a block + * return: 0 if OK, -1 if error + */ +static int +mmFreeMem(struct mem_block *b) +{ + if (!b) + return 0; + + if (b->free) { + debug_printf("block already free\n"); + return -1; + } + if (b->reserved) { + debug_printf("block is reserved\n"); + return -1; + } + + b->free = 1; + b->next_free = b->heap->next_free; + b->prev_free = b->heap; + b->next_free->prev_free = b; + b->prev_free->next_free = b; + + Join2Blocks(b); + if (b->prev != b->heap) + Join2Blocks(b->prev); + + return 0; +} + + +/** + * destroy MM + */ +static void +mmDestroy(struct mem_block *heap) +{ + struct mem_block *p; + + if (!heap) + return; + + for (p = heap->next; p != heap; ) { + struct mem_block *next = p->next; + FREE(p); + p = next; + } + + FREE(heap); +} + + +struct mm_pb_manager +{ + struct pb_manager base; + + _glthread_Mutex mutex; + + size_t size; + struct mem_block *heap; + + size_t align2; + + struct pb_buffer *buffer; + void *map; +}; + + +static INLINE struct mm_pb_manager * +mm_pb_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct mm_pb_manager *)mgr; +} + + +struct mm_buffer +{ + struct pb_buffer base; + + struct mm_pb_manager *mgr; + + struct mem_block *block; +}; + + +static INLINE struct mm_buffer * +mm_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct mm_buffer *)buf; +} + + +static void +mm_buffer_destroy(struct pb_buffer *buf) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + + assert(buf->base.refcount == 0); + + _glthread_LOCK_MUTEX(mm->mutex); + mmFreeMem(mm_buf->block); + FREE(buf); + _glthread_UNLOCK_MUTEX(mm->mutex); +} + + +static void * +mm_buffer_map(struct pb_buffer *buf, + unsigned flags) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + + return (unsigned char *) mm->map + mm_buf->block->ofs; +} + + +static void +mm_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static void +mm_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + pb_get_base_buffer(mm->buffer, base_buf, offset); + *offset += mm_buf->block->ofs; +} + + +static const struct pb_vtbl +mm_buffer_vtbl = { + mm_buffer_destroy, + mm_buffer_map, + mm_buffer_unmap, + mm_buffer_get_base_buffer +}; + + +static struct pb_buffer * +mm_bufmgr_create_buffer(struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc) +{ + struct mm_pb_manager *mm = mm_pb_manager(mgr); + struct mm_buffer *mm_buf; + + /* We don't handle alignments larger then the one initially setup */ + assert(desc->alignment % (1 << mm->align2) == 0); + if(desc->alignment % (1 << mm->align2)) + return NULL; + + _glthread_LOCK_MUTEX(mm->mutex); + + mm_buf = CALLOC_STRUCT(mm_buffer); + if (!mm_buf) { + _glthread_UNLOCK_MUTEX(mm->mutex); + return NULL; + } + + mm_buf->base.base.refcount = 1; + mm_buf->base.base.alignment = desc->alignment; + mm_buf->base.base.usage = desc->usage; + mm_buf->base.base.size = size; + + mm_buf->base.vtbl = &mm_buffer_vtbl; + + mm_buf->mgr = mm; + + mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + if(!mm_buf->block) { + debug_printf("warning: heap full\n"); +#if 0 + mmDumpMemInfo(mm->heap); +#endif + + mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + if(!mm_buf->block) { + assert(0); + FREE(mm_buf); + _glthread_UNLOCK_MUTEX(mm->mutex); + return NULL; + } + } + + /* Some sanity checks */ + assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size); + assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size); + + _glthread_UNLOCK_MUTEX(mm->mutex); + return SUPER(mm_buf); +} + + +static void +mm_bufmgr_destroy(struct pb_manager *mgr) +{ + struct mm_pb_manager *mm = mm_pb_manager(mgr); + + _glthread_LOCK_MUTEX(mm->mutex); + + mmDestroy(mm->heap); + + pb_unmap(mm->buffer); + pb_reference(&mm->buffer, NULL); + + _glthread_UNLOCK_MUTEX(mm->mutex); + + FREE(mgr); +} + + +struct pb_manager * +mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, + size_t size, size_t align2) +{ + struct mm_pb_manager *mm; + + if(!buffer) + return NULL; + + mm = CALLOC_STRUCT(mm_pb_manager); + if (!mm) + return NULL; + + mm->base.create_buffer = mm_bufmgr_create_buffer; + mm->base.destroy = mm_bufmgr_destroy; + + mm->size = size; + mm->align2 = align2; /* 64-byte alignment */ + + _glthread_INIT_MUTEX(mm->mutex); + + mm->buffer = buffer; + + mm->map = pb_map(mm->buffer, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!mm->map) + goto failure; + + mm->heap = mmInit(0, size); + if (!mm->heap) + goto failure; + + return SUPER(mm); + +failure: +if(mm->heap) + mmDestroy(mm->heap); + if(mm->map) + pb_unmap(mm->buffer); + if(mm) + FREE(mm); + return NULL; +} + + +struct pb_manager * +mm_bufmgr_create(struct pb_manager *provider, + size_t size, size_t align2) +{ + struct pb_buffer *buffer; + struct pb_manager *mgr; + struct pb_desc desc; + + assert(provider); + assert(provider->create_buffer); + + memset(&desc, 0, sizeof(desc)); + desc.alignment = 1 << align2; + + buffer = provider->create_buffer(provider, size, &desc); + if (!buffer) + return NULL; + + mgr = mm_bufmgr_create_from_buffer(buffer, size, align2); + if (!mgr) { + pb_reference(&buffer, NULL); + return NULL; + } + + return mgr; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c new file mode 100644 index 0000000000..04477a865a --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -0,0 +1,288 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * \file + * Batch buffer pool management. + * + * \author José Fonseca + * \author Thomas Hellström + */ + + +#include "linked_list.h" + +#include "p_compiler.h" +#include "p_debug.h" +#include "p_thread.h" +#include "p_defines.h" +#include "p_util.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pool_pb_manager +{ + struct pb_manager base; + + _glthread_Mutex mutex; + + size_t bufSize; + size_t bufAlign; + + size_t numFree; + size_t numTot; + + struct list_head free; + + struct pb_buffer *buffer; + void *map; + + struct pool_buffer *bufs; +}; + + +static INLINE struct pool_pb_manager * +pool_pb_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pool_pb_manager *)mgr; +} + + +struct pool_buffer +{ + struct pb_buffer base; + + struct pool_pb_manager *mgr; + + struct list_head head; + + size_t start; +}; + + +static INLINE struct pool_buffer * +pool_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pool_buffer *)buf; +} + + + +static void +pool_buffer_destroy(struct pb_buffer *buf) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + + assert(pool_buf->base.base.refcount == 0); + + _glthread_LOCK_MUTEX(pool->mutex); + LIST_ADD(&pool_buf->head, &pool->free); + pool->numFree++; + _glthread_UNLOCK_MUTEX(pool->mutex); +} + + +static void * +pool_buffer_map(struct pb_buffer *buf, unsigned flags) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + void *map; + + _glthread_LOCK_MUTEX(pool->mutex); + map = (unsigned char *) pool->map + pool_buf->start; + _glthread_UNLOCK_MUTEX(pool->mutex); + return map; +} + + +static void +pool_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static void +pool_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + pb_get_base_buffer(pool->buffer, base_buf, offset); + *offset += pool_buf->start; +} + + +static const struct pb_vtbl +pool_buffer_vtbl = { + pool_buffer_destroy, + pool_buffer_map, + pool_buffer_unmap, + pool_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pool_bufmgr_create_buffer(struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pool_pb_manager *pool = pool_pb_manager(mgr); + struct pool_buffer *pool_buf; + struct list_head *item; + + assert(size == pool->bufSize); + assert(pool->bufAlign % desc->alignment == 0); + + _glthread_LOCK_MUTEX(pool->mutex); + + if (pool->numFree == 0) { + _glthread_UNLOCK_MUTEX(pool->mutex); + debug_printf("warning: out of fixed size buffer objects\n"); + return NULL; + } + + item = pool->free.next; + + if (item == &pool->free) { + _glthread_UNLOCK_MUTEX(pool->mutex); + debug_printf("error: fixed size buffer pool corruption\n"); + return NULL; + } + + LIST_DEL(item); + --pool->numFree; + + _glthread_UNLOCK_MUTEX(pool->mutex); + + pool_buf = LIST_ENTRY(struct pool_buffer, item, head); + assert(pool_buf->base.base.refcount == 0); + pool_buf->base.base.refcount = 1; + pool_buf->base.base.alignment = desc->alignment; + pool_buf->base.base.usage = desc->usage; + + return SUPER(pool_buf); +} + + +static void +pool_bufmgr_destroy(struct pb_manager *mgr) +{ + struct pool_pb_manager *pool = pool_pb_manager(mgr); + _glthread_LOCK_MUTEX(pool->mutex); + + FREE(pool->bufs); + + pb_unmap(pool->buffer); + pb_reference(&pool->buffer, NULL); + + _glthread_UNLOCK_MUTEX(pool->mutex); + + FREE(mgr); +} + + +struct pb_manager * +pool_bufmgr_create(struct pb_manager *provider, + size_t numBufs, + size_t bufSize, + const struct pb_desc *desc) +{ + struct pool_pb_manager *pool; + struct pool_buffer *pool_buf; + size_t i; + + pool = (struct pool_pb_manager *)CALLOC(1, sizeof(*pool)); + if (!pool) + return NULL; + + pool->base.destroy = pool_bufmgr_destroy; + pool->base.create_buffer = pool_bufmgr_create_buffer; + + LIST_INITHEAD(&pool->free); + + pool->numTot = numBufs; + pool->numFree = numBufs; + pool->bufSize = bufSize; + pool->bufAlign = desc->alignment; + + _glthread_INIT_MUTEX(pool->mutex); + + pool->buffer = provider->create_buffer(provider, numBufs*bufSize, desc); + if (!pool->buffer) + goto failure; + + pool->map = pb_map(pool->buffer, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!pool->map) + goto failure; + + pool->bufs = (struct pool_buffer *)CALLOC(numBufs, sizeof(*pool->bufs)); + if (!pool->bufs) + goto failure; + + pool_buf = pool->bufs; + for (i = 0; i < numBufs; ++i) { + pool_buf->base.base.refcount = 0; + pool_buf->base.base.alignment = 0; + pool_buf->base.base.usage = 0; + pool_buf->base.base.size = bufSize; + pool_buf->base.vtbl = &pool_buffer_vtbl; + pool_buf->mgr = pool; + pool_buf->start = i * bufSize; + LIST_ADDTAIL(&pool_buf->head, &pool->free); + pool_buf++; + } + + return SUPER(pool); + +failure: + if(pool->bufs) + FREE(pool->bufs); + if(pool->map) + pb_unmap(pool->buffer); + if(pool->buffer) + pb_reference(&pool->buffer, NULL); + if(pool) + FREE(pool); + return NULL; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c new file mode 100644 index 0000000000..978944091f --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -0,0 +1,170 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Implementation of client buffer (also designated as "user buffers"), which + * are just state-tracker owned data masqueraded as buffers. + * + * \author José Fonseca + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" + +#include "pb_buffer.h" + + +/** + * User buffers are special buffers that initially reference memory + * held by the user but which may if necessary copy that memory into + * device memory behind the scenes, for submission to hardware. + * + * These are particularly useful when the referenced data is never + * submitted to hardware at all, in the particular case of software + * vertex processing. + */ +struct pb_user_buffer +{ + struct pb_buffer base; + void *data; +}; + + +extern const struct pb_vtbl pb_user_buffer_vtbl; + + +static INLINE struct pb_user_buffer * +pb_user_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &pb_user_buffer_vtbl); + return (struct pb_user_buffer *)buf; +} + + +static void +pb_user_buffer_destroy(struct pb_buffer *buf) +{ + assert(buf); + FREE(buf); +} + + +static void * +pb_user_buffer_map(struct pb_buffer *buf, + unsigned flags) +{ + return pb_user_buffer(buf)->data; +} + + +static void +pb_user_buffer_unmap(struct pb_buffer *buf) +{ + /* No-op */ +} + + +static void +pb_user_buffer_get_base_buffer(struct pb_buffer *buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + *base_buf = buf; + *offset = 0; +} + + +const struct pb_vtbl +pb_user_buffer_vtbl = { + pb_user_buffer_destroy, + pb_user_buffer_map, + pb_user_buffer_unmap, + pb_user_buffer_get_base_buffer +}; + + +static struct pipe_buffer * +pb_winsys_user_buffer_create(struct pipe_winsys *winsys, + void *data, + unsigned bytes) +{ + struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer); + + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.size = bytes; + buf->base.base.alignment = 0; + buf->base.base.usage = 0; + + buf->base.vtbl = &pb_user_buffer_vtbl; + buf->data = data; + + return &buf->base.base; +} + + +static void * +pb_winsys_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + (void)winsys; + return pb_map(pb_buffer(buf), flags); +} + + +static void +pb_winsys_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + (void)winsys; + pb_unmap(pb_buffer(buf)); +} + + +static void +pb_winsys_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + (void)winsys; + pb_destroy(pb_buffer(buf)); +} + + +void +pb_init_winsys(struct pipe_winsys *winsys) +{ + winsys->user_buffer_create = pb_winsys_user_buffer_create; + winsys->buffer_map = pb_winsys_buffer_map; + winsys->buffer_unmap = pb_winsys_buffer_unmap; + winsys->buffer_destroy = pb_winsys_buffer_destroy; +} diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile new file mode 100644 index 0000000000..12a8bd0409 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -0,0 +1,3 @@ +default: + cd ../.. ; make + diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile new file mode 100644 index 0000000000..eb8b14e0e8 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/exec/Makefile @@ -0,0 +1,3 @@ +default: + cd ../../.. ; make + diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c new file mode 100644 index 0000000000..a8f64c2287 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -0,0 +1,2485 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi_exec.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +static void +tgsi_exec_prepare( struct tgsi_exec_machine *mach ) +{ + struct tgsi_exec_labels *labels = &mach->Labels; + struct tgsi_parse_context parse; + struct tgsi_full_instruction *instructions; + struct tgsi_full_declaration *declarations; + uint maxInstructions = 10, numInstructions = 0; + uint maxDeclarations = 10, numDeclarations = 0; + uint k; + uint instno = 0; + + mach->ImmLimit = 0; + labels->count = 0; + + declarations = (struct tgsi_full_declaration *) + MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); + + instructions = (struct tgsi_full_instruction *) + MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); + + k = tgsi_parse_init( &parse, mach->Tokens ); + if (k != TGSI_PARSE_OK) { + debug_printf("Problem parsing!\n"); + return; + } + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + uint pointer = parse.Position; + uint i; + + tgsi_parse_token( &parse ); + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* save expanded declaration */ + if (numDeclarations == maxDeclarations) { + declarations = REALLOC(declarations, + maxDeclarations + * sizeof(struct tgsi_full_declaration), + (maxDeclarations + 10) + * sizeof(struct tgsi_full_declaration)); + maxDeclarations += 10; + } + memcpy(declarations + numDeclarations, + &parse.FullToken.FullDeclaration, + sizeof(declarations[0])); + numDeclarations++; + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + assert( size % 4 == 0 ); + assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); + + for( i = 0; i < size; i++ ) { + mach->Imms[mach->ImmLimit + i / 4][i % 4] = parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + mach->ImmLimit += size / 4; + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + assert( labels->count < 128 ); + + labels->labels[labels->count][0] = instno; + labels->labels[labels->count][1] = pointer; + labels->count++; + + /* save expanded instruction */ + if (numInstructions == maxInstructions) { + instructions = REALLOC(instructions, + maxInstructions + * sizeof(struct tgsi_full_instruction), + (maxInstructions + 10) + * sizeof(struct tgsi_full_instruction)); + maxInstructions += 10; + } + memcpy(instructions + numInstructions, + &parse.FullToken.FullInstruction, + sizeof(instructions[0])); + numInstructions++; + break; + + default: + assert( 0 ); + } + } + tgsi_parse_free (&parse); + + if (mach->Declarations) { + FREE( mach->Declarations ); + } + mach->Declarations = declarations; + mach->NumDeclarations = numDeclarations; + + if (mach->Instructions) { + FREE( mach->Instructions ); + } + mach->Instructions = instructions; + mach->NumInstructions = numInstructions; +} + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call tgsi_exec_machine_run() many times. + */ +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach, + const struct tgsi_token *tokens, + uint numSamplers, + struct tgsi_sampler *samplers) +{ + uint i, k; + struct tgsi_parse_context parse; + +#if 0 + tgsi_dump(tokens, 0); +#endif + + mach->Tokens = tokens; + + mach->Samplers = samplers; + + k = tgsi_parse_init (&parse, mach->Tokens); + if (k != TGSI_PARSE_OK) { + debug_printf( "Problem parsing!\n" ); + return; + } + + mach->Processor = parse.FullHeader.Processor.Processor; + tgsi_parse_free (&parse); + + mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); + mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + + /* Setup constants. */ + for( i = 0; i < 4; i++ ) { + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; + } + + tgsi_exec_prepare( mach ); +} + + +void +tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) +{ + if (mach->Instructions) { + FREE(mach->Instructions); + mach->Instructions = NULL; + mach->NumInstructions = 0; + } + if (mach->Declarations) { + FREE(mach->Declarations); + mach->Declarations = NULL; + mach->NumDeclarations = 0; + } +} + + +static void +micro_abs( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) fabs( (double) src->f[0] ); + dst->f[1] = (float) fabs( (double) src->f[1] ); + dst->f[2] = (float) fabs( (double) src->f[2] ); + dst->f[3] = (float) fabs( (double) src->f[3] ); +} + +static void +micro_add( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] + src1->f[0]; + dst->f[1] = src0->f[1] + src1->f[1]; + dst->f[2] = src0->f[2] + src1->f[2]; + dst->f[3] = src0->f[3] + src1->f[3]; +} + +static void +micro_iadd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] + src1->i[0]; + dst->i[1] = src0->i[1] + src1->i[1]; + dst->i[2] = src0->i[2] + src1->i[2]; + dst->i[3] = src0->i[3] + src1->i[3]; +} + +static void +micro_and( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] & src1->u[0]; + dst->u[1] = src0->u[1] & src1->u[1]; + dst->u[2] = src0->u[2] & src1->u[2]; + dst->u[3] = src0->u[3] & src1->u[3]; +} + +static void +micro_ceil( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) ceil( (double) src->f[0] ); + dst->f[1] = (float) ceil( (double) src->f[1] ); + dst->f[2] = (float) ceil( (double) src->f[2] ); + dst->f[3] = (float) ceil( (double) src->f[3] ); +} + +static void +micro_cos( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) cos( (double) src->f[0] ); + dst->f[1] = (float) cos( (double) src->f[1] ); + dst->f[2] = (float) cos( (double) src->f[2] ); + dst->f[3] = (float) cos( (double) src->f[3] ); +} + +static void +micro_ddx( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_div( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] / src1->f[0]; + dst->f[1] = src0->f[1] / src1->f[1]; + dst->f[2] = src0->f[2] / src1->f[2]; + dst->f[3] = src0->f[3] / src1->f[3]; +} + +static void +micro_udiv( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] / src1->u[0]; + dst->u[1] = src0->u[1] / src1->u[1]; + dst->u[2] = src0->u[2] / src1->u[2]; + dst->u[3] = src0->u[3] / src1->u[3]; +} + +static void +micro_eq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ieq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_exp2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = (float) pow( 2.0, (double) src->f[0] ); + dst->f[1] = (float) pow( 2.0, (double) src->f[1] ); + dst->f[2] = (float) pow( 2.0, (double) src->f[2] ); + dst->f[3] = (float) pow( 2.0, (double) src->f[3] ); +} + +static void +micro_f2it( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = (int) src->f[0]; + dst->i[1] = (int) src->f[1]; + dst->i[2] = (int) src->f[2]; + dst->i[3] = (int) src->f[3]; +} + +static void +micro_f2ut( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = (uint) src->f[0]; + dst->u[1] = (uint) src->f[1]; + dst->u[2] = (uint) src->f[2]; + dst->u[3] = (uint) src->f[3]; +} + +static void +micro_flr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) floor( (double) src->f[0] ); + dst->f[1] = (float) floor( (double) src->f[1] ); + dst->f[2] = (float) floor( (double) src->f[2] ); + dst->f[3] = (float) floor( (double) src->f[3] ); +} + +static void +micro_frc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] ); + dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] ); + dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] ); + dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] ); +} + +static void +micro_ge( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_i2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) src->i[0]; + dst->f[1] = (float) src->i[1]; + dst->f[2] = (float) src->i[2]; + dst->f[3] = (float) src->i[3]; +} + +static void +micro_lg2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f; + dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f; + dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f; + dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f; +} + +static void +micro_lt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ilt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_ult( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; +} + +static void +micro_max( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_min( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_umod( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] % src1->u[0]; + dst->u[1] = src0->u[1] % src1->u[1]; + dst->u[2] = src0->u[2] % src1->u[2]; + dst->u[3] = src0->u[3] % src1->u[3]; +} + +static void +micro_mul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] * src1->f[0]; + dst->f[1] = src0->f[1] * src1->f[1]; + dst->f[2] = src0->f[2] * src1->f[2]; + dst->f[3] = src0->f[3] * src1->f[3]; +} + +static void +micro_imul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] * src1->i[0]; + dst->i[1] = src0->i[1] * src1->i[1]; + dst->i[2] = src0->i[2] * src1->i[2]; + dst->i[3] = src0->i[3] * src1->i[3]; +} + +static void +micro_imul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->i[0] = src0->i[0] * src1->i[0]; + dst1->i[1] = src0->i[1] * src1->i[1]; + dst1->i[2] = src0->i[2] * src1->i[2]; + dst1->i[3] = src0->i[3] * src1->i[3]; + dst0->i[0] = 0; + dst0->i[1] = 0; + dst0->i[2] = 0; + dst0->i[3] = 0; +} + +static void +micro_umul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->u[0] = src0->u[0] * src1->u[0]; + dst1->u[1] = src0->u[1] * src1->u[1]; + dst1->u[2] = src0->u[2] * src1->u[2]; + dst1->u[3] = src0->u[3] * src1->u[3]; + dst0->u[0] = 0; + dst0->u[1] = 0; + dst0->u[2] = 0; + dst0->u[3] = 0; +} + +static void +micro_movc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2 ) +{ + dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; + dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; + dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; + dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; +} + +static void +micro_neg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = -src->f[0]; + dst->f[1] = -src->f[1]; + dst->f[2] = -src->f[2]; + dst->f[3] = -src->f[3]; +} + +static void +micro_ineg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_not( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_or( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] | src1->u[0]; + dst->u[1] = src0->u[1] | src1->u[1]; + dst->u[2] = src0->u[2] | src1->u[2]; + dst->u[3] = src0->u[3] | src1->u[3]; +} + +static void +micro_pow( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] ); + dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] ); + dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] ); + dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] ); +} + +static void +micro_rnd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) ); + dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) ); + dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) ); + dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) ); +} + +static void +micro_shl( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] << src1->i[0]; + dst->i[1] = src0->i[1] << src1->i[1]; + dst->i[2] = src0->i[2] << src1->i[2]; + dst->i[3] = src0->i[3] << src1->i[3]; +} + +static void +micro_ishr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] >> src1->i[0]; + dst->i[1] = src0->i[1] >> src1->i[1]; + dst->i[2] = src0->i[2] >> src1->i[2]; + dst->i[3] = src0->i[3] >> src1->i[3]; +} + +static void +micro_trunc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0 ) +{ + dst->f[0] = (float) (int) src0->f[0]; + dst->f[1] = (float) (int) src0->f[1]; + dst->f[2] = (float) (int) src0->f[2]; + dst->f[3] = (float) (int) src0->f[3]; +} + +static void +micro_ushr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] >> src1->u[0]; + dst->u[1] = src0->u[1] >> src1->u[1]; + dst->u[2] = src0->u[2] >> src1->u[2]; + dst->u[3] = src0->u[3] >> src1->u[3]; +} + +static void +micro_sin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) sin( (double) src->f[0] ); + dst->f[1] = (float) sin( (double) src->f[1] ); + dst->f[2] = (float) sin( (double) src->f[2] ); + dst->f[3] = (float) sin( (double) src->f[3] ); +} + +static void +micro_sqrt( union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) sqrt( (double) src->f[0] ); + dst->f[1] = (float) sqrt( (double) src->f[1] ); + dst->f[2] = (float) sqrt( (double) src->f[2] ); + dst->f[3] = (float) sqrt( (double) src->f[3] ); +} + +static void +micro_sub( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] - src1->f[0]; + dst->f[1] = src0->f[1] - src1->f[1]; + dst->f[2] = src0->f[2] - src1->f[2]; + dst->f[3] = src0->f[3] - src1->f[3]; +} + +static void +micro_u2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) src->u[0]; + dst->f[1] = (float) src->u[1]; + dst->f[2] = (float) src->u[2]; + dst->f[3] = (float) src->u[3]; +} + +static void +micro_xor( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] ^ src1->u[0]; + dst->u[1] = src0->u[1] ^ src1->u[1]; + dst->u[2] = src0->u[2] ^ src1->u[2]; + dst->u[3] = src0->u[3] ^ src1->u[3]; +} + +static void +fetch_src_file_channel( + const struct tgsi_exec_machine *mach, + const uint file, + const uint swizzle, + const union tgsi_exec_channel *index, + union tgsi_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: + chan->f[0] = mach->Consts[index->i[0]][swizzle]; + chan->f[1] = mach->Consts[index->i[1]][swizzle]; + chan->f[2] = mach->Consts[index->i[2]][swizzle]; + chan->f[3] = mach->Consts[index->i[3]][swizzle]; + break; + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (int) mach->ImmLimit ); + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + assert( index->i[1] < (int) mach->ImmLimit ); + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + assert( index->i[2] < (int) mach->ImmLimit ); + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + assert( index->i[3] < (int) mach->ImmLimit ); + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union tgsi_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.i[0] *= 17; + index.i[1] *= 17; + index.i[2] *= 17; + index.i[3] *= 17; + break; + case TGSI_FILE_CONSTANT: + index.i[0] *= 4096; + index.i[1] *= 4096; + index.i[2] *= 4096; + index.i[3] *= 4096; + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + micro_abs( chan, chan ); + break; + + case TGSI_UTIL_SIGN_SET: + micro_abs( chan, chan ); + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); + } +} + +static void +store_dest( + struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union tgsi_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + + default: + assert( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union tgsi_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct tgsi_sampler *sampler, + const union tgsi_exec_channel *s, + const union tgsi_exec_channel *t, + const union tgsi_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union tgsi_exec_channel *r, + union tgsi_exec_channel *g, + union tgsi_exec_channel *b, + union tgsi_exec_channel *a ) +{ + uint j; + float rgba[NUM_CHANNELS][QUAD_SIZE]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + + for (j = 0; j < 4; j++) { + r->f[j] = rgba[0][j]; + g->f[j] = rgba[1][j]; + b->f[j] = rgba[2][j]; + a->f[j] = rgba[3][j]; + } +} + + +static void +exec_tex(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union tgsi_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[1], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[1] ); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { + case TGSI_EXTSWIZZLE_W: + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + break; + + case TGSI_EXTSWIZZLE_ONE: + break; + + default: + assert (0); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + +/** + * Evaluate a constant-valued coefficient at the position of the + * current quad. + */ +static void +eval_constant_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +/** + * Evaluate a linear-valued coefficient at the position of the + * current quad. + */ +static void +eval_linear_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +/** + * Evaluate a perspective-valued coefficient at the position of the + * current quad. + */ +static void +eval_perspective_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* eval_coef_func)( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration( + struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl ) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + eval_coef_func eval; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Interpolation.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + eval( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + eval( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union tgsi_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_f2it( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + /* TGSI_OPCODE_SWZ */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + + FETCH( &r[2], 0, CHAN_W ); + micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); + micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); + micro_pow( &r[1], &r[1], &r[2] ); + micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + micro_sqrt( &r[0], &r[0] ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + assert (0); + break; + + case TGSI_OPCODE_LOG: + assert (0); + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_mul( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + micro_mul( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + /* XXX use micro_min()?? */ + micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + /* XXX use micro_max()?? */ + micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_mul( &r[0], &r[0], &r[1] ); + FETCH( &r[1], 2, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_sub( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_sub( &r[1], &r[1], &r[2] ); + micro_mul( &r[0], &r[0], &r[1] ); + micro_add( &r[0], &r[0], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_frc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_flr( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_rnd( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + micro_lg2( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_pow( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + + micro_mul( &r[2], &r[0], &r[1] ); + + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + micro_mul( &r[5], &r[3], &r[4] ); + micro_sub( &r[2], &r[2], &r[5] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + + micro_mul( &r[3], &r[3], &r[2] ); + + FETCH(&r[5], 0, CHAN_X); + + micro_mul( &r[1], &r[1], &r[5] ); + micro_sub( &r[3], &r[3], &r[1] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + micro_mul( &r[5], &r[5], &r[4] ); + micro_mul( &r[0], &r[0], &r[2] ); + micro_sub( &r[5], &r[5], &r[0] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + micro_abs( &r[0], &r[0] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 1, CHAN_W); + + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + micro_cos( &r[0], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ddx( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ddy( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + /* for enabled ExecMask bits, set the killed bit */ + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_eq( &r[0], &r[0], &r[1], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], + &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + micro_sin( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = LOD bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + assert (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = LOD) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + micro_cos( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + micro_sin( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + assert(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ceil( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_i2f( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_not( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_trunc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_shl( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ishr( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_and( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_or( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_xor( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + if (mach->LoopMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + assert( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + assert( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this assertion */ + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + for (i = 0; i < mach->NumDeclarations; i++) { + exec_declaration( mach, mach->Declarations+i ); + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + assert(pc < mach->NumInstructions); + exec_instruction( mach, mach->Instructions + pc, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h new file mode 100644 index 0000000000..1fb66ee960 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined TGSI_EXEC_H +#define TGSI_EXEC_H + +#include "pipe/p_compiler.h" + +#if defined __cplusplus +extern "C" { +#endif + +#define NUM_CHANNELS 4 /* R,G,B,A */ +#define QUAD_SIZE 4 /* 4 pixel/quad */ + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union tgsi_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct tgsi_exec_vector +{ + union tgsi_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct tgsi_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct tgsi_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + +/** + * For branching/calling subroutines. + */ +struct tgsi_exec_labels +{ + unsigned labels[128][2]; + unsigned count; +}; + +/* + * Locations of various utility registers (_I = Index, _C = Channel) + */ +#define TGSI_EXEC_TEMP_00000000_I 32 +#define TGSI_EXEC_TEMP_00000000_C 0 + +#define TGSI_EXEC_TEMP_7FFFFFFF_I 32 +#define TGSI_EXEC_TEMP_7FFFFFFF_C 1 + +#define TGSI_EXEC_TEMP_80000000_I 32 +#define TGSI_EXEC_TEMP_80000000_C 2 + +#define TGSI_EXEC_TEMP_FFFFFFFF_I 32 +#define TGSI_EXEC_TEMP_FFFFFFFF_C 3 + +#define TGSI_EXEC_TEMP_ONE_I 33 +#define TGSI_EXEC_TEMP_ONE_C 0 + +#define TGSI_EXEC_TEMP_TWO_I 33 +#define TGSI_EXEC_TEMP_TWO_C 1 + +#define TGSI_EXEC_TEMP_128_I 33 +#define TGSI_EXEC_TEMP_128_C 2 + +#define TGSI_EXEC_TEMP_MINUS_128_I 33 +#define TGSI_EXEC_TEMP_MINUS_128_C 3 + +#define TGSI_EXEC_TEMP_KILMASK_I 34 +#define TGSI_EXEC_TEMP_KILMASK_C 0 + +#define TGSI_EXEC_TEMP_OUTPUT_I 34 +#define TGSI_EXEC_TEMP_OUTPUT_C 1 + +#define TGSI_EXEC_TEMP_PRIMITIVE_I 34 +#define TGSI_EXEC_TEMP_PRIMITIVE_C 2 + +#define TGSI_EXEC_TEMP_R0 35 + +#define TGSI_EXEC_NUM_TEMPS (32 + 4) +#define TGSI_EXEC_NUM_ADDRS 1 +#define TGSI_EXEC_NUM_IMMEDIATES 256 + +#define TGSI_EXEC_MAX_COND_NESTING 10 +#define TGSI_EXEC_MAX_LOOP_NESTING 10 +#define TGSI_EXEC_MAX_CALL_NESTING 10 + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct tgsi_exec_machine +{ + /* + * 32 program temporaries + * 4 internal temporaries + * 1 address + * 1 temporary of padding to align to 16 bytes + */ + struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_ADDRS + 1]; + + /* + * This will point to _Temps after aligning to 16B boundary. + */ + struct tgsi_exec_vector *Temps; + struct tgsi_exec_vector *Addrs; + + struct tgsi_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + float (*Consts)[4]; + struct tgsi_exec_vector *Inputs; + struct tgsi_exec_vector *Outputs; + const struct tgsi_token *Tokens; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct tgsi_interp_coef *InterpCoefs; + struct tgsi_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; + + struct tgsi_exec_labels Labels; +}; + + +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach, + const struct tgsi_token *tokens, + unsigned numSamplers, + struct tgsi_sampler *samplers); + +uint +tgsi_exec_machine_run( + struct tgsi_exec_machine *mach ); + + +void +tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* TGSI_EXEC_H */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c new file mode 100755 index 0000000000..593464db3e --- /dev/null +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -0,0 +1,2378 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi_exec.h" +#include "tgsi_sse2.h" + +#include "x86/rtasm/x86sse.h" + +#if defined(__i386__) || defined(__386__) + +#define DUMP_SSE 0 + +#if DUMP_SSE + +static void +_print_reg( + struct x86_reg reg ) +{ + if (reg.mod != mod_REG) + debug_printf( "[" ); + + switch( reg.file ) { + case file_REG32: + switch( reg.idx ) { + case reg_AX: + debug_printf( "EAX" ); + break; + case reg_CX: + debug_printf( "ECX" ); + break; + case reg_DX: + debug_printf( "EDX" ); + break; + case reg_BX: + debug_printf( "EBX" ); + break; + case reg_SP: + debug_printf( "ESP" ); + break; + case reg_BP: + debug_printf( "EBP" ); + break; + case reg_SI: + debug_printf( "ESI" ); + break; + case reg_DI: + debug_printf( "EDI" ); + break; + } + break; + case file_MMX: + assert( 0 ); + break; + case file_XMM: + debug_printf( "XMM%u", reg.idx ); + break; + case file_x87: + assert( 0 ); + break; + } + + if (reg.mod == mod_DISP8 || + reg.mod == mod_DISP32) + debug_printf("+%d", reg.disp); + + if (reg.mod != mod_REG) + debug_printf( "]" ); +} + +static void +_fill( + const char *op ) +{ + unsigned count = 10 - strlen( op ); + + while( count-- ) { + debug_printf( " " ); + } +} + +#define DUMP_START() debug_printf( "\nsse-dump start ----------------" ) +#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" ) +#define DUMP( OP ) debug_printf( "\n%s", OP ) +#define DUMP_I( OP, I ) do {\ + debug_printf( "\n%s", OP );\ + _fill( OP );\ + debug_printf( "%u", I ); } while( 0 ) +#define DUMP_R( OP, R0 ) do {\ + debug_printf( "\n%s", OP );\ + _fill( OP );\ + _print_reg( R0 ); } while( 0 ) +#define DUMP_RR( OP, R0, R1 ) do {\ + debug_printf( "\n%s", OP );\ + _fill( OP );\ + _print_reg( R0 );\ + debug_printf( ", " );\ + _print_reg( R1 ); } while( 0 ) +#define DUMP_RRI( OP, R0, R1, I ) do {\ + debug_printf( "\n%s", OP );\ + _fill( OP );\ + _print_reg( R0 );\ + debug_printf( ", " );\ + _print_reg( R1 );\ + debug_printf( ", " );\ + debug_printf( "%u", I ); } while( 0 ) + +#else + +#define DUMP_START() +#define DUMP_END() +#define DUMP( OP ) +#define DUMP_I( OP, I ) +#define DUMP_R( OP, R0 ) +#define DUMP_RR( OP, R0, R1 ) +#define DUMP_RRI( OP, R0, R1, I ) + +#endif + +#define FOR_EACH_CHANNEL( CHAN )\ + for( CHAN = 0; CHAN < 4; CHAN++ ) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if( IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +/** + * X86 utility functions. + */ + +static struct x86_reg +make_xmm( + unsigned xmm ) +{ + return x86_make_reg( + file_XMM, + (enum x86_reg_name) xmm ); +} + +/** + * X86 register mapping helpers. + */ + +static struct x86_reg +get_const_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_CX ); +} + +static struct x86_reg +get_input_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_AX ); +} + +static struct x86_reg +get_output_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DX ); +} + +static struct x86_reg +get_temp_base( void ) +{ +#ifdef WIN32 + return x86_make_reg( + file_REG32, + reg_BX ); +#else + return x86_make_reg( + file_REG32, + reg_SI ); +#endif +} + +static struct x86_reg +get_coef_base( void ) +{ + return get_output_base(); +} + +/** + * Data access helpers. + */ + +static struct x86_reg +get_argument( + unsigned index ) +{ + return x86_make_disp( + x86_make_reg( file_REG32, reg_SP ), + (index + 1) * 4 ); +} + +static struct x86_reg +get_const( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_const_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_input( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_input_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_output( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_output_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_temp( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_temp_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_coef( + unsigned vec, + unsigned chan, + unsigned member ) +{ + return x86_make_disp( + get_coef_base(), + ((vec * 3 + member) * 4 + chan) * 4 ); +} + +/** + * X86 rtasm wrappers. + */ + +static void +emit_addps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "ADDPS", dst, src ); + sse_addps( func, dst, src ); +} + +static void +emit_andnps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "ANDNPS", dst, src ); + sse_andnps( func, dst, src ); +} + +static void +emit_andps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "ANDPS", dst, src ); + sse_andps( func, dst, src ); +} + +static void +emit_call( + struct x86_function *func, + void (* addr)() ) +{ + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + + DUMP_I( "CALL", addr ); + x86_mov_reg_imm( func, ecx, (unsigned long) addr ); + x86_call( func, ecx ); +} + +static void +emit_cmpps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src, + enum sse_cc cc ) +{ + DUMP_RRI( "CMPPS", dst, src, cc ); + sse_cmpps( func, dst, src, cc ); +} + +static void +emit_cvttps2dq( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "CVTTPS2DQ", dst, src ); + sse2_cvttps2dq( func, dst, src ); +} + +static void +emit_maxps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "MAXPS", dst, src ); + sse_maxps( func, dst, src ); +} + +static void +emit_minps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "MINPS", dst, src ); + sse_minps( func, dst, src ); +} + +static void +emit_mov( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "MOV", dst, src ); + x86_mov( func, dst, src ); +} + +static void +emit_movaps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "MOVAPS", dst, src ); + sse_movaps( func, dst, src ); +} + +static void +emit_movss( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "MOVSS", dst, src ); + sse_movss( func, dst, src ); +} + +static void +emit_movups( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "MOVUPS", dst, src ); + sse_movups( func, dst, src ); +} + +static void +emit_mulps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "MULPS", dst, src ); + sse_mulps( func, dst, src ); +} + +static void +emit_or( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "OR", dst, src ); + x86_or( func, dst, src ); +} + +static void +emit_orps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "ORPS", dst, src ); + sse_orps( func, dst, src ); +} + +static void +emit_pmovmskb( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "PMOVMSKB", dst, src ); + sse_pmovmskb( func, dst, src ); +} + +static void +emit_pop( + struct x86_function *func, + struct x86_reg dst ) +{ + DUMP_R( "POP", dst ); + x86_pop( func, dst ); +} + +static void +emit_push( + struct x86_function *func, + struct x86_reg dst ) +{ + DUMP_R( "PUSH", dst ); + x86_push( func, dst ); +} + +static void +emit_rcpps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "RCPPS", dst, src ); + sse2_rcpps( func, dst, src ); +} + +#ifdef WIN32 +static void +emit_retw( + struct x86_function *func, + unsigned size ) +{ + DUMP_I( "RET", size ); + x86_retw( func, size ); +} +#else +static void +emit_ret( + struct x86_function *func ) +{ + DUMP( "RET" ); + x86_ret( func ); +} +#endif + +static void +emit_rsqrtps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "RSQRTPS", dst, src ); + sse_rsqrtps( func, dst, src ); +} + +static void +emit_shufps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src, + unsigned char shuf ) +{ + DUMP_RRI( "SHUFPS", dst, src, shuf ); + sse_shufps( func, dst, src, shuf ); +} + +static void +emit_subps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "SUBPS", dst, src ); + sse_subps( func, dst, src ); +} + +static void +emit_xorps( + struct x86_function *func, + struct x86_reg dst, + struct x86_reg src ) +{ + DUMP_RR( "XORPS", dst, src ); + sse_xorps( func, dst, src ); +} + +/** + * Data fetch helpers. + */ + +static void +emit_const( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movss( + func, + make_xmm( xmm ), + get_const( vec, chan ) ); + emit_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +static void +emit_inputf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movups( + func, + make_xmm( xmm ), + get_input( vec, chan ) ); +} + +static void +emit_output( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movups( + func, + get_output( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_tempf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movaps( + func, + make_xmm( xmm ), + get_temp( vec, chan ) ); +} + +static void +emit_coef( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan, + unsigned member ) +{ + emit_movss( + func, + make_xmm( xmm ), + get_coef( vec, chan, member ) ); + emit_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +/** + * Data store helpers. + */ + +static void +emit_inputs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movups( + func, + get_input( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_temps( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movaps( + func, + get_temp( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_addrs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_temps( + func, + xmm, + vec + TGSI_EXEC_NUM_TEMPS, + chan ); +} + +/** + * Coefficent fetch helpers. + */ + +static void +emit_coef_a0( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 0 ); +} + +static void +emit_coef_dadx( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 1 ); +} + +static void +emit_coef_dady( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 2 ); +} + +/** + * Function call helpers. + */ + +static void +emit_push_gp( + struct x86_function *func ) +{ + emit_push( + func, + get_const_base() ); + emit_push( + func, + get_input_base() ); + emit_push( + func, + get_output_base() ); + + /* It is important on non-win32 platforms that temp base is pushed last. + */ + emit_push( + func, + get_temp_base() ); +} + +static void +emit_pop_gp( + struct x86_function *func ) +{ + /* Restore GP registers in a reverse order. + */ + emit_pop( + func, + get_temp_base() ); + emit_pop( + func, + get_output_base() ); + emit_pop( + func, + get_input_base() ); + emit_pop( + func, + get_const_base() ); +} + +static void +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_dst, + void (*code)() ) +{ + emit_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + + emit_push_gp( + func ); + +#ifdef WIN32 + emit_push( + func, + get_temp( TEMP_R0, 0 ) ); +#endif + + emit_call( + func, + code ); + + emit_pop_gp( + func ); + + emit_movaps( + func, + make_xmm( xmm_dst ), + get_temp( TEMP_R0, 0 ) ); +} + +static void +emit_func_call_dst_src( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src, + void (*code)() ) +{ + emit_movaps( + func, + get_temp( TEMP_R0, 1 ), + make_xmm( xmm_src ) ); + + emit_func_call_dst( + func, + xmm_dst, + code ); +} + +/** + * Low-level instruction translators. + */ + +static void +emit_abs( + struct x86_function *func, + unsigned xmm ) +{ + emit_andps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_7FFFFFFF_I, + TGSI_EXEC_TEMP_7FFFFFFF_C ) ); +} + +static void +emit_add( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_addps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void XSTDCALL +cos4f( + float *store ) +{ +#ifdef WIN32 + store[0] = (float) cos( (double) store[0] ); + store[1] = (float) cos( (double) store[1] ); + store[2] = (float) cos( (double) store[2] ); + store[3] = (float) cos( (double) store[3] ); +#else + const unsigned X = TEMP_R0 * 16; + store[X + 0] = cosf( store[X + 0] ); + store[X + 1] = cosf( store[X + 1] ); + store[X + 2] = cosf( store[X + 2] ); + store[X + 3] = cosf( store[X + 3] ); +#endif +} + +static void +emit_cos( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + cos4f ); +} + +static void XSTDCALL +ex24f( + float *store ) +{ +#ifdef WIN32 + store[0] = (float) pow( 2.0, (double) store[0] ); + store[1] = (float) pow( 2.0, (double) store[1] ); + store[2] = (float) pow( 2.0, (double) store[2] ); + store[3] = (float) pow( 2.0, (double) store[3] ); +#else + const unsigned X = TEMP_R0 * 16; + store[X + 0] = powf( 2.0f, store[X + 0] ); + store[X + 1] = powf( 2.0f, store[X + 1] ); + store[X + 2] = powf( 2.0f, store[X + 2] ); + store[X + 3] = powf( 2.0f, store[X + 3] ); +#endif +} + +static void +emit_ex2( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + ex24f ); +} + +static void +emit_f2it( + struct x86_function *func, + unsigned xmm ) +{ + emit_cvttps2dq( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); +} + +static void XSTDCALL +flr4f( + float *store ) +{ +#ifdef WIN32 + const unsigned X = 0; +#else + const unsigned X = TEMP_R0 * 16; +#endif + store[X + 0] = (float) floor( (double) store[X + 0] ); + store[X + 1] = (float) floor( (double) store[X + 1] ); + store[X + 2] = (float) floor( (double) store[X + 2] ); + store[X + 3] = (float) floor( (double) store[X + 3] ); +} + +static void +emit_flr( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + flr4f ); +} + +static void XSTDCALL +frc4f( + float *store ) +{ +#ifdef WIN32 + const unsigned X = 0; +#else + const unsigned X = TEMP_R0 * 16; +#endif + store[X + 0] -= (float) floor( (double) store[X + 0] ); + store[X + 1] -= (float) floor( (double) store[X + 1] ); + store[X + 2] -= (float) floor( (double) store[X + 2] ); + store[X + 3] -= (float) floor( (double) store[X + 3] ); +} + +static void +emit_frc( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + frc4f ); +} + +static void XSTDCALL +lg24f( + float *store ) +{ +#ifdef WIN32 + const unsigned X = 0; +#else + const unsigned X = TEMP_R0 * 16; +#endif + store[X + 0] = LOG2( store[X + 0] ); + store[X + 1] = LOG2( store[X + 1] ); + store[X + 2] = LOG2( store[X + 2] ); + store[X + 3] = LOG2( store[X + 3] ); +} + +static void +emit_lg2( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + lg24f ); +} + +static void +emit_MOV( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_movups( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_mul (struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src) +{ + emit_mulps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_neg( + struct x86_function *func, + unsigned xmm ) +{ + emit_xorps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void XSTDCALL +pow4f( + float *store ) +{ +#ifdef WIN32 + store[0] = (float) pow( (double) store[0], (double) store[4] ); + store[1] = (float) pow( (double) store[1], (double) store[5] ); + store[2] = (float) pow( (double) store[2], (double) store[6] ); + store[3] = (float) pow( (double) store[3], (double) store[7] ); +#else + const unsigned X = TEMP_R0 * 16; + store[X + 0] = powf( store[X + 0], store[X + 4] ); + store[X + 1] = powf( store[X + 1], store[X + 5] ); + store[X + 2] = powf( store[X + 2], store[X + 6] ); + store[X + 3] = powf( store[X + 3], store[X + 7] ); +#endif +} + +static void +emit_pow( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_func_call_dst_src( + func, + xmm_dst, + xmm_src, + pow4f ); +} + +static void +emit_rcp ( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_rcpps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_rsqrt( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_rsqrtps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_setsign( + struct x86_function *func, + unsigned xmm ) +{ + emit_orps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void XSTDCALL +sin4f( + float *store ) +{ +#ifdef WIN32 + store[0] = (float) sin( (double) store[0] ); + store[1] = (float) sin( (double) store[1] ); + store[2] = (float) sin( (double) store[2] ); + store[3] = (float) sin( (double) store[3] ); +#else + const unsigned X = TEMP_R0 * 16; + store[X + 0] = sinf( store[X + 0] ); + store[X + 1] = sinf( store[X + 1] ); + store[X + 2] = sinf( store[X + 2] ); + store[X + 3] = sinf( store[X + 3] ); +#endif +} + +static void +emit_sin (struct x86_function *func, + unsigned xmm_dst) +{ + emit_func_call_dst( + func, + xmm_dst, + sin4f ); +} + +static void +emit_sub( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_subps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +/** + * Register fetch. + */ + +static void +emit_fetch( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_src_register *reg, + const unsigned chan_index ) +{ + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( reg->SrcRegister.File ) { + case TGSI_FILE_CONSTANT: + emit_const( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_INPUT: + emit_inputf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_TEMPORARY: + emit_tempf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + break; + + case TGSI_EXTSWIZZLE_ONE: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + break; + + default: + assert( 0 ); + } + + switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { + case TGSI_UTIL_SIGN_CLEAR: + emit_abs( func, xmm ); + break; + + case TGSI_UTIL_SIGN_SET: + emit_setsign( func, xmm ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + emit_neg( func, xmm ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } +} + +#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ + emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + +/** + * Register store. + */ + +static void +emit_store( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index ) +{ + switch( reg->DstRegister.File ) { + case TGSI_FILE_OUTPUT: + emit_output( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_TEMPORARY: + emit_temps( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + default: + assert( 0 ); + } + + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: +// assert( 0 ); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +} + +#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ + emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +/** + * High-level instruction translators. + */ + +static void +emit_kil( + struct x86_function *func, + const struct tgsi_full_src_register *reg ) +{ + unsigned uniquemask; + unsigned registers[4]; + unsigned nextregister = 0; + unsigned firstchan = ~0; + unsigned chan_index; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + FOR_EACH_CHANNEL( chan_index ) { + unsigned swizzle; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle( + reg, + chan_index ); + + /* check if the component has not been already tested */ + if( !(uniquemask & (1 << swizzle)) ) { + uniquemask |= 1 << swizzle; + + /* allocate register */ + registers[chan_index] = nextregister; + emit_fetch( + func, + nextregister, + reg, + chan_index ); + nextregister++; + + /* mark the first channel used */ + if( firstchan == ~0 ) { + firstchan = chan_index; + } + } + } + + emit_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + emit_push( + func, + x86_make_reg( file_REG32, reg_DX ) ); + + FOR_EACH_CHANNEL( chan_index ) { + if( uniquemask & (1 << chan_index) ) { + emit_cmpps( + func, + make_xmm( registers[chan_index] ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + + if( chan_index == firstchan ) { + emit_pmovmskb( + func, + x86_make_reg( file_REG32, reg_AX ), + make_xmm( registers[chan_index] ) ); + } + else { + emit_pmovmskb( + func, + x86_make_reg( file_REG32, reg_DX ), + make_xmm( registers[chan_index] ) ); + emit_or( + func, + x86_make_reg( file_REG32, reg_AX ), + x86_make_reg( file_REG32, reg_DX ) ); + } + } + } + + emit_or( + func, + get_temp( + TGSI_EXEC_TEMP_KILMASK_I, + TGSI_EXEC_TEMP_KILMASK_C ), + x86_make_reg( file_REG32, reg_AX ) ); + + emit_pop( + func, + x86_make_reg( file_REG32, reg_DX ) ); + emit_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + +static void +emit_setcc( + struct x86_function *func, + struct tgsi_full_instruction *inst, + enum sse_cc cc ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_cmpps( + func, + make_xmm( 0 ), + make_xmm( 1 ), + cc ); + emit_andps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static void +emit_cmp( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_cmpps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + emit_andps( + func, + make_xmm( 1 ), + make_xmm( 0 ) ); + emit_andnps( + func, + make_xmm( 0 ), + make_xmm( 2 ) ); + emit_orps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static int +emit_instruction( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + switch( inst->Instruction.Opcode ) { + case TGSI_OPCODE_ARL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + /* TGSI_OPCODE_SWZ */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C); + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + STORE( func, *inst, 0, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( func, *inst, 0, 0, CHAN_W ); + } + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_maxps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 1, 0, CHAN_Y ); + emit_maxps( + func, + make_xmm( 1 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + FETCH( func, *inst, 2, 0, CHAN_W ); + emit_minps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_128_I, + TGSI_EXEC_TEMP_128_C ) ); + emit_maxps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_MINUS_128_I, + TGSI_EXEC_TEMP_MINUS_128_C ) ); + emit_pow( func, 1, 2 ); + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_xorps( + func, + make_xmm( 2 ), + make_xmm( 2 ) ); + emit_cmpps( + func, + make_xmm( 2 ), + make_xmm( 0 ), + cc_LessThanEqual ); + emit_andps( + func, + make_xmm( 2 ), + make_xmm( 1 ) ); + STORE( func, *inst, 2, 0, CHAN_Z ); + } + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rcp( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rsqrt( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + return 0; + break; + + case TGSI_OPCODE_LOG: + return 0; + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_add( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul(func, 1, 2 ); + emit_add(func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_W ); + FETCH( func, *inst, 2, 1, CHAN_W ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 1, 1, CHAN_Y ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, 0, CHAN_Z ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, 1, CHAN_W ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_minps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_maxps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + emit_setcc( func, inst, cc_LessThan ); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + emit_setcc( func, inst, cc_NotLessThan ); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_sub( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_sub( func, 1, 2 ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CND: + return 0; + break; + + case TGSI_OPCODE_CND0: + return 0; + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + return 0; + break; + + case TGSI_OPCODE_INDEX: + return 0; + break; + + case TGSI_OPCODE_NEGATE: + return 0; + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_frc( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + return 0; + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_flr( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + return 0; + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_ex2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_lg2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_pow( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 1, 1, CHAN_Z ); + FETCH( func, *inst, 3, 0, CHAN_Z ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 4, 1, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_MOV( func, 2, 0 ); + emit_mul( func, 2, 1 ); + emit_MOV( func, 5, 3 ); + emit_mul( func, 5, 4 ); + emit_sub( func, 2, 5 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 2, 1, CHAN_X ); + FETCH( func, *inst, 5, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + emit_mul( func, 3, 2 ); + emit_mul( func, 1, 5 ); + emit_sub( func, 3, 1 ); + STORE( func, *inst, 3, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_mul( func, 5, 4 ); + emit_mul( func, 0, 2 ); + emit_sub( func, 5, 0 ); + STORE( func, *inst, 5, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + return 0; + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_abs( func, 0) ; + + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RCC: + return 0; + break; + + case TGSI_OPCODE_DPH: + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 1, CHAN_W ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + return 0; + break; + + case TGSI_OPCODE_DDY: + return 0; + break; + + case TGSI_OPCODE_KIL: + emit_kil( func, &inst->FullSrcRegisters[0] ); + break; + + case TGSI_OPCODE_PK2H: + return 0; + break; + + case TGSI_OPCODE_PK2US: + return 0; + break; + + case TGSI_OPCODE_PK4B: + return 0; + break; + + case TGSI_OPCODE_PK4UB: + return 0; + break; + + case TGSI_OPCODE_RFL: + return 0; + break; + + case TGSI_OPCODE_SEQ: + return 0; + break; + + case TGSI_OPCODE_SFL: + return 0; + break; + + case TGSI_OPCODE_SGT: + return 0; + break; + + case TGSI_OPCODE_SIN: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + return 0; + break; + + case TGSI_OPCODE_SNE: + return 0; + break; + + case TGSI_OPCODE_STR: + return 0; + break; + + case TGSI_OPCODE_TEX: + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_TXD: + return 0; + break; + + case TGSI_OPCODE_UP2H: + return 0; + break; + + case TGSI_OPCODE_UP2US: + return 0; + break; + + case TGSI_OPCODE_UP4B: + return 0; + break; + + case TGSI_OPCODE_UP4UB: + return 0; + break; + + case TGSI_OPCODE_X2D: + return 0; + break; + + case TGSI_OPCODE_ARA: + return 0; + break; + + case TGSI_OPCODE_ARR: + return 0; + break; + + case TGSI_OPCODE_BRA: + return 0; + break; + + case TGSI_OPCODE_CAL: + return 0; + break; + + case TGSI_OPCODE_RET: + case TGSI_OPCODE_END: +#ifdef WIN32 + emit_retw( func, 16 ); +#else + emit_ret( func ); +#endif + break; + + case TGSI_OPCODE_SSG: + return 0; + break; + + case TGSI_OPCODE_CMP: + emit_cmp (func, inst); + break; + + case TGSI_OPCODE_SCS: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + emit_sin( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_TXB: + return 0; + break; + + case TGSI_OPCODE_NRM: + return 0; + break; + + case TGSI_OPCODE_DIV: + return 0; + break; + + case TGSI_OPCODE_DP2: + return 0; + break; + + case TGSI_OPCODE_TXL: + return 0; + break; + + case TGSI_OPCODE_BRK: + return 0; + break; + + case TGSI_OPCODE_IF: + return 0; + break; + + case TGSI_OPCODE_LOOP: + return 0; + break; + + case TGSI_OPCODE_REP: + return 0; + break; + + case TGSI_OPCODE_ELSE: + return 0; + break; + + case TGSI_OPCODE_ENDIF: + return 0; + break; + + case TGSI_OPCODE_ENDLOOP: + return 0; + break; + + case TGSI_OPCODE_ENDREP: + return 0; + break; + + case TGSI_OPCODE_PUSHA: + return 0; + break; + + case TGSI_OPCODE_POPA: + return 0; + break; + + case TGSI_OPCODE_CEIL: + return 0; + break; + + case TGSI_OPCODE_I2F: + return 0; + break; + + case TGSI_OPCODE_NOT: + return 0; + break; + + case TGSI_OPCODE_TRUNC: + return 0; + break; + + case TGSI_OPCODE_SHL: + return 0; + break; + + case TGSI_OPCODE_SHR: + return 0; + break; + + case TGSI_OPCODE_AND: + return 0; + break; + + case TGSI_OPCODE_OR: + return 0; + break; + + case TGSI_OPCODE_MOD: + return 0; + break; + + case TGSI_OPCODE_XOR: + return 0; + break; + + case TGSI_OPCODE_SAD: + return 0; + break; + + case TGSI_OPCODE_TXF: + return 0; + break; + + case TGSI_OPCODE_TXQ: + return 0; + break; + + case TGSI_OPCODE_CONT: + return 0; + break; + + case TGSI_OPCODE_EMIT: + return 0; + break; + + case TGSI_OPCODE_ENDPRIM: + return 0; + break; + + default: + return 0; + } + + return 1; +} + +static void +emit_declaration( + struct x86_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned i, j; + + assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + /* Do not touch WPOS.xy */ + if( first == 0 ) { + mask &= ~TGSI_WRITEMASK_XY; + if( mask == TGSI_WRITEMASK_NONE ) { + first++; + } + } + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Interpolation.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_inputf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_inputf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_inputf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_inputf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } + } +} + +unsigned +tgsi_emit_sse2( + struct tgsi_token *tokens, + struct x86_function *func ) +{ + struct tgsi_parse_context parse; + unsigned ok = 1; + + DUMP_START(); + + func->csr = func->store; + + emit_mov( + func, + get_input_base(), + get_argument( 0 ) ); + emit_mov( + func, + get_output_base(), + get_argument( 1 ) ); + emit_mov( + func, + get_const_base(), + get_argument( 2 ) ); + emit_mov( + func, + get_temp_base(), + get_argument( 3 ) ); + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + ok = emit_instruction( + func, + &parse.FullToken.FullInstruction ); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d\n", + parse.FullToken.FullInstruction.Instruction.Opcode ); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* XXX implement this */ + ok = 0; + debug_printf("failed to emit immediate value\n"); + break; + + default: + assert( 0 ); + ok = 0; + break; + } + } + + tgsi_parse_free( &parse ); + + DUMP_END(); + + return ok; +} + +/** + * Fragment shaders are responsible for interpolating shader inputs. Because on + * x86 we have only 4 GP registers, and here we have 5 shader arguments (input, + * output, const, temp and coef), the code is split into two phases -- + * DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff argument, + * as outputs are not needed in the DECLARATION phase. + */ +unsigned +tgsi_emit_sse2_fs( + struct tgsi_token *tokens, + struct x86_function *func ) +{ + struct tgsi_parse_context parse; + boolean instruction_phase = FALSE; + + DUMP_START(); + + func->csr = func->store; + + /* DECLARATION phase, do not load output argument. */ + emit_mov( + func, + get_input_base(), + get_argument( 0 ) ); + emit_mov( + func, + get_const_base(), + get_argument( 2 ) ); + emit_mov( + func, + get_temp_base(), + get_argument( 3 ) ); + emit_mov( + func, + get_coef_base(), + get_argument( 4 ) ); + + tgsi_parse_init( &parse, tokens ); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + emit_mov( + func, + get_output_base(), + get_argument( 1 ) ); + } + emit_instruction( + func, + &parse.FullToken.FullInstruction ); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* XXX implement this */ + assert(0); + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free( &parse ); + + DUMP_END(); + + return 1; +} + +#endif /* i386 */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h new file mode 100755 index 0000000000..9bee371766 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -0,0 +1,26 @@ +#if !defined TGSI_SSE2_H +#define TGSI_SSE2_H + +#if defined __cplusplus +extern "C" { +#endif // defined __cplusplus + +struct tgsi_token; +struct x86_function; + +unsigned +tgsi_emit_sse2( + struct tgsi_token *tokens, + struct x86_function *function ); + +unsigned +tgsi_emit_sse2_fs( + struct tgsi_token *tokens, + struct x86_function *function ); + +#if defined __cplusplus +} // extern "C" +#endif // defined __cplusplus + +#endif // !defined TGSI_SSE2_H + diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c new file mode 100644 index 0000000000..a00ff1c2a5 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c @@ -0,0 +1,1371 @@ +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_build.h" +#include "tgsi_parse.h" + +/* + * version + */ + +struct tgsi_version +tgsi_build_version( void ) +{ + struct tgsi_version version; + + version.MajorVersion = 1; + version.MinorVersion = 1; + version.Padding = 0; + + return version; +} + +/* + * header + */ + +struct tgsi_header +tgsi_build_header( void ) +{ + struct tgsi_header header; + + header.HeaderSize = 1; + header.BodySize = 0; + + return header; +} + +static void +header_headersize_grow( struct tgsi_header *header ) +{ + assert( header->HeaderSize < 0xFF ); + assert( header->BodySize == 0 ); + + header->HeaderSize++; +} + +static void +header_bodysize_grow( struct tgsi_header *header ) +{ + assert( header->BodySize < 0xFFFFFF ); + + header->BodySize++; +} + +struct tgsi_processor +tgsi_default_processor( void ) +{ + struct tgsi_processor processor; + + processor.Processor = TGSI_PROCESSOR_FRAGMENT; + processor.Padding = 0; + + return processor; +} + +struct tgsi_processor +tgsi_build_processor( + unsigned type, + struct tgsi_header *header ) +{ + struct tgsi_processor processor; + + processor = tgsi_default_processor(); + processor.Processor = type; + + header_headersize_grow( header ); + + return processor; +} + +/* + * declaration + */ + +struct tgsi_declaration +tgsi_default_declaration( void ) +{ + struct tgsi_declaration declaration; + + declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; + declaration.Size = 1; + declaration.File = TGSI_FILE_NULL; + declaration.Declare = TGSI_DECLARE_RANGE; + declaration.UsageMask = TGSI_WRITEMASK_XYZW; + declaration.Interpolate = 0; + declaration.Semantic = 0; + declaration.Padding = 0; + declaration.Extended = 0; + + return declaration; +} + +struct tgsi_declaration +tgsi_build_declaration( + unsigned file, + unsigned declare, + unsigned usage_mask, + unsigned interpolate, + unsigned semantic, + struct tgsi_header *header ) +{ + struct tgsi_declaration declaration; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( declare <= TGSI_DECLARE_MASK ); + + declaration = tgsi_default_declaration(); + declaration.File = file; + declaration.Declare = declare; + declaration.UsageMask = usage_mask; + declaration.Interpolate = interpolate; + declaration.Semantic = semantic; + + header_bodysize_grow( header ); + + return declaration; +} + +static void +declaration_grow( + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + assert( declaration->Size < 0xFF ); + + declaration->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_full_declaration +tgsi_default_full_declaration( void ) +{ + struct tgsi_full_declaration full_declaration; + + full_declaration.Declaration = tgsi_default_declaration(); + full_declaration.Interpolation = tgsi_default_declaration_interpolation(); + full_declaration.Semantic = tgsi_default_declaration_semantic(); + + return full_declaration; +} + +unsigned +tgsi_build_full_declaration( + const struct tgsi_full_declaration *full_decl, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0; + struct tgsi_declaration *declaration; + + if( maxsize <= size ) + return 0; + declaration = (struct tgsi_declaration *) &tokens[size]; + size++; + + *declaration = tgsi_build_declaration( + full_decl->Declaration.File, + full_decl->Declaration.Declare, + full_decl->Declaration.UsageMask, + full_decl->Declaration.Interpolate, + full_decl->Declaration.Semantic, + header ); + + switch( full_decl->Declaration.Declare ) { + case TGSI_DECLARE_RANGE: + { + struct tgsi_declaration_range *dr; + + if( maxsize <= size ) + return 0; + dr = (struct tgsi_declaration_range *) &tokens[size]; + size++; + + *dr = tgsi_build_declaration_range( + full_decl->u.DeclarationRange.First, + full_decl->u.DeclarationRange.Last, + declaration, + header ); + break; + } + + case TGSI_DECLARE_MASK: + { + struct tgsi_declaration_mask *dm; + + if( maxsize <= size ) + return 0; + dm = (struct tgsi_declaration_mask *) &tokens[size]; + size++; + + *dm = tgsi_build_declaration_mask( + full_decl->u.DeclarationMask.Mask, + declaration, + header ); + break; + } + + default: + assert( 0 ); + } + + if( full_decl->Declaration.Interpolate ) { + struct tgsi_declaration_interpolation *di; + + if( maxsize <= size ) + return 0; + di = (struct tgsi_declaration_interpolation *) &tokens[size]; + size++; + + *di = tgsi_build_declaration_interpolation( + full_decl->Interpolation.Interpolate, + declaration, + header ); + } + + if( full_decl->Declaration.Semantic ) { + struct tgsi_declaration_semantic *ds; + + if( maxsize <= size ) + return 0; + ds = (struct tgsi_declaration_semantic *) &tokens[size]; + size++; + + *ds = tgsi_build_declaration_semantic( + full_decl->Semantic.SemanticName, + full_decl->Semantic.SemanticIndex, + declaration, + header ); + } + + return size; +} + +struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_range declaration_range; + + assert( last >= first ); + assert( last <= 0xFFFF ); + + declaration_range.First = first; + declaration_range.Last = last; + + declaration_grow( declaration, header ); + + return declaration_range; +} + +struct tgsi_declaration_mask +tgsi_build_declaration_mask( + unsigned mask, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_mask declaration_mask; + + declaration_mask.Mask = mask; + + declaration_grow( declaration, header ); + + return declaration_mask; +} + +struct tgsi_declaration_interpolation +tgsi_default_declaration_interpolation( void ) +{ + struct tgsi_declaration_interpolation di; + + di.Interpolate = TGSI_INTERPOLATE_CONSTANT; + di.Padding = 0; + + return di; +} + +struct tgsi_declaration_interpolation +tgsi_build_declaration_interpolation( + unsigned interpolate, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_interpolation di; + + assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE ); + + di = tgsi_default_declaration_interpolation(); + di.Interpolate = interpolate; + + declaration_grow( declaration, header ); + + return di; +} + +struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ) +{ + struct tgsi_declaration_semantic ds; + + ds.SemanticName = TGSI_SEMANTIC_POSITION; + ds.SemanticIndex = 0; + ds.Padding = 0; + + return ds; +} + +struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_semantic ds; + + assert( semantic_name <= TGSI_SEMANTIC_COUNT ); + assert( semantic_index <= 0xFFFF ); + + ds = tgsi_default_declaration_semantic(); + ds.SemanticName = semantic_name; + ds.SemanticIndex = semantic_index; + + declaration_grow( declaration, header ); + + return ds; +} + +/* + * immediate + */ + +struct tgsi_immediate +tgsi_default_immediate( void ) +{ + struct tgsi_immediate immediate; + + immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE; + immediate.Size = 1; + immediate.DataType = TGSI_IMM_FLOAT32; + immediate.Padding = 0; + immediate.Extended = 0; + + return immediate; +} + +struct tgsi_immediate +tgsi_build_immediate( + struct tgsi_header *header ) +{ + struct tgsi_immediate immediate; + + immediate = tgsi_default_immediate(); + + header_bodysize_grow( header ); + + return immediate; +} + +struct tgsi_full_immediate +tgsi_default_full_immediate( void ) +{ + struct tgsi_full_immediate fullimm; + + fullimm.Immediate = tgsi_default_immediate(); + fullimm.u.Pointer = (void *) 0; + + return fullimm; +} + +static void +immediate_grow( + struct tgsi_immediate *immediate, + struct tgsi_header *header ) +{ + assert( immediate->Size < 0xFF ); + + immediate->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_immediate_float32 +tgsi_build_immediate_float32( + float value, + struct tgsi_immediate *immediate, + struct tgsi_header *header ) +{ + struct tgsi_immediate_float32 immediate_float32; + + immediate_float32.Float = value; + + immediate_grow( immediate, header ); + + return immediate_float32; +} + +unsigned +tgsi_build_full_immediate( + const struct tgsi_full_immediate *full_imm, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0, i; + struct tgsi_immediate *immediate; + + if( maxsize <= size ) + return 0; + immediate = (struct tgsi_immediate *) &tokens[size]; + size++; + + *immediate = tgsi_build_immediate( header ); + + for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) { + struct tgsi_immediate_float32 *if32; + + if( maxsize <= size ) + return 0; + if32 = (struct tgsi_immediate_float32 *) &tokens[size]; + size++; + + *if32 = tgsi_build_immediate_float32( + full_imm->u.ImmediateFloat32[i].Float, + immediate, + header ); + } + + return size; +} + +/* + * instruction + */ + +struct tgsi_instruction +tgsi_default_instruction( void ) +{ + struct tgsi_instruction instruction; + + instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; + instruction.Size = 1; + instruction.Opcode = TGSI_OPCODE_MOV; + instruction.Saturate = TGSI_SAT_NONE; + instruction.NumDstRegs = 1; + instruction.NumSrcRegs = 1; + instruction.Padding = 0; + instruction.Extended = 0; + + return instruction; +} + +struct tgsi_instruction +tgsi_build_instruction( + unsigned opcode, + unsigned saturate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header ) +{ + struct tgsi_instruction instruction; + + assert (opcode <= TGSI_OPCODE_LAST); + assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE); + assert (num_dst_regs <= 3); + assert (num_src_regs <= 15); + + instruction = tgsi_default_instruction(); + instruction.Opcode = opcode; + instruction.Saturate = saturate; + instruction.NumDstRegs = num_dst_regs; + instruction.NumSrcRegs = num_src_regs; + + header_bodysize_grow( header ); + + return instruction; +} + +static void +instruction_grow( + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + assert (instruction->Size < 0xFF); + + instruction->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_full_instruction +tgsi_default_full_instruction( void ) +{ + struct tgsi_full_instruction full_instruction; + unsigned i; + + full_instruction.Instruction = tgsi_default_instruction(); + full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv(); + full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); + full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); + for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { + full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); + } + for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { + full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); + } + + return full_instruction; +} + +unsigned +tgsi_build_full_instruction( + const struct tgsi_full_instruction *full_inst, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0; + unsigned i; + struct tgsi_instruction *instruction; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + instruction = (struct tgsi_instruction *) &tokens[size]; + size++; + + *instruction = tgsi_build_instruction( + full_inst->Instruction.Opcode, + full_inst->Instruction.Saturate, + full_inst->Instruction.NumDstRegs, + full_inst->Instruction.NumSrcRegs, + header ); + prev_token = (struct tgsi_token *) instruction; + + if( tgsi_compare_instruction_ext_nv( + full_inst->InstructionExtNv, + tgsi_default_instruction_ext_nv() ) ) { + struct tgsi_instruction_ext_nv *instruction_ext_nv; + + if( maxsize <= size ) + return 0; + instruction_ext_nv = + (struct tgsi_instruction_ext_nv *) &tokens[size]; + size++; + + *instruction_ext_nv = tgsi_build_instruction_ext_nv( + full_inst->InstructionExtNv.Precision, + full_inst->InstructionExtNv.CondDstIndex, + full_inst->InstructionExtNv.CondFlowIndex, + full_inst->InstructionExtNv.CondMask, + full_inst->InstructionExtNv.CondSwizzleX, + full_inst->InstructionExtNv.CondSwizzleY, + full_inst->InstructionExtNv.CondSwizzleZ, + full_inst->InstructionExtNv.CondSwizzleW, + full_inst->InstructionExtNv.CondDstUpdate, + full_inst->InstructionExtNv.CondFlowEnable, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_nv; + } + + if( tgsi_compare_instruction_ext_label( + full_inst->InstructionExtLabel, + tgsi_default_instruction_ext_label() ) ) { + struct tgsi_instruction_ext_label *instruction_ext_label; + + if( maxsize <= size ) + return 0; + instruction_ext_label = + (struct tgsi_instruction_ext_label *) &tokens[size]; + size++; + + *instruction_ext_label = tgsi_build_instruction_ext_label( + full_inst->InstructionExtLabel.Label, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_label; + } + + if( tgsi_compare_instruction_ext_texture( + full_inst->InstructionExtTexture, + tgsi_default_instruction_ext_texture() ) ) { + struct tgsi_instruction_ext_texture *instruction_ext_texture; + + if( maxsize <= size ) + return 0; + instruction_ext_texture = + (struct tgsi_instruction_ext_texture *) &tokens[size]; + size++; + + *instruction_ext_texture = tgsi_build_instruction_ext_texture( + full_inst->InstructionExtTexture.Texture, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_texture; + } + + for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { + const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; + struct tgsi_dst_register *dst_register; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + dst_register = (struct tgsi_dst_register *) &tokens[size]; + size++; + + *dst_register = tgsi_build_dst_register( + reg->DstRegister.File, + reg->DstRegister.WriteMask, + reg->DstRegister.Index, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register; + + if( tgsi_compare_dst_register_ext_concode( + reg->DstRegisterExtConcode, + tgsi_default_dst_register_ext_concode() ) ) { + struct tgsi_dst_register_ext_concode *dst_register_ext_concode; + + if( maxsize <= size ) + return 0; + dst_register_ext_concode = + (struct tgsi_dst_register_ext_concode *) &tokens[size]; + size++; + + *dst_register_ext_concode = tgsi_build_dst_register_ext_concode( + reg->DstRegisterExtConcode.CondMask, + reg->DstRegisterExtConcode.CondSwizzleX, + reg->DstRegisterExtConcode.CondSwizzleY, + reg->DstRegisterExtConcode.CondSwizzleZ, + reg->DstRegisterExtConcode.CondSwizzleW, + reg->DstRegisterExtConcode.CondSrcIndex, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register_ext_concode; + } + + if( tgsi_compare_dst_register_ext_modulate( + reg->DstRegisterExtModulate, + tgsi_default_dst_register_ext_modulate() ) ) { + struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; + + if( maxsize <= size ) + return 0; + dst_register_ext_modulate = + (struct tgsi_dst_register_ext_modulate *) &tokens[size]; + size++; + + *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( + reg->DstRegisterExtModulate.Modulate, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register_ext_modulate; + } + } + + for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { + const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; + struct tgsi_src_register *src_register; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + src_register = (struct tgsi_src_register *) &tokens[size]; + size++; + + *src_register = tgsi_build_src_register( + reg->SrcRegister.File, + reg->SrcRegister.SwizzleX, + reg->SrcRegister.SwizzleY, + reg->SrcRegister.SwizzleZ, + reg->SrcRegister.SwizzleW, + reg->SrcRegister.Negate, + reg->SrcRegister.Indirect, + reg->SrcRegister.Dimension, + reg->SrcRegister.Index, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register; + + if( tgsi_compare_src_register_ext_swz( + reg->SrcRegisterExtSwz, + tgsi_default_src_register_ext_swz() ) ) { + struct tgsi_src_register_ext_swz *src_register_ext_swz; + + if( maxsize <= size ) + return 0; + src_register_ext_swz = + (struct tgsi_src_register_ext_swz *) &tokens[size]; + size++; + + *src_register_ext_swz = tgsi_build_src_register_ext_swz( + reg->SrcRegisterExtSwz.ExtSwizzleX, + reg->SrcRegisterExtSwz.ExtSwizzleY, + reg->SrcRegisterExtSwz.ExtSwizzleZ, + reg->SrcRegisterExtSwz.ExtSwizzleW, + reg->SrcRegisterExtSwz.NegateX, + reg->SrcRegisterExtSwz.NegateY, + reg->SrcRegisterExtSwz.NegateZ, + reg->SrcRegisterExtSwz.NegateW, + reg->SrcRegisterExtSwz.ExtDivide, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register_ext_swz; + } + + if( tgsi_compare_src_register_ext_mod( + reg->SrcRegisterExtMod, + tgsi_default_src_register_ext_mod() ) ) { + struct tgsi_src_register_ext_mod *src_register_ext_mod; + + if( maxsize <= size ) + return 0; + src_register_ext_mod = + (struct tgsi_src_register_ext_mod *) &tokens[size]; + size++; + + *src_register_ext_mod = tgsi_build_src_register_ext_mod( + reg->SrcRegisterExtMod.Complement, + reg->SrcRegisterExtMod.Bias, + reg->SrcRegisterExtMod.Scale2X, + reg->SrcRegisterExtMod.Absolute, + reg->SrcRegisterExtMod.Negate, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register_ext_mod; + } + + if( reg->SrcRegister.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->SrcRegisterInd.File, + reg->SrcRegisterInd.SwizzleX, + reg->SrcRegisterInd.SwizzleY, + reg->SrcRegisterInd.SwizzleZ, + reg->SrcRegisterInd.SwizzleW, + reg->SrcRegisterInd.Negate, + reg->SrcRegisterInd.Indirect, + reg->SrcRegisterInd.Dimension, + reg->SrcRegisterInd.Index, + instruction, + header ); + } + + if( reg->SrcRegister.Dimension ) { + struct tgsi_dimension *dim; + + assert( !reg->SrcRegisterDim.Dimension ); + + if( maxsize <= size ) + return 0; + dim = (struct tgsi_dimension *) &tokens[size]; + size++; + + *dim = tgsi_build_dimension( + reg->SrcRegisterDim.Indirect, + reg->SrcRegisterDim.Index, + instruction, + header ); + + if( reg->SrcRegisterDim.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->SrcRegisterDimInd.File, + reg->SrcRegisterDimInd.SwizzleX, + reg->SrcRegisterDimInd.SwizzleY, + reg->SrcRegisterDimInd.SwizzleZ, + reg->SrcRegisterDimInd.SwizzleW, + reg->SrcRegisterDimInd.Negate, + reg->SrcRegisterDimInd.Indirect, + reg->SrcRegisterDimInd.Dimension, + reg->SrcRegisterDimInd.Index, + instruction, + header ); + } + } + } + + return size; +} + +struct tgsi_instruction_ext_nv +tgsi_default_instruction_ext_nv( void ) +{ + struct tgsi_instruction_ext_nv instruction_ext_nv; + + instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV; + instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT; + instruction_ext_nv.CondDstIndex = 0; + instruction_ext_nv.CondFlowIndex = 0; + instruction_ext_nv.CondMask = TGSI_CC_TR; + instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X; + instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y; + instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z; + instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W; + instruction_ext_nv.CondDstUpdate = 0; + instruction_ext_nv.CondFlowEnable = 0; + instruction_ext_nv.Padding = 0; + instruction_ext_nv.Extended = 0; + + return instruction_ext_nv; +} + +union token_u32 +{ + unsigned u32; +}; + +unsigned +tgsi_compare_instruction_ext_nv( + struct tgsi_instruction_ext_nv a, + struct tgsi_instruction_ext_nv b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_nv +tgsi_build_instruction_ext_nv( + unsigned precision, + unsigned cond_dst_index, + unsigned cond_flow_index, + unsigned cond_mask, + unsigned cond_swizzle_x, + unsigned cond_swizzle_y, + unsigned cond_swizzle_z, + unsigned cond_swizzle_w, + unsigned cond_dst_update, + unsigned cond_flow_update, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_nv instruction_ext_nv; + + instruction_ext_nv = tgsi_default_instruction_ext_nv(); + instruction_ext_nv.Precision = precision; + instruction_ext_nv.CondDstIndex = cond_dst_index; + instruction_ext_nv.CondFlowIndex = cond_flow_index; + instruction_ext_nv.CondMask = cond_mask; + instruction_ext_nv.CondSwizzleX = cond_swizzle_x; + instruction_ext_nv.CondSwizzleY = cond_swizzle_y; + instruction_ext_nv.CondSwizzleZ = cond_swizzle_z; + instruction_ext_nv.CondSwizzleW = cond_swizzle_w; + instruction_ext_nv.CondDstUpdate = cond_dst_update; + instruction_ext_nv.CondFlowEnable = cond_flow_update; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_nv; +} + +struct tgsi_instruction_ext_label +tgsi_default_instruction_ext_label( void ) +{ + struct tgsi_instruction_ext_label instruction_ext_label; + + instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; + instruction_ext_label.Label = 0; + instruction_ext_label.Padding = 0; + instruction_ext_label.Extended = 0; + + return instruction_ext_label; +} + +unsigned +tgsi_compare_instruction_ext_label( + struct tgsi_instruction_ext_label a, + struct tgsi_instruction_ext_label b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_label +tgsi_build_instruction_ext_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_label instruction_ext_label; + + instruction_ext_label = tgsi_default_instruction_ext_label(); + instruction_ext_label.Label = label; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_label; +} + +struct tgsi_instruction_ext_texture +tgsi_default_instruction_ext_texture( void ) +{ + struct tgsi_instruction_ext_texture instruction_ext_texture; + + instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; + instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; + instruction_ext_texture.Padding = 0; + instruction_ext_texture.Extended = 0; + + return instruction_ext_texture; +} + +unsigned +tgsi_compare_instruction_ext_texture( + struct tgsi_instruction_ext_texture a, + struct tgsi_instruction_ext_texture b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_texture +tgsi_build_instruction_ext_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_texture instruction_ext_texture; + + instruction_ext_texture = tgsi_default_instruction_ext_texture(); + instruction_ext_texture.Texture = texture; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_texture; +} + +struct tgsi_src_register +tgsi_default_src_register( void ) +{ + struct tgsi_src_register src_register; + + src_register.File = TGSI_FILE_NULL; + src_register.SwizzleX = TGSI_SWIZZLE_X; + src_register.SwizzleY = TGSI_SWIZZLE_Y; + src_register.SwizzleZ = TGSI_SWIZZLE_Z; + src_register.SwizzleW = TGSI_SWIZZLE_W; + src_register.Negate = 0; + src_register.Indirect = 0; + src_register.Dimension = 0; + src_register.Index = 0; + src_register.Extended = 0; + + return src_register; +} + +struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register src_register; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( negate <= 1 ); + assert( index >= -0x8000 && index <= 0x7FFF ); + + src_register = tgsi_default_src_register(); + src_register.File = file; + src_register.SwizzleX = swizzle_x; + src_register.SwizzleY = swizzle_y; + src_register.SwizzleZ = swizzle_z; + src_register.SwizzleW = swizzle_w; + src_register.Negate = negate; + src_register.Indirect = indirect; + src_register.Dimension = dimension; + src_register.Index = index; + + instruction_grow( instruction, header ); + + return src_register; +} + +struct tgsi_full_src_register +tgsi_default_full_src_register( void ) +{ + struct tgsi_full_src_register full_src_register; + + full_src_register.SrcRegister = tgsi_default_src_register(); + full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz(); + full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); + full_src_register.SrcRegisterInd = tgsi_default_src_register(); + full_src_register.SrcRegisterDim = tgsi_default_dimension(); + full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); + + return full_src_register; +} + +struct tgsi_src_register_ext_swz +tgsi_default_src_register_ext_swz( void ) +{ + struct tgsi_src_register_ext_swz src_register_ext_swz; + + src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; + src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X; + src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y; + src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z; + src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W; + src_register_ext_swz.NegateX = 0; + src_register_ext_swz.NegateY = 0; + src_register_ext_swz.NegateZ = 0; + src_register_ext_swz.NegateW = 0; + src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE; + src_register_ext_swz.Padding = 0; + src_register_ext_swz.Extended = 0; + + return src_register_ext_swz; +} + +unsigned +tgsi_compare_src_register_ext_swz( + struct tgsi_src_register_ext_swz a, + struct tgsi_src_register_ext_swz b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_src_register_ext_swz +tgsi_build_src_register_ext_swz( + unsigned ext_swizzle_x, + unsigned ext_swizzle_y, + unsigned ext_swizzle_z, + unsigned ext_swizzle_w, + unsigned negate_x, + unsigned negate_y, + unsigned negate_z, + unsigned negate_w, + unsigned ext_divide, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register_ext_swz src_register_ext_swz; + + assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE ); + assert( negate_x <= 1 ); + assert( negate_y <= 1 ); + assert( negate_z <= 1 ); + assert( negate_w <= 1 ); + assert( ext_divide <= TGSI_EXTSWIZZLE_ONE ); + + src_register_ext_swz = tgsi_default_src_register_ext_swz(); + src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; + src_register_ext_swz.ExtSwizzleY = ext_swizzle_y; + src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z; + src_register_ext_swz.ExtSwizzleW = ext_swizzle_w; + src_register_ext_swz.NegateX = negate_x; + src_register_ext_swz.NegateY = negate_y; + src_register_ext_swz.NegateZ = negate_z; + src_register_ext_swz.NegateW = negate_w; + src_register_ext_swz.ExtDivide = ext_divide; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return src_register_ext_swz; +} + +struct tgsi_src_register_ext_mod +tgsi_default_src_register_ext_mod( void ) +{ + struct tgsi_src_register_ext_mod src_register_ext_mod; + + src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; + src_register_ext_mod.Complement = 0; + src_register_ext_mod.Bias = 0; + src_register_ext_mod.Scale2X = 0; + src_register_ext_mod.Absolute = 0; + src_register_ext_mod.Negate = 0; + src_register_ext_mod.Padding = 0; + src_register_ext_mod.Extended = 0; + + return src_register_ext_mod; +} + +unsigned +tgsi_compare_src_register_ext_mod( + struct tgsi_src_register_ext_mod a, + struct tgsi_src_register_ext_mod b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_src_register_ext_mod +tgsi_build_src_register_ext_mod( + unsigned complement, + unsigned bias, + unsigned scale_2x, + unsigned absolute, + unsigned negate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register_ext_mod src_register_ext_mod; + + assert( complement <= 1 ); + assert( bias <= 1 ); + assert( scale_2x <= 1 ); + assert( absolute <= 1 ); + assert( negate <= 1 ); + + src_register_ext_mod = tgsi_default_src_register_ext_mod(); + src_register_ext_mod.Complement = complement; + src_register_ext_mod.Bias = bias; + src_register_ext_mod.Scale2X = scale_2x; + src_register_ext_mod.Absolute = absolute; + src_register_ext_mod.Negate = negate; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return src_register_ext_mod; +} + +struct tgsi_dimension +tgsi_default_dimension( void ) +{ + struct tgsi_dimension dimension; + + dimension.Indirect = 0; + dimension.Dimension = 0; + dimension.Padding = 0; + dimension.Index = 0; + dimension.Extended = 0; + + return dimension; +} + +struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dimension dimension; + + dimension = tgsi_default_dimension(); + dimension.Indirect = indirect; + dimension.Index = index; + + instruction_grow( instruction, header ); + + return dimension; +} + +struct tgsi_dst_register +tgsi_default_dst_register( void ) +{ + struct tgsi_dst_register dst_register; + + dst_register.File = TGSI_FILE_NULL; + dst_register.WriteMask = TGSI_WRITEMASK_XYZW; + dst_register.Indirect = 0; + dst_register.Dimension = 0; + dst_register.Index = 0; + dst_register.Padding = 0; + dst_register.Extended = 0; + + return dst_register; +} + +struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register dst_register; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( mask <= TGSI_WRITEMASK_XYZW ); + assert( index >= -32768 && index <= 32767 ); + + dst_register = tgsi_default_dst_register(); + dst_register.File = file; + dst_register.WriteMask = mask; + dst_register.Index = index; + + instruction_grow( instruction, header ); + + return dst_register; +} + +struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ) +{ + struct tgsi_full_dst_register full_dst_register; + + full_dst_register.DstRegister = tgsi_default_dst_register(); + full_dst_register.DstRegisterExtConcode = + tgsi_default_dst_register_ext_concode(); + full_dst_register.DstRegisterExtModulate = + tgsi_default_dst_register_ext_modulate(); + + return full_dst_register; +} + +struct tgsi_dst_register_ext_concode +tgsi_default_dst_register_ext_concode( void ) +{ + struct tgsi_dst_register_ext_concode dst_register_ext_concode; + + dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE; + dst_register_ext_concode.CondMask = TGSI_CC_TR; + dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X; + dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y; + dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z; + dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W; + dst_register_ext_concode.CondSrcIndex = 0; + dst_register_ext_concode.Padding = 0; + dst_register_ext_concode.Extended = 0; + + return dst_register_ext_concode; +} + +unsigned +tgsi_compare_dst_register_ext_concode( + struct tgsi_dst_register_ext_concode a, + struct tgsi_dst_register_ext_concode b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_dst_register_ext_concode +tgsi_build_dst_register_ext_concode( + unsigned cc, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + int index, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register_ext_concode dst_register_ext_concode; + + assert( cc <= TGSI_CC_FL ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( index >= -32768 && index <= 32767 ); + + dst_register_ext_concode = tgsi_default_dst_register_ext_concode(); + dst_register_ext_concode.CondMask = cc; + dst_register_ext_concode.CondSwizzleX = swizzle_x; + dst_register_ext_concode.CondSwizzleY = swizzle_y; + dst_register_ext_concode.CondSwizzleZ = swizzle_z; + dst_register_ext_concode.CondSwizzleW = swizzle_w; + dst_register_ext_concode.CondSrcIndex = index; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return dst_register_ext_concode; +} + +struct tgsi_dst_register_ext_modulate +tgsi_default_dst_register_ext_modulate( void ) +{ + struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + + dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; + dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; + dst_register_ext_modulate.Padding = 0; + dst_register_ext_modulate.Extended = 0; + + return dst_register_ext_modulate; +} + +unsigned +tgsi_compare_dst_register_ext_modulate( + struct tgsi_dst_register_ext_modulate a, + struct tgsi_dst_register_ext_modulate b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_dst_register_ext_modulate +tgsi_build_dst_register_ext_modulate( + unsigned modulate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + + assert( modulate <= TGSI_MODULATE_EIGHTH ); + + dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); + dst_register_ext_modulate.Modulate = modulate; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return dst_register_ext_modulate; +} + diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h new file mode 100644 index 0000000000..116c78abf3 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -0,0 +1,320 @@ +#if !defined TGSI_BUILD_H +#define TGSI_BUILD_H + +#if defined __cplusplus +extern "C" { +#endif // defined __cplusplus + +/* + * version + */ + +struct tgsi_version +tgsi_build_version( void ); + +/* + * header + */ + +struct tgsi_header +tgsi_build_header( void ); + +struct tgsi_processor +tgsi_default_processor( void ); + +struct tgsi_processor +tgsi_build_processor( + unsigned processor, + struct tgsi_header *header ); + +/* + * declaration + */ + +struct tgsi_declaration +tgsi_default_declaration( void ); + +struct tgsi_declaration +tgsi_build_declaration( + unsigned file, + unsigned declare, + unsigned usage_mask, + unsigned interpolate, + unsigned semantic, + struct tgsi_header *header ); + +struct tgsi_full_declaration +tgsi_default_full_declaration( void ); + +unsigned +tgsi_build_full_declaration( + const struct tgsi_full_declaration *full_decl, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +struct tgsi_declaration_mask +tgsi_build_declaration_mask( + unsigned mask, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +struct tgsi_declaration_interpolation +tgsi_default_declaration_interpolation( void ); + +struct tgsi_declaration_interpolation +tgsi_build_declaration_interpolation( + unsigned interpolate, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ); + +struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +/* + * immediate + */ + +struct tgsi_immediate +tgsi_default_immediate( void ); + +struct tgsi_immediate +tgsi_build_immediate( + struct tgsi_header *header ); + +struct tgsi_full_immediate +tgsi_default_full_immediate( void ); + +struct tgsi_immediate_float32 +tgsi_build_immediate_float32( + float value, + struct tgsi_immediate *immediate, + struct tgsi_header *header ); + +unsigned +tgsi_build_full_immediate( + const struct tgsi_full_immediate *full_imm, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +/* + * instruction + */ + +struct tgsi_instruction +tgsi_default_instruction( void ); + +struct tgsi_instruction +tgsi_build_instruction( + unsigned opcode, + unsigned saturate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header ); + +struct tgsi_full_instruction +tgsi_default_full_instruction( void ); + +unsigned +tgsi_build_full_instruction( + const struct tgsi_full_instruction *full_inst, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +struct tgsi_instruction_ext_nv +tgsi_default_instruction_ext_nv( void ); + +unsigned +tgsi_compare_instruction_ext_nv( + struct tgsi_instruction_ext_nv a, + struct tgsi_instruction_ext_nv b ); + +struct tgsi_instruction_ext_nv +tgsi_build_instruction_ext_nv( + unsigned precision, + unsigned cond_dst_index, + unsigned cond_flow_index, + unsigned cond_mask, + unsigned cond_swizzle_x, + unsigned cond_swizzle_y, + unsigned cond_swizzle_z, + unsigned cond_swizzle_w, + unsigned cond_dst_update, + unsigned cond_flow_update, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_instruction_ext_label +tgsi_default_instruction_ext_label( void ); + +unsigned +tgsi_compare_instruction_ext_label( + struct tgsi_instruction_ext_label a, + struct tgsi_instruction_ext_label b ); + +struct tgsi_instruction_ext_label +tgsi_build_instruction_ext_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_instruction_ext_texture +tgsi_default_instruction_ext_texture( void ); + +unsigned +tgsi_compare_instruction_ext_texture( + struct tgsi_instruction_ext_texture a, + struct tgsi_instruction_ext_texture b ); + +struct tgsi_instruction_ext_texture +tgsi_build_instruction_ext_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_src_register +tgsi_default_src_register( void ); + +struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_full_src_register +tgsi_default_full_src_register( void ); + +struct tgsi_src_register_ext_swz +tgsi_default_src_register_ext_swz( void ); + +unsigned +tgsi_compare_src_register_ext_swz( + struct tgsi_src_register_ext_swz a, + struct tgsi_src_register_ext_swz b ); + +struct tgsi_src_register_ext_swz +tgsi_build_src_register_ext_swz( + unsigned ext_swizzle_x, + unsigned ext_swizzle_y, + unsigned ext_swizzle_z, + unsigned ext_swizzle_w, + unsigned negate_x, + unsigned negate_y, + unsigned negate_z, + unsigned negate_w, + unsigned ext_divide, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_src_register_ext_mod +tgsi_default_src_register_ext_mod( void ); + +unsigned +tgsi_compare_src_register_ext_mod( + struct tgsi_src_register_ext_mod a, + struct tgsi_src_register_ext_mod b ); + +struct tgsi_src_register_ext_mod +tgsi_build_src_register_ext_mod( + unsigned complement, + unsigned bias, + unsigned scale_2x, + unsigned absolute, + unsigned negate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dimension +tgsi_default_dimension( void ); + +struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dst_register +tgsi_default_dst_register( void ); + +struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ); + +struct tgsi_dst_register_ext_concode +tgsi_default_dst_register_ext_concode( void ); + +unsigned +tgsi_compare_dst_register_ext_concode( + struct tgsi_dst_register_ext_concode a, + struct tgsi_dst_register_ext_concode b ); + +struct tgsi_dst_register_ext_concode +tgsi_build_dst_register_ext_concode( + unsigned cc, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + int index, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dst_register_ext_modulate +tgsi_default_dst_register_ext_modulate( void ); + +unsigned +tgsi_compare_dst_register_ext_modulate( + struct tgsi_dst_register_ext_modulate a, + struct tgsi_dst_register_ext_modulate b ); + +struct tgsi_dst_register_ext_modulate +tgsi_build_dst_register_ext_modulate( + unsigned modulate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +#if defined __cplusplus +} // extern "C" +#endif // defined __cplusplus + +#endif // !defined TGSI_BUILD_H + diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c new file mode 100644 index 0000000000..b5c54847e0 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -0,0 +1,1581 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_dump.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" + +struct gen_dump +{ + unsigned tabs; + void (* write)( + struct gen_dump *dump, + const void *data, + unsigned size ); +}; + +struct text_dump +{ + struct gen_dump base; + char *text; + unsigned length; + unsigned capacity; +}; + +static void +_text_dump_write( + struct gen_dump *dump, + const void *data, + unsigned size ) +{ + struct text_dump *td = (struct text_dump *) dump; + unsigned new_length = td->length + size; + + if( new_length >= td->capacity ) { + unsigned new_capacity = td->capacity; + + do { + if( new_capacity == 0 ) { + new_capacity = 256; + } + else { + new_capacity *= 2; + } + } while( new_length >= new_capacity ); + td->text = (char *) REALLOC( + td->text, + td->capacity, + new_capacity ); + td->capacity = new_capacity; + } + memcpy( + &td->text[td->length], + data, + size ); + td->length = new_length; + td->text[td->length] = '\0'; +} + +struct file_dump +{ + struct gen_dump base; + FILE *file; +}; + +static void +_file_dump_write( + struct gen_dump *dump, + const void *data, + unsigned size ) +{ + struct file_dump *fd = (struct file_dump *) dump; + +#if 0 + fwrite( data, 1, size, fd->file ); +#else + { + unsigned i; + + for (i = 0; i < size; i++ ) { + fprintf( fd->file, "%c", ((const char *) data)[i] ); + } + } +#endif +} + +static void +gen_dump_str( + struct gen_dump *dump, + const char *str ) +{ + unsigned i; + size_t len = strlen( str ); + + for (i = 0; i < len; i++) { + dump->write( dump, &str[i], 1 ); + if (str[i] == '\n') { + unsigned i; + + for (i = 0; i < dump->tabs; i++) { + dump->write( dump, " ", 4 ); + } + } + } +} + +static void +gen_dump_chr( + struct gen_dump *dump, + const char chr ) +{ + dump->write( dump, &chr, 1 ); +} + +static void +gen_dump_uix( + struct gen_dump *dump, + const unsigned ui ) +{ + char str[36]; + + sprintf( str, "0x%x", ui ); + gen_dump_str( dump, str ); +} + +static void +gen_dump_uid( + struct gen_dump *dump, + const unsigned ui ) +{ + char str[16]; + + sprintf( str, "%u", ui ); + gen_dump_str( dump, str ); +} + +static void +gen_dump_sid( + struct gen_dump *dump, + const int si ) +{ + char str[16]; + + sprintf( str, "%d", si ); + gen_dump_str( dump, str ); +} + +static void +gen_dump_flt( + struct gen_dump *dump, + const float flt ) +{ + char str[48]; + + sprintf( str, "%10.4f", flt ); + gen_dump_str( dump, str ); +} + +static void +gen_dump_enum( + struct gen_dump *dump, + const unsigned e, + const char **enums, + const unsigned enums_count ) +{ + if (e >= enums_count) { + gen_dump_uid( dump, e ); + } + else { + gen_dump_str( dump, enums[e] ); + } +} + +static void +gen_dump_tab( + struct gen_dump *dump ) +{ + ++dump->tabs; +} + +static void +gen_dump_untab( + struct gen_dump *dump ) +{ + assert( dump->tabs > 0 ); + + --dump->tabs; +} + +#define TXT(S) gen_dump_str( dump, S ) +#define CHR(C) gen_dump_chr( dump, C ) +#define UIX(I) gen_dump_uix( dump, I ) +#define UID(I) gen_dump_uid( dump, I ) +#define SID(I) gen_dump_sid( dump, I ) +#define FLT(F) gen_dump_flt( dump, F ) +#define TAB() gen_dump_tab( dump ) +#define UNT() gen_dump_untab( dump ) +#define ENM(E,ENUMS) gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) + +static const char *TGSI_PROCESSOR_TYPES[] = +{ + "PROCESSOR_FRAGMENT", + "PROCESSOR_VERTEX", + "PROCESSOR_GEOMETRY" +}; + +static const char *TGSI_PROCESSOR_TYPES_SHORT[] = +{ + "FRAG", + "VERT", + "GEOM" +}; + +static const char *TGSI_TOKEN_TYPES[] = +{ + "TOKEN_TYPE_DECLARATION", + "TOKEN_TYPE_IMMEDIATE", + "TOKEN_TYPE_INSTRUCTION" +}; + +static const char *TGSI_FILES[] = +{ + "FILE_NULL", + "FILE_CONSTANT", + "FILE_INPUT", + "FILE_OUTPUT", + "FILE_TEMPORARY", + "FILE_SAMPLER", + "FILE_ADDRESS", + "FILE_IMMEDIATE" +}; + +static const char *TGSI_FILES_SHORT[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static const char *TGSI_DECLARES[] = +{ + "DECLARE_RANGE", + "DECLARE_MASK" +}; + +static const char *TGSI_INTERPOLATES[] = +{ + "INTERPOLATE_CONSTANT", + "INTERPOLATE_LINEAR", + "INTERPOLATE_PERSPECTIVE", + "INTERPOLATE_ATTRIB" +}; + +static const char *TGSI_INTERPOLATES_SHORT[] = +{ + "CONSTANT", + "LINEAR", + "PERSPECTIVE", + "ATTRIB" +}; + +static const char *TGSI_SEMANTICS[] = +{ + "SEMANTIC_POSITION", + "SEMANTIC_COLOR", + "SEMANTIC_BCOLOR", + "SEMANTIC_FOG", + "SEMANTIC_PSIZE", + "SEMANTIC_GENERIC," +}; + +static const char *TGSI_SEMANTICS_SHORT[] = +{ + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", +}; + +static const char *TGSI_IMMS[] = +{ + "IMM_FLOAT32" +}; + +static const char *TGSI_IMMS_SHORT[] = +{ + "FLT32" +}; + +static const char *TGSI_OPCODES[] = +{ + "OPCODE_ARL", + "OPCODE_MOV", + "OPCODE_LIT", + "OPCODE_RCP", + "OPCODE_RSQ", + "OPCODE_EXP", + "OPCODE_LOG", + "OPCODE_MUL", + "OPCODE_ADD", + "OPCODE_DP3", + "OPCODE_DP4", + "OPCODE_DST", + "OPCODE_MIN", + "OPCODE_MAX", + "OPCODE_SLT", + "OPCODE_SGE", + "OPCODE_MAD", + "OPCODE_SUB", + "OPCODE_LERP", + "OPCODE_CND", + "OPCODE_CND0", + "OPCODE_DOT2ADD", + "OPCODE_INDEX", + "OPCODE_NEGATE", + "OPCODE_FRAC", + "OPCODE_CLAMP", + "OPCODE_FLOOR", + "OPCODE_ROUND", + "OPCODE_EXPBASE2", + "OPCODE_LOGBASE2", + "OPCODE_POWER", + "OPCODE_CROSSPRODUCT", + "OPCODE_MULTIPLYMATRIX", + "OPCODE_ABS", + "OPCODE_RCC", + "OPCODE_DPH", + "OPCODE_COS", + "OPCODE_DDX", + "OPCODE_DDY", + "OPCODE_KILP", + "OPCODE_PK2H", + "OPCODE_PK2US", + "OPCODE_PK4B", + "OPCODE_PK4UB", + "OPCODE_RFL", + "OPCODE_SEQ", + "OPCODE_SFL", + "OPCODE_SGT", + "OPCODE_SIN", + "OPCODE_SLE", + "OPCODE_SNE", + "OPCODE_STR", + "OPCODE_TEX", + "OPCODE_TXD", + "OPCODE_UP2H", + "OPCODE_UP2US", + "OPCODE_UP4B", + "OPCODE_UP4UB", + "OPCODE_X2D", + "OPCODE_ARA", + "OPCODE_ARR", + "OPCODE_BRA", + "OPCODE_CAL", + "OPCODE_RET", + "OPCODE_SSG", + "OPCODE_CMP", + "OPCODE_SCS", + "OPCODE_TXB", + "OPCODE_NRM", + "OPCODE_DIV", + "OPCODE_DP2", + "OPCODE_TXL", + "OPCODE_BRK", + "OPCODE_IF", + "OPCODE_LOOP", + "OPCODE_REP", + "OPCODE_ELSE", + "OPCODE_ENDIF", + "OPCODE_ENDLOOP", + "OPCODE_ENDREP", + "OPCODE_PUSHA", + "OPCODE_POPA", + "OPCODE_CEIL", + "OPCODE_I2F", + "OPCODE_NOT", + "OPCODE_TRUNC", + "OPCODE_SHL", + "OPCODE_SHR", + "OPCODE_AND", + "OPCODE_OR", + "OPCODE_MOD", + "OPCODE_XOR", + "OPCODE_SAD", + "OPCODE_TXF", + "OPCODE_TXQ", + "OPCODE_CONT", + "OPCODE_EMIT", + "OPCODE_ENDPRIM", + "OPCODE_BGNLOOP2", + "OPCODE_BGNSUB", + "OPCODE_ENDLOOP2", + "OPCODE_ENDSUB", + "OPCODE_NOISE1", + "OPCODE_NOISE2", + "OPCODE_NOISE3", + "OPCODE_NOISE4", + "OPCODE_NOP", + "OPCODE_TEXBEM", + "OPCODE_TEXBEML", + "OPCODE_TEXREG2AR", + "OPCODE_TEXM3X2PAD", + "OPCODE_TEXM3X2TEX", + "OPCODE_TEXM3X3PAD", + "OPCODE_TEXM3X3TEX", + "OPCODE_TEXM3X3SPEC", + "OPCODE_TEXM3X3VSPEC", + "OPCODE_TEXREG2GB", + "OPCODE_TEXREG2RGB", + "OPCODE_TEXDP3TEX", + "OPCODE_TEXDP3", + "OPCODE_TEXM3X3", + "OPCODE_TEXM3X2DEPTH", + "OPCODE_TEXDEPTH", + "OPCODE_BEM", + "OPCODE_M4X3", + "OPCODE_M3X4", + "OPCODE_M3X3", + "OPCODE_M3X2", + "OPCODE_NRM4", + "OPCODE_CALLNZ", + "OPCODE_IFC", + "OPCODE_BREAKC", + "OPCODE_KIL", + "OPCODE_END" +}; + +static const char *TGSI_OPCODES_SHORT[] = +{ + "ARL", + "MOV", + "LIT", + "RCP", + "RSQ", + "EXP", + "LOG", + "MUL", + "ADD", + "DP3", + "DP4", + "DST", + "MIN", + "MAX", + "SLT", + "SGE", + "MAD", + "SUB", + "LERP", + "CND", + "CND0", + "DOT2ADD", + "INDEX", + "NEGATE", + "FRAC", + "CLAMP", + "FLOOR", + "ROUND", + "EXPBASE2", + "LOGBASE2", + "POWER", + "CROSSPRODUCT", + "MULTIPLYMATRIX", + "ABS", + "RCC", + "DPH", + "COS", + "DDX", + "DDY", + "KILP", + "PK2H", + "PK2US", + "PK4B", + "PK4UB", + "RFL", + "SEQ", + "SFL", + "SGT", + "SIN", + "SLE", + "SNE", + "STR", + "TEX", + "TXD", + "UP2H", + "UP2US", + "UP4B", + "UP4UB", + "X2D", + "ARA", + "ARR", + "BRA", + "CAL", + "RET", + "SSG", + "CMP", + "SCS", + "TXB", + "NRM", + "DIV", + "DP2", + "TXL", + "BRK", + "IF", + "LOOP", + "REP", + "ELSE", + "ENDIF", + "ENDLOOP", + "ENDREP", + "PUSHA", + "POPA", + "CEIL", + "I2F", + "NOT", + "TRUNC", + "SHL", + "SHR", + "AND", + "OR", + "MOD", + "XOR", + "SAD", + "TXF", + "TXQ", + "CONT", + "EMIT", + "ENDPRIM", + "BGNLOOP2", + "BGNSUB", + "ENDLOOP2", + "ENDSUB", + "NOISE1", + "NOISE2", + "NOISE3", + "NOISE4", + "NOP", + "TEXBEM", + "TEXBEML", + "TEXREG2AR", + "TEXM3X2PAD", + "TEXM3X2TEX", + "TEXM3X3PAD", + "TEXM3X3TEX", + "TEXM3X3SPEC", + "TEXM3X3VSPEC", + "TEXREG2GB", + "TEXREG2RGB", + "TEXDP3TEX", + "TEXDP3", + "TEXM3X3", + "TEXM3X2DEPTH", + "TEXDEPTH", + "BEM", + "M4X3", + "M3X4", + "M3X3", + "M3X2", + "NRM4", + "CALLNZ", + "IFC", + "BREAKC", + "KIL", + "END" +}; + +static const char *TGSI_SATS[] = +{ + "SAT_NONE", + "SAT_ZERO_ONE", + "SAT_MINUS_PLUS_ONE" +}; + +static const char *TGSI_INSTRUCTION_EXTS[] = +{ + "INSTRUCTION_EXT_TYPE_NV", + "INSTRUCTION_EXT_TYPE_LABEL", + "INSTRUCTION_EXT_TYPE_TEXTURE" +}; + +static const char *TGSI_PRECISIONS[] = +{ + "PRECISION_DEFAULT", + "TGSI_PRECISION_FLOAT32", + "TGSI_PRECISION_FLOAT16", + "TGSI_PRECISION_FIXED12" +}; + +static const char *TGSI_CCS[] = +{ + "CC_GT", + "CC_EQ", + "CC_LT", + "CC_UN", + "CC_GE", + "CC_LE", + "CC_NE", + "CC_TR", + "CC_FL" +}; + +static const char *TGSI_SWIZZLES[] = +{ + "SWIZZLE_X", + "SWIZZLE_Y", + "SWIZZLE_Z", + "SWIZZLE_W" +}; + +static const char *TGSI_SWIZZLES_SHORT[] = +{ + "x", + "y", + "z", + "w" +}; + +static const char *TGSI_TEXTURES[] = +{ + "TEXTURE_UNKNOWN", + "TEXTURE_1D", + "TEXTURE_2D", + "TEXTURE_3D", + "TEXTURE_CUBE", + "TEXTURE_RECT", + "TEXTURE_SHADOW1D", + "TEXTURE_SHADOW2D", + "TEXTURE_SHADOWRECT" +}; + +static const char *TGSI_SRC_REGISTER_EXTS[] = +{ + "SRC_REGISTER_EXT_TYPE_SWZ", + "SRC_REGISTER_EXT_TYPE_MOD" +}; + +static const char *TGSI_EXTSWIZZLES[] = +{ + "EXTSWIZZLE_X", + "EXTSWIZZLE_Y", + "EXTSWIZZLE_Z", + "EXTSWIZZLE_W", + "EXTSWIZZLE_ZERO", + "EXTSWIZZLE_ONE" +}; + +static const char *TGSI_EXTSWIZZLES_SHORT[] = +{ + "x", + "y", + "z", + "w", + "0", + "1" +}; + +static const char *TGSI_WRITEMASKS[] = +{ + "0", + "WRITEMASK_X", + "WRITEMASK_Y", + "WRITEMASK_XY", + "WRITEMASK_Z", + "WRITEMASK_XZ", + "WRITEMASK_YZ", + "WRITEMASK_XYZ", + "WRITEMASK_W", + "WRITEMASK_XW", + "WRITEMASK_YW", + "WRITEMASK_XYW", + "WRITEMASK_ZW", + "WRITEMASK_XZW", + "WRITEMASK_YZW", + "WRITEMASK_XYZW" +}; + +static const char *TGSI_DST_REGISTER_EXTS[] = +{ + "DST_REGISTER_EXT_TYPE_CONDCODE", + "DST_REGISTER_EXT_TYPE_MODULATE" +}; + +static const char *TGSI_MODULATES[] = +{ + "MODULATE_1X", + "MODULATE_2X", + "MODULATE_4X", + "MODULATE_8X", + "MODULATE_HALF", + "MODULATE_QUARTER", + "MODULATE_EIGHTH" +}; + +static void +dump_declaration_short( + struct gen_dump *dump, + struct tgsi_full_declaration *decl ) +{ + TXT( "\nDCL " ); + ENM( decl->Declaration.File, TGSI_FILES_SHORT ); + + switch( decl->Declaration.Declare ) { + case TGSI_DECLARE_RANGE: + CHR( '[' ); + UID( decl->u.DeclarationRange.First ); + if( decl->u.DeclarationRange.First != decl->u.DeclarationRange.Last ) { + TXT( ".." ); + UID( decl->u.DeclarationRange.Last ); + } + CHR( ']' ); + break; + default: + assert( 0 ); + } + + if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) { + CHR( '.' ); + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + CHR( 'x' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + CHR( 'y' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + CHR( 'z' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + CHR( 'w' ); + } + } + + if( decl->Declaration.Interpolate ) { + TXT( ", " ); + ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); + } + + if( decl->Declaration.Semantic ) { + TXT( ", " ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); + CHR( '[' ); + UID( decl->Semantic.SemanticIndex ); + CHR( ']' ); + } +} + +static void +dump_declaration_verbose( + struct gen_dump *dump, + struct tgsi_full_declaration *decl, + unsigned ignored, + unsigned deflt, + struct tgsi_full_declaration *fd ) +{ + TXT( "\nFile : " ); + ENM( decl->Declaration.File, TGSI_FILES ); + TXT( "\nDeclare : " ); + ENM( decl->Declaration.Declare, TGSI_DECLARES ); + if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { + TXT( "\nUsageMask : " ); + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + CHR( 'X' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + CHR( 'Y' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + CHR( 'Z' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + CHR( 'W' ); + } + } + if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { + TXT( "\nInterpolate: " ); + UID( decl->Declaration.Interpolate ); + } + if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { + TXT( "\nSemantic : " ); + UID( decl->Declaration.Semantic ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Declaration.Padding ); + } + + CHR( '\n' ); + switch( decl->Declaration.Declare ) { + case TGSI_DECLARE_RANGE: + TXT( "\nFirst: " ); + UID( decl->u.DeclarationRange.First ); + TXT( "\nLast : " ); + UID( decl->u.DeclarationRange.Last ); + break; + + case TGSI_DECLARE_MASK: + TXT( "\nMask: " ); + UIX( decl->u.DeclarationMask.Mask ); + break; + + default: + assert( 0 ); + } + + if( decl->Declaration.Interpolate ) { + CHR( '\n' ); + TXT( "\nInterpolate: " ); + ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Interpolation.Padding ); + } + } + + if( decl->Declaration.Semantic ) { + CHR( '\n' ); + TXT( "\nSemanticName : " ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); + TXT( "\nSemanticIndex: " ); + UID( decl->Semantic.SemanticIndex ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Semantic.Padding ); + } + } +} + +static void +dump_immediate_short( + struct gen_dump *dump, + struct tgsi_full_immediate *imm ) +{ + unsigned i; + + TXT( "\nIMM " ); + ENM( imm->Immediate.DataType, TGSI_IMMS_SHORT ); + + TXT( " { " ); + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + switch( imm->Immediate.DataType ) { + case TGSI_IMM_FLOAT32: + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + + default: + assert( 0 ); + } + + if( i < imm->Immediate.Size - 2 ) { + TXT( ", " ); + } + } + TXT( " }" ); +} + +static void +dump_immediate_verbose( + struct gen_dump *dump, + struct tgsi_full_immediate *imm, + unsigned ignored ) +{ + unsigned i; + + TXT( "\nDataType : " ); + ENM( imm->Immediate.DataType, TGSI_IMMS ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( imm->Immediate.Padding ); + } + + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + CHR( '\n' ); + switch( imm->Immediate.DataType ) { + case TGSI_IMM_FLOAT32: + TXT( "\nFloat: " ); + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + + default: + assert( 0 ); + } + } +} + +static void +dump_instruction_short( + struct gen_dump *dump, + struct tgsi_full_instruction *inst, + unsigned instno ) +{ + unsigned i; + boolean first_reg = TRUE; + + CHR( '\n' ); + UID( instno ); + CHR( ':' ); + ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT ); + + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + TXT( "_SAT" ); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + TXT( "_SAT[-1,1]" ); + break; + default: + assert( 0 ); + } + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if( !first_reg ) { + CHR( ',' ); + } + CHR( ' ' ); + + ENM( dst->DstRegister.File, TGSI_FILES_SHORT ); + + CHR( '[' ); + SID( dst->DstRegister.Index ); + CHR( ']' ); + + if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) { + CHR( '.' ); + if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) { + CHR( 'x' ); + } + if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Y ) { + CHR( 'y' ); + } + if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Z ) { + CHR( 'z' ); + } + if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_W ) { + CHR( 'w' ); + } + } + + first_reg = FALSE; + } + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + + if( !first_reg ) { + CHR( ',' ); + } + CHR( ' ' ); + + if( src->SrcRegisterExtMod.Complement ) { + TXT( "(1 - " ); + } + if( src->SrcRegisterExtMod.Negate ) { + CHR( '-' ); + } + if( src->SrcRegisterExtMod.Absolute ) { + CHR( '|' ); + } + if( src->SrcRegister.Negate ) { + CHR( '-' ); + } + + ENM( src->SrcRegister.File, TGSI_FILES_SHORT ); + + CHR( '[' ); + SID( src->SrcRegister.Index ); + CHR( ']' ); + + if (src->SrcRegister.Extended) { + if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { + CHR( '.' ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT ); + } + } + else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) { + CHR( '.' ); + ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT ); + ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT ); + ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT ); + ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT ); + } + + if( src->SrcRegisterExtMod.Absolute ) { + CHR( '|' ); + } + if( src->SrcRegisterExtMod.Complement ) { + CHR( ')' ); + } + + first_reg = FALSE; + } + + switch( inst->Instruction.Opcode ) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_CAL: + TXT( " :" ); + UID( inst->InstructionExtLabel.Label ); + break; + } +} + +static void +dump_instruction_verbose( + struct gen_dump *dump, + struct tgsi_full_instruction *inst, + unsigned ignored, + unsigned deflt, + struct tgsi_full_instruction *fi ) +{ + unsigned i; + + TXT( "\nOpcode : " ); + ENM( inst->Instruction.Opcode, TGSI_OPCODES ); + if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { + TXT( "\nSaturate : " ); + ENM( inst->Instruction.Saturate, TGSI_SATS ); + } + if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { + TXT( "\nNumDstRegs : " ); + UID( inst->Instruction.NumDstRegs ); + } + if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { + TXT( "\nNumSrcRegs : " ); + UID( inst->Instruction.NumSrcRegs ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->Instruction.Padding ); + } + + if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { + CHR( '\n' ); + TXT( "\nType : " ); + ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { + TXT( "\nPrecision : " ); + ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); + } + if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { + TXT( "\nCondDstIndex : " ); + UID( inst->InstructionExtNv.CondDstIndex ); + } + if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { + TXT( "\nCondFlowIndex : " ); + UID( inst->InstructionExtNv.CondFlowIndex ); + } + if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { + TXT( "\nCondMask : " ); + ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { + TXT( "\nCondSwizzleX : " ); + ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { + TXT( "\nCondSwizzleY : " ); + ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ : " ); + ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { + TXT( "\nCondSwizzleW : " ); + ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { + TXT( "\nCondDstUpdate : " ); + UID( inst->InstructionExtNv.CondDstUpdate ); + } + if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { + TXT( "\nCondFlowEnable: " ); + UID( inst->InstructionExtNv.CondFlowEnable ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtNv.Padding ); + if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { + TXT( "\nExtended : " ); + UID( inst->InstructionExtNv.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { + CHR( '\n' ); + TXT( "\nType : " ); + ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { + TXT( "\nLabel : " ); + UID( inst->InstructionExtLabel.Label ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtLabel.Padding ); + if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtLabel.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { + CHR( '\n' ); + TXT( "\nType : " ); + ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { + TXT( "\nTexture : " ); + ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtTexture.Padding ); + if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtTexture.Extended ); + } + } + } + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; + + CHR( '\n' ); + TXT( "\nFile : " ); + ENM( dst->DstRegister.File, TGSI_FILES ); + if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { + TXT( "\nWriteMask: " ); + ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); + } + if( ignored ) { + if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( dst->DstRegister.Indirect ); + } + if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( dst->DstRegister.Dimension ); + } + } + if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { + TXT( "\nIndex : " ); + SID( dst->DstRegister.Index ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegister.Padding ); + if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegister.Extended ); + } + } + + if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { + CHR( '\n' ); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { + TXT( "\nCondMask : " ); + ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { + TXT( "\nCondSwizzleX: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { + TXT( "\nCondSwizzleY: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { + TXT( "\nCondSwizzleW: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { + TXT( "\nCondSrcIndex: " ); + UID( dst->DstRegisterExtConcode.CondSrcIndex ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtConcode.Padding ); + if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegisterExtConcode.Extended ); + } + } + } + + if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { + CHR( '\n' ); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { + TXT( "\nModulate: " ); + ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtModulate.Padding ); + if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { + TXT( "\nExtended: " ); + UID( dst->DstRegisterExtModulate.Extended ); + } + } + } + } + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; + + CHR( '\n' ); + TXT( "\nFile : "); + ENM( src->SrcRegister.File, TGSI_FILES ); + if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { + TXT( "\nSwizzleX : " ); + ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { + TXT( "\nSwizzleY : " ); + ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { + TXT( "\nSwizzleZ : " ); + ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { + TXT( "\nSwizzleW : " ); + ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegister.Negate ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( src->SrcRegister.Indirect ); + } + if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( src->SrcRegister.Dimension ); + } + } + if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { + TXT( "\nIndex : " ); + SID( src->SrcRegister.Index ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegister.Extended ); + } + } + + if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { + CHR( '\n' ); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { + TXT( "\nExtSwizzleX: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { + TXT( "\nExtSwizzleY: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { + TXT( "\nExtSwizzleZ: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { + TXT( "\nExtSwizzleW: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { + TXT( "\nNegateX : " ); + UID( src->SrcRegisterExtSwz.NegateX ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { + TXT( "\nNegateY : " ); + UID( src->SrcRegisterExtSwz.NegateY ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { + TXT( "\nNegateZ : " ); + UID( src->SrcRegisterExtSwz.NegateZ ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { + TXT( "\nNegateW : " ); + UID( src->SrcRegisterExtSwz.NegateW ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) { + TXT( "\nExtDivide : " ); + ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtSwz.Padding ); + if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtSwz.Extended ); + } + } + } + + if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { + CHR( '\n' ); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { + TXT( "\nComplement: " ); + UID( src->SrcRegisterExtMod.Complement ); + } + if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { + TXT( "\nBias : " ); + UID( src->SrcRegisterExtMod.Bias ); + } + if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { + TXT( "\nScale2X : " ); + UID( src->SrcRegisterExtMod.Scale2X ); + } + if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { + TXT( "\nAbsolute : " ); + UID( src->SrcRegisterExtMod.Absolute ); + } + if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegisterExtMod.Negate ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtMod.Padding ); + if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtMod.Extended ); + } + } + } + } +} + +static void +dump_gen( + struct gen_dump *dump, + const struct tgsi_token *tokens, + unsigned flags ) +{ + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned verbose = flags & TGSI_DUMP_VERBOSE; + unsigned ignored = !(flags & TGSI_DUMP_NO_IGNORED); + unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT); + unsigned instno = 0; + + dump->tabs = 0; + + /* sanity check */ + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); + + tgsi_parse_init( &parse, tokens ); + + TXT( "tgsi-dump begin -----------------" ); + + CHR( '\n' ); + ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT ); + CHR( ' ' ); + UID( parse.FullVersion.Version.MajorVersion ); + CHR( '.' ); + UID( parse.FullVersion.Version.MinorVersion ); + + if( verbose ) { + TXT( "\nMajorVersion: " ); + UID( parse.FullVersion.Version.MajorVersion ); + TXT( "\nMinorVersion: " ); + UID( parse.FullVersion.Version.MinorVersion ); + CHR( '\n' ); + + TXT( "\nHeaderSize: " ); + UID( parse.FullHeader.Header.HeaderSize ); + TXT( "\nBodySize : " ); + UID( parse.FullHeader.Header.BodySize ); + TXT( "\nProcessor : " ); + ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); + CHR( '\n' ); + } + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + dump_declaration_short( + dump, + &parse.FullToken.FullDeclaration ); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + dump_immediate_short( + dump, + &parse.FullToken.FullImmediate ); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + dump_instruction_short( + dump, + &parse.FullToken.FullInstruction, + instno ); + instno++; + break; + + default: + assert( 0 ); + } + + if( verbose ) { + TXT( "\nType : " ); + ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); + if( ignored ) { + TXT( "\nSize : " ); + UID( parse.FullToken.Token.Size ); + if( deflt || parse.FullToken.Token.Extended ) { + TXT( "\nExtended : " ); + UID( parse.FullToken.Token.Extended ); + } + } + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + dump_declaration_verbose( + dump, + &parse.FullToken.FullDeclaration, + ignored, + deflt, + &fd ); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + dump_immediate_verbose( + dump, + &parse.FullToken.FullImmediate, + ignored ); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + dump_instruction_verbose( + dump, + &parse.FullToken.FullInstruction, + ignored, + deflt, + &fi ); + break; + + default: + assert( 0 ); + } + + CHR( '\n' ); + } + } + + TXT( "\ntgsi-dump end -------------------\n" ); + + tgsi_parse_free( &parse ); +} + + +static void +sanity_checks(void) +{ + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); + assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0); +} + + +void +tgsi_dump( + const struct tgsi_token *tokens, + unsigned flags ) +{ + struct file_dump dump; + + sanity_checks(); + + dump.base.write = _file_dump_write; +#if 0 + { + static unsigned counter = 0; + char buffer[64]; + sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ ); + dump.file = fopen( buffer, "wt" ); + } +#else + dump.file = stderr; +#endif + + dump_gen( + &dump.base, + tokens, + flags ); + +#if 0 + fclose( dump.file ); +#endif +} + +void +tgsi_dump_str( + char **str, + const struct tgsi_token *tokens, + unsigned flags ) +{ + struct text_dump dump; + + dump.base.write = _text_dump_write; + dump.text = NULL; + dump.length = 0; + dump.capacity = 0; + + dump_gen( + &dump.base, + tokens, + flags ); + + *str = dump.text; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h new file mode 100644 index 0000000000..1adc9db251 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -0,0 +1,28 @@ +#if !defined TGSI_DUMP_H +#define TGSI_DUMP_H + +#if defined __cplusplus +extern "C" { +#endif // defined __cplusplus + +#define TGSI_DUMP_VERBOSE 1 +#define TGSI_DUMP_NO_IGNORED 2 +#define TGSI_DUMP_NO_DEFAULT 4 + +void +tgsi_dump( + const struct tgsi_token *tokens, + unsigned flags ); + +void +tgsi_dump_str( + char **str, + const struct tgsi_token *tokens, + unsigned flags ); + +#if defined __cplusplus +} // extern "C" +#endif // defined __cplusplus + +#endif // !defined TGSI_DUMP_H + diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c new file mode 100644 index 0000000000..bf6b89ce56 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c @@ -0,0 +1,319 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" + +void +tgsi_full_token_init( + union tgsi_full_token *full_token ) +{ + full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION; +} + +void +tgsi_full_token_free( + union tgsi_full_token *full_token ) +{ + if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { + FREE( full_token->FullImmediate.u.Pointer ); + } +} + +unsigned +tgsi_parse_init( + struct tgsi_parse_context *ctx, + const struct tgsi_token *tokens ) +{ + ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; + if( ctx->FullVersion.Version.MajorVersion > 1 ) { + return TGSI_PARSE_ERROR; + } + + ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1]; + if( ctx->FullHeader.Header.HeaderSize >= 2 ) { + ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; + } + else { + ctx->FullHeader.Processor = tgsi_default_processor(); + } + + ctx->Tokens = tokens; + ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize; + + tgsi_full_token_init( &ctx->FullToken ); + + return TGSI_PARSE_OK; +} + +void +tgsi_parse_free( + struct tgsi_parse_context *ctx ) +{ + tgsi_full_token_free( &ctx->FullToken ); +} + +boolean +tgsi_parse_end_of_tokens( + struct tgsi_parse_context *ctx ) +{ + return ctx->Position >= + 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; +} + +static void +next_token( + struct tgsi_parse_context *ctx, + void *token ) +{ + assert( !tgsi_parse_end_of_tokens( ctx ) ); + + *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++]; +} + +void +tgsi_parse_token( + struct tgsi_parse_context *ctx ) +{ + struct tgsi_token token; + unsigned i; + + tgsi_full_token_free( &ctx->FullToken ); + tgsi_full_token_init( &ctx->FullToken ); + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; + + *decl = tgsi_default_full_declaration(); + decl->Declaration = *(struct tgsi_declaration *) &token; + + switch( decl->Declaration.Type ) { + case TGSI_DECLARE_RANGE: + next_token( ctx, &decl->u.DeclarationRange ); + break; + + case TGSI_DECLARE_MASK: + next_token( ctx, &decl->u.DeclarationMask ); + break; + + default: + assert (0); + } + + if( decl->Declaration.Interpolate ) { + next_token( ctx, &decl->Interpolation ); + } + + if( decl->Declaration.Semantic ) { + next_token( ctx, &decl->Semantic ); + } + + break; + } + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + + *imm = tgsi_default_full_immediate(); + imm->Immediate = *(struct tgsi_immediate *) &token; + + assert( !imm->Immediate.Extended ); + + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + imm->u.Pointer = MALLOC( + sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + next_token( ctx, &imm->u.ImmediateFloat32[i] ); + } + break; + + default: + assert( 0 ); + } + + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; + unsigned extended; + + *inst = tgsi_default_full_instruction(); + inst->Instruction = *(struct tgsi_instruction *) &token; + + extended = inst->Instruction.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_INSTRUCTION_EXT_TYPE_NV: + inst->InstructionExtNv = + *(struct tgsi_instruction_ext_nv *) &token; + break; + + case TGSI_INSTRUCTION_EXT_TYPE_LABEL: + inst->InstructionExtLabel = + *(struct tgsi_instruction_ext_label *) &token; + break; + + case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: + inst->InstructionExtTexture = + *(struct tgsi_instruction_ext_texture *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + + assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + unsigned extended; + + next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullDstRegisters[i].DstRegister.Indirect ); + assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); + + extended = inst->FullDstRegisters[i].DstRegister.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: + inst->FullDstRegisters[i].DstRegisterExtConcode = + *(struct tgsi_dst_register_ext_concode *) &token; + break; + + case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: + inst->FullDstRegisters[i].DstRegisterExtModulate = + *(struct tgsi_dst_register_ext_modulate *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + } + + assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + unsigned extended; + + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); + + extended = inst->FullSrcRegisters[i].SrcRegister.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: + inst->FullSrcRegisters[i].SrcRegisterExtSwz = + *(struct tgsi_src_register_ext_swz *) &token; + break; + + case TGSI_SRC_REGISTER_EXT_TYPE_MOD: + inst->FullSrcRegisters[i].SrcRegisterExtMod = + *(struct tgsi_src_register_ext_mod *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + + if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + } + + if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); + + /* + * No support for multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); + + if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + } + } + } + + break; + } + + default: + assert( 0 ); + } +} + diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h new file mode 100644 index 0000000000..9372da8d5d --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -0,0 +1,121 @@ +#if !defined TGSI_PARSE_H +#define TGSI_PARSE_H + +#if defined __cplusplus +extern "C" { +#endif // defined __cplusplus + +struct tgsi_full_version +{ + struct tgsi_version Version; +}; + +struct tgsi_full_header +{ + struct tgsi_header Header; + struct tgsi_processor Processor; +}; + +struct tgsi_full_dst_register +{ + struct tgsi_dst_register DstRegister; + struct tgsi_dst_register_ext_concode DstRegisterExtConcode; + struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; +}; + +struct tgsi_full_src_register +{ + struct tgsi_src_register SrcRegister; + struct tgsi_src_register_ext_swz SrcRegisterExtSwz; + struct tgsi_src_register_ext_mod SrcRegisterExtMod; + struct tgsi_src_register SrcRegisterInd; + struct tgsi_dimension SrcRegisterDim; + struct tgsi_src_register SrcRegisterDimInd; +}; + +struct tgsi_full_declaration +{ + struct tgsi_declaration Declaration; + union + { + struct tgsi_declaration_range DeclarationRange; + struct tgsi_declaration_mask DeclarationMask; + } u; + struct tgsi_declaration_interpolation Interpolation; + struct tgsi_declaration_semantic Semantic; +}; + +struct tgsi_full_immediate +{ + struct tgsi_immediate Immediate; + union + { + void *Pointer; + struct tgsi_immediate_float32 *ImmediateFloat32; + } u; +}; + +#define TGSI_FULL_MAX_DST_REGISTERS 2 +#define TGSI_FULL_MAX_SRC_REGISTERS 3 + +struct tgsi_full_instruction +{ + struct tgsi_instruction Instruction; + struct tgsi_instruction_ext_nv InstructionExtNv; + struct tgsi_instruction_ext_label InstructionExtLabel; + struct tgsi_instruction_ext_texture InstructionExtTexture; + struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; + struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; +}; + +union tgsi_full_token +{ + struct tgsi_token Token; + struct tgsi_full_declaration FullDeclaration; + struct tgsi_full_immediate FullImmediate; + struct tgsi_full_instruction FullInstruction; +}; + +void +tgsi_full_token_init( + union tgsi_full_token *full_token ); + +void +tgsi_full_token_free( + union tgsi_full_token *full_token ); + +struct tgsi_parse_context +{ + const struct tgsi_token *Tokens; + unsigned Position; + struct tgsi_full_version FullVersion; + struct tgsi_full_header FullHeader; + union tgsi_full_token FullToken; +}; + +#define TGSI_PARSE_OK 0 +#define TGSI_PARSE_ERROR 1 + +unsigned +tgsi_parse_init( + struct tgsi_parse_context *ctx, + const struct tgsi_token *tokens ); + +void +tgsi_parse_free( + struct tgsi_parse_context *ctx ); + +boolean +tgsi_parse_end_of_tokens( + struct tgsi_parse_context *ctx ); + +void +tgsi_parse_token( + struct tgsi_parse_context *ctx ); + +#if defined __cplusplus +} // extern "C" +#endif // defined __cplusplus + +#endif // !defined TGSI_PARSE_H + diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.c b/src/gallium/auxiliary/tgsi/util/tgsi_transform.c new file mode 100644 index 0000000000..357f77b05a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_transform.c @@ -0,0 +1,199 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program transformation utility. + * + * Authors: Brian Paul + */ + + +#include "tgsi_transform.h" + + + +static void +emit_instruction(struct tgsi_transform_context *ctx, + const struct tgsi_full_instruction *inst) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_instruction(inst, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + +static void +emit_declaration(struct tgsi_transform_context *ctx, + const struct tgsi_full_declaration *decl) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_declaration(decl, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + +static void +emit_immediate(struct tgsi_transform_context *ctx, + const struct tgsi_full_immediate *imm) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_immediate(imm, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + + +/** + * Apply user-defined transformations to the input shader to produce + * the output shader. + * For example, a register search-and-replace operation could be applied + * by defining a transform_instruction() callback that examined and changed + * the instruction src/dest regs. + * + * \return number of tokens emitted + */ +int +tgsi_transform_shader(const struct tgsi_token *tokens_in, + struct tgsi_token *tokens_out, + uint max_tokens_out, + struct tgsi_transform_context *ctx) +{ + uint procType; + + /* input shader */ + struct tgsi_parse_context parse; + + /* output shader */ + struct tgsi_processor *processor; + + + /** + ** callback context init + **/ + ctx->emit_instruction = emit_instruction; + ctx->emit_declaration = emit_declaration; + ctx->emit_immediate = emit_immediate; + ctx->tokens_out = tokens_out; + ctx->max_tokens_out = max_tokens_out; + + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n"); + return -1; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Setup output shader + **/ + *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version(); + + ctx->header = (struct tgsi_header *) (tokens_out + 1); + *ctx->header = tgsi_build_header(); + + processor = (struct tgsi_processor *) (tokens_out + 2); + *processor = tgsi_build_processor( procType, ctx->header ); + + ctx->ti = 3; + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + if (ctx->transform_instruction) + ctx->transform_instruction(ctx, fullinst); + else + ctx->emit_instruction(ctx, fullinst); + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + + if (ctx->transform_declaration) + ctx->transform_declaration(ctx, fulldecl); + else + ctx->emit_declaration(ctx, fulldecl); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *fullimm + = &parse.FullToken.FullImmediate; + + if (ctx->transform_immediate) + ctx->transform_immediate(ctx, fullimm); + else + ctx->emit_immediate(ctx, fullimm); + } + break; + + default: + assert( 0 ); + } + } + + if (ctx->epilog) { + ctx->epilog(ctx); + } + + tgsi_parse_free (&parse); + + return ctx->ti; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.h b/src/gallium/auxiliary/tgsi/util/tgsi_transform.h new file mode 100644 index 0000000000..fcf85d603b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_transform.h @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TRANSFORM_H +#define TGSI_TRANSFORM_H + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" + + + +/** + * Subclass this to add caller-specific data + */ +struct tgsi_transform_context +{ +/**** PUBLIC ***/ + + /** + * User-defined callbacks invoked per instruction. + */ + void (*transform_instruction)(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst); + + void (*transform_declaration)(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl); + + void (*transform_immediate)(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *imm); + + /** + * Called at end of input program to allow caller to append extra + * instructions. Return number of tokens emitted. + */ + void (*epilog)(struct tgsi_transform_context *ctx); + + +/*** PRIVATE ***/ + + /** + * These are setup by tgsi_transform_shader() and cannot be overridden. + * Meant to be called from in the above user callback functions. + */ + void (*emit_instruction)(struct tgsi_transform_context *ctx, + const struct tgsi_full_instruction *inst); + void (*emit_declaration)(struct tgsi_transform_context *ctx, + const struct tgsi_full_declaration *decl); + void (*emit_immediate)(struct tgsi_transform_context *ctx, + const struct tgsi_full_immediate *imm); + + struct tgsi_header *header; + uint max_tokens_out; + struct tgsi_token *tokens_out; + uint ti; +}; + + + +extern int +tgsi_transform_shader(const struct tgsi_token *tokens_in, + struct tgsi_token *tokens_out, + uint max_tokens_out, + struct tgsi_transform_context *ctx); + + +#endif /* TGSI_TRANSFORM_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c new file mode 100644 index 0000000000..4cdd89182a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c @@ -0,0 +1,274 @@ +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" +#include "tgsi_util.h" + +union pointer_hack +{ + void *pointer; + unsigned long long uint64; +}; + +void * +tgsi_align_128bit( + void *unaligned ) +{ + union pointer_hack ph; + + ph.uint64 = 0; + ph.pointer = unaligned; + ph.uint64 = (ph.uint64 + 15) & ~15; + return ph.pointer; +} + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + component ); + } + + return swizzle; +} + +void +tgsi_util_set_src_register_swizzle( + struct tgsi_src_register *reg, + unsigned swizzle, + unsigned component ) +{ + switch( component ) { + case 0: + reg->SwizzleX = swizzle; + break; + case 1: + reg->SwizzleY = swizzle; + break; + case 2: + reg->SwizzleZ = swizzle; + break; + case 3: + reg->SwizzleW = swizzle; + break; + default: + assert( 0 ); + } +} + +void +tgsi_util_set_src_register_extswizzle( + struct tgsi_src_register_ext_swz *reg, + unsigned swizzle, + unsigned component ) +{ + switch( component ) { + case 0: + reg->ExtSwizzleX = swizzle; + break; + case 1: + reg->ExtSwizzleY = swizzle; + break; + case 2: + reg->ExtSwizzleZ = swizzle; + break; + case 3: + reg->ExtSwizzleW = swizzle; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + assert( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} + +void +tgsi_util_set_full_src_register_sign_mode( + struct tgsi_full_src_register *reg, + unsigned sign_mode ) +{ + reg->SrcRegisterExtSwz.NegateX = 0; + reg->SrcRegisterExtSwz.NegateY = 0; + reg->SrcRegisterExtSwz.NegateZ = 0; + reg->SrcRegisterExtSwz.NegateW = 0; + + switch (sign_mode) + { + case TGSI_UTIL_SIGN_CLEAR: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 1; + reg->SrcRegisterExtMod.Negate = 0; + break; + + case TGSI_UTIL_SIGN_SET: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 1; + reg->SrcRegisterExtMod.Negate = 1; + break; + + case TGSI_UTIL_SIGN_TOGGLE: + reg->SrcRegister.Negate = 1; + reg->SrcRegisterExtMod.Absolute = 0; + reg->SrcRegisterExtMod.Negate = 0; + break; + + case TGSI_UTIL_SIGN_KEEP: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 0; + reg->SrcRegisterExtMod.Negate = 0; + break; + + default: + assert( 0 ); + } +} + diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h new file mode 100644 index 0000000000..ef14446f0e --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.h @@ -0,0 +1,70 @@ +#if !defined TGSI_UTIL_H +#define TGSI_UTIL_H + +#if defined __cplusplus +extern "C" { +#endif // defined __cplusplus + +void * +tgsi_align_128bit( + void *unaligned ); + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ); + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component); + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ); + +void +tgsi_util_set_src_register_swizzle( + struct tgsi_src_register *reg, + unsigned swizzle, + unsigned component ); + +void +tgsi_util_set_src_register_extswizzle( + struct tgsi_src_register_ext_swz *reg, + unsigned swizzle, + unsigned component ); + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ); + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ); + +#define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */ +#define TGSI_UTIL_SIGN_SET 1 /* Force negative */ +#define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */ +#define TGSI_UTIL_SIGN_KEEP 3 /* No change */ + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ); + +void +tgsi_util_set_full_src_register_sign_mode( + struct tgsi_full_src_register *reg, + unsigned sign_mode ); + +#if defined __cplusplus +} // extern "C" +#endif // defined __cplusplus + +#endif // !defined TGSI_UTIL_H + diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c new file mode 100644 index 0000000000..b9607a6ba7 --- /dev/null +++ b/src/gallium/auxiliary/util/p_debug.c @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include + +#ifdef WIN32 +#include +#include +#else +#include +#include +#endif + +#include "pipe/p_debug.h" +#include "pipe/p_compiler.h" + + +void debug_vprintf(const char *format, va_list ap) +{ +#ifdef WIN32 + EngDebugPrint("Gallium3D: ", (PCHAR)format, ap); +#else + vfprintf(stderr, format, ap); +#endif +} + + +void debug_printf(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + debug_vprintf(format, ap); + va_end(ap); +} + + +static INLINE void debug_abort(void) +{ +#ifdef WIN32 + EngDebugBreak(); +#else + abort(); +#endif +} + + +void debug_assert_fail(const char *expr, const char *file, unsigned line) +{ + debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr); + debug_abort(); +} diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c new file mode 100644 index 0000000000..3f795a3898 --- /dev/null +++ b/src/gallium/auxiliary/util/p_tile.c @@ -0,0 +1,699 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * RGBA/float tile get/put functions. + * Usable both by drivers and state trackers. + * Surfaces should already be in a mapped state. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" + +#include "p_tile.h" + + + +/** + * Move raw block of pixels from surface to user memory. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_get_tile_raw(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *p, int dst_stride) +{ + const uint cpp = ps->cpp; + const ubyte *pSrc; + const uint src_stride = ps->pitch * cpp; + ubyte *pDest; + uint i; + + if (dst_stride == 0) { + dst_stride = w * cpp; + } + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + pSrc = (const ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp; + pDest = (ubyte *) p; + + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, w * cpp); + pDest += dst_stride; + pSrc += src_stride; + } + + pipe_surface_unmap(ps); +} + + +/** + * Move raw block of pixels from user memory to surface. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_put_tile_raw(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *p, int src_stride) +{ + const uint cpp = ps->cpp; + const ubyte *pSrc; + const uint dst_stride = ps->pitch * cpp; + ubyte *pDest; + uint i; + + if (src_stride == 0) { + src_stride = w * cpp; + } + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + pSrc = (const ubyte *) p; + pDest = (ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp; + + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, w * cpp); + pDest += dst_stride; + pSrc += src_stride; + } + + pipe_surface_unmap(ps); +} + + + + +/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */ +#define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F)) + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +a8r8g8b8_get_tile_rgba(unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff); + pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff); + pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff); + pRow[3] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff); + } + p += dst_stride; + } +} + + +static void +a8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); + UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); + UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); + *dst++ = (a << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ + +static void +b8g8r8a8_get_tile_rgba(unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff); + pRow[1] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff); + pRow[2] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff); + pRow[3] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff); + } + p += dst_stride; + } +} + + +static void +b8g8r8a8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); + UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); + UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); + *dst++ = (b << 24) | (g << 16) | (r << 8) | a; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ + +static void +a1r5g5b5_get_tile_rgba(ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = ((pixel >> 15) ) * 1.0f; + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ + +static void +a4r4g4b4_get_tile_rgba(ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f); + pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f); + pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f); + pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_R5G6B5_UNORM ***/ + +static void +r5g6b5_get_tile_rgba(ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = 1.0f; + } + p += dst_stride; + } +} + + +static void +r5g5b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0); + uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0); + uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0); + *dst++ = (r << 11) | (g << 5) | (b); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_Z16_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z16_get_tile_rgba(ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const float scale = 1.0f / 65535.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src++ * scale; + } + p += dst_stride; + } +} + + + + +/*** PIPE_FORMAT_U_L8 ***/ + +static void +l8_get_tile_rgba(ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = UBYTE_TO_FLOAT(*src); + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_U_A8 ***/ + +static void +a8_get_tile_rgba(ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = 0.0; + pRow[3] = UBYTE_TO_FLOAT(*src); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ + +static void +r16g16b16a16_get_tile_rgba(short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src += 4, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = SHORT_TO_FLOAT(src[1]); + pRow[2] = SHORT_TO_FLOAT(src[2]); + pRow[3] = SHORT_TO_FLOAT(src[3]); + } + p += dst_stride; + } +} + + +static void +r16g16b16a16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst += 4, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]); + UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]); + UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_U_I8 ***/ + +static void +i8_get_tile_rgba(ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = UBYTE_TO_FLOAT(*src); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_U_A8_L8 ***/ + +static void +a8_l8_get_tile_rgba(ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + ushort p = *src++; + pRow[0] = + pRow[1] = + pRow[2] = UBYTE_TO_FLOAT(p & 0xff); + pRow[3] = UBYTE_TO_FLOAT(p >> 8); + } + p += dst_stride; + } +} + + + + +/*** PIPE_FORMAT_Z32_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32_get_tile_rgba(unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / (double) 0xffffffff; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (*src++ * scale); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_S8Z24_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +s8z24_get_tile_rgba(unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ & 0xffffff)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_Z24S8_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +z24s8_get_tile_rgba(unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ >> 8)); + } + p += dst_stride; + } +} + + +void +pipe_get_tile_rgba(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p) +{ + unsigned dst_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(h * w * ps->cpp); + + if (!packed) + return; + + pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp); + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_U_L8: + l8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_U_A8: + a8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_U_I8: + i8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_U_A8_L8: + a8_l8_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_get_tile_rgba((short *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + z16_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_Z32_UNORM: + z32_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_S8Z24_UNORM: + s8z24_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + break; + case PIPE_FORMAT_Z24S8_UNORM: + z24s8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + break; + default: + assert(0); + } + + FREE(packed); +} + + +void +pipe_put_tile_rgba(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p) +{ + unsigned src_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(h * w * ps->cpp); + + if (!packed) + return; + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + /*a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + break; + case PIPE_FORMAT_U_L8: + /*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_U_A8: + /*a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_U_I8: + /*i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_U_A8_L8: + /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z32_UNORM: + /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_S8Z24_UNORM: + /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z24S8_UNORM: + /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + default: + assert(0); + } + + pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp); + + FREE(packed); +} diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h new file mode 100644 index 0000000000..cd8124bf11 --- /dev/null +++ b/src/gallium/auxiliary/util/p_tile.h @@ -0,0 +1,81 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef P_TILE_H +#define P_TILE_H + +#include "pipe/p_compiler.h" + +struct pipe_context; +struct pipe_surface; + + +/** + * Clip tile against surface dims. + * \return TRUE if tile is totally clipped, FALSE otherwise + */ +static INLINE boolean +pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) +{ + if (x >= ps->width) + return TRUE; + if (y >= ps->height) + return TRUE; + if (x + *w > ps->width) + *w = ps->width - x; + if (y + *h > ps->height) + *h = ps->height - y; + return FALSE; +} + + +extern void +pipe_get_tile_raw(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *p, int dst_stride); + +extern void +pipe_put_tile_raw(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *p, int src_stride); + + +extern void +pipe_get_tile_rgba(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p); + +extern void +pipe_put_tile_rgba(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p); + +#endif diff --git a/src/gallium/auxiliary/util/p_util.c b/src/gallium/auxiliary/util/p_util.c new file mode 100644 index 0000000000..2a92f8e408 --- /dev/null +++ b/src/gallium/auxiliary/util/p_util.c @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Miscellaneous utility functions. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" + + +/** + * Copy 2D rect from one place to another. + * Position and sizes are in pixels. + */ +void +pipe_copy_rect(ubyte * dst, + unsigned cpp, + unsigned dst_pitch, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + const ubyte * src, + int src_pitch, + unsigned src_x, + int src_y) +{ + unsigned i; + + dst_pitch *= cpp; + src_pitch *= cpp; + dst += dst_x * cpp; + src += src_x * cpp; + dst += dst_y * dst_pitch; + src += src_y * src_pitch; + width *= cpp; + + if (width == dst_pitch && width == src_pitch) + memcpy(dst, src, height * width); + else { + for (i = 0; i < height; i++) { + memcpy(dst, src, width); + dst += dst_pitch; + src += src_pitch; + } + } +} -- cgit v1.2.3 From 66f22aa3bf7fa546e946b45156aa578e202982c9 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 15 Feb 2008 20:11:40 +0900 Subject: Code reorganization: s/aux/auxiliary/ -- update build. --- src/gallium/Makefile | 2 +- src/gallium/Makefile.template | 2 +- src/gallium/auxiliary/llvm/Makefile | 2 +- src/gallium/drivers/cell/ppu/Makefile | 2 +- src/gallium/drivers/cell/spu/Makefile | 2 +- src/gallium/winsys/dri/Makefile.template | 2 +- src/mesa/Makefile | 2 +- src/mesa/sources | 66 ++++++++++++++++---------------- 8 files changed, 40 insertions(+), 40 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/Makefile b/src/gallium/Makefile index a13b9a52d3..89e068a449 100644 --- a/src/gallium/Makefile +++ b/src/gallium/Makefile @@ -2,7 +2,7 @@ TOP = ../.. include $(TOP)/configs/current -SUBDIRS = aux drivers +SUBDIRS = auxiliary drivers default: subdirs diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 0717ed8dd2..83b25f9b47 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -17,7 +17,7 @@ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/include/pipe \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/include \ diff --git a/src/gallium/auxiliary/llvm/Makefile b/src/gallium/auxiliary/llvm/Makefile index e6ac399d08..e0abf860c1 100644 --- a/src/gallium/auxiliary/llvm/Makefile +++ b/src/gallium/auxiliary/llvm/Makefile @@ -31,7 +31,7 @@ OBJECTS = $(C_SOURCES:.c=.o) \ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/drivers - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ -I$(TOP)/include diff --git a/src/gallium/drivers/cell/ppu/Makefile b/src/gallium/drivers/cell/ppu/Makefile index 011863c11e..a4c3f29e8a 100644 --- a/src/gallium/drivers/cell/ppu/Makefile +++ b/src/gallium/drivers/cell/ppu/Makefile @@ -43,7 +43,7 @@ OBJECTS = $(SOURCES:.c=.o) \ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers .c.o: diff --git a/src/gallium/drivers/cell/spu/Makefile b/src/gallium/drivers/cell/spu/Makefile index 7aa947299e..30ef2450ec 100644 --- a/src/gallium/drivers/cell/spu/Makefile +++ b/src/gallium/drivers/cell/spu/Makefile @@ -34,7 +34,7 @@ SPU_ASM_OUT = $(SOURCES:.c=.s) \ INCLUDE_DIRS = \ -I$(TOP)/src/mesa \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template index b96305c094..2a261ed669 100644 --- a/src/gallium/winsys/dri/Makefile.template +++ b/src/gallium/winsys/dri/Makefile.template @@ -49,7 +49,7 @@ SHARED_INCLUDES = \ -I$(TOP)/include \ -I$(TOP)/include/GL/internal \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/aux \ + -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ -I$(TOP)/src/mesa/main \ diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 561608fedd..c8cb2b592f 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -21,7 +21,7 @@ CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a endif ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/gallium/aux/llvm/libgallivm.a +LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a endif .SUFFIXES : .cpp diff --git a/src/mesa/sources b/src/mesa/sources index 2d07738210..cecd8a830f 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -158,45 +158,45 @@ VF_SOURCES = \ DRAW_SOURCES = \ - $(TOP)/src/gallium/aux/draw/draw_clip.c \ - $(TOP)/src/gallium/aux/draw/draw_context.c\ - $(TOP)/src/gallium/aux/draw/draw_cull.c \ - $(TOP)/src/gallium/aux/draw/draw_debug.c \ - $(TOP)/src/gallium/aux/draw/draw_flatshade.c \ - $(TOP)/src/gallium/aux/draw/draw_offset.c \ - $(TOP)/src/gallium/aux/draw/draw_prim.c \ - $(TOP)/src/gallium/aux/draw/draw_stipple.c \ - $(TOP)/src/gallium/aux/draw/draw_twoside.c \ - $(TOP)/src/gallium/aux/draw/draw_unfilled.c \ - $(TOP)/src/gallium/aux/draw/draw_validate.c \ - $(TOP)/src/gallium/aux/draw/draw_vbuf.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_cache.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_fetch.c \ - $(TOP)/src/gallium/aux/draw/draw_vertex_shader.c \ - $(TOP)/src/gallium/aux/draw/draw_vf.c \ - $(TOP)/src/gallium/aux/draw/draw_vf_generic.c \ - $(TOP)/src/gallium/aux/draw/draw_vf_sse.c \ - $(TOP)/src/gallium/aux/draw/draw_wide_prims.c + $(TOP)/src/gallium/auxiliary/draw/draw_clip.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_context.c\ + $(TOP)/src/gallium/auxiliary/draw/draw_cull.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_debug.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_flatshade.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_offset.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_prim.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_stipple.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_twoside.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_unfilled.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_validate.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vbuf.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_cache.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_fetch.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vertex_shader.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf_generic.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vf_sse.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_wide_prims.c TGSIEXEC_SOURCES = \ - $(TOP)/src/gallium/aux/tgsi/exec/tgsi_exec.c \ - $(TOP)/src/gallium/aux/tgsi/exec/tgsi_sse2.c + $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c \ + $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c TGSIUTIL_SOURCES = \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_build.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_dump.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_parse.c \ - $(TOP)/src/gallium/aux/tgsi/util/tgsi_util.c + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_build.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_dump.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_parse.c \ + $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_util.c STATECACHE_SOURCES = \ - $(TOP)/src/gallium/aux/cso_cache/cso_hash.c \ - $(TOP)/src/gallium/aux/cso_cache/cso_cache.c + $(TOP)/src/gallium/auxiliary/cso_cache/cso_hash.c \ + $(TOP)/src/gallium/auxiliary/cso_cache/cso_cache.c PIPEUTIL_SOURCES = \ - $(TOP)/src/gallium/aux/util/p_debug.c \ - $(TOP)/src/gallium/aux/util/p_tile.c \ - $(TOP)/src/gallium/aux/util/p_util.c + $(TOP)/src/gallium/auxiliary/util/p_debug.c \ + $(TOP)/src/gallium/auxiliary/util/p_tile.c \ + $(TOP)/src/gallium/auxiliary/util/p_util.c STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ @@ -428,7 +428,7 @@ INCLUDE_DIRS = \ -I$(TOP)/src/mesa/main \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/aux + -I$(TOP)/src/gallium/auxiliary OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/tnl \ @@ -438,4 +438,4 @@ OLD_INCLUDE_DIRS = \ -I$(TOP)/src/mesa/shader \ -I$(TOP)/src/mesa/shader/grammar \ -I$(TOP)/src/mesa/shader/slang \ - -I$(TOP)/s$(TOP)/src/gallium/aux/tgsi + -I$(TOP)/s$(TOP)/src/gallium/auxiliary/tgsi -- cgit v1.2.3 From b29d8d27292c2ad956d3f0a307603f00ee01af28 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Feb 2008 13:37:01 +0000 Subject: draw: subclass vertex shaders according to execution method Create new files for shaders compiled/executed with llvm, sse, exec respectively --- src/gallium/auxiliary/draw/Makefile | 2 +- src/gallium/auxiliary/draw/draw_private.h | 40 ++-- src/gallium/auxiliary/draw/draw_vertex_shader.c | 229 ++------------------- src/gallium/auxiliary/draw/draw_vs.h | 50 +++++ src/gallium/auxiliary/draw/draw_vs_exec.c | 186 ++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_llvm.c | 237 ++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_sse.c | 251 ++++++++++++++++++++++++ src/mesa/sources | 3 + 8 files changed, 766 insertions(+), 232 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_vs.h create mode 100644 src/gallium/auxiliary/draw/draw_vs_exec.c create mode 100644 src/gallium/auxiliary/draw/draw_vs_llvm.c create mode 100644 src/gallium/auxiliary/draw/draw_vs_sse.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 451911a354..fe9b150f30 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -1,2 +1,2 @@ default: - cd .. ; make + cd ../../../mesa ; make diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3d09aef87c..bc11259cb2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -128,13 +128,25 @@ struct draw_stage * Private version of the compiled vertex_shader */ struct draw_vertex_shader { + + /* This member will disappear shortly: + */ const struct pipe_shader_state *state; -#if defined(__i386__) || defined(__386__) - struct x86_function sse2_program; -#endif -#ifdef MESA_LLVM - struct gallivm_prog *llvm_prog; -#endif + + void (*prepare)( struct draw_vertex_shader *shader, + struct draw_context *draw ); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + void (*run)( struct draw_vertex_shader *shader, + struct draw_context *draw, + const unsigned *elts, + unsigned count, + struct vertex_header *vOut[] ); + + + void (*delete)( struct draw_vertex_shader * ); }; @@ -176,7 +188,7 @@ struct draw_context struct pipe_viewport_state viewport; struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; - const struct draw_vertex_shader *vertex_shader; + struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ @@ -201,6 +213,7 @@ struct draw_context boolean convert_wide_points; /**< convert wide points to tris? */ boolean convert_wide_lines; /**< convert side lines to tris? */ + boolean use_sse; unsigned reduced_prim; @@ -255,11 +268,10 @@ struct draw_context unsigned queue_nr; } pq; - int use_sse : 1; -#ifdef MESA_LLVM - struct gallivm_cpu_engine *engine; -#endif - + + /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. + */ + struct gallivm_cpu_engine *engine; void *driver_private; }; @@ -290,11 +302,7 @@ extern void draw_vertex_cache_invalidate( struct draw_context *draw ); extern void draw_vertex_cache_unreference( struct draw_context *draw ); extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ); - extern void draw_vertex_shader_queue_flush( struct draw_context *draw ); -#ifdef MESA_LLVM -extern void draw_vertex_shader_queue_flush_llvm( struct draw_context *draw ); -#endif struct tgsi_exec_machine; diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 9413f8b43a..f68f6e3244 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -33,177 +33,10 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#if defined(__i386__) || defined(__386__) -#include "tgsi/exec/tgsi_sse2.h" -#endif #include "draw_private.h" #include "draw_context.h" +#include "draw_vs.h" -#include "x86/rtasm/x86sse.h" -#include "llvm/gallivm.h" - - -#define DBG_VS 0 - - -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1<machine; - unsigned int j; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(count <= 4); - assert(draw->vertex_shader->state->output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); - - /* Consts does not require 16 byte alignment. */ - machine->Consts = (float (*)[4]) draw->user.constants; - - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); - - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); - - /* run shader */ -#ifdef MESA_LLVM - if (1) { - struct gallivm_prog *prog = draw->vertex_shader->llvm_prog; - gallivm_cpu_vs_exec(prog, - machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps); - } else -#elif defined(__i386__) || defined(__386__) - if (draw->use_sse) { - /* SSE */ - /* cast away const */ - struct draw_vertex_shader *shader - = (struct draw_vertex_shader *)draw->vertex_shader; - codegen_function func - = (codegen_function) x86_get_func( &shader->sse2_program ); - - if (func) - func( - machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps ); - else - /* interpreter */ - tgsi_exec_machine_run( machine ); - } - else -#endif - { - /* interpreter */ - tgsi_exec_machine_run( machine ); - } - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - -#if DBG_VS - debug_printf("output[%d]win: %f %f %f %f\n", j, - vOut[j]->data[0][0], - vOut[j]->data[0][1], - vOut[j]->data[0][2], - vOut[j]->data[0][3]); -#endif - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; -#if DBG_VS - debug_printf("output[%d][%d]: %f %f %f %f\n", j, slot, - vOut[j]->data[slot][0], - vOut[j]->data[slot][1], - vOut[j]->data[slot][2], - vOut[j]->data[slot][3]); -#endif - } - } /* loop over vertices */ -} /** @@ -213,13 +46,14 @@ run_vertex_program(struct draw_context *draw, void draw_vertex_shader_queue_flush(struct draw_context *draw) { + struct draw_vertex_shader *shader = draw->vertex_shader; unsigned i; assert(draw->vs.queue_nr != 0); /* XXX: do this on statechange: */ - draw_update_vertex_fetch( draw ); + shader->prepare( shader, draw ); // fprintf(stderr, " q(%d) ", draw->vs.queue_nr ); @@ -242,7 +76,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) assert(n > 0); assert(n <= 4); - run_vertex_program(draw, elts, n, dests); + shader->run(shader, draw, elts, n, dests); } draw->vs.queue_nr = 0; @@ -255,43 +89,16 @@ draw_create_vertex_shader(struct draw_context *draw, { struct draw_vertex_shader *vs; - vs = CALLOC_STRUCT( draw_vertex_shader ); - if (vs == NULL) { - return NULL; - } - - vs->state = shader; + vs = draw_create_vs_llvm( draw, shader ); + if (vs) + return vs; -#ifdef MESA_LLVM - struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); - gallivm_ir_set_layout(ir, GALLIVM_SOA); - gallivm_ir_set_components(ir, 4); - gallivm_ir_fill_from_tgsi(ir, shader->tokens); - vs->llvm_prog = gallivm_ir_compile(ir); - gallivm_ir_delete(ir); - - draw->engine = gallivm_global_cpu_engine(); - if (!draw->engine) { - draw->engine = gallivm_cpu_engine_create(vs->llvm_prog); - } - else { - gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog); - } -#elif defined(__i386__) || defined(__386__) - if (draw->use_sse) { - /* cast-away const */ - struct pipe_shader_state *sh = (struct pipe_shader_state *) shader; - - x86_init_func( &vs->sse2_program ); - if (!tgsi_emit_sse2( (struct tgsi_token *) sh->tokens, - &vs->sse2_program )) { - x86_release_func( (struct x86_function *) &vs->sse2_program ); - fprintf(stdout /*err*/, - "tgsi_emit_sse2() failed, falling back to interpreter\n"); - } - } -#endif + vs = draw_create_vs_sse( draw, shader ); + if (vs) + return vs; + vs = draw_create_vs_exec( draw, shader ); + assert(vs); return vs; } @@ -307,11 +114,7 @@ draw_bind_vertex_shader(struct draw_context *draw, tgsi_exec_machine_init(&draw->machine); - /* specify the vertex program to interpret/execute */ - tgsi_exec_machine_bind_shader(&draw->machine, - draw->vertex_shader->state->tokens, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/ ); + dvs->prepare( dvs, draw ); } @@ -319,9 +122,5 @@ void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs) { -#if defined(__i386__) || defined(__386__) - x86_release_func( (struct x86_function *) &dvs->sse2_program ); -#endif - - FREE( dvs ); + dvs->delete( dvs ); } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h new file mode 100644 index 0000000000..4ee7e705e9 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef DRAW_VS_H +#define DRAW_VS_H + +struct draw_vertex_shader; +struct draw_context; +struct pipe_shader_state; + +struct draw_vertex_shader * +draw_create_vs_exec(struct draw_context *draw, + const struct pipe_shader_state *templ); + +struct draw_vertex_shader * +draw_create_vs_sse(struct draw_context *draw, + const struct pipe_shader_state *templ); + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *templ); + +#endif diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c new file mode 100644 index 0000000000..8588879400 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -0,0 +1,186 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" + + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<machine, + shader->state->tokens, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/ ); + + draw_update_vertex_fetch( draw ); +} + + +/** + * Transform vertices with the current vertex program/shader + * Up to four vertices can be shaded at a time. + * \param vbuffer the input vertex data + * \param elts indexes of four input vertices + * \param count number of vertices to shade [1..4] + * \param vOut array of pointers to four output vertices + */ +static void +vs_exec_run( struct draw_vertex_shader *shader, + struct draw_context *draw, + const unsigned *elts, + unsigned count, + struct vertex_header *vOut[] ) +{ + struct tgsi_exec_machine *machine = &draw->machine; + unsigned int j; + + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + assert(draw->vertex_shader->state->output_semantic_name[0] + == TGSI_SEMANTIC_POSITION); + + machine->Consts = (float (*)[4]) draw->user.constants; + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + + /* run interpreter */ + tgsi_exec_machine_run( machine ); + + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + } /* loop over vertices */ +} + + + +static void +vs_exec_delete( struct draw_vertex_shader *dvs ) +{ + FREE( dvs ); +} + + +struct draw_vertex_shader * +draw_create_vs_exec(struct draw_context *draw, + const struct pipe_shader_state *state) +{ + struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader ); + + if (vs == NULL) + return NULL; + + vs->state = state; + vs->prepare = vs_exec_prepare; + vs->run = vs_exec_run; + vs->delete = vs_exec_delete; + + return vs; +} diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c new file mode 100644 index 0000000000..44022b6e07 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -0,0 +1,237 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin + * Keith Whitwell + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" + +#ifdef MESA_LLVM + +#include "llvm/gallivm.h" + +struct draw_llvm_vertex_shader { + struct draw_vertex_shader base; + struct gallivm_prog *llvm_prog; +}; + + +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<machine; + unsigned int j; + + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + + assert(count <= 4); + assert(draw->vertex_shader->state->output_semantic_name[0] + == TGSI_SEMANTIC_POSITION); + + /* Consts does not require 16 byte alignment. */ + machine->Consts = (float (*)[4]) draw->user.constants; + + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + + /* run shader */ + gallivm_cpu_vs_exec(shader->llvm_prog, + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps); + + /* store machine results */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + } /* loop over vertices */ +} + +static void +vs_llvm_delete( struct draw_vertex_shader *base ) +{ + struct draw_llvm_vertex_shader *shader = + (struct draw_llvm_vertex_shader *)base; + + /* Do something to free compiled shader: + */ + + FREE( shader ); +} + + + + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_llvm_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); + if (vs == NULL) + return NULL; + + vs->base.state = templ; + vs->base.prepare = vs_llvm_prepare; + vs->base.run = vs_llvm_run; + vs->base.delete = vs_llvm_delete; + + { + struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); + gallivm_ir_set_layout(ir, GALLIVM_SOA); + gallivm_ir_set_components(ir, 4); + gallivm_ir_fill_from_tgsi(ir, vs->base.state->tokens); + vs->llvm_prog = gallivm_ir_compile(ir); + gallivm_ir_delete(ir); + } + + draw->engine = gallivm_global_cpu_engine(); + + /* XXX: Why are there two versions of this? Shouldn't creating the + * engine be a separate operation to compiling a shader? + */ + if (!draw->engine) { + draw->engine = gallivm_cpu_engine_create(vs->llvm_prog); + } + else { + gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog); + } + + return &vs->base; +} + + + + + +#else + +struct draw_vertex_shader * +draw_create_vs_llvm(struct draw_context *draw, + const struct pipe_shader_state *shader) +{ + return NULL; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c new file mode 100644 index 0000000000..04349cb404 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -0,0 +1,251 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + */ + +#include "draw_vs.h" + +#if defined(__i386__) || defined(__386__) + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "x86/rtasm/x86sse.h" +#include "tgsi/exec/tgsi_sse2.h" + + +typedef void (XSTDCALL *codegen_function) ( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], + struct tgsi_exec_vector *temporary ); + + +struct draw_sse_vertex_shader { + struct draw_vertex_shader base; + struct x86_function sse2_program; + codegen_function func; +}; + + +/* Should be part of the generated shader: + */ +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<machine; + unsigned int j; + + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + const float *scale = draw->viewport.scale; + const float *trans = draw->viewport.translate; + + assert(count <= 4); + assert(draw->vertex_shader->state->output_semantic_name[0] + == TGSI_SEMANTIC_POSITION); + + /* Consts does not require 16 byte alignment. */ + machine->Consts = (float (*)[4]) draw->user.constants; + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); + + + /* Fetch vertices. This may at some point be integrated into the + * compiled shader -- that would require a reorganization where + * multiple versions of the compiled shader might exist, + * specialized for each fetch state. + */ + draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + + + /* run compiled shader + */ + shader->func( + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps ); + + + /* XXX: Computing the clipmask and emitting results should be done + * in the vertex program as a set of instructions appended to + * the user-provided code. + */ + for (j = 0; j < count; j++) { + unsigned slot; + float x, y, z, w; + + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + } +} + + + +static void +vs_sse_delete( struct draw_vertex_shader *base ) +{ + struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; + + x86_release_func( &shader->sse2_program ); + + FREE( shader ); +} + + +struct draw_vertex_shader * +draw_create_vs_sse(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_sse_vertex_shader *vs; + + if (!draw->use_sse) + return NULL; + + vs = CALLOC_STRUCT( draw_sse_vertex_shader ); + if (vs == NULL) + return NULL; + + vs->base.state = templ; + vs->base.prepare = vs_sse_prepare; + vs->base.run = vs_sse_run; + vs->base.delete = vs_sse_delete; + + x86_init_func( &vs->sse2_program ); + + if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state->tokens, + &vs->sse2_program )) + goto fail; + + vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); + + return &vs->base; + +fail: + fprintf(stderr, "tgsi_emit_sse2() failed, falling back to interpreter\n"); + + x86_release_func( &vs->sse2_program ); + + FREE(vs); + return NULL; +} + + + +#else + +struct draw_vertex_shader * +draw_create_vs_sse( struct draw_context *draw, + const struct pipe_shader_state *templ ) +{ + return NULL; +} + + +#endif + diff --git a/src/mesa/sources b/src/mesa/sources index cecd8a830f..f83d247a1e 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -159,6 +159,9 @@ VF_SOURCES = \ DRAW_SOURCES = \ $(TOP)/src/gallium/auxiliary/draw/draw_clip.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vs_exec.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vs_sse.c \ + $(TOP)/src/gallium/auxiliary/draw/draw_vs_llvm.c \ $(TOP)/src/gallium/auxiliary/draw/draw_context.c\ $(TOP)/src/gallium/auxiliary/draw/draw_cull.c \ $(TOP)/src/gallium/auxiliary/draw/draw_debug.c \ -- cgit v1.2.3 From c179bc990108a8ea691ceab03fd68a12396ab538 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Feb 2008 13:39:24 +0000 Subject: tgsi: pass through failure to sse-codegenerate for fragment programs too. In particular, will fallback to interpreted execution for shaders with TEX instructions. --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 79209575bc..62c6a69c63 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2308,6 +2308,7 @@ tgsi_emit_sse2_fs( { struct tgsi_parse_context parse; boolean instruction_phase = FALSE; + unsigned ok = 1; DUMP_START(); @@ -2333,7 +2334,7 @@ tgsi_emit_sse2_fs( tgsi_parse_init( &parse, tokens ); - while( !tgsi_parse_end_of_tokens( &parse ) ) { + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { @@ -2352,17 +2353,18 @@ tgsi_emit_sse2_fs( get_output_base(), get_argument( 1 ) ); } - emit_instruction( + ok = emit_instruction( func, &parse.FullToken.FullInstruction ); break; case TGSI_TOKEN_TYPE_IMMEDIATE: /* XXX implement this */ - assert(0); + ok = 0; break; default: + ok = 0; assert( 0 ); } } @@ -2371,7 +2373,7 @@ tgsi_emit_sse2_fs( DUMP_END(); - return 1; + return ok; } #endif /* i386 */ -- cgit v1.2.3 From b08d3fa249c830d274dca362b8f824b75fe26945 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Fri, 15 Feb 2008 10:00:31 -0800 Subject: Make this file build on non-SSE builds (e.g., Cell) --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 04349cb404..27bc66812c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -243,7 +243,7 @@ struct draw_vertex_shader * draw_create_vs_sse( struct draw_context *draw, const struct pipe_shader_state *templ ) { - return NULL; + return (void *) 0; } -- cgit v1.2.3 From 4a79156812d574249b51e1692f4615aa31bf0e50 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 17 Feb 2008 09:42:26 -0500 Subject: fix the build --- src/gallium/auxiliary/llvm/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/llvm/Makefile b/src/gallium/auxiliary/llvm/Makefile index e0abf860c1..39fac6ea4a 100644 --- a/src/gallium/auxiliary/llvm/Makefile +++ b/src/gallium/auxiliary/llvm/Makefile @@ -30,7 +30,7 @@ OBJECTS = $(C_SOURCES:.c=.o) \ ### Include directories INCLUDES = \ -I. \ - -I$(TOP)/src/gallium/drivers + -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/mesa \ -- cgit v1.2.3 From bfd5916eafb9a97ad10f1d4a8738e7dcb02e04f4 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 18 Feb 2008 14:25:04 +0900 Subject: Code reorganization: split gallium and mesa makefiles. In other words, don't build src/gallium source code from within src/mesa/Makefile. Also, allow to customize which gallium auxiliary dirs, driver driver, winsys dirs get built from the config/* files. --- configs/default | 12 +++- configs/linux-cell | 3 + configs/linux-dri | 6 +- configs/linux-llvm | 2 + src/gallium/Makefile.template | 1 + src/gallium/auxiliary/Makefile | 6 +- src/gallium/auxiliary/cso_cache/Makefile | 18 +++++ src/gallium/auxiliary/draw/Makefile | 41 ++++++++++- src/gallium/auxiliary/tgsi/Makefile | 24 ++++++- src/gallium/auxiliary/tgsi/exec/Makefile | 3 - src/gallium/auxiliary/util/Makefile | 20 ++++++ src/gallium/drivers/Makefile | 6 +- src/gallium/winsys/Makefile | 20 ++++++ src/gallium/winsys/dri/Makefile.template | 6 +- src/gallium/winsys/xlib/Makefile | 113 +++++++++++++++++++++++++++++++ src/mesa/Makefile | 74 ++------------------ src/mesa/sources | 56 +-------------- 17 files changed, 264 insertions(+), 147 deletions(-) create mode 100644 src/gallium/auxiliary/cso_cache/Makefile delete mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile create mode 100644 src/gallium/auxiliary/util/Makefile create mode 100644 src/gallium/winsys/Makefile create mode 100644 src/gallium/winsys/xlib/Makefile (limited to 'src/gallium/auxiliary') diff --git a/configs/default b/configs/default index 25a87e66a1..7f659725cb 100644 --- a/configs/default +++ b/configs/default @@ -60,13 +60,21 @@ GLW_SOURCES = GLwDrawA.c # Directories to build LIB_DIR = lib -SRC_DIRS = gallium mesa glu glut/glx glw +SRC_DIRS = gallium mesa gallium/winsys glu glut/glx glw GLU_DIRS = sgi -DRIVER_DIRS = x11 osmesa +DRIVER_DIRS = # Which subdirs under $(TOP)/progs/ to enter: PROGRAM_DIRS = demos redbook samples glsl xdemos +# Gallium directories and +GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi util +GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) +GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple failover +GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) +GALLIUM_WINSYS_DIRS = xlib + + # Library/program dependencies #EXTRA_LIB_PATH ?= GL_LIB_DEPS = $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread diff --git a/configs/linux-cell b/configs/linux-cell index 09f62fc1ff..807ede75a7 100644 --- a/configs/linux-cell +++ b/configs/linux-cell @@ -5,6 +5,9 @@ include $(TOP)/configs/default CONFIG_NAME = linux-cell +GALLIUM_DRIVER_DIRS += cell + + # Compiler and flags CC = ppu32-gcc CXX = ppu32-g++ diff --git a/configs/linux-dri b/configs/linux-dri index e6135856fc..68afcb1d28 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -54,15 +54,13 @@ USING_EGL=0 # Directories ifeq ($(USING_EGL), 1) -SRC_DIRS = egl glx/x11 gallium mesa glu glut/glx glw +SRC_DIRS := egl $(SRC_DIRS) PROGRAM_DIRS = egl -else -SRC_DIRS = glx/x11 gallium mesa glu glut/glx glw -PROGRAM_DIRS = endif DRIVER_DIRS = dri WINDOW_SYSTEM=dri +GALLIUM_WINSYS_DIRS = dri # gamma are missing because they have not been converted to use the new # interface. diff --git a/configs/linux-llvm b/configs/linux-llvm index 915189f462..a8889321a0 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -5,6 +5,8 @@ include $(TOP)/configs/linux CONFIG_NAME = linux-llvm +GALLIUM_AUXILIARY_DIRS += llvm + OPT_FLAGS = -g -ansi -pedantic DEFINES += -DDEBUG -DDEBUG_MATH -DMESA_LLVM=1 diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 83b25f9b47..6ad58c205c 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -20,6 +20,7 @@ INCLUDES = \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ -I$(TOP)/include \ $(DRIVER_INCLUDES) diff --git a/src/gallium/auxiliary/Makefile b/src/gallium/auxiliary/Makefile index da68498aa1..eaa0f2fe4e 100644 --- a/src/gallium/auxiliary/Makefile +++ b/src/gallium/auxiliary/Makefile @@ -2,11 +2,7 @@ TOP = ../../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_DIR = llvm -endif - -SUBDIRS = pipebuffer $(LLVM_DIR) +SUBDIRS = $(GALLIUM_AUXILIARY_DIRS) default: subdirs diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile new file mode 100644 index 0000000000..8248b097fd --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -0,0 +1,18 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = cso_cache + +DRIVER_SOURCES = \ + cso_cache.c \ + cso_hash.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index fe9b150f30..c56b63d85b 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -1,2 +1,39 @@ -default: - cd ../../../mesa ; make +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = draw + +DRIVER_SOURCES = \ + draw_clip.c \ + draw_vs_exec.c \ + draw_vs_sse.c \ + draw_vs_llvm.c \ + draw_context.c\ + draw_cull.c \ + draw_debug.c \ + draw_flatshade.c \ + draw_offset.c \ + draw_prim.c \ + draw_stipple.c \ + draw_twoside.c \ + draw_unfilled.c \ + draw_validate.c \ + draw_vbuf.c \ + draw_vertex.c \ + draw_vertex_cache.c \ + draw_vertex_fetch.c \ + draw_vertex_shader.c \ + draw_vf.c \ + draw_vf_generic.c \ + draw_vf_sse.c \ + draw_wide_prims.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 12a8bd0409..c10ab396d8 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -1,3 +1,23 @@ -default: - cd ../.. ; make + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = tgsi + +DRIVER_SOURCES = \ + exec/tgsi_exec.c \ + exec/tgsi_sse2.c \ + util/tgsi_build.c \ + util/tgsi_dump.c \ + util/tgsi_parse.c \ + util/tgsi_util.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile deleted file mode 100644 index eb8b14e0e8..0000000000 --- a/src/gallium/auxiliary/tgsi/exec/Makefile +++ /dev/null @@ -1,3 +0,0 @@ -default: - cd ../../.. ; make - diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile new file mode 100644 index 0000000000..b8cb148c4f --- /dev/null +++ b/src/gallium/auxiliary/util/Makefile @@ -0,0 +1,20 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = util + +DRIVER_SOURCES = \ + p_debug.c \ + p_tile.c \ + p_util.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/drivers/Makefile b/src/gallium/drivers/Makefile index c0345a9cb5..8dcfb18a16 100644 --- a/src/gallium/drivers/Makefile +++ b/src/gallium/drivers/Makefile @@ -2,11 +2,7 @@ TOP = ../../.. include $(TOP)/configs/current -ifeq ($(CONFIG_NAME), linux-cell) -CELL_DIR = cell -endif - -SUBDIRS = softpipe i915simple i965simple failover pipebuffer $(CELL_DIR) +SUBDIRS = softpipe i915simple i965simple failover default: subdirs diff --git a/src/gallium/winsys/Makefile b/src/gallium/winsys/Makefile new file mode 100644 index 0000000000..3dc5ea5744 --- /dev/null +++ b/src/gallium/winsys/Makefile @@ -0,0 +1,20 @@ +TOP = ../../.. +include $(TOP)/configs/current + + +SUBDIRS = $(GALLIUM_WINSYS_DIRS) + + +default: subdirs + + +subdirs: + @for dir in $(SUBDIRS) ; do \ + if [ -d $$dir ] ; then \ + (cd $$dir && $(MAKE)) || exit 1 ; \ + fi \ + done + + +clean: + rm -f `find . -name \*.[oa]` diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template index 2a261ed669..65a93bd53e 100644 --- a/src/gallium/winsys/dri/Makefile.template +++ b/src/gallium/winsys/dri/Makefile.template @@ -1,7 +1,9 @@ # -*-makefile-*- -MESA_MODULES = $(TOP)/src/mesa/libmesa.a - +MESA_MODULES = \ + $(TOP)/src/mesa/libmesa.a \ + $(GALLIUM_AUXILIARIES) + COMMON_GALLIUM_SOURCES = \ $(TOP)/src/mesa/drivers/dri/common/utils.c \ $(TOP)/src/mesa/drivers/dri/common/vblank.c \ diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile new file mode 100644 index 0000000000..2664ac47ce --- /dev/null +++ b/src/gallium/winsys/xlib/Makefile @@ -0,0 +1,113 @@ +# src/mesa/Makefile + +TOP = ../../../.. +include $(TOP)/configs/current + + +INCLUDE_DIRS = \ + -I$(TOP)/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/src/mesa/main \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary + +X11_DRIVER_SOURCES = \ + glxapi.c \ + fakeglx.c \ + xfonts.c \ + xm_api.c \ + xm_winsys.c \ + xm_winsys_aub.c \ + brw_aub.c + + +GL_MAJOR = 1 +GL_MINOR = 5 +GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) + + +PIPE_LIB = \ + $(GALLIUM_DRIVERS) \ + $(TOP)/src/mesa/libglapi.a \ + $(TOP)/src/mesa/libmesa.a \ + $(GALLIUM_AUXILIARIES) + +ifeq ($(CONFIG_NAME), linux-cell) +CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a +CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a +endif + +ifeq ($(CONFIG_NAME), linux-llvm) +LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a +endif + + +.SUFFIXES : .cpp + +.c.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDE_DIRS) $(CXXFLAGS) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ + + +default: $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) + + +###################################################################### +# Stand-alone Mesa libGL and libOSMesa +STAND_ALONE_DRIVER_SOURCES = \ + $(X11_DRIVER_SOURCES) + +STAND_ALONE_DRIVER_OBJECTS = $(STAND_ALONE_DRIVER_SOURCES:.c=.o) + +STAND_ALONE_OBJECTS = \ + $(STAND_ALONE_DRIVER_OBJECTS) + +# Make the GL library +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(LLVM_LIB) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) + $(TOP)/bin/mklib -o $(GL_LIB) \ + -linker "$(CC)" \ + -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ + -install $(TOP)/$(LIB_DIR) \ + $(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \ + --start-group $(PIPE_LIB) $(LLVM_LIB) --end-group $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS) + + +###################################################################### +# Generic stuff + +depend: $(ALL_SOURCES) + @ echo "running $(MKDEP)" + @ rm -f depend # workaround oops on gutsy?!? + @ touch depend + @ $(MKDEP) $(MKDEP_OPTIONS) $(DEFINES) $(INCLUDE_DIRS) $(ALL_SOURCES) \ + > /dev/null 2>/dev/null + + +install: default + $(INSTALL) -d $(INSTALL_DIR)/include/GL + $(INSTALL) -d $(INSTALL_DIR)/$(LIB_DIR) + $(INSTALL) -m 644 $(TOP)/include/GL/*.h $(INSTALL_DIR)/include/GL + @if [ -e $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) ]; then \ + $(INSTALL) $(TOP)/$(LIB_DIR)/libGL* $(INSTALL_DIR)/$(LIB_DIR); \ + fi + +## NOT INSTALLED YET: +## $(INSTALL) -d $(INSTALL_DIR)/include/GLES +## $(INSTALL) -m 644 include/GLES/*.h $(INSTALL_DIR)/include/GLES + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` $(TOP)/include/GL/*.h + +clean: + -rm -f *.o + + +include depend diff --git a/src/mesa/Makefile b/src/mesa/Makefile index c8cb2b592f..86a9cf0b88 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -11,19 +11,6 @@ GL_MINOR = 5 GL_TINY = 0$(MESA_MAJOR)0$(MESA_MINOR)0$(MESA_TINY) -PIPE_LIB = \ - $(TOP)/src/gallium/drivers/softpipe/libsoftpipe.a \ - $(TOP)/src/gallium/drivers/i965simple/libi965simple.a - -ifeq ($(CONFIG_NAME), linux-cell) -CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a -CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a -endif - -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a -endif - .SUFFIXES : .cpp .c.o: @@ -36,33 +23,14 @@ endif $(CC) -c $(INCLUDE_DIRS) $(CFLAGS) $< -o $@ -# Figure out what to make here -default: - @if [ "${DRIVER_DIRS}" = "dri" ] ; then \ - $(MAKE) linux-solo ; \ - elif [ "${DRIVER_DIRS}" = "osmesa" ] ; then \ - $(MAKE) osmesa-only ; \ - elif [ "$(DRIVER_DIRS)" = "beos" ]; then \ - $(MAKE) beos ; \ - elif [ "$(DRIVER_DIRS)" = "directfb" ]; then \ - $(MAKE) directfb ; \ - elif [ "$(DRIVER_DIRS)" = "fbdev osmesa" ]; then \ - $(MAKE) fbdev ; $(MAKE) osmesa-only ; \ - else \ - $(MAKE) stand-alone ; \ - fi - +default: depend subdirs libmesa.a -###################################################################### -# BeOS driver target - -beos: depend subdirs libmesa.a - cd drivers/beos; $(MAKE) +ifneq ($(DRIVER_DIRS),dri) +default: libglapi.a +endif ###################################################################### -# Linux DRI drivers - # Make archive of core object files libmesa.a: $(SOLO_OBJECTS) @ $(TOP)/bin/mklib -o mesa -static $(SOLO_OBJECTS); @@ -70,32 +38,8 @@ libmesa.a: $(SOLO_OBJECTS) mimeset -f "$@" ; \ fi -linux-solo: depend subdirs libmesa.a - cd $(TOP)/src/gallium/winsys/dri ; $(MAKE) - - -##################################################################### -# Stand-alone Mesa libGL, no built-in drivers (DirectFB) - -libgl-core: $(CORE_OBJECTS) - @ $(TOP)/bin/mklib -o $(GL_LIB) \ - -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) $(CORE_OBJECTS) \ - $(GL_LIB_DEPS) - -directfb: depend subdirs libgl-core - cd drivers/directfb ; $(MAKE) - - -##################################################################### -# fbdev Mesa driver (libGL.so) - -fbdev: $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) $(COMMON_DRIVER_OBJECTS) - @ $(TOP)/bin/mklib -o $(GL_LIB) \ - -major $(MESA_MAJOR) -minor $(MESA_MINOR) -patch $(MESA_TINY) \ - -install $(TOP)/$(LIB_DIR) $(MKLIB_OPTIONS) \ - $(CORE_OBJECTS) $(FBDEV_DRIVER_OBJECTS) \ - $(COMMON_DRIVER_OBJECTS) $(GL_LIB_DEPS) +libglapi.a: $(GLAPI_OBJECTS) + @ $(TOP)/bin/mklib -o glapi -static $(GLAPI_OBJECTS) ###################################################################### @@ -176,9 +120,6 @@ install: default @if [ -e $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) ]; then \ $(INSTALL) $(TOP)/$(LIB_DIR)/libOSMesa* $(INSTALL_DIR)/$(LIB_DIR); \ fi - @if [ "${DRIVER_DIRS}" = "dri" ] ; then \ - cd $(TOP)/gallium/winsys/dri ; $(MAKE) install ; \ - fi ## NOT INSTALLED YET: ## $(INSTALL) -d $(INSTALL_DIR)/include/GLES @@ -192,9 +133,8 @@ tags: clean: -rm -f */*.o -rm -f */*/*.o - -rm -f depend depend.bak libmesa.a + -rm -f depend depend.bak libmesa.a libglapi.a -rm -f drivers/*/*.o - (cd drivers/dri && $(MAKE) clean) (cd x86 && $(MAKE) clean) (cd x86-64 && $(MAKE) clean) diff --git a/src/mesa/sources b/src/mesa/sources index f83d247a1e..0d185fd5f3 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -156,51 +156,6 @@ VF_SOURCES = \ vf/vf_generic.c \ vf/vf_sse.c - -DRAW_SOURCES = \ - $(TOP)/src/gallium/auxiliary/draw/draw_clip.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vs_exec.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vs_sse.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vs_llvm.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_context.c\ - $(TOP)/src/gallium/auxiliary/draw/draw_cull.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_debug.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_flatshade.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_offset.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_prim.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_stipple.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_twoside.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_unfilled.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_validate.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vbuf.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vertex.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vertex_cache.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vertex_fetch.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vertex_shader.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vf.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vf_generic.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_vf_sse.c \ - $(TOP)/src/gallium/auxiliary/draw/draw_wide_prims.c - -TGSIEXEC_SOURCES = \ - $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c \ - $(TOP)/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c - -TGSIUTIL_SOURCES = \ - $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_build.c \ - $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_dump.c \ - $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_parse.c \ - $(TOP)/src/gallium/auxiliary/tgsi/util/tgsi_util.c - -STATECACHE_SOURCES = \ - $(TOP)/src/gallium/auxiliary/cso_cache/cso_hash.c \ - $(TOP)/src/gallium/auxiliary/cso_cache/cso_cache.c - -PIPEUTIL_SOURCES = \ - $(TOP)/src/gallium/auxiliary/util/p_debug.c \ - $(TOP)/src/gallium/auxiliary/util/p_tile.c \ - $(TOP)/src/gallium/auxiliary/util/p_util.c - STATETRACKER_SOURCES = \ state_tracker/st_atom.c \ state_tracker/st_atom_blend.c \ @@ -229,7 +184,7 @@ STATETRACKER_SOURCES = \ state_tracker/st_cb_readpixels.c \ state_tracker/st_cb_strings.c \ state_tracker/st_cb_texture.c \ - state_tracker/st_cache.c \ + state_tracker/st_cache.c \ state_tracker/st_context.c \ state_tracker/st_debug.c \ state_tracker/st_draw.c \ @@ -333,15 +288,6 @@ SPARC_API = \ __COMMON_DRIVER_SOURCES = \ drivers/common/driverfuncs.c -X11_DRIVER_SOURCES = \ - $(TOP)/src/gallium/winsys/xlib/glxapi.c \ - $(TOP)/src/gallium/winsys/xlib/fakeglx.c \ - $(TOP)/src/gallium/winsys/xlib/xfonts.c \ - $(TOP)/src/gallium/winsys/xlib/xm_api.c \ - $(TOP)/src/gallium/winsys/xlib/xm_winsys.c \ - $(TOP)/src/gallium/winsys/xlib/xm_winsys_aub.c \ - $(TOP)/src/gallium/winsys/xlib/brw_aub.c - OSMESA_DRIVER_SOURCES = \ drivers/osmesa/osmesa.c -- cgit v1.2.3 From 33ceb6716a2166db75659fa66d85fb4cfb9633c7 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 18 Feb 2008 10:52:44 +0000 Subject: Update scons build for new code layout. --- SConstruct | 7 +- src/SConscript | 7 ++ src/gallium/SConscript | 23 +++- src/gallium/auxiliary/cso_cache/SConscript | 10 ++ src/gallium/auxiliary/draw/SConscript | 31 +++++ src/gallium/auxiliary/pipebuffer/SConscript | 14 +++ src/gallium/auxiliary/tgsi/SConscript | 14 +++ src/gallium/auxiliary/util/SConscript | 11 ++ src/gallium/drivers/failover/SConscript | 13 +++ src/gallium/winsys/SConscript | 10 ++ src/gallium/winsys/dri/SConscript | 51 +++++++++ src/gallium/winsys/dri/intel/SConscript | 14 +-- src/gallium/winsys/xlib/SConscript | 28 +++++ src/mesa/SConscript | 170 ++-------------------------- src/mesa/drivers/dri/SConscript | 48 -------- 15 files changed, 229 insertions(+), 222 deletions(-) create mode 100644 src/SConscript create mode 100644 src/gallium/auxiliary/cso_cache/SConscript create mode 100644 src/gallium/auxiliary/draw/SConscript create mode 100644 src/gallium/auxiliary/pipebuffer/SConscript create mode 100644 src/gallium/auxiliary/tgsi/SConscript create mode 100644 src/gallium/auxiliary/util/SConscript create mode 100644 src/gallium/drivers/failover/SConscript create mode 100644 src/gallium/winsys/SConscript create mode 100644 src/gallium/winsys/dri/SConscript create mode 100644 src/gallium/winsys/xlib/SConscript delete mode 100644 src/mesa/drivers/dri/SConscript (limited to 'src/gallium/auxiliary') diff --git a/SConstruct b/SConstruct index 22a4072c93..1126aff21b 100644 --- a/SConstruct +++ b/SConstruct @@ -108,7 +108,10 @@ env.Append(CPPPATH = [ '#/include', '#/src/mesa', '#/src/mesa/main', - '#/src/mesa/pipe', + '#/src/gallium/include/pipe', + '#/src/gallium/include', + '#/src/gallium/auxiliary', + '#/src/gallium/drivers', ]) @@ -222,7 +225,7 @@ build_dir = os.path.join(build_topdir, build_subdir) # http://www.scons.org/wiki/SimultaneousVariantBuilds SConscript( - 'src/mesa/SConscript', + 'src/SConscript', build_dir = build_dir, duplicate = 0 # http://www.scons.org/doc/0.97/HTML/scons-user/x2261.html ) diff --git a/src/SConscript b/src/SConscript new file mode 100644 index 0000000000..5b09943894 --- /dev/null +++ b/src/SConscript @@ -0,0 +1,7 @@ +Import('*') + +SConscript([ + 'gallium/SConscript', + 'mesa/SConscript', + 'gallium/winsys/SConscript', +]) diff --git a/src/gallium/SConscript b/src/gallium/SConscript index d9c20e0100..a835f6d661 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -1,9 +1,24 @@ Import('*') -#env = env.Clone() +env = env.Clone() + +auxiliaries = [] + +Export('auxiliaries') + SConscript([ - 'softpipe/SConscript', - 'i915simple/SConscript', - 'i965simple/SConscript', + # NOTE: order matters! + 'auxiliary/util/SConscript', + 'auxiliary/tgsi/SConscript', + 'auxiliary/cso_cache/SConscript', + 'auxiliary/draw/SConscript', + #'auxiliary/llvm/SConscript', + 'auxiliary/pipebuffer/SConscript', + + 'drivers/softpipe/SConscript', + 'drivers/i915simple/SConscript', + 'drivers/i965simple/SConscript', + 'drivers/failover/SConscript', + #'drivers/cell/SConscript', ]) diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript new file mode 100644 index 0000000000..9751881613 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/SConscript @@ -0,0 +1,10 @@ +Import('*') + +cso_cache = env.ConvenienceLibrary( + target = 'cso_cache', + source = [ + 'cso_cache.c', + 'cso_hash.c', + ]) + +auxiliaries.insert(0, cso_cache) diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript new file mode 100644 index 0000000000..8e3a8caa74 --- /dev/null +++ b/src/gallium/auxiliary/draw/SConscript @@ -0,0 +1,31 @@ +Import('*') + +draw = env.ConvenienceLibrary( + target = 'draw', + source = [ + 'draw_clip.c', + 'draw_vs_exec.c', + 'draw_vs_sse.c', + 'draw_vs_llvm.c', + 'draw_context.c', + 'draw_cull.c', + 'draw_debug.c', + 'draw_flatshade.c', + 'draw_offset.c', + 'draw_prim.c', + 'draw_stipple.c', + 'draw_twoside.c', + 'draw_unfilled.c', + 'draw_validate.c', + 'draw_vbuf.c', + 'draw_vertex.c', + 'draw_vertex_cache.c', + 'draw_vertex_fetch.c', + 'draw_vertex_shader.c', + 'draw_vf.c', + 'draw_vf_generic.c', + 'draw_vf_sse.c', + 'draw_wide_prims.c', + ]) + +auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript new file mode 100644 index 0000000000..3d41fd84a6 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -0,0 +1,14 @@ +Import('*') + +pipebuffer = env.ConvenienceLibrary( + target = 'pipebuffer', + source = [ + 'pb_buffer_fenced.c', + 'pb_buffer_malloc.c', + 'pb_bufmgr_fenced.c', + 'pb_bufmgr_mm.c', + 'pb_bufmgr_pool.c', + 'pb_winsys.c', + ]) + +auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript new file mode 100644 index 0000000000..8464bfe944 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -0,0 +1,14 @@ +Import('*') + +tgsi = env.ConvenienceLibrary( + target = 'tgsi', + source = [ + 'exec/tgsi_exec.c', + 'exec/tgsi_sse2.c', + 'util/tgsi_build.c', + 'util/tgsi_dump.c', + 'util/tgsi_parse.c', + 'util/tgsi_util.c', + ]) + +auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript new file mode 100644 index 0000000000..b126cf44d6 --- /dev/null +++ b/src/gallium/auxiliary/util/SConscript @@ -0,0 +1,11 @@ +Import('*') + +util = env.ConvenienceLibrary( + target = 'util', + source = [ + 'p_debug.c', + 'p_tile.c', + 'p_util.c', + ]) + +auxiliaries.insert(0, util) diff --git a/src/gallium/drivers/failover/SConscript b/src/gallium/drivers/failover/SConscript new file mode 100644 index 0000000000..f8e9b1b491 --- /dev/null +++ b/src/gallium/drivers/failover/SConscript @@ -0,0 +1,13 @@ +Import('*') + +env = env.Clone() + +failover = env.ConvenienceLibrary( + target = 'failover', + source = [ + 'fo_state.c', + 'fo_state_emit.c', + 'fo_context.c', + ]) + +Export('failover') diff --git a/src/gallium/winsys/SConscript b/src/gallium/winsys/SConscript new file mode 100644 index 0000000000..32215d8d58 --- /dev/null +++ b/src/gallium/winsys/SConscript @@ -0,0 +1,10 @@ +Import('*') + +if dri: + SConscript([ + 'dri/SConscript', + ]) +else: + SConscript([ + 'xlib/SConscript', + ]) diff --git a/src/gallium/winsys/dri/SConscript b/src/gallium/winsys/dri/SConscript new file mode 100644 index 0000000000..8c56ce917c --- /dev/null +++ b/src/gallium/winsys/dri/SConscript @@ -0,0 +1,51 @@ +Import('*') + +drienv = env.Clone() + +drienv.Replace(CPPPATH = [ + '#src/mesa/drivers/dri/common', + '#include', + '#include/GL/internal', + '#src/gallium/include', + '#src/gallium/auxiliary', + '#src/gallium/drivers', + '#src/mesa', + '#src/mesa/main', + '#src/mesa/glapi', + '#src/mesa/math', + '#src/mesa/transform', + '#src/mesa/shader', + '#src/mesa/swrast', + '#src/mesa/swrast_setup', + '#src/egl/main', + '#src/egl/drivers/dri', +]) + +drienv.ParseConfig('pkg-config --cflags --libs libdrm') + +COMMON_GALLIUM_SOURCES = [ + '#src/mesa/drivers/dri/common/utils.c', + '#src/mesa/drivers/dri/common/vblank.c', + '#src/mesa/drivers/dri/common/dri_util.c', + '#src/mesa/drivers/dri/common/xmlconfig.c', +] + +COMMON_BM_SOURCES = [ + '#src/mesa/drivers/dri/common/dri_bufmgr.c', + '#src/mesa/drivers/dri/common/dri_drmpool.c', +] + +Export([ + 'drienv', + 'COMMON_GALLIUM_SOURCES', + 'COMMON_BM_SOURCES', +]) + +# TODO: Installation +#install: $(LIBNAME) +# $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) +# $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) + +SConscript([ + 'intel/SConscript', +]) diff --git a/src/gallium/winsys/dri/intel/SConscript b/src/gallium/winsys/dri/intel/SConscript index a7cc10450e..525ba580e8 100644 --- a/src/gallium/winsys/dri/intel/SConscript +++ b/src/gallium/winsys/dri/intel/SConscript @@ -9,11 +9,6 @@ env.Append(CPPPATH = [ #MINIGLX_SOURCES = server/intel_dri.c -pipe_drivers = [ - softpipe, - i915simple -] - DRIVER_SOURCES = [ 'intel_winsys_pipe.c', 'intel_winsys_softpipe.c', @@ -31,11 +26,14 @@ sources = \ COMMON_BM_SOURCES + \ DRIVER_SOURCES -# DRIVER_DEFINES = -I../intel $(shell pkg-config libdrm --atleast-version=2.3.1 \ -# && echo "-DDRM_VBLANK_FLIP=DRM_VBLANK_FLIP") +drivers = [ + softpipe, + i915simple +] +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions env.SharedLibrary( target ='i915tex_dri.so', source = sources, - LIBS = pipe_drivers + env['LIBS'], + LIBS = mesa + drivers + auxiliaries + env['LIBS'], ) \ No newline at end of file diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript new file mode 100644 index 0000000000..f8aa5ef945 --- /dev/null +++ b/src/gallium/winsys/xlib/SConscript @@ -0,0 +1,28 @@ +####################################################################### +# SConscript for xlib winsys + +Import('*') + + +sources = [ + 'glxapi.c', + 'fakeglx.c', + 'xfonts.c', + 'xm_api.c', + 'xm_winsys.c', + 'xm_winsys_aub.c', + 'brw_aub.c', +] + +drivers = [ + softpipe, + i915simple, + i965simple, +] + +# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions +env.SharedLibrary( + target ='GL', + source = sources, + LIBS = glapi + mesa + drivers + auxiliaries + env['LIBS'], +) diff --git a/src/mesa/SConscript b/src/mesa/SConscript index faf8c84872..a828133580 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -1,7 +1,5 @@ ####################################################################### -# SConscript for mesa -# -# TODO: Split this into per-module SConscripts +# SConscript for Mesa Import('*') @@ -116,53 +114,6 @@ VF_SOURCES = [ 'vf/vf_sse.c', ] -DRAW_SOURCES = [ - 'pipe/draw/draw_clip.c', - 'pipe/draw/draw_context.c', - 'pipe/draw/draw_cull.c', - 'pipe/draw/draw_debug.c', - 'pipe/draw/draw_flatshade.c', - 'pipe/draw/draw_offset.c', - 'pipe/draw/draw_prim.c', - 'pipe/draw/draw_stipple.c', - 'pipe/draw/draw_twoside.c', - 'pipe/draw/draw_unfilled.c', - 'pipe/draw/draw_validate.c', - 'pipe/draw/draw_vbuf.c', - 'pipe/draw/draw_vertex.c', - 'pipe/draw/draw_vertex_cache.c', - 'pipe/draw/draw_vertex_fetch.c', - 'pipe/draw/draw_vertex_shader.c', - 'pipe/draw/draw_vertex_shader_llvm.c', - 'pipe/draw/draw_vf.c', - 'pipe/draw/draw_vf_generic.c', - 'pipe/draw/draw_vf_sse.c', - 'pipe/draw/draw_wide_prims.c', -] - -TGSIEXEC_SOURCES = [ - 'pipe/tgsi/exec/tgsi_exec.c', - 'pipe/tgsi/exec/tgsi_sse2.c', -] - -TGSIUTIL_SOURCES = [ - 'pipe/tgsi/util/tgsi_build.c', - 'pipe/tgsi/util/tgsi_dump.c', - 'pipe/tgsi/util/tgsi_parse.c', - 'pipe/tgsi/util/tgsi_util.c', -] - -STATECACHE_SOURCES = [ - 'pipe/cso_cache/cso_hash.c', - 'pipe/cso_cache/cso_cache.c', -] - -PIPEUTIL_SOURCES = [ - 'pipe/util/p_debug.c', - 'pipe/util/p_tile.c', - 'pipe/util/p_util.c', -] - STATETRACKER_SOURCES = [ 'state_tracker/st_atom.c', 'state_tracker/st_atom_blend.c', @@ -311,126 +262,25 @@ else: ASM_SOURCES = [] API_SOURCES = [] - -####################################################################### -# Driver sources - - -X11_DRIVER_SOURCES = [ - 'pipe/xlib/glxapi.c', - 'pipe/xlib/fakeglx.c', - 'pipe/xlib/xfonts.c', - 'pipe/xlib/xm_api.c', - 'pipe/xlib/xm_winsys.c', - 'pipe/xlib/xm_winsys_aub.c', - 'pipe/xlib/brw_aub.c', -] - -OSMESA_DRIVER_SOURCES = [ - 'drivers/osmesa/osmesa.c', -] - -GLIDE_DRIVER_SOURCES = [ - 'drivers/glide/fxapi.c', - 'drivers/glide/fxdd.c', - 'drivers/glide/fxddspan.c', - 'drivers/glide/fxddtex.c', - 'drivers/glide/fxsetup.c', - 'drivers/glide/fxtexman.c', - 'drivers/glide/fxtris.c', - 'drivers/glide/fxvb.c', - 'drivers/glide/fxglidew.c', - 'drivers/glide/fxg.c', -] - -SVGA_DRIVER_SOURCES = [ - 'drivers/svga/svgamesa.c', - 'drivers/svga/svgamesa8.c', - 'drivers/svga/svgamesa15.c', - 'drivers/svga/svgamesa16.c', - 'drivers/svga/svgamesa24.c', - 'drivers/svga/svgamesa32.c', -] - -FBDEV_DRIVER_SOURCES = [ - 'drivers/fbdev/glfbdev.c', -] - - -### All the core C sources - SOLO_SOURCES = \ MAIN_SOURCES + \ MATH_SOURCES + \ VBO_SOURCES + \ VF_SOURCES + \ - DRAW_SOURCES + \ - TGSIEXEC_SOURCES + \ - TGSIUTIL_SOURCES + \ - PIPEUTIL_SOURCES + \ - STATECACHE_SOURCES + \ STATETRACKER_SOURCES + \ SHADER_SOURCES + \ ASM_SOURCES + \ SLANG_SOURCES -CORE_SOURCES = \ - GLAPI_SOURCES + API_SOURCES + \ - SOLO_SOURCES - -ALL_SOURCES = \ - GLAPI_SOURCES + API_SOURCES + \ - SOLO_SOURCES + \ - ASM_SOURCES + \ - X11_DRIVER_SOURCES + \ - FBDEV_DRIVER_SOURCES + \ - OSMESA_DRIVER_SOURCES - - -###################################################################### -# Gallium sources - -SConscript([ - 'pipe/SConscript', -]) - - -###################################################################### -# libGL +mesa = env.ConvenienceLibrary( + target = 'mesa', + source = SOLO_SOURCES, +) +Export('mesa') if not dri: - STAND_ALONE_DRIVER_SOURCES = \ - CORE_SOURCES + \ - X11_DRIVER_SOURCES - - Import( - 'softpipe', - 'i915simple', - 'i965simple' + glapi = env.ConvenienceLibrary( + target = 'glapi', + source = GLAPI_SOURCES + API_SOURCES, ) - - pipe_drivers = [ - softpipe, - i965simple - ] - - env.SharedLibrary( - target ='GL', - source = STAND_ALONE_DRIVER_SOURCES, - LIBS = [softpipe, i965simple] + env['LIBS'], - ) - - -###################################################################### -# Driver sources - -if dri: - mesa = env.ConvenienceLibrary( - target = 'mesa', - source = SOLO_SOURCES, - ) - env.Prepend(LIBS = [mesa]) - - SConscript([ - 'drivers/dri/SConscript', - ]) + Export('glapi') diff --git a/src/mesa/drivers/dri/SConscript b/src/mesa/drivers/dri/SConscript deleted file mode 100644 index d32bd08669..0000000000 --- a/src/mesa/drivers/dri/SConscript +++ /dev/null @@ -1,48 +0,0 @@ -Import('*') - -drienv = env.Clone() - -drienv.Replace(CPPPATH = [ - '#src/mesa/drivers/dri/common', - '#include', - '#include/GL/internal', - '#src/mesa', - '#src/mesa/main', - '#src/mesa/glapi', - '#src/mesa/math', - '#src/mesa/transform', - '#src/mesa/shader', - '#src/mesa/swrast', - '#src/mesa/swrast_setup', - '#src/egl/main', - '#src/egl/drivers/dri', -]) - -drienv.ParseConfig('pkg-config --cflags --libs libdrm') - -COMMON_GALLIUM_SOURCES = [ - '../common/utils.c', - '../common/vblank.c', - '../common/dri_util.c', - '../common/xmlconfig.c', -] - -COMMON_BM_SOURCES = [ - '../common/dri_bufmgr.c', - '../common/dri_drmpool.c', -] - -Export([ - 'drienv', - 'COMMON_GALLIUM_SOURCES', - 'COMMON_BM_SOURCES', -]) - -# TODO: Installation -#install: $(LIBNAME) -# $(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR) -# $(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR) - -SConscript([ - 'intel_winsys/SConscript', -]) -- cgit v1.2.3 From 687a8b96ef13658bbe779d0011ce1144844f1972 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 18 Feb 2008 20:02:42 +0900 Subject: Standardize on using the pipe/ include prefix. --- SConstruct | 1 - src/gallium/Makefile.template | 1 - src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 10 +++++----- src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c | 4 ++-- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 8 ++++---- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 10 +++++----- src/gallium/drivers/i915simple/i915_state_immediate.c | 2 +- src/gallium/drivers/i915simple/i915_state_inlines.h | 4 ++-- src/gallium/drivers/softpipe/sp_state_surface.c | 2 +- 9 files changed, 20 insertions(+), 22 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/SConstruct b/SConstruct index 1126aff21b..4fb51f2e6d 100644 --- a/SConstruct +++ b/SConstruct @@ -108,7 +108,6 @@ env.Append(CPPPATH = [ '#/include', '#/src/mesa', '#/src/mesa/main', - '#/src/gallium/include/pipe', '#/src/gallium/include', '#/src/gallium/auxiliary', '#/src/gallium/drivers', diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 6ad58c205c..6698212e77 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -16,7 +16,6 @@ OBJECTS = $(C_SOURCES:.c=.o) \ INCLUDES = \ -I. \ -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/gallium/include/pipe \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ -I$(TOP)/src/mesa \ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index f4fc3f6d71..bc85c4b19f 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -36,11 +36,11 @@ #include "linked_list.h" -#include "p_compiler.h" -#include "p_debug.h" -#include "p_winsys.h" -#include "p_thread.h" -#include "p_util.h" +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index c535d3276c..bffca5b244 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -34,8 +34,8 @@ */ -#include "p_debug.h" -#include "p_util.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 8b1b51c0e2..969aab51b5 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -36,10 +36,10 @@ #include "linked_list.h" -#include "p_defines.h" -#include "p_debug.h" -#include "p_thread.h" -#include "p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 04477a865a..beb145b7cb 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -37,11 +37,11 @@ #include "linked_list.h" -#include "p_compiler.h" -#include "p_debug.h" -#include "p_thread.h" -#include "p_defines.h" -#include "p_util.h" +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index 07031fc6c5..dfbbcab624 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -33,7 +33,7 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" -#include "p_util.h" +#include "pipe/p_util.h" /* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. diff --git a/src/gallium/drivers/i915simple/i915_state_inlines.h b/src/gallium/drivers/i915simple/i915_state_inlines.h index 0934ac79a4..378de8f9c4 100644 --- a/src/gallium/drivers/i915simple/i915_state_inlines.h +++ b/src/gallium/drivers/i915simple/i915_state_inlines.h @@ -28,8 +28,8 @@ #ifndef I915_STATE_INLINES_H #define I915_STATE_INLINES_H -#include "p_compiler.h" -#include "p_defines.h" +#include "pipe/p_compiler.h" +#include "pipe/p_defines.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index e2c6893e9f..124b18b708 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -27,7 +27,7 @@ /* Authors: Keith Whitwell */ -#include "p_inlines.h" +#include "pipe/p_inlines.h" #include "sp_context.h" #include "sp_state.h" -- cgit v1.2.3 From 3f3b09d6d86cfd277c5837d15466ee703897aa3d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 18 Feb 2008 20:05:06 +0900 Subject: Rename llvm -> gallivm. Following the directory == library name policy simplifies the build system. --- src/gallium/auxiliary/gallivm/Makefile | 85 ++ src/gallium/auxiliary/gallivm/gallivm.cpp | 327 ++++++ src/gallium/auxiliary/gallivm/gallivm.h | 103 ++ src/gallium/auxiliary/gallivm/gallivm_builtins.cpp | 567 +++++++++ src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 202 ++++ src/gallium/auxiliary/gallivm/gallivm_p.h | 110 ++ src/gallium/auxiliary/gallivm/instructions.cpp | 889 ++++++++++++++ src/gallium/auxiliary/gallivm/instructions.h | 152 +++ src/gallium/auxiliary/gallivm/instructionssoa.cpp | 121 ++ src/gallium/auxiliary/gallivm/instructionssoa.h | 74 ++ src/gallium/auxiliary/gallivm/llvm_builtins.c | 115 ++ src/gallium/auxiliary/gallivm/loweringpass.cpp | 17 + src/gallium/auxiliary/gallivm/loweringpass.h | 15 + src/gallium/auxiliary/gallivm/storage.cpp | 364 ++++++ src/gallium/auxiliary/gallivm/storage.h | 133 +++ src/gallium/auxiliary/gallivm/storagesoa.cpp | 389 +++++++ src/gallium/auxiliary/gallivm/storagesoa.h | 111 ++ src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 1221 ++++++++++++++++++++ src/gallium/auxiliary/gallivm/tgsitollvm.h | 20 + src/gallium/auxiliary/llvm/Makefile | 85 -- src/gallium/auxiliary/llvm/gallivm.cpp | 327 ------ src/gallium/auxiliary/llvm/gallivm.h | 103 -- src/gallium/auxiliary/llvm/gallivm_builtins.cpp | 567 --------- src/gallium/auxiliary/llvm/gallivm_cpu.cpp | 202 ---- src/gallium/auxiliary/llvm/gallivm_p.h | 110 -- src/gallium/auxiliary/llvm/instructions.cpp | 889 -------------- src/gallium/auxiliary/llvm/instructions.h | 152 --- src/gallium/auxiliary/llvm/instructionssoa.cpp | 121 -- src/gallium/auxiliary/llvm/instructionssoa.h | 74 -- src/gallium/auxiliary/llvm/llvm_builtins.c | 115 -- src/gallium/auxiliary/llvm/loweringpass.cpp | 17 - src/gallium/auxiliary/llvm/loweringpass.h | 15 - src/gallium/auxiliary/llvm/storage.cpp | 364 ------ src/gallium/auxiliary/llvm/storage.h | 133 --- src/gallium/auxiliary/llvm/storagesoa.cpp | 389 ------- src/gallium/auxiliary/llvm/storagesoa.h | 111 -- src/gallium/auxiliary/llvm/tgsitollvm.cpp | 1221 -------------------- src/gallium/auxiliary/llvm/tgsitollvm.h | 20 - 38 files changed, 5015 insertions(+), 5015 deletions(-) create mode 100644 src/gallium/auxiliary/gallivm/Makefile create mode 100644 src/gallium/auxiliary/gallivm/gallivm.cpp create mode 100644 src/gallium/auxiliary/gallivm/gallivm.h create mode 100644 src/gallium/auxiliary/gallivm/gallivm_builtins.cpp create mode 100644 src/gallium/auxiliary/gallivm/gallivm_cpu.cpp create mode 100644 src/gallium/auxiliary/gallivm/gallivm_p.h create mode 100644 src/gallium/auxiliary/gallivm/instructions.cpp create mode 100644 src/gallium/auxiliary/gallivm/instructions.h create mode 100644 src/gallium/auxiliary/gallivm/instructionssoa.cpp create mode 100644 src/gallium/auxiliary/gallivm/instructionssoa.h create mode 100644 src/gallium/auxiliary/gallivm/llvm_builtins.c create mode 100644 src/gallium/auxiliary/gallivm/loweringpass.cpp create mode 100644 src/gallium/auxiliary/gallivm/loweringpass.h create mode 100644 src/gallium/auxiliary/gallivm/storage.cpp create mode 100644 src/gallium/auxiliary/gallivm/storage.h create mode 100644 src/gallium/auxiliary/gallivm/storagesoa.cpp create mode 100644 src/gallium/auxiliary/gallivm/storagesoa.h create mode 100644 src/gallium/auxiliary/gallivm/tgsitollvm.cpp create mode 100644 src/gallium/auxiliary/gallivm/tgsitollvm.h delete mode 100644 src/gallium/auxiliary/llvm/Makefile delete mode 100644 src/gallium/auxiliary/llvm/gallivm.cpp delete mode 100644 src/gallium/auxiliary/llvm/gallivm.h delete mode 100644 src/gallium/auxiliary/llvm/gallivm_builtins.cpp delete mode 100644 src/gallium/auxiliary/llvm/gallivm_cpu.cpp delete mode 100644 src/gallium/auxiliary/llvm/gallivm_p.h delete mode 100644 src/gallium/auxiliary/llvm/instructions.cpp delete mode 100644 src/gallium/auxiliary/llvm/instructions.h delete mode 100644 src/gallium/auxiliary/llvm/instructionssoa.cpp delete mode 100644 src/gallium/auxiliary/llvm/instructionssoa.h delete mode 100644 src/gallium/auxiliary/llvm/llvm_builtins.c delete mode 100644 src/gallium/auxiliary/llvm/loweringpass.cpp delete mode 100644 src/gallium/auxiliary/llvm/loweringpass.h delete mode 100644 src/gallium/auxiliary/llvm/storage.cpp delete mode 100644 src/gallium/auxiliary/llvm/storage.h delete mode 100644 src/gallium/auxiliary/llvm/storagesoa.cpp delete mode 100644 src/gallium/auxiliary/llvm/storagesoa.h delete mode 100644 src/gallium/auxiliary/llvm/tgsitollvm.cpp delete mode 100644 src/gallium/auxiliary/llvm/tgsitollvm.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile new file mode 100644 index 0000000000..39fac6ea4a --- /dev/null +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -0,0 +1,85 @@ +# -*-makefile-*- +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = gallivm + + +GALLIVM_SOURCES = \ + gallivm.cpp \ + gallivm_cpu.cpp \ + instructions.cpp \ + loweringpass.cpp \ + tgsitollvm.cpp \ + storage.cpp \ + storagesoa.cpp \ + instructionssoa.cpp + +INC_SOURCES = gallivm_builtins.cpp + +CPP_SOURCES = \ + $(GALLIVM_SOURCES) + +C_SOURCES = +ASM_SOURCES = + +OBJECTS = $(C_SOURCES:.c=.o) \ + $(CPP_SOURCES:.cpp=.o) \ + $(ASM_SOURCES:.S=.o) + +### Include directories +INCLUDES = \ + -I. \ + -I$(TOP)/src/gallium/drivers \ + -I$(TOP)/src/gallium/auxiliary \ + -I$(TOP)/src/gallium/include \ + -I$(TOP)/src/mesa \ + -I$(TOP)/include + + +##### RULES ##### + +.c.o: + $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.cpp.o: + $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ + +.S.o: + $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ + +##### TARGETS ##### + +default:: depend symlinks $(LIBNAME) + + +$(LIBNAME): $(OBJECTS) Makefile + $(TOP)/bin/mklib -o $@ -static $(OBJECTS) + + +depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) + rm -f depend + touch depend + $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ + $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null + + +gallivm_builtins.cpp: llvm_builtins.c + clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins + + +# Emacs tags +tags: + etags `find . -name \*.[ch]` `find ../include` + + +# Remove .o and backup files +clean: + -rm -f *.o */*.o *~ *.so *~ server/*.o + -rm -f depend depend.bak + -rm -f gallivm_builtins.cpp + +symlinks: + + +include depend diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp new file mode 100644 index 0000000000..d14bb3b99a --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -0,0 +1,327 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static int GLOBAL_ID = 0; + +using namespace llvm; + +static inline +void AddStandardCompilePasses(PassManager &PM) +{ + PM.add(new LoweringPass()); + PM.add(createVerifierPass()); // Verify that input is correct + + PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp + + //PM.add(createStripSymbolsPass(true)); + + PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst + PM.add(createCFGSimplificationPass()); // Clean up disgusting code + PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas + PM.add(createGlobalOptimizerPass()); // Optimize out global vars + PM.add(createGlobalDCEPass()); // Remove unused fns and globs + PM.add(createIPConstantPropagationPass());// IP Constant Propagation + PM.add(createDeadArgEliminationPass()); // Dead argument elimination + PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE + PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE + + PM.add(createPruneEHPass()); // Remove dead EH info + + PM.add(createFunctionInliningPass()); // Inline small functions + PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args + + PM.add(createTailDuplicationPass()); // Simplify cfg by copying code + PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas + PM.add(createInstructionCombiningPass()); // Combine silly seq's + PM.add(createCondPropagationPass()); // Propagate conditionals + + PM.add(createTailCallEliminationPass()); // Eliminate tail calls + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createReassociatePass()); // Reassociate expressions + PM.add(createLoopRotatePass()); + PM.add(createLICMPass()); // Hoist loop invariants + PM.add(createLoopUnswitchPass()); // Unswitch loops. + PM.add(createLoopIndexSplitPass()); // Index split loops. + PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc + PM.add(createIndVarSimplifyPass()); // Canonicalize indvars + PM.add(createLoopUnrollPass()); // Unroll small loops + PM.add(createInstructionCombiningPass()); // Clean up after the unroller + PM.add(createGVNPass()); // Remove redundancies + PM.add(createSCCPPass()); // Constant prop with SCCP + + // Run instcombine after redundancy elimination to exploit opportunities + // opened up by them. + PM.add(createInstructionCombiningPass()); + PM.add(createCondPropagationPass()); // Propagate conditionals + + PM.add(createDeadStoreEliminationPass()); // Delete dead stores + PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' + PM.add(createCFGSimplificationPass()); // Merge & remove BBs + PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations + PM.add(createDeadTypeEliminationPass()); // Eliminate dead types + PM.add(createConstantMergePass()); // Merge dup global constants +} + +void gallivm_prog_delete(struct gallivm_prog *prog) +{ + delete prog->module; + prog->module = 0; + prog->function = 0; + free(prog); +} + +static inline void +constant_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan) +{ + unsigned i; + + for (i = 0; i < QUAD_SIZE; ++i) { + inputs[i][attrib][chan] = coefs[attrib].a0[chan]; + } +} + +static inline void +linear_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = inputs[i][0][0]; + const float y = inputs[i][0][1]; + + inputs[i][attrib][chan] = + coefs[attrib].a0[chan] + + coefs[attrib].dadx[chan] * x + + coefs[attrib].dady[chan] * y; + } +} + +static inline void +perspective_interpolation(float (*inputs)[16][4], + const struct tgsi_interp_coef *coefs, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + const float x = inputs[i][0][0]; + const float y = inputs[i][0][1]; + /* WPOS.w here is really 1/w */ + const float w = 1.0f / inputs[i][0][3]; + assert(inputs[i][0][3] != 0.0); + + inputs[i][attrib][chan] = + (coefs[attrib].a0[chan] + + coefs[attrib].dadx[chan] * x + + coefs[attrib].dady[chan] * y) * w; + } +} + +void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix) +{ + if (!ir || !ir->module) + return; + + if (file_prefix) { + std::ostringstream stream; + stream << file_prefix; + stream << ir->id; + stream << ".ll"; + std::string name = stream.str(); + std::ofstream out(name.c_str()); + if (!out) { + std::cerr<<"Can't open file : "<module); + out.close(); + } else { + const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList(); + llvm::Module::FunctionListType::const_iterator itr; + std::cout<<"; ---------- Start shader "<id<id<num_interp; ++i) { + const gallivm_interpolate &interp = prog->interpolators[i]; + switch (interp.type) { + case TGSI_INTERPOLATE_CONSTANT: + constant_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + case TGSI_INTERPOLATE_LINEAR: + linear_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + perspective_interpolation(inputs, coef, interp.attrib, interp.chan); + break; + + default: + assert( 0 ); + } + } +} + + +struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type) +{ + struct gallivm_ir *ir = + (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir)); + ++GLOBAL_ID; + ir->id = GLOBAL_ID; + ir->type = type; + + return ir; +} + +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout) +{ + ir->layout = layout; +} + +void gallivm_ir_set_components(struct gallivm_ir *ir, int num) +{ + ir->num_components = num; +} + +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + std::cout << "Creating llvm from: " <module = mod; + gallivm_ir_dump(ir, 0); +} + +void gallivm_ir_delete(struct gallivm_ir *ir) +{ + delete ir->module; + free(ir); +} + +struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) +{ + struct gallivm_prog *prog = + (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); + llvm::Module *mod = llvm::CloneModule(ir->module); + prog->num_consts = ir->num_consts; + memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); + prog->num_interp = ir->num_interp; + + /* Run optimization passes over it */ + PassManager passes; + passes.add(new TargetData(mod)); + AddStandardCompilePasses(passes); + passes.run(*mod); + prog->module = mod; + + std::cout << "After optimizations:"<dump(); + + return prog; +} + +#endif /* MESA_LLVM */ diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h new file mode 100644 index 0000000000..92da4bca7f --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef GALLIVM_H +#define GALLIVM_H + +#if defined __cplusplus +extern "C" { +#endif + +#include "pipe/p_state.h" + +#ifdef MESA_LLVM + +struct tgsi_token; + +struct gallivm_ir; +struct gallivm_prog; +struct gallivm_cpu_engine; +struct tgsi_interp_coef; +struct tgsi_sampler; +struct tgsi_exec_vector; + +enum gallivm_shader_type { + GALLIVM_VS, + GALLIVM_FS +}; + +enum gallivm_vector_layout { + GALLIVM_AOS, + GALLIVM_SOA +}; + +struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type); +void gallivm_ir_set_layout(struct gallivm_ir *ir, + enum gallivm_vector_layout layout); +void gallivm_ir_set_components(struct gallivm_ir *ir, int num); +void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, + const struct tgsi_token *tokens); +void gallivm_ir_delete(struct gallivm_ir *ir); + + +struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir); + +void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + const struct tgsi_interp_coef *coefs); +void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); + + +struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); +struct gallivm_cpu_engine *gallivm_global_cpu_engine(); +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_vector *inputs, + struct tgsi_exec_vector *dests, + float (*consts)[4], + struct tgsi_exec_vector *temps); +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float x, float y, + float (*dests)[PIPE_MAX_SHADER_INPUTS][4], + float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], + float (*consts)[4], + struct tgsi_sampler *samplers); +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog); +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); + + +#endif /* MESA_LLVM */ + +#if defined __cplusplus +} // extern "C" +#endif + +#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp new file mode 100644 index 0000000000..1796f0a177 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -0,0 +1,567 @@ +// Generated by llvm2cpp - DO NOT MODIFY! + + +Module* createGallivmBuiltins(Module *mod) { + +mod->setModuleIdentifier("shader"); + +// Type Definitions +ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25); + +PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0); + +std::vectorFuncTy_2_args; +FuncTy_2_args.push_back(Type::FloatTy); +FuncTy_2_args.push_back(Type::FloatTy); +FunctionType* FuncTy_2 = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/FuncTy_2_args, + /*isVarArg=*/false); + +PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0); + +VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4); + +std::vectorFuncTy_5_args; +FuncTy_5_args.push_back(VectorTy_4); +FunctionType* FuncTy_5 = FunctionType::get( + /*Result=*/VectorTy_4, + /*Params=*/FuncTy_5_args, + /*isVarArg=*/false); + +std::vectorFuncTy_6_args; +FuncTy_6_args.push_back(VectorTy_4); +FuncTy_6_args.push_back(VectorTy_4); +FuncTy_6_args.push_back(VectorTy_4); +FunctionType* FuncTy_6 = FunctionType::get( + /*Result=*/VectorTy_4, + /*Params=*/FuncTy_6_args, + /*isVarArg=*/false); + +VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4); + +std::vectorFuncTy_9_args; +FunctionType* FuncTy_9 = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/FuncTy_9_args, + /*isVarArg=*/true); + +PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0); + +PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0); + +std::vectorFuncTy_12_args; +FuncTy_12_args.push_back(Type::FloatTy); +FunctionType* FuncTy_12 = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/FuncTy_12_args, + /*isVarArg=*/false); + +PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0); + +std::vectorFuncTy_13_args; +FuncTy_13_args.push_back(VectorTy_4); +FunctionType* FuncTy_13 = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/FuncTy_13_args, + /*isVarArg=*/false); + + +// Function Declarations + +Function* func_approx = new Function( + /*Type=*/FuncTy_2, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"approx", mod); +func_approx->setCallingConv(CallingConv::C); +const ParamAttrsList *func_approx_PAL = 0; +func_approx->setParamAttrs(func_approx_PAL); + +Function* func_powf = new Function( + /*Type=*/FuncTy_2, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"powf", mod); // (external, no body) +func_powf->setCallingConv(CallingConv::C); +const ParamAttrsList *func_powf_PAL = 0; +func_powf->setParamAttrs(func_powf_PAL); + +Function* func_lit = new Function( + /*Type=*/FuncTy_5, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"lit", mod); +func_lit->setCallingConv(CallingConv::C); +const ParamAttrsList *func_lit_PAL = 0; +func_lit->setParamAttrs(func_lit_PAL); + +Function* func_cmp = new Function( + /*Type=*/FuncTy_6, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"cmp", mod); +func_cmp->setCallingConv(CallingConv::C); +const ParamAttrsList *func_cmp_PAL = 0; +{ + ParamAttrsVector Attrs; + ParamAttrsWithIndex PAWI; + PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; + Attrs.push_back(PAWI); + func_cmp_PAL = ParamAttrsList::get(Attrs); + +} +func_cmp->setParamAttrs(func_cmp_PAL); + +Function* func_vcos = new Function( + /*Type=*/FuncTy_5, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"vcos", mod); +func_vcos->setCallingConv(CallingConv::C); +const ParamAttrsList *func_vcos_PAL = 0; +func_vcos->setParamAttrs(func_vcos_PAL); + +Function* func_printf = new Function( + /*Type=*/FuncTy_9, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"printf", mod); // (external, no body) +func_printf->setCallingConv(CallingConv::C); +const ParamAttrsList *func_printf_PAL = 0; +func_printf->setParamAttrs(func_printf_PAL); + +Function* func_cosf = new Function( + /*Type=*/FuncTy_12, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"cosf", mod); // (external, no body) +func_cosf->setCallingConv(CallingConv::C); +const ParamAttrsList *func_cosf_PAL = 0; +func_cosf->setParamAttrs(func_cosf_PAL); + +Function* func_scs = new Function( + /*Type=*/FuncTy_5, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"scs", mod); +func_scs->setCallingConv(CallingConv::C); +const ParamAttrsList *func_scs_PAL = 0; +func_scs->setParamAttrs(func_scs_PAL); + +Function* func_sinf = new Function( + /*Type=*/FuncTy_12, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"sinf", mod); // (external, no body) +func_sinf->setCallingConv(CallingConv::C); +const ParamAttrsList *func_sinf_PAL = 0; +func_sinf->setParamAttrs(func_sinf_PAL); + +Function* func_vsin = new Function( + /*Type=*/FuncTy_5, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"vsin", mod); +func_vsin->setCallingConv(CallingConv::C); +const ParamAttrsList *func_vsin_PAL = 0; +func_vsin->setParamAttrs(func_vsin_PAL); + +Function* func_kilp = new Function( + /*Type=*/FuncTy_13, + /*Linkage=*/GlobalValue::WeakLinkage, + /*Name=*/"kilp", mod); +func_kilp->setCallingConv(CallingConv::C); +const ParamAttrsList *func_kilp_PAL = 0; +{ + ParamAttrsVector Attrs; + ParamAttrsWithIndex PAWI; + PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; + Attrs.push_back(PAWI); + func_kilp_PAL = ParamAttrsList::get(Attrs); + +} +func_kilp->setParamAttrs(func_kilp_PAL); + +// Global Variable Declarations + + +GlobalVariable* gvar_array__str = new GlobalVariable( +/*Type=*/ArrayTy_0, +/*isConstant=*/true, +/*Linkage=*/GlobalValue::InternalLinkage, +/*Initializer=*/0, // has initializer, specified below +/*Name=*/".str", +mod); + +GlobalVariable* gvar_array__str1 = new GlobalVariable( +/*Type=*/ArrayTy_0, +/*isConstant=*/true, +/*Linkage=*/GlobalValue::InternalLinkage, +/*Initializer=*/0, // has initializer, specified below +/*Name=*/".str1", +mod); + +// Constant Definitions +Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true); +Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true); +ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f)); +ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f)); +Constant* const_float_18 = Constant::getNullValue(Type::FloatTy); +Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32)); +std::vector const_packed_20_elems; +ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); +const_packed_20_elems.push_back(const_float_21); +UndefValue* const_float_22 = UndefValue::get(Type::FloatTy); +const_packed_20_elems.push_back(const_float_22); +const_packed_20_elems.push_back(const_float_22); +const_packed_20_elems.push_back(const_float_21); +Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems); +ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10)); +ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10)); +ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10)); +std::vector const_packed_26_elems; +const_packed_26_elems.push_back(const_float_21); +const_packed_26_elems.push_back(const_float_18); +const_packed_26_elems.push_back(const_float_18); +const_packed_26_elems.push_back(const_float_21); +Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems); +Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy); +std::vector const_packed_28_elems; +const_packed_28_elems.push_back(const_int32_19); +ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10)); +const_packed_28_elems.push_back(const_int32_29); +const_packed_28_elems.push_back(const_int32_25); +const_packed_28_elems.push_back(const_int32_24); +Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems); +std::vector const_packed_30_elems; +const_packed_30_elems.push_back(const_int32_19); +const_packed_30_elems.push_back(const_int32_23); +ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10)); +const_packed_30_elems.push_back(const_int32_31); +const_packed_30_elems.push_back(const_int32_24); +Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems); +std::vector const_packed_32_elems; +const_packed_32_elems.push_back(const_int32_19); +const_packed_32_elems.push_back(const_int32_23); +const_packed_32_elems.push_back(const_int32_25); +ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10)); +const_packed_32_elems.push_back(const_int32_33); +Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems); +std::vector const_ptr_34_indices; +const_ptr_34_indices.push_back(const_int32_19); +const_ptr_34_indices.push_back(const_int32_19); +Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() ); +UndefValue* const_packed_35 = UndefValue::get(VectorTy_4); +std::vector const_ptr_36_indices; +const_ptr_36_indices.push_back(const_int32_19); +const_ptr_36_indices.push_back(const_int32_19); +Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() ); + +// Global Variable Definitions +gvar_array__str->setInitializer(const_array_14); +gvar_array__str1->setInitializer(const_array_15); + +// Function Definitions + +// Function: approx (func_approx) +{ + Function::arg_iterator args = func_approx->arg_begin(); + Value* float_a = args++; + float_a->setName("a"); + Value* float_b = args++; + float_b->setName("b"); + + BasicBlock* label_entry = new BasicBlock("entry",func_approx,0); + + // Block entry (label_entry) + FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry); + SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry); + FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry); + SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry); + FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry); + SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry); + std::vector float_call_params; + float_call_params.push_back(float_a_addr_0); + float_call_params.push_back(float_b_addr_1); + CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry); + float_call->setCallingConv(CallingConv::C); + float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0; + float_call->setParamAttrs(float_call_PAL); + + new ReturnInst(float_call, label_entry); + +} + +// Function: lit (func_lit) +{ + Function::arg_iterator args = func_lit->arg_begin(); + Value* packed_tmp = args++; + packed_tmp->setName("tmp"); + + BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0); + BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0); + BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0); + + // Block entry (label_entry_38) + ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38); + FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38); + new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38); + + // Block ifthen (label_ifthen) + InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen); + ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen); + ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen); + std::vector float_call_41_params; + float_call_41_params.push_back(float_tmp12); + float_call_41_params.push_back(float_tmp14); + CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen); + float_call_41->setCallingConv(CallingConv::C); + float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0; + float_call_41->setParamAttrs(float_call_41_PAL); + + InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen); + new ReturnInst(packed_tmp16, label_ifthen); + + // Block UnifiedReturnBlock (label_UnifiedReturnBlock) + new ReturnInst(const_packed_26, label_UnifiedReturnBlock); + +} + +// Function: cmp (func_cmp) +{ + Function::arg_iterator args = func_cmp->arg_begin(); + Value* packed_tmp0 = args++; + packed_tmp0->setName("tmp0"); + Value* packed_tmp1 = args++; + packed_tmp1->setName("tmp1"); + Value* packed_tmp2 = args++; + packed_tmp2->setName("tmp2"); + + BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0); + BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0); + BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0); + BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0); + BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0); + BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0); + BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0); + + // Block entry (label_entry_44) + ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44); + CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44); + FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44); + ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44); + CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44); + FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44); + SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44); + new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44); + + // Block cond.?14 (label_cond__14) + ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14); + ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14); + CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14); + FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14); + new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14); + + // Block cond.cont20 (label_cond_cont20) + ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20); + ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20); + CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20); + FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20); + new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20); + + // Block cond.?28 (label_cond__28) + PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28); + packed_tmp23_reg2mem_0->reserveOperandSpace(2); + packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14); + packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20); + + ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28); + ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28); + CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28); + FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28); + new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28); + + // Block cond.cont34 (label_cond_cont34) + PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34); + packed_tmp23_reg2mem_1->reserveOperandSpace(2); + packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14); + packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20); + + ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34); + ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34); + CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34); + FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34); + new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34); + + // Block cond.?42 (label_cond__42) + PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42); + packed_tmp37_reg2mem_0->reserveOperandSpace(2); + packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28); + packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34); + + ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42); + new ReturnInst(packed_tmp5113, label_cond__42); + + // Block cond.cont48 (label_cond_cont48) + PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48); + packed_tmp37_reg2mem_1->reserveOperandSpace(2); + packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28); + packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34); + + ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48); + new ReturnInst(packed_tmp51, label_cond_cont48); + +} + +// Function: vcos (func_vcos) +{ + Function::arg_iterator args = func_vcos->arg_begin(); + Value* packed_val = args++; + packed_val->setName("val"); + + BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0); + + // Block entry (label_entry_53) + ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53); + CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53); + ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53); + CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53); + ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53); + CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53); + ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53); + CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53); + std::vector int32_call_params; + int32_call_params.push_back(const_ptr_34); + int32_call_params.push_back(double_conv_54); + int32_call_params.push_back(double_conv4); + int32_call_params.push_back(double_conv7); + int32_call_params.push_back(double_conv10); + CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53); + int32_call->setCallingConv(CallingConv::C); + int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0; + int32_call->setParamAttrs(int32_call_PAL); + + CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53); + float_call13->setCallingConv(CallingConv::C); + float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0; + float_call13->setParamAttrs(float_call13_PAL); + + InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53); + CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53); + float_call18->setCallingConv(CallingConv::C); + float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0; + float_call18->setParamAttrs(float_call18_PAL); + + InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53); + CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53); + float_call23->setCallingConv(CallingConv::C); + float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0; + float_call23->setParamAttrs(float_call23_PAL); + + InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53); + CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53); + float_call28->setCallingConv(CallingConv::C); + float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0; + float_call28->setParamAttrs(float_call28_PAL); + + InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53); + CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53); + CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53); + CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53); + CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53); + std::vector int32_call43_params; + int32_call43_params.push_back(const_ptr_36); + int32_call43_params.push_back(double_conv33); + int32_call43_params.push_back(double_conv36); + int32_call43_params.push_back(double_conv39); + int32_call43_params.push_back(double_conv42); + CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53); + int32_call43->setCallingConv(CallingConv::C); + int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0; + int32_call43->setParamAttrs(int32_call43_PAL); + + new ReturnInst(packed_tmp30, label_entry_53); + +} + +// Function: scs (func_scs) +{ + Function::arg_iterator args = func_scs->arg_begin(); + Value* packed_val_58 = args++; + packed_val_58->setName("val"); + + BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0); + + // Block entry (label_entry_59) + ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59); + CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59); + float_call_60->setCallingConv(CallingConv::C); + float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0; + float_call_60->setParamAttrs(float_call_60_PAL); + + InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59); + CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59); + float_call7->setCallingConv(CallingConv::C); + float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0; + float_call7->setParamAttrs(float_call7_PAL); + + InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59); + new ReturnInst(packed_tmp9, label_entry_59); + +} + +// Function: vsin (func_vsin) +{ + Function::arg_iterator args = func_vsin->arg_begin(); + Value* packed_val_62 = args++; + packed_val_62->setName("val"); + + BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0); + + // Block entry (label_entry_63) + ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63); + CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63); + float_call_65->setCallingConv(CallingConv::C); + float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0; + float_call_65->setParamAttrs(float_call_65_PAL); + + InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63); + InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63); + InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63); + InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63); + new ReturnInst(packed_tmp15_67, label_entry_63); + +} + +// Function: kilp (func_kilp) +{ + Function::arg_iterator args = func_kilp->arg_begin(); + Value* packed_val_69 = args++; + packed_val_69->setName("val"); + + BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0); + BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0); + BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0); + BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0); + BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0); + + // Block entry (label_entry_70) + ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70); + FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70); + new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70); + + // Block lor_rhs (label_lor_rhs) + ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs); + FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs); + new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs); + + // Block lor_rhs5 (label_lor_rhs5) + ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5); + FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5); + new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5); + + // Block lor_rhs11 (label_lor_rhs11) + ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11); + FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11); + CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11); + new ReturnInst(int32_retval, label_lor_rhs11); + + // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71) + new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71); + +} + +return mod; + +} diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp new file mode 100644 index 0000000000..8f9830d0b1 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "instructions.h" +#include "loweringpass.h" +#include "storage.h" +#include "tgsitollvm.h" + +#include "pipe/p_context.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +struct gallivm_cpu_engine { + llvm::ExecutionEngine *engine; +}; + +static struct gallivm_cpu_engine *CPU = 0; + +typedef int (*fragment_shader_runner)(float x, float y, + float (*dests)[16][4], + float (*inputs)[16][4], + int num_attribs, + float (*consts)[4], int num_consts, + struct tgsi_sampler *samplers); + +int gallivm_cpu_fs_exec(struct gallivm_prog *prog, + float fx, float fy, + float (*dests)[16][4], + float (*inputs)[16][4], + float (*consts)[4], + struct tgsi_sampler *samplers) +{ + fragment_shader_runner runner = reinterpret_cast(prog->function); + assert(runner); + + return runner(fx, fy, dests, inputs, prog->num_interp, + consts, prog->num_consts, + samplers); +} + +static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) +{ + llvm::Module *mod = prog->module; + llvm::Function *func = 0; + + switch (prog->type) { + case GALLIVM_VS: + func = mod->getFunction("vs_shader"); + break; + case GALLIVM_FS: + func = mod->getFunction("fs_shader"); + break; + default: + assert(!"Unknown shader type!"); + break; + } + return func; +} + +/*! + This function creates a CPU based execution engine for the given gallivm_prog. + gallivm_cpu_engine should be used as a singleton throughout the library. Before + executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. + The gallivm_prog instance which is being passed to the constructor is being + automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile + with it again. + */ +struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) +{ + struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) + calloc(1, sizeof(struct gallivm_cpu_engine)); + llvm::Module *mod = static_cast(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); + ee->DisableLazyCompilation(); + cpu->engine = ee; + + llvm::Function *func = func_for_shader(prog); + + prog->function = ee->getPointerToFunction(func); + CPU = cpu; + return cpu; +} + + +/*! + This function JIT compiles the given gallivm_prog with the given cpu based execution engine. + The reference to the generated machine code entry point will be stored + in the gallivm_prog program. After executing this function one can call gallivm_prog_exec + in order to execute the gallivm_prog on the CPU. + */ +void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) +{ + llvm::Module *mod = static_cast(prog->module); + llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); + llvm::ExecutionEngine *ee = cpu->engine; + assert(ee); + /*FIXME : remove */ + ee->DisableLazyCompilation(); + ee->addModuleProvider(mp); + + llvm::Function *func = func_for_shader(prog); + prog->function = ee->getPointerToFunction(func); +} + +void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) +{ + free(cpu); +} + +struct gallivm_cpu_engine * gallivm_global_cpu_engine() +{ + return CPU; +} + + +typedef void (*vertex_shader_runner)(void *ainputs, + void *dests, + float (*aconsts)[4], + void *temps); + + +/*! + This function is used to execute the gallivm_prog in software. Before calling + this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile + function. + */ +int gallivm_cpu_vs_exec(struct gallivm_prog *prog, + struct tgsi_exec_vector *inputs, + struct tgsi_exec_vector *dests, + float (*consts)[4], + struct tgsi_exec_vector *temps) +{ + vertex_shader_runner runner = reinterpret_cast(prog->function); + assert(runner); + /*FIXME*/ + runner(inputs, dests, consts, temps); + + return 0; +} + +#endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h new file mode 100644 index 0000000000..cfe7b1901b --- /dev/null +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h @@ -0,0 +1,110 @@ +#ifndef GALLIVM_P_H +#define GALLIVM_P_H + +#ifdef MESA_LLVM + +#include "gallivm.h" +#include "pipe/p_shader_tokens.h" +#include "pipe/p_compiler.h" + +namespace llvm { + class Module; +} + +#if defined __cplusplus +extern "C" { +#endif + +enum gallivm_shader_type; +enum gallivm_vector_layout; + +struct gallivm_interpolate { + int attrib; + int chan; + int type; +}; + +struct gallivm_ir { + llvm::Module *module; + int id; + enum gallivm_shader_type type; + enum gallivm_vector_layout layout; + int num_components; + int num_consts; + + //FIXME: this might not be enough for some shaders + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +struct gallivm_prog { + llvm::Module *module; + void *function; + + int id; + enum gallivm_shader_type type; + + int num_consts; + + //FIXME: this might not be enough for some shaders + struct gallivm_interpolate interpolators[32*4]; + int num_interp; +}; + +static INLINE void gallivm_swizzle_components(int swizzle, + int *xc, int *yc, + int *zc, int *wc) +{ + int x = swizzle / 1000; swizzle -= x * 1000; + int y = swizzle / 100; swizzle -= y * 100; + int z = swizzle / 10; swizzle -= z * 10; + int w = swizzle; + + if (xc) *xc = x; + if (yc) *yc = y; + if (zc) *zc = z; + if (wc) *wc = w; +} + +static INLINE boolean gallivm_is_swizzle(int swizzle) +{ + const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + + TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; + return swizzle != NO_SWIZZLE; +} + +static INLINE int gallivm_x_swizzle(int swizzle) +{ + int x; + gallivm_swizzle_components(swizzle, &x, 0, 0, 0); + return x; +} + +static INLINE int gallivm_y_swizzle(int swizzle) +{ + int y; + gallivm_swizzle_components(swizzle, 0, &y, 0, 0); + return y; +} + +static INLINE int gallivm_z_swizzle(int swizzle) +{ + int z; + gallivm_swizzle_components(swizzle, 0, 0, &z, 0); + return z; +} + +static INLINE int gallivm_w_swizzle(int swizzle) +{ + int w; + gallivm_swizzle_components(swizzle, 0, 0, 0, &w); + return w; +} + +#endif /* MESA_LLVM */ + +#if defined __cplusplus +} // extern "C" +#endif + +#endif diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp new file mode 100644 index 0000000000..55d39fa5f1 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -0,0 +1,889 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "instructions.h" + +#include "storage.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +using namespace llvm; + +#include "gallivm_builtins.cpp" + +static inline std::string createFuncName(int label) +{ + std::ostringstream stream; + stream << "function"; + stream << label; + return stream.str(); +} + +Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, + Storage *storage) + : m_mod(mod), m_func(func), m_builder(block), m_idx(0), + m_storage(storage) +{ + m_floatVecType = VectorType::get(Type::FloatTy, 4); + + m_llvmFSqrt = 0; + m_llvmFAbs = 0; + m_llvmPow = 0; + m_llvmFloor = 0; + m_llvmFlog = 0; + m_llvmLit = 0; + m_fmtPtr = 0; + + createGallivmBuiltins(m_mod); +} + +llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateAdd(in1, in2, name("add")); +} + +llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + return add(mulRes, in3); +} + +llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateMul(in1, in2, name("mul")); +} + +const char * Instructions::name(const char *prefix) +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); + return vectorFromVals(dot3, dot3, dot3, dot3); +} + +llvm::Value *Instructions::callFSqrt(llvm::Value *val) +{ + if (!m_llvmFSqrt) { + // predeclare the intrinsic + std::vector fsqrtArgs; + fsqrtArgs.push_back(Type::FloatTy); + ParamAttrsList *fsqrtPal = 0; + FunctionType* fsqrtType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fsqrtArgs, + /*isVarArg=*/false); + m_llvmFSqrt = new Function( + /*Type=*/fsqrtType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.sqrt.f32", m_mod); + m_llvmFSqrt->setCallingConv(CallingConv::C); + m_llvmFSqrt->setParamAttrs(fsqrtPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, + name("sqrt")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::rsq(llvm::Value *in1) +{ + Value *x = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("extractx")); + Value *abs = callFAbs(x); + Value *sqrt = callFSqrt(abs); + + Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, + APFloat(1.f)), + sqrt, + name("rsqrt")); + return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); +} + +llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) +{ + Constant *const_vec = Constant::getNullValue(m_floatVecType); + Value *res = m_builder.CreateInsertElement(const_vec, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; +} + +llvm::Value *Instructions::callFAbs(llvm::Value *val) +{ + if (!m_llvmFAbs) { + // predeclare the intrinsic + std::vector fabsArgs; + fabsArgs.push_back(Type::FloatTy); + ParamAttrsList *fabsPal = 0; + FunctionType* fabsType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fabsArgs, + /*isVarArg=*/false); + m_llvmFAbs = new Function( + /*Type=*/fabsType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"fabs", m_mod); + m_llvmFAbs->setCallingConv(CallingConv::C); + m_llvmFAbs->setParamAttrs(fabsPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, + name("fabs")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::lit(llvm::Value *in) +{ + if (!m_llvmLit) { + m_llvmLit = m_mod->getFunction("lit"); + } + CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) +{ + Value *res = m_builder.CreateSub(in1, in2, name("sub")); + return res; +} + +llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) +{ + if (!m_llvmPow) { + // predeclare the intrinsic + std::vector powArgs; + powArgs.push_back(Type::FloatTy); + powArgs.push_back(Type::FloatTy); + ParamAttrsList *powPal = 0; + FunctionType* powType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/powArgs, + /*isVarArg=*/false); + m_llvmPow = new Function( + /*Type=*/powType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.pow.f32", m_mod); + m_llvmPow->setCallingConv(CallingConv::C); + m_llvmPow->setParamAttrs(powPal); + } + std::vector params; + params.push_back(val1); + params.push_back(val2); + CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), + name("pow")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + llvm::Value *val = callPow(x1, x2); + return vectorFromVals(val, val, val, val); +} + +llvm::Value * Instructions::rcp(llvm::Value *in1) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *res = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, + APFloat(1.f)), + x1, name("rcp")); + return vectorFromVals(res, res, res, res); +} + +llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + std::vector vec = extractVector(mulRes); + Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy")); + Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz")); + Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4")); + return vectorFromVals(dot4, dot4, dot4, dot4); +} + +llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + std::vector vec1 = extractVector(mulRes); + Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy")); + Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz")); + Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph")); + return vectorFromVals(dph, dph, dph, dph); +} + +llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) +{ + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *w = m_builder.CreateExtractElement(in2, + m_storage->constantInt(3), + name("w")); + Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); + return vectorFromVals(ConstantFP::get(Type::FloatTy, APFloat(1.f)), + ry, z, w); +} + +llvm::Value * Instructions::ex2(llvm::Value *in) +{ + llvm::Value *val = callPow(ConstantFP::get(Type::FloatTy, APFloat(2.f)), + m_builder.CreateExtractElement( + in, m_storage->constantInt(0), + name("x1"))); + return vectorFromVals(val, val, val, val); +} + +llvm::Value * Instructions::callFloor(llvm::Value *val) +{ + if (!m_llvmFloor) { + // predeclare the intrinsic + std::vector floorArgs; + floorArgs.push_back(Type::FloatTy); + ParamAttrsList *floorPal = 0; + FunctionType* floorType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/floorArgs, + /*isVarArg=*/false); + m_llvmFloor = new Function( + /*Type=*/floorType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"floorf", m_mod); + m_llvmFloor->setCallingConv(CallingConv::C); + m_llvmFloor->setParamAttrs(floorPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFloor, val, + name("floorf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::floor(llvm::Value *in) +{ + std::vector vec = extractVector(in); + return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]), + callFloor(vec[2]), callFloor(vec[3])); +} + +llvm::Value * Instructions::arl(llvm::Value *in) +{ + return floor(in); +} + +llvm::Value * Instructions::frc(llvm::Value *in) +{ + llvm::Value *flr = floor(in); + return sub(in, flr); +} + +llvm::Value * Instructions::callFLog(llvm::Value *val) +{ + if (!m_llvmFlog) { + // predeclare the intrinsic + std::vector flogArgs; + flogArgs.push_back(Type::FloatTy); + ParamAttrsList *flogPal = 0; + FunctionType* flogType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/flogArgs, + /*isVarArg=*/false); + m_llvmFlog = new Function( + /*Type=*/flogType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"logf", m_mod); + m_llvmFlog->setCallingConv(CallingConv::C); + m_llvmFlog->setParamAttrs(flogPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFlog, val, + name("logf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::lg2(llvm::Value *in) +{ + std::vector vec = extractVector(in); + llvm::Value *const_vec = constVector(1.442695f, 1.442695f, + 1.442695f, 1.442695f); + return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), + callFLog(vec[2]), callFLog(vec[3])), const_vec); +} + +llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) +{ + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) +{ + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], + name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], + name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], + name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], + name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); +} + +void Instructions::printVector(llvm::Value *val) +{ + static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; + + if (!m_fmtPtr) { + Constant *format = ConstantArray::get(frmt, true); + ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); + GlobalVariable* globalFormat = new GlobalVariable( + /*Type=*/arrayTy, + /*isConstant=*/true, + /*Linkage=*/GlobalValue::InternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name(".str"), + m_mod); + globalFormat->setInitializer(format); + + Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); + std::vector const_ptr_21_indices; + const_ptr_21_indices.push_back(const_int0); + const_ptr_21_indices.push_back(const_int0); + m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, + &const_ptr_21_indices[0], const_ptr_21_indices.size()); + } + + Function *func_printf = m_mod->getFunction("printf"); + if (!func_printf) + func_printf = declarePrintf(); + assert(func_printf); + std::vector vec = extractVector(val); + Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); + Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); + Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); + Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); + std::vector params; + params.push_back(m_fmtPtr); + params.push_back(dx); + params.push_back(dy); + params.push_back(dz); + params.push_back(dw); + CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), + name("printf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(true); +} + +llvm::Function * Instructions::declarePrintf() +{ + std::vector args; + ParamAttrsList *params = 0; + FunctionType* funcTy = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/args, + /*isVarArg=*/true); + Function* func_printf = new Function( + /*Type=*/funcTy, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"printf", m_mod); + func_printf->setCallingConv(CallingConv::C); + func_printf->setParamAttrs(params); + return func_printf; +} + + +llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} +llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + + +llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} + +llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z1")); + + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *z2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(2), + name("z2")); + Value *y1z2 = mul(y1, z2); + Value *z1y2 = mul(z1, y2); + + Value *z1x2 = mul(z1, x2); + Value *x1z2 = mul(x1, z2); + + Value *x1y2 = mul(x1, y2); + Value *y1x2 = mul(y1, x2); + + return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); +} + + +llvm::Value * Instructions::abs(llvm::Value *in) +{ + std::vector vec = extractVector(in); + Value *xabs = callFAbs(vec[0]); + Value *yabs = callFAbs(vec[1]); + Value *zabs = callFAbs(vec[2]); + Value *wabs = callFAbs(vec[3]); + return vectorFromVals(xabs, yabs, zabs, wabs); +} + +void Instructions::ifop(llvm::Value *in) +{ + BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0); + BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0); + + //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); + //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); + //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); + + Constant *float0 = Constant::getNullValue(Type::FloatTy); + + Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), + name("extractx")); + Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); + m_builder.CreateCondBr(xcmp, ifthen, ifend); + //m_builder.SetInsertPoint(yblock); + + m_builder.SetInsertPoint(ifthen); + m_ifStack.push(ifend); +} + +llvm::BasicBlock * Instructions::currentBlock() const +{ + return m_builder.GetInsertBlock(); +} + +void Instructions::elseop() +{ + assert(!m_ifStack.empty()); + BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0); + m_builder.CreateBr(ifend); + m_builder.SetInsertPoint(m_ifStack.top()); + currentBlock()->setName(name("ifelse")); + m_ifStack.pop(); + m_ifStack.push(ifend); +} + +void Instructions::endif() +{ + assert(!m_ifStack.empty()); + m_builder.CreateBr(m_ifStack.top()); + m_builder.SetInsertPoint(m_ifStack.top()); + m_ifStack.pop(); +} + +llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + llvm::Value *m = mul(in1, in2); + llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); + llvm::Value *s = sub(vec1, in1); + return add(m, mul(s, in3)); +} + +void Instructions::beginLoop() +{ + BasicBlock *begin = new BasicBlock(name("loop"), m_func,0); + BasicBlock *end = new BasicBlock(name("endloop"), m_func,0); + + m_builder.CreateBr(begin); + Loop loop; + loop.begin = begin; + loop.end = end; + m_builder.SetInsertPoint(begin); + m_loopStack.push(loop); +} + +void Instructions::endLoop() +{ + assert(!m_loopStack.empty()); + Loop loop = m_loopStack.top(); + m_builder.CreateBr(loop.begin); + loop.end->moveAfter(currentBlock()); + m_builder.SetInsertPoint(loop.end); + m_loopStack.pop(); +} + +void Instructions::brk() +{ + assert(!m_loopStack.empty()); + BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0); + m_builder.CreateBr(m_loopStack.top().end); + m_builder.SetInsertPoint(unr); +} + +llvm::Value * Instructions::trunc(llvm::Value *in) +{ + std::vector vec = extractVector(in); + Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), + name("ftoix")); + Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), + name("ftoiy")); + Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), + name("ftoiz")); + Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), + name("ftoiw")); + Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, + name("fx")); + Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, + name("fy")); + Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, + name("fz")); + Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, + name("fw")); + return vectorFromVals(fx, fy, fz, fw); +} + +void Instructions::end() +{ + m_builder.CreateRetVoid(); +} + +void Instructions::cal(int label, llvm::Value *input) +{ + std::vector params; + params.push_back(input); + llvm::Function *func = findFunction(label); + + m_builder.CreateCall(func, params.begin(), params.end()); +} + +llvm::Function * Instructions::declareFunc(int label) +{ + PointerType *vecPtr = PointerType::getUnqual(m_floatVecType); + std::vector args; + args.push_back(vecPtr); + args.push_back(vecPtr); + args.push_back(vecPtr); + args.push_back(vecPtr); + ParamAttrsList *params = 0; + FunctionType *funcType = FunctionType::get( + /*Result=*/Type::VoidTy, + /*Params=*/args, + /*isVarArg=*/false); + std::string name = createFuncName(label); + Function *func = new Function( + /*Type=*/funcType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/name.c_str(), m_mod); + func->setCallingConv(CallingConv::C); + func->setParamAttrs(params); + return func; +} + +void Instructions::bgnSub(unsigned label) +{ + llvm::Function *func = findFunction(label); + + Function::arg_iterator args = func->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("INPUT"); + m_storage->pushArguments(ptr_INPUT); + + llvm::BasicBlock *entry = new BasicBlock("entry", func, 0); + + m_func = func; + m_builder.SetInsertPoint(entry); +} + +void Instructions::endSub() +{ + m_func = 0; + m_builder.SetInsertPoint(0); +} + +llvm::Function * Instructions::findFunction(int label) +{ + llvm::Function *func = m_functions[label]; + if (!func) { + func = declareFunc(label); + m_functions[label] = func; + } + return func; +} + +llvm::Value * Instructions::constVector(float x, float y, float z, float w) +{ + std::vector vec(4); + vec[0] = ConstantFP::get(Type::FloatTy, APFloat(x)); + vec[1] = ConstantFP::get(Type::FloatTy, APFloat(y)); + vec[2] = ConstantFP::get(Type::FloatTy, APFloat(z)); + vec[3] = ConstantFP::get(Type::FloatTy, APFloat(w)); + return ConstantVector::get(m_floatVecType, vec); +} + + +std::vector Instructions::extractVector(llvm::Value *vec) +{ + std::vector elems(4); + elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0), + name("x")); + elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1), + name("y")); + elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2), + name("z")); + elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3), + name("w")); + return elems; +} + +llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + llvm::Function *func = m_mod->getFunction("cmp"); + assert(func); + + std::vector params; + params.push_back(in1); + params.push_back(in2); + params.push_back(in3); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::cos(llvm::Value *in) +{ +#if 0 + llvm::Function *func = m_mod->getFunction("vcos"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("cosres")); + call->setTailCall(false); + return call; +#else + std::vector elems = extractVector(in); + Function *func = m_mod->getFunction("cosf"); + assert(func); + CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); + cos->setCallingConv(CallingConv::C); + cos->setTailCall(true); + return vectorFromVals(cos, cos, cos, cos); +#endif +} + +llvm::Value * Instructions::scs(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("scs"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("scsres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::kilp(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("kilp"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::sin(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("vsin"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("sinres")); + call->setTailCall(false); + return call; +} +#endif //MESA_LLVM + + diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h new file mode 100644 index 0000000000..9ebc17dd8e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef INSTRUCTIONS_H +#define INSTRUCTIONS_H + +#include +#include +#include +#include + +#include +#include + +namespace llvm { + class VectorType; + class Function; +} + +class Storage; + +class Instructions +{ +public: + Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, + Storage *storage); + + llvm::BasicBlock *currentBlock() const; + + llvm::Value *abs(llvm::Value *in1); + llvm::Value *arl(llvm::Value *in1); + llvm::Value *add(llvm::Value *in1, llvm::Value *in2); + void beginLoop(); + void bgnSub(unsigned); + void brk(); + void cal(int label, llvm::Value *input); + llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cos(llvm::Value *in); + llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dst(llvm::Value *in1, llvm::Value *in2); + void elseop(); + void endif(); + void endLoop(); + void end(); + void endSub(); + llvm::Value *ex2(llvm::Value *in); + llvm::Value *floor(llvm::Value *in); + llvm::Value *frc(llvm::Value *in); + void ifop(llvm::Value *in); + llvm::Value *kilp(llvm::Value *in); + llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3); + llvm::Value *lit(llvm::Value *in); + llvm::Value *lg2(llvm::Value *in); + llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in2); + llvm::Value *min(llvm::Value *in1, llvm::Value *in2); + llvm::Value *max(llvm::Value *in1, llvm::Value *in2); + llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); + llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); + llvm::Value *rcp(llvm::Value *in); + llvm::Value *rsq(llvm::Value *in); + llvm::Value *scs(llvm::Value *in); + llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sin(llvm::Value *in); + llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); + llvm::Value *trunc(llvm::Value *in); + + void printVector(llvm::Value *val); +private: + const char *name(const char *prefix); + + llvm::Value *callFAbs(llvm::Value *val); + llvm::Value *callFloor(llvm::Value *val); + llvm::Value *callFSqrt(llvm::Value *val); + llvm::Value *callFLog(llvm::Value *val); + llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); + + llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w=0); + + llvm::Value *constVector(float x, float y, float z, float w); + + llvm::Function *declarePrintf(); + llvm::Function *declareFunc(int label); + + llvm::Function *findFunction(int label); + + std::vector extractVector(llvm::Value *vec); +private: + llvm::Module *m_mod; + llvm::Function *m_func; + char m_name[32]; + llvm::LLVMFoldingBuilder m_builder; + int m_idx; + + llvm::VectorType *m_floatVecType; + + llvm::Function *m_llvmFSqrt; + llvm::Function *m_llvmFAbs; + llvm::Function *m_llvmPow; + llvm::Function *m_llvmFloor; + llvm::Function *m_llvmFlog; + llvm::Function *m_llvmLit; + + llvm::Constant *m_fmtPtr; + + std::stack m_ifStack; + struct Loop { + llvm::BasicBlock *begin; + llvm::BasicBlock *end; + }; + std::stack m_loopStack; + std::map m_functions; + Storage *m_storage; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp new file mode 100644 index 0000000000..a4d5046637 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -0,0 +1,121 @@ +#include "instructionssoa.h" + +#include "storagesoa.h" + +#include + +using namespace llvm; + +InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage) + : m_builder(block), + m_storage(storage), + m_idx(0) +{ +} + +const char * InstructionsSoa::name(const char *prefix) const +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder.CreateInsertElement(constVector, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; +} + +std::vector InstructionsSoa::arl(const std::vector in) +{ + std::vector res(4); + + //Extract x's + llvm::Value *x1 = m_builder.CreateExtractElement(in[0], + m_storage->constantInt(0), + name("extractX")); + //cast it to an unsigned int + x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); + + res[0] = x1;//vectorFromVals(x1, x2, x3, x4); + //only x is valid. the others shouldn't be necessary + /* + res[1] = Constant::getNullValue(m_floatVecType); + res[2] = Constant::getNullValue(m_floatVecType); + res[3] = Constant::getNullValue(m_floatVecType); + */ + + return res; +} + + +std::vector InstructionsSoa::add(const std::vector in1, + const std::vector in2) +{ + std::vector res(4); + + res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); + res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); + res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); + res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); + + return res; +} + +std::vector InstructionsSoa::mul(const std::vector in1, + const std::vector in2) +{ + std::vector res(4); + + res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); + res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); + res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); + res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); + + return res; +} + +void InstructionsSoa::end() +{ + m_builder.CreateRetVoid(); +} + +std::vector InstructionsSoa::madd(const std::vector in1, + const std::vector in2, + const std::vector in3) +{ + std::vector res = mul(in1, in2); + return add(res, in3); +} + +std::vector InstructionsSoa::extractVector(llvm::Value *vector) +{ + std::vector res(4); + res[0] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(0), + name("extract1X")); + res[1] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(1), + name("extract2X")); + res[2] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(2), + name("extract3X")); + res[3] = m_builder.CreateExtractElement(vector, + m_storage->constantInt(3), + name("extract4X")); + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h new file mode 100644 index 0000000000..4169dcbb2e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef INSTRUCTIONSSOA_H +#define INSTRUCTIONSSOA_H + +#include + +#include + +namespace llvm { + class Module; + class Function; + class BasicBlock; + class Value; +} +class StorageSoa; + +class InstructionsSoa +{ +public: + InstructionsSoa(llvm::Module *mod, llvm::Function *func, + llvm::BasicBlock *block, StorageSoa *storage); + + std::vector arl(const std::vector in); + + std::vector add(const std::vector in1, + const std::vector in2); + std::vector madd(const std::vector in1, + const std::vector in2, + const std::vector in3); + std::vector mul(const std::vector in1, + const std::vector in2); + void end(); + + std::vector extractVector(llvm::Value *vector); +private: + const char * name(const char *prefix) const; + llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w); +private: + llvm::LLVMFoldingBuilder m_builder; + StorageSoa *m_storage; +private: + mutable int m_idx; + mutable char m_name[32]; +}; + + +#endif diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c new file mode 100644 index 0000000000..4f98d754ba --- /dev/null +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c @@ -0,0 +1,115 @@ +/*clang --emit-llvm llvm_builtins.c |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=gallivm_builtins.cpp -f -for=shader -funcname=createGallivmBuiltins*/ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +typedef __attribute__(( ocu_vector_type(4) )) float float4; + +extern float powf(float a, float b); + +inline float approx(float a, float b) +{ + if (b < -128.0f) b = -128.0f; + if (b > 128.0f) b = 128.0f; + if (a < 0) a = 0; + return powf(a, b); +} + +inline float4 lit(float4 tmp) +{ + float4 result; + result.x = 1.0; + result.w = 1.0; + if (tmp.x > 0) { + result.y = tmp.x; + result.z = approx(tmp.y, tmp.w); + } else { + result.y = 0; + result.z = 0; + } + return result; +} + +inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2) +{ + float4 result; + + result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x; + result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y; + result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z; + result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w; + + return result; +} + +extern float cosf(float val); +extern float sinf(float val); + +inline float4 vcos(float4 val) +{ + float4 result; + printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w); + result.x = cosf(val.x); + result.y = cosf(val.x); + result.z = cosf(val.x); + result.w = cosf(val.x); + printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w); + return result; +} + +inline float4 scs(float4 val) +{ + float4 result; + float tmp = val.x; + result.x = cosf(tmp); + result.y = sinf(tmp); + return result; +} + + +inline float4 vsin(float4 val) +{ + float4 result; + float tmp = val.x; + float res = sinf(tmp); + result.x = res; + result.y = res; + result.z = res; + result.w = res; + return result; +} + +inline int kilp(float4 val) +{ + if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0) + return 1; + else + return 0; +} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.cpp b/src/gallium/auxiliary/gallivm/loweringpass.cpp new file mode 100644 index 0000000000..556dbec366 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/loweringpass.cpp @@ -0,0 +1,17 @@ +#include "loweringpass.h" + +using namespace llvm; + +char LoweringPass::ID = 0; +RegisterPass X("lowering", "Lowering Pass"); + +LoweringPass::LoweringPass() + : ModulePass((intptr_t)&ID) +{ +} + +bool LoweringPass::runOnModule(Module &m) +{ + llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n"; + return false; +} diff --git a/src/gallium/auxiliary/gallivm/loweringpass.h b/src/gallium/auxiliary/gallivm/loweringpass.h new file mode 100644 index 0000000000..f62dcf6ba7 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/loweringpass.h @@ -0,0 +1,15 @@ +#ifndef LOWERINGPASS_H +#define LOWERINGPASS_H + +#include "llvm/Pass.h" +#include "llvm/Module.h" + +struct LoweringPass : public llvm::ModulePass +{ + static char ID; + LoweringPass(); + + virtual bool runOnModule(llvm::Module &m); +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp new file mode 100644 index 0000000000..c4326de8c5 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -0,0 +1,364 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +#ifdef MESA_LLVM + +#include "storage.h" + +#include "gallivm_p.h" + +#include "pipe/p_shader_tokens.h" +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace llvm; + +Storage::Storage(llvm::BasicBlock *block, llvm::Value *input) + : m_block(block), + m_INPUT(input), + m_addrs(32), + m_idx(0) +{ + m_floatVecType = VectorType::get(Type::FloatTy, 4); + m_intVecType = VectorType::get(IntegerType::get(32), 4); + + m_undefFloatVec = UndefValue::get(m_floatVecType); + m_undefIntVec = UndefValue::get(m_intVecType); + m_extSwizzleVec = 0; + + m_numConsts = 0; +} + +//can only build vectors with all members in the [0, 9] range +llvm::Constant *Storage::shuffleMask(int vec) +{ + if (!m_extSwizzleVec) { + std::vector elems; + elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); + elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); + elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); + elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); + m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); + } + + if (m_intVecs.find(vec) != m_intVecs.end()) { + return m_intVecs[vec]; + } + int origVec = vec; + Constant* const_vec = 0; + if (origVec == 0) { + const_vec = Constant::getNullValue(m_intVecType); + } else { + int x = gallivm_x_swizzle(vec); + int y = gallivm_y_swizzle(vec); + int z = gallivm_z_swizzle(vec); + int w = gallivm_w_swizzle(vec); + std::vector elems; + elems.push_back(constantInt(x)); + elems.push_back(constantInt(y)); + elems.push_back(constantInt(z)); + elems.push_back(constantInt(w)); + const_vec = ConstantVector::get(m_intVecType, elems); + } + + m_intVecs[origVec] = const_vec; + return const_vec; +} + +llvm::ConstantInt *Storage::constantInt(int idx) +{ + if (m_constInts.find(idx) != m_constInts.end()) { + return m_constInts[idx]; + } + ConstantInt *const_int = ConstantInt::get(APInt(32, idx)); + m_constInts[idx] = const_int; + return const_int; +} + +llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx) +{ + Value *val = element(InputsArg, idx, indIdx); + LoadInst *load = new LoadInst(val, name("input"), false, m_block); + load->setAlignment(8); + + return load; +} + +llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx) +{ + m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts; + + Value *elem = element(ConstsArg, idx, indIdx); + LoadInst *load = new LoadInst(elem, name("const"), false, m_block); + load->setAlignment(8); + return load; +} + +llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle) +{ + Constant *mask = shuffleMask(shuffle); + ShuffleVectorInst *res = + new ShuffleVectorInst(vec, m_extSwizzleVec, mask, + name("shuffle"), m_block); + return res; +} + + +llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx) +{ + Value *elem = element(TempsArg, idx, indIdx); + + LoadInst *load = new LoadInst(elem, name("temp"), false, m_block); + load->setAlignment(8); + + return load; +} + +void Storage::setTempElement(int idx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = 0; + if (m_tempWriteMap[idx]) + templ = tempElement(idx); + val = maskWrite(val, mask, templ); + } + Value *elem = element(TempsArg, idx); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); + m_tempWriteMap[idx] = true; +} + +void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = 0; + if (m_destWriteMap[dstIdx]) + templ = outputElement(dstIdx); + val = maskWrite(val, mask, templ); + } + + Value *elem = element(DestsArg, dstIdx); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); + m_destWriteMap[dstIdx] = true; +} + +llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) +{ + llvm::Value *dst = templ; + if (!dst) + dst = Constant::getNullValue(m_floatVecType); + if ((mask & TGSI_WRITEMASK_X)) { + llvm::Value *x = new ExtractElementInst(src, unsigned(0), + name("x"), m_block); + dst = new InsertElementInst(dst, x, unsigned(0), + name("dstx"), m_block); + } + if ((mask & TGSI_WRITEMASK_Y)) { + llvm::Value *y = new ExtractElementInst(src, unsigned(1), + name("y"), m_block); + dst = new InsertElementInst(dst, y, unsigned(1), + name("dsty"), m_block); + } + if ((mask & TGSI_WRITEMASK_Z)) { + llvm::Value *z = new ExtractElementInst(src, unsigned(2), + name("z"), m_block); + dst = new InsertElementInst(dst, z, unsigned(2), + name("dstz"), m_block); + } + if ((mask & TGSI_WRITEMASK_W)) { + llvm::Value *w = new ExtractElementInst(src, unsigned(3), + name("w"), m_block); + dst = new InsertElementInst(dst, w, unsigned(3), + name("dstw"), m_block); + } + return dst; +} + +const char * Storage::name(const char *prefix) +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +int Storage::numConsts() const +{ + return m_numConsts; +} + +llvm::Value * Storage::addrElement(int idx) const +{ + Value *ret = m_addrs[idx]; + if (!ret) + return m_undefFloatVec; + return ret; +} + +void Storage::setAddrElement(int idx, llvm::Value *val, int mask) +{ + if (mask != TGSI_WRITEMASK_XYZW) { + llvm::Value *templ = m_addrs[idx]; + val = maskWrite(val, mask, templ); + } + m_addrs[idx] = val; +} + +llvm::Value * Storage::extractIndex(llvm::Value *vec) +{ + llvm::Value *x = new ExtractElementInst(vec, unsigned(0), + name("x"), m_block); + return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block); +} + +void Storage::setCurrentBlock(llvm::BasicBlock *block) +{ + m_block = block; +} + +llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx) +{ + Value *elem = element(DestsArg, idx, indIdx); + LoadInst *load = new LoadInst(elem, name("output"), false, m_block); + load->setAlignment(8); + + return load; +} + +llvm::Value * Storage::inputPtr() const +{ + return m_INPUT; +} + +void Storage::pushArguments(llvm::Value *input) +{ + m_argStack.push(m_INPUT); + + m_INPUT = input; +} + +void Storage::popArguments() +{ + m_INPUT = m_argStack.top(); + m_argStack.pop(); +} + +void Storage::pushTemps() +{ + m_extSwizzleVec = 0; +} + +void Storage::popTemps() +{ +} + +llvm::Value * Storage::immediateElement(int idx) +{ + return m_immediates[idx]; +} + +void Storage::addImmediate(float *val) +{ + std::vector vec(4); + vec[0] = ConstantFP::get(Type::FloatTy, APFloat(val[0])); + vec[1] = ConstantFP::get(Type::FloatTy, APFloat(val[1])); + vec[2] = ConstantFP::get(Type::FloatTy, APFloat(val[2])); + vec[3] = ConstantFP::get(Type::FloatTy, APFloat(val[3])); + m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); +} + + +llvm::Value * Storage::elemPtr(Args arg) +{ + std::vector indices; + indices.push_back(constantInt(0)); + indices.push_back(constantInt(static_cast(arg))); + GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT, + indices.begin(), + indices.end(), + name("input_ptr"), + m_block); + return new LoadInst(getElem, name("input_field"), false, m_block); +} + +llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, + llvm::Value *indIdx ) +{ + GetElementPtrInst *getElem = 0; + + if (indIdx) { + getElem = new GetElementPtrInst(ptr, + BinaryOperator::create(Instruction::Add, + indIdx, + constantInt(idx), + name("add"), + m_block), + name("field"), + m_block); + } else { + getElem = new GetElementPtrInst(ptr, + constantInt(idx), + name("field"), + m_block); + } + return getElem; +} + +llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx ) +{ + Value *val = elemPtr(arg); + return elemIdx(val, idx, indIdx); +} + +void Storage::setKilElement(llvm::Value *val) +{ + std::vector indices; + indices.push_back(constantInt(0)); + indices.push_back(constantInt(static_cast(KilArg))); + GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT, + indices.begin(), + indices.end(), + name("kil_ptr"), + m_block); + StoreInst *st = new StoreInst(val, elem, false, m_block); + st->setAlignment(8); +} + +#endif //MESA_LLVM + + diff --git a/src/gallium/auxiliary/gallivm/storage.h b/src/gallium/auxiliary/gallivm/storage.h new file mode 100644 index 0000000000..8574f7554e --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storage.h @@ -0,0 +1,133 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ + +#ifndef STORAGE_H +#define STORAGE_H + +#include +#include +#include +#include + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class LoadInst; + class Value; + class VectorType; +} + +class Storage +{ +public: + Storage(llvm::BasicBlock *block, + llvm::Value *input); + + llvm::Value *inputPtr() const; + + void setCurrentBlock(llvm::BasicBlock *block); + + llvm::ConstantInt *constantInt(int); + llvm::Constant *shuffleMask(int vec); + llvm::Value *inputElement(int idx, llvm::Value *indIdx =0); + llvm::Value *constElement(int idx, llvm::Value *indIdx =0); + llvm::Value *outputElement(int idx, llvm::Value *indIdx =0); + llvm::Value *tempElement(int idx, llvm::Value *indIdx =0); + llvm::Value *immediateElement(int idx); + + void setOutputElement(int dstIdx, llvm::Value *val, int mask); + void setTempElement(int idx, llvm::Value *val, int mask); + + llvm::Value *addrElement(int idx) const; + void setAddrElement(int idx, llvm::Value *val, int mask); + + void setKilElement(llvm::Value *val); + + llvm::Value *shuffleVector(llvm::Value *vec, int shuffle); + + llvm::Value *extractIndex(llvm::Value *vec); + + int numConsts() const; + + void pushArguments(llvm::Value *input); + void popArguments(); + void pushTemps(); + void popTemps(); + + void addImmediate(float *val); + +private: + llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ); + const char *name(const char *prefix); + + enum Args { + DestsArg = 0, + InputsArg = 1, + TempsArg = 2, + ConstsArg = 3, + KilArg = 4 + }; + llvm::Value *elemPtr(Args arg); + llvm::Value *elemIdx(llvm::Value *ptr, int idx, + llvm::Value *indIdx = 0); + llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0); + +private: + llvm::BasicBlock *m_block; + llvm::Value *m_INPUT; + + std::map m_constInts; + std::map m_intVecs; + std::vector m_addrs; + std::vector m_immediates; + + llvm::VectorType *m_floatVecType; + llvm::VectorType *m_intVecType; + + char m_name[32]; + int m_idx; + + int m_numConsts; + + std::map m_destWriteMap; + std::map m_tempWriteMap; + + llvm::Value *m_undefFloatVec; + llvm::Value *m_undefIntVec; + llvm::Value *m_extSwizzleVec; + + std::stack m_argStack; + std::stack > m_tempStack; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp new file mode 100644 index 0000000000..ed0674a96f --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -0,0 +1,389 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "storagesoa.h" + +#include "gallivm_p.h" + +#include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" + +#include +#include +#include + +#include +#include +#include +#include +#include + +using namespace llvm; + + +StorageSoa::StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts, + llvm::Value *temps) + : m_block(block), + m_input(input), + m_output(output), + m_consts(consts), + m_temps(temps), + m_immediates(0), + m_idx(0) +{ +} + +void StorageSoa::addImmediate(float *vec) +{ + std::vector vals(4); + vals[0] = vec[0]; + vals[1] = vec[1]; + vals[2] = vec[2]; + vals[3] = vec[3]; + m_immediatesToFlush.push_back(vals); +} + +void StorageSoa::declareImmediates() +{ + if (m_immediatesToFlush.empty()) + return; + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorChannels = ArrayType::get(vectorType, 4); + ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size()); + + m_immediates = new GlobalVariable( + /*Type=*/arrayType, + /*isConstant=*/false, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name("immediates"), + currentModule()); + + std::vector arrayVals; + for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { + std::vector vec = m_immediatesToFlush[i]; + std::vector vals(4); + std::vector channelArray; + + vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0]; + llvm::Constant *xChannel = createConstGlobalVector(vals); + + vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; + llvm::Constant *yChannel = createConstGlobalVector(vals); + + vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; + llvm::Constant *zChannel = createConstGlobalVector(vals); + + vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; + llvm::Constant *wChannel = createConstGlobalVector(vals); + channelArray.push_back(xChannel); + channelArray.push_back(yChannel); + channelArray.push_back(zChannel); + channelArray.push_back(wChannel); + Constant *constChannels = ConstantArray::get(vectorChannels, + channelArray); + arrayVals.push_back(constChannels); + } + Constant *constArray = ConstantArray::get(arrayType, arrayVals); + m_immediates->setInitializer(constArray); + + m_immediatesToFlush.clear(); +} + +llvm::Value *StorageSoa::addrElement(int idx) const +{ + std::map::const_iterator itr = m_addresses.find(idx); + if (itr == m_addresses.end()) { + debug_printf("Trying to access invalid shader 'address'\n"); + return 0; + } + llvm::Value * res = (*itr).second; + + res = new LoadInst(res, name("addr"), false, m_block); + + return res; +} + +std::vector StorageSoa::inputElement(llvm::Value *idx) +{ + std::vector res(4); + + res[0] = element(m_input, idx, 0); + res[1] = element(m_input, idx, 1); + res[2] = element(m_input, idx, 2); + res[3] = element(m_input, idx, 3); + + return res; +} + +std::vector StorageSoa::constElement(llvm::Value *idx) +{ + std::vector res(4); + llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; + + xChannel = elementPointer(m_consts, idx, 0); + yChannel = elementPointer(m_consts, idx, 1); + zChannel = elementPointer(m_consts, idx, 2); + wChannel = elementPointer(m_consts, idx, 3); + + res[0] = alignedArrayLoad(xChannel); + res[1] = alignedArrayLoad(yChannel); + res[2] = alignedArrayLoad(zChannel); + res[3] = alignedArrayLoad(wChannel); + + return res; +} + +std::vector StorageSoa::outputElement(llvm::Value *idx) +{ + std::vector res(4); + + res[0] = element(m_output, idx, 0); + res[1] = element(m_output, idx, 1); + res[2] = element(m_output, idx, 2); + res[3] = element(m_output, idx, 3); + + return res; +} + +std::vector StorageSoa::tempElement(llvm::Value *idx) +{ + std::vector res(4); + + res[0] = element(m_temps, idx, 0); + res[1] = element(m_temps, idx, 1); + res[2] = element(m_temps, idx, 2); + res[3] = element(m_temps, idx, 3); + + return res; +} + +std::vector StorageSoa::immediateElement(llvm::Value *idx) +{ + std::vector res(4); + + res[0] = element(m_immediates, idx, 0); + res[1] = element(m_immediates, idx, 1); + res[2] = element(m_immediates, idx, 2); + res[3] = element(m_immediates, idx, 3); + + return res; +} + +llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, + int channel) const +{ + std::vector indices; + if (m_immediates == ptr) + indices.push_back(constantInt(0)); + indices.push_back(index); + indices.push_back(constantInt(channel)); + + GetElementPtrInst *getElem = new GetElementPtrInst(ptr, + indices.begin(), + indices.end(), + name("ptr"), + m_block); + return getElem; +} + +llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index, + int channel) const +{ + llvm::Value *res = elementPointer(ptr, index, channel); + LoadInst *load = new LoadInst(res, name("element"), false, m_block); + //load->setAlignment(8); + return load; +} + +const char * StorageSoa::name(const char *prefix) const +{ + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; +} + +llvm::ConstantInt * StorageSoa::constantInt(int idx) const +{ + if (m_constInts.find(idx) != m_constInts.end()) { + return m_constInts[idx]; + } + ConstantInt *constInt = ConstantInt::get(APInt(32, idx)); + m_constInts[idx] = constInt; + return constInt; +} + +llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + PointerType *vectorPtr = PointerType::get(vectorType, 0); + + CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block); + LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block); + load->setAlignment(8); + return load; +} + +llvm::Module * StorageSoa::currentModule() const +{ + if (!m_block || !m_block->getParent()) + return 0; + + return m_block->getParent()->getParent(); +} + +llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector &vec) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + std::vector immValues; + ConstantFP *constx = ConstantFP::get(Type::FloatTy, APFloat(vec[0])); + ConstantFP *consty = ConstantFP::get(Type::FloatTy, APFloat(vec[1])); + ConstantFP *constz = ConstantFP::get(Type::FloatTy, APFloat(vec[2])); + ConstantFP *constw = ConstantFP::get(Type::FloatTy, APFloat(vec[3])); + immValues.push_back(constx); + immValues.push_back(consty); + immValues.push_back(constz); + immValues.push_back(constw); + Constant *constVector = ConstantVector::get(vectorType, immValues); + + return constVector; +} + +std::vector StorageSoa::load(Argument type, int idx, int swizzle, + llvm::Value *indIdx) +{ + std::vector val(4); + + //if we have an indirect index, always use that + // if not use the integer offset to create one + llvm::Value *realIndex = 0; + if (indIdx) + realIndex = indIdx; + else + realIndex = constantInt(idx); + debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); + + switch(type) { + case Input: + val = inputElement(realIndex); + break; + case Output: + val = outputElement(realIndex); + break; + case Temp: + val = tempElement(realIndex); + break; + case Const: + val = constElement(realIndex); + break; + case Immediate: + val = immediateElement(realIndex); + break; + case Address: + debug_printf("Address not handled in the load phase!\n"); + assert(0); + break; + } + if (!gallivm_is_swizzle(swizzle)) + return val; + + std::vector res(4); + + res[0] = val[gallivm_x_swizzle(swizzle)]; + res[1] = val[gallivm_y_swizzle(swizzle)]; + res[2] = val[gallivm_z_swizzle(swizzle)]; + res[3] = val[gallivm_w_swizzle(swizzle)]; + return res; +} + +void StorageSoa::store(Argument type, int idx, const std::vector &val, + int mask) +{ + llvm::Value *out = 0; + switch(type) { + case Output: + out = m_output; + break; + case Temp: + out = m_temps; + break; + case Input: + out = m_input; + break; + case Address: { + llvm::Value *addr = m_addresses[idx]; + if (!addr) { + addAddress(idx); + addr = m_addresses[idx]; + assert(addr); + } + new StoreInst(val[0], addr, false, m_block); + return; + break; + } + default: + debug_printf("Can't save output of this type: %d !\n", type); + assert(0); + break; + } + llvm::Value *realIndex = constantInt(idx); + if ((mask & TGSI_WRITEMASK_X)) { + llvm::Value *xChannel = elementPointer(out, realIndex, 0); + new StoreInst(val[0], xChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_Y)) { + llvm::Value *yChannel = elementPointer(out, realIndex, 1); + new StoreInst(val[1], yChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_Z)) { + llvm::Value *zChannel = elementPointer(out, realIndex, 2); + new StoreInst(val[2], zChannel, false, m_block); + } + if ((mask & TGSI_WRITEMASK_W)) { + llvm::Value *wChannel = elementPointer(out, realIndex, 3); + new StoreInst(val[3], wChannel, false, m_block); + } +} + +void StorageSoa::addAddress(int idx) +{ + GlobalVariable *val = new GlobalVariable( + /*Type=*/IntegerType::get(32), + /*isConstant=*/false, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name("address"), + currentModule()); + val->setInitializer(Constant::getNullValue(IntegerType::get(32))); + + debug_printf("adding to %d\n", idx); + m_addresses[idx] = val; +} diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h new file mode 100644 index 0000000000..6443351f27 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -0,0 +1,111 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef STORAGESOA_H +#define STORAGESOA_H + +#include +#include +#include + +namespace llvm { + class BasicBlock; + class Constant; + class ConstantInt; + class GlobalVariable; + class LoadInst; + class Value; + class VectorType; + class Module; +} + +class StorageSoa +{ +public: + enum Argument { + Input, + Output, + Temp, + Const, + Immediate, + Address + }; +public: + StorageSoa(llvm::BasicBlock *block, + llvm::Value *input, + llvm::Value *output, + llvm::Value *consts, + llvm::Value *temps); + + + std::vector load(Argument type, int idx, int swizzle, + llvm::Value *indIdx =0); + void store(Argument type, int idx, const std::vector &val, + int mask); + + void addImmediate(float *vec); + void declareImmediates(); + + void addAddress(int idx); + + llvm::Value * addrElement(int idx) const; + + llvm::ConstantInt *constantInt(int) const; +private: + llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx, + int channel) const; + llvm::Value *element(llvm::Value *ptr, llvm::Value *idx, + int channel) const; + const char *name(const char *prefix) const; + llvm::Value *alignedArrayLoad(llvm::Value *val); + llvm::Module *currentModule() const; + llvm::Constant *createConstGlobalVector(const std::vector &vec); + + std::vector inputElement(llvm::Value *indIdx); + std::vector constElement(llvm::Value *indIdx); + std::vector outputElement(llvm::Value *indIdx); + std::vector tempElement(llvm::Value *indIdx); + std::vector immediateElement(llvm::Value *indIdx); +private: + llvm::BasicBlock *m_block; + + llvm::Value *m_input; + llvm::Value *m_output; + llvm::Value *m_consts; + llvm::Value *m_temps; + llvm::GlobalVariable *m_immediates; + + std::map m_addresses; + + std::vector > m_immediatesToFlush; + + mutable std::map m_constInts; + mutable char m_name[32]; + mutable int m_idx; +}; + +#endif diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp new file mode 100644 index 0000000000..2cb4acce32 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -0,0 +1,1221 @@ +#include "tgsitollvm.h" + +#include "gallivm.h" +#include "gallivm_p.h" + +#include "storage.h" +#include "instructions.h" +#include "storagesoa.h" +#include "instructionssoa.h" + +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" + + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include +#include +#include + +using namespace llvm; + +static inline FunctionType *vertexShaderFunctionType() +{ + //Function takes three arguments, + // the calling code has to make sure the types it will + // pass are castable to the following: + // [4 x <4 x float>] inputs, + // [4 x <4 x float>] output, + // [4 x [4 x float]] consts, + // [4 x <4 x float>] temps + + std::vector funcArgs; + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); + PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); + + ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); + ArrayType *constsArray = ArrayType::get(floatArray, 4); + PointerType *constsArrayPtr = PointerType::get(constsArray, 0); + + funcArgs.push_back(vectorArrayPtr);//inputs + funcArgs.push_back(vectorArrayPtr);//output + funcArgs.push_back(constsArrayPtr);//consts + funcArgs.push_back(vectorArrayPtr);//temps + + FunctionType *functionType = FunctionType::get( + /*Result=*/Type::VoidTy, + /*Params=*/funcArgs, + /*isVarArg=*/false); + + return functionType; +} + +static inline void +add_interpolator(struct gallivm_ir *ir, + struct gallivm_interpolate *interp) +{ + ir->interpolators[ir->num_interp] = *interp; + ++ir->num_interp; +} + +static void +translate_declaration(struct gallivm_ir *prog, + llvm::Module *module, + Storage *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *fd) +{ + if (decl->Declaration.File == TGSI_FILE_INPUT) { + unsigned first, last, mask; + uint interp_method; + + assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE); + + first = decl->u.DeclarationRange.First; + last = decl->u.DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + /* Do not touch WPOS.xy */ + if (first == 0) { + mask &= ~TGSI_WRITEMASK_XY; + if (mask == TGSI_WRITEMASK_NONE) { + first++; + if (first > last) { + return; + } + } + } + + interp_method = decl->Interpolation.Interpolate; + + if (mask == TGSI_WRITEMASK_XYZW) { + unsigned i, j; + + for (i = first; i <= last; i++) { + for (j = 0; j < NUM_CHANNELS; j++) { + //interp( mach, i, j ); + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } else { + unsigned i, j; + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + struct gallivm_interpolate interp; + interp.type = interp_method; + interp.attrib = i; + interp.chan = j; + add_interpolator(prog, &interp); + } + } + } + } + } +} + +static void +translate_declarationir(struct gallivm_ir *, + llvm::Module *, + StorageSoa *storage, + struct tgsi_full_declaration *decl, + struct tgsi_full_declaration *) +{ + if (decl->Declaration.File == TGSI_FILE_ADDRESS) { + int idx = decl->u.DeclarationRange.First; + storage->addAddress(idx); + } +} + +static void +translate_immediate(Storage *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.Size - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + + +static void +translate_immediateir(StorageSoa *storage, + struct tgsi_full_immediate *imm) +{ + float vec[4]; + int i; + for (i = 0; i < imm->Immediate.Size - 1; ++i) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + vec[i] = imm->u.ImmediateFloat32[i].Float; + break; + default: + assert(0); + } + } + storage->addImmediate(vec); +} + +static inline int +swizzleInt(struct tgsi_full_src_register *src) +{ + int swizzle = 0; + int start = 1000; + + for (int k = 0; k < 4; ++k) { + swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; + start /= 10; + } + return swizzle; +} + +static inline llvm::Value * +swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, + Storage *storage) +{ + int swizzle = swizzleInt(src); + + if (gallivm_is_swizzle(swizzle)) { + /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ + val = storage->shuffleVector(val, swizzle); + } + return val; +} + +static void +translate_instruction(llvm::Module *module, + Storage *storage, + Instructions *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + llvm::Value *inputs[4]; + inputs[0] = 0; + inputs[1] = 0; + inputs[2] = 0; + inputs[3] = 0; + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + llvm::Value *val = 0; + llvm::Value *indIdx = 0; + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + indIdx = storage->extractIndex(indIdx); + } + if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { + val = storage->constElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { + val = storage->inputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { + val = storage->tempElement(src->SrcRegister.Index); + } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { + val = storage->outputElement(src->SrcRegister.Index, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + val = storage->immediateElement(src->SrcRegister.Index); + } else { + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + return; + } + + inputs[i] = swizzleVector(val, src, storage); + } + + /*if (inputs[0]) + instr->printVector(inputs[0]); + if (inputs[1]) + instr->printVector(inputs[1]);*/ + llvm::Value *out = 0; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); + } + break; + case TGSI_OPCODE_RCP: { + out = instr->rcp(inputs[0]); + } + break; + case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); + } + break; + case TGSI_OPCODE_EXP: + break; + case TGSI_OPCODE_LOG: + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DST: { + out = instr->dst(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SGE: { + out = instr->sge(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_LERP: { + out = instr->lerp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_CND: + break; + case TGSI_OPCODE_CND0: + break; + case TGSI_OPCODE_DOT2ADD: + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: + break; + case TGSI_OPCODE_FRAC: { + out = instr->frc(inputs[0]); + } + break; + case TGSI_OPCODE_CLAMP: + break; + case TGSI_OPCODE_FLOOR: { + out = instr->floor(inputs[0]); + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + out = instr->ex2(inputs[0]); + } + break; + case TGSI_OPCODE_LOGBASE2: { + out = instr->lg2(inputs[0]); + } + break; + case TGSI_OPCODE_POWER: { + out = instr->pow(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + out = instr->cross(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + out = instr->dph(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_COS: { + out = instr->cos(inputs[0]); + } + break; + case TGSI_OPCODE_DDX: + break; + case TGSI_OPCODE_DDY: + break; + case TGSI_OPCODE_KILP: { + out = instr->kilp(inputs[0]); + storage->setKilElement(out); + return; + } + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: + break; + case TGSI_OPCODE_SFL: + break; + case TGSI_OPCODE_SGT: { + out = instr->sgt(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_SIN: { + out = instr->sin(inputs[0]); + } + break; + case TGSI_OPCODE_SLE: + break; + case TGSI_OPCODE_SNE: + break; + case TGSI_OPCODE_STR: + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); + return; + } + break; + case TGSI_OPCODE_RET: { + instr->end(); + return; + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + out = instr->cmp(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SCS: { + out = instr->scs(inputs[0]); + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM: + break; + case TGSI_OPCODE_DIV: + break; + case TGSI_OPCODE_DP2: + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + instr->brk(); + return; + } + break; + case TGSI_OPCODE_IF: { + instr->ifop(inputs[0]); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + instr->elseop(); + storage->setCurrentBlock(instr->currentBlock()); + return; //only state update + } + break; + case TGSI_OPCODE_ENDIF: { + instr->endif(); + storage->setCurrentBlock(instr->currentBlock()); + return; //just update the state + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + out = instr->trunc(inputs[0]); + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + instr->beginLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_BGNSUB: { + instr->bgnSub(instno); + storage->setCurrentBlock(instr->currentBlock()); + storage->pushTemps(); + return; + } + break; + case TGSI_OPCODE_ENDLOOP2: { + instr->endLoop(); + storage->setCurrentBlock(instr->currentBlock()); + return; + } + break; + case TGSI_OPCODE_ENDSUB: { + instr->endSub(); + storage->setCurrentBlock(instr->currentBlock()); + storage->popArguments(); + storage->popTemps(); + return; + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_TEXBEM: + break; + case TGSI_OPCODE_TEXBEML: + break; + case TGSI_OPCODE_TEXREG2AR: + break; + case TGSI_OPCODE_TEXM3X2PAD: + break; + case TGSI_OPCODE_TEXM3X2TEX: + break; + case TGSI_OPCODE_TEXM3X3PAD: + break; + case TGSI_OPCODE_TEXM3X3TEX: + break; + case TGSI_OPCODE_TEXM3X3SPEC: + break; + case TGSI_OPCODE_TEXM3X3VSPEC: + break; + case TGSI_OPCODE_TEXREG2GB: + break; + case TGSI_OPCODE_TEXREG2RGB: + break; + case TGSI_OPCODE_TEXDP3TEX: + break; + case TGSI_OPCODE_TEXDP3: + break; + case TGSI_OPCODE_TEXM3X3: + break; + case TGSI_OPCODE_TEXM3X2DEPTH: + break; + case TGSI_OPCODE_TEXDEPTH: + break; + case TGSI_OPCODE_BEM: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_NRM4: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* # not sure if we need this */ + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + /*TXT( "_SAT" );*/ + break; + case TGSI_SAT_MINUS_PLUS_ONE: + /*TXT( "_SAT[-1,1]" );*/ + break; + default: + assert( 0 ); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { + storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { + storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else { + fprintf(stderr, "ERROR: unsupported LLVM destination!"); + assert(!"wrong destination"); + } + } +} + + +static void +translate_instructionir(llvm::Module *module, + StorageSoa *storage, + InstructionsSoa *instr, + struct tgsi_full_instruction *inst, + struct tgsi_full_instruction *fi, + unsigned instno) +{ + std::vector< std::vector > inputs(inst->Instruction.NumSrcRegs); + + for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + std::vector val; + llvm::Value *indIdx = 0; + int swizzle = swizzleInt(src); + + if (src->SrcRegister.Indirect) { + indIdx = storage->addrElement(src->SrcRegisterInd.Index); + } + if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { + val = storage->load(StorageSoa::Const, + src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { + val = storage->load(StorageSoa::Input, + src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { + val = storage->load(StorageSoa::Temp, + src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { + val = storage->load(StorageSoa::Output, + src->SrcRegister.Index, swizzle, indIdx); + } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { + val = storage->load(StorageSoa::Immediate, + src->SrcRegister.Index, swizzle, indIdx); + } else { + fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); + return; + } + + inputs[i] = val; + } + + std::vector out(4); + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: { + out = instr->arl(inputs[0]); + } + break; + case TGSI_OPCODE_MOV: { + out = inputs[0]; + } + break; + case TGSI_OPCODE_LIT: { + } + break; + case TGSI_OPCODE_RCP: { + } + break; + case TGSI_OPCODE_RSQ: { + } + break; + case TGSI_OPCODE_EXP: + break; + case TGSI_OPCODE_LOG: + break; + case TGSI_OPCODE_MUL: { + out = instr->mul(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_ADD: { + out = instr->add(inputs[0], inputs[1]); + } + break; + case TGSI_OPCODE_DP3: { + } + break; + case TGSI_OPCODE_DP4: { + } + break; + case TGSI_OPCODE_DST: { + } + break; + case TGSI_OPCODE_MIN: { + } + break; + case TGSI_OPCODE_MAX: { + } + break; + case TGSI_OPCODE_SLT: { + } + break; + case TGSI_OPCODE_SGE: { + } + break; + case TGSI_OPCODE_MAD: { + out = instr->madd(inputs[0], inputs[1], inputs[2]); + } + break; + case TGSI_OPCODE_SUB: { + } + break; + case TGSI_OPCODE_LERP: { + } + break; + case TGSI_OPCODE_CND: + break; + case TGSI_OPCODE_CND0: + break; + case TGSI_OPCODE_DOT2ADD: + break; + case TGSI_OPCODE_INDEX: + break; + case TGSI_OPCODE_NEGATE: + break; + case TGSI_OPCODE_FRAC: { + } + break; + case TGSI_OPCODE_CLAMP: + break; + case TGSI_OPCODE_FLOOR: { + } + break; + case TGSI_OPCODE_ROUND: + break; + case TGSI_OPCODE_EXPBASE2: { + } + break; + case TGSI_OPCODE_LOGBASE2: { + } + break; + case TGSI_OPCODE_POWER: { + } + break; + case TGSI_OPCODE_CROSSPRODUCT: { + } + break; + case TGSI_OPCODE_MULTIPLYMATRIX: + break; + case TGSI_OPCODE_ABS: { + } + break; + case TGSI_OPCODE_RCC: + break; + case TGSI_OPCODE_DPH: { + } + break; + case TGSI_OPCODE_COS: { + } + break; + case TGSI_OPCODE_DDX: + break; + case TGSI_OPCODE_DDY: + break; + case TGSI_OPCODE_KILP: { + } + break; + case TGSI_OPCODE_PK2H: + break; + case TGSI_OPCODE_PK2US: + break; + case TGSI_OPCODE_PK4B: + break; + case TGSI_OPCODE_PK4UB: + break; + case TGSI_OPCODE_RFL: + break; + case TGSI_OPCODE_SEQ: + break; + case TGSI_OPCODE_SFL: + break; + case TGSI_OPCODE_SGT: { + } + break; + case TGSI_OPCODE_SIN: { + } + break; + case TGSI_OPCODE_SLE: + break; + case TGSI_OPCODE_SNE: + break; + case TGSI_OPCODE_STR: + break; + case TGSI_OPCODE_TEX: + break; + case TGSI_OPCODE_TXD: + break; + case TGSI_OPCODE_UP2H: + break; + case TGSI_OPCODE_UP2US: + break; + case TGSI_OPCODE_UP4B: + break; + case TGSI_OPCODE_UP4UB: + break; + case TGSI_OPCODE_X2D: + break; + case TGSI_OPCODE_ARA: + break; + case TGSI_OPCODE_ARR: + break; + case TGSI_OPCODE_BRA: + break; + case TGSI_OPCODE_CAL: { + } + break; + case TGSI_OPCODE_RET: { + } + break; + case TGSI_OPCODE_SSG: + break; + case TGSI_OPCODE_CMP: { + } + break; + case TGSI_OPCODE_SCS: { + } + break; + case TGSI_OPCODE_TXB: + break; + case TGSI_OPCODE_NRM: + break; + case TGSI_OPCODE_DIV: + break; + case TGSI_OPCODE_DP2: + break; + case TGSI_OPCODE_TXL: + break; + case TGSI_OPCODE_BRK: { + } + break; + case TGSI_OPCODE_IF: { + } + break; + case TGSI_OPCODE_LOOP: + break; + case TGSI_OPCODE_REP: + break; + case TGSI_OPCODE_ELSE: { + } + break; + case TGSI_OPCODE_ENDIF: { + } + break; + case TGSI_OPCODE_ENDLOOP: + break; + case TGSI_OPCODE_ENDREP: + break; + case TGSI_OPCODE_PUSHA: + break; + case TGSI_OPCODE_POPA: + break; + case TGSI_OPCODE_CEIL: + break; + case TGSI_OPCODE_I2F: + break; + case TGSI_OPCODE_NOT: + break; + case TGSI_OPCODE_TRUNC: { + } + break; + case TGSI_OPCODE_SHL: + break; + case TGSI_OPCODE_SHR: + break; + case TGSI_OPCODE_AND: + break; + case TGSI_OPCODE_OR: + break; + case TGSI_OPCODE_MOD: + break; + case TGSI_OPCODE_XOR: + break; + case TGSI_OPCODE_SAD: + break; + case TGSI_OPCODE_TXF: + break; + case TGSI_OPCODE_TXQ: + break; + case TGSI_OPCODE_CONT: + break; + case TGSI_OPCODE_EMIT: + break; + case TGSI_OPCODE_ENDPRIM: + break; + case TGSI_OPCODE_BGNLOOP2: { + } + break; + case TGSI_OPCODE_BGNSUB: { + } + break; + case TGSI_OPCODE_ENDLOOP2: { + } + break; + case TGSI_OPCODE_ENDSUB: { + } + break; + case TGSI_OPCODE_NOISE1: + break; + case TGSI_OPCODE_NOISE2: + break; + case TGSI_OPCODE_NOISE3: + break; + case TGSI_OPCODE_NOISE4: + break; + case TGSI_OPCODE_NOP: + break; + case TGSI_OPCODE_TEXBEM: + break; + case TGSI_OPCODE_TEXBEML: + break; + case TGSI_OPCODE_TEXREG2AR: + break; + case TGSI_OPCODE_TEXM3X2PAD: + break; + case TGSI_OPCODE_TEXM3X2TEX: + break; + case TGSI_OPCODE_TEXM3X3PAD: + break; + case TGSI_OPCODE_TEXM3X3TEX: + break; + case TGSI_OPCODE_TEXM3X3SPEC: + break; + case TGSI_OPCODE_TEXM3X3VSPEC: + break; + case TGSI_OPCODE_TEXREG2GB: + break; + case TGSI_OPCODE_TEXREG2RGB: + break; + case TGSI_OPCODE_TEXDP3TEX: + break; + case TGSI_OPCODE_TEXDP3: + break; + case TGSI_OPCODE_TEXM3X3: + break; + case TGSI_OPCODE_TEXM3X2DEPTH: + break; + case TGSI_OPCODE_TEXDEPTH: + break; + case TGSI_OPCODE_BEM: + break; + case TGSI_OPCODE_M4X3: + break; + case TGSI_OPCODE_M3X4: + break; + case TGSI_OPCODE_M3X3: + break; + case TGSI_OPCODE_M3X2: + break; + case TGSI_OPCODE_NRM4: + break; + case TGSI_OPCODE_CALLNZ: + break; + case TGSI_OPCODE_IFC: + break; + case TGSI_OPCODE_BREAKC: + break; + case TGSI_OPCODE_KIL: + break; + case TGSI_OPCODE_END: + instr->end(); + return; + break; + default: + fprintf(stderr, "ERROR: Unknown opcode %d\n", + inst->Instruction.Opcode); + assert(0); + break; + } + + if (!out[0]) { + fprintf(stderr, "ERROR: unsupported opcode %d\n", + inst->Instruction.Opcode); + assert(!"Unsupported opcode"); + } + + /* store results */ + for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { + storage->store(StorageSoa::Output, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { + storage->store(StorageSoa::Temp, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { + storage->store(StorageSoa::Address, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + } else { + fprintf(stderr, "ERROR: unsupported LLVM destination!"); + assert(!"wrong destination"); + } + } +} + +llvm::Module * +tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) +{ + llvm::Module *mod = new Module("shader"); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + Function* shader = mod->getFunction("execute_shader"); + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + stream << ir->id; + std::string func_name = stream.str(); + shader->setName(func_name.c_str()); + + Function::arg_iterator args = shader->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("input"); + + BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + Storage storage(label_entry, ptr_INPUT); + Instructions instr(mod, shader, label_entry, &storage); + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declaration(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediate(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + translate_instruction(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + ir->num_consts = storage.numConsts(); + return mod; +} + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens) +{ + llvm::Module *mod = new Module("shader"); + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + unsigned instno = 0; + std::ostringstream stream; + if (ir->type == GALLIVM_VS) { + stream << "vs_shader"; + } else { + stream << "fs_shader"; + } + //stream << ir->id; + std::string func_name = stream.str(); + Function *shader = llvm::cast(mod->getOrInsertFunction( + func_name.c_str(), + vertexShaderFunctionType())); + + Function::arg_iterator args = shader->arg_begin(); + Value *input = args++; + input->setName("inputs"); + Value *output = args++; + output->setName("outputs"); + Value *consts = args++; + consts->setName("consts"); + Value *temps = args++; + temps->setName("temps"); + + BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + + tgsi_parse_init(&parse, tokens); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + StorageSoa storage(label_entry, input, output, consts, temps); + InstructionsSoa instr(mod, shader, label_entry, &storage); + + while(!tgsi_parse_end_of_tokens(&parse)) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + translate_declarationir(ir, mod, &storage, + &parse.FullToken.FullDeclaration, + &fd); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + translate_immediateir(&storage, + &parse.FullToken.FullImmediate); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + storage.declareImmediates(); + translate_instructionir(mod, &storage, &instr, + &parse.FullToken.FullInstruction, + &fi, instno); + ++instno; + break; + + default: + assert(0); + } + } + + tgsi_parse_free(&parse); + + return mod; +} diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.h b/src/gallium/auxiliary/gallivm/tgsitollvm.h new file mode 100644 index 0000000000..7ada04d629 --- /dev/null +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.h @@ -0,0 +1,20 @@ +#ifndef TGSITOLLVM_H +#define TGSITOLLVM_H + + +namespace llvm { + class Module; +} + +struct gallivm_ir; +struct tgsi_token; + + +llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + + +llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, + const struct tgsi_token *tokens); + +#endif diff --git a/src/gallium/auxiliary/llvm/Makefile b/src/gallium/auxiliary/llvm/Makefile deleted file mode 100644 index 39fac6ea4a..0000000000 --- a/src/gallium/auxiliary/llvm/Makefile +++ /dev/null @@ -1,85 +0,0 @@ -# -*-makefile-*- -TOP = ../../../.. -include $(TOP)/configs/current - -LIBNAME = gallivm - - -GALLIVM_SOURCES = \ - gallivm.cpp \ - gallivm_cpu.cpp \ - instructions.cpp \ - loweringpass.cpp \ - tgsitollvm.cpp \ - storage.cpp \ - storagesoa.cpp \ - instructionssoa.cpp - -INC_SOURCES = gallivm_builtins.cpp - -CPP_SOURCES = \ - $(GALLIVM_SOURCES) - -C_SOURCES = -ASM_SOURCES = - -OBJECTS = $(C_SOURCES:.c=.o) \ - $(CPP_SOURCES:.cpp=.o) \ - $(ASM_SOURCES:.S=.o) - -### Include directories -INCLUDES = \ - -I. \ - -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/gallium/auxiliary \ - -I$(TOP)/src/gallium/include \ - -I$(TOP)/src/mesa \ - -I$(TOP)/include - - -##### RULES ##### - -.c.o: - $(CC) -c $(INCLUDES) $(LLVM_CFLAGS) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.cpp.o: - $(CXX) -c $(INCLUDES) $(LLVM_CXXFLAGS) $(CXXFLAGS) $(DRIVER_DEFINES) $< -o $@ - -.S.o: - $(CC) -c $(INCLUDES) $(CFLAGS) $(DRIVER_DEFINES) $< -o $@ - -##### TARGETS ##### - -default:: depend symlinks $(LIBNAME) - - -$(LIBNAME): $(OBJECTS) Makefile - $(TOP)/bin/mklib -o $@ -static $(OBJECTS) - - -depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) - rm -f depend - touch depend - $(MKDEP) $(MKDEP_OPTIONS) $(DRIVER_DEFINES) $(INCLUDES) $(C_SOURCES) $(CPP_SOURCES) \ - $(ASM_SOURCES) $(INC_SOURCES) 2> /dev/null - - -gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins - - -# Emacs tags -tags: - etags `find . -name \*.[ch]` `find ../include` - - -# Remove .o and backup files -clean: - -rm -f *.o */*.o *~ *.so *~ server/*.o - -rm -f depend depend.bak - -rm -f gallivm_builtins.cpp - -symlinks: - - -include depend diff --git a/src/gallium/auxiliary/llvm/gallivm.cpp b/src/gallium/auxiliary/llvm/gallivm.cpp deleted file mode 100644 index d14bb3b99a..0000000000 --- a/src/gallium/auxiliary/llvm/gallivm.cpp +++ /dev/null @@ -1,327 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "instructions.h" -#include "loweringpass.h" -#include "storage.h" -#include "tgsitollvm.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_dump.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -static int GLOBAL_ID = 0; - -using namespace llvm; - -static inline -void AddStandardCompilePasses(PassManager &PM) -{ - PM.add(new LoweringPass()); - PM.add(createVerifierPass()); // Verify that input is correct - - PM.add(createLowerSetJmpPass()); // Lower llvm.setjmp/.longjmp - - //PM.add(createStripSymbolsPass(true)); - - PM.add(createRaiseAllocationsPass()); // call %malloc -> malloc inst - PM.add(createCFGSimplificationPass()); // Clean up disgusting code - PM.add(createPromoteMemoryToRegisterPass());// Kill useless allocas - PM.add(createGlobalOptimizerPass()); // Optimize out global vars - PM.add(createGlobalDCEPass()); // Remove unused fns and globs - PM.add(createIPConstantPropagationPass());// IP Constant Propagation - PM.add(createDeadArgEliminationPass()); // Dead argument elimination - PM.add(createInstructionCombiningPass()); // Clean up after IPCP & DAE - PM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE - - PM.add(createPruneEHPass()); // Remove dead EH info - - PM.add(createFunctionInliningPass()); // Inline small functions - PM.add(createArgumentPromotionPass()); // Scalarize uninlined fn args - - PM.add(createTailDuplicationPass()); // Simplify cfg by copying code - PM.add(createInstructionCombiningPass()); // Cleanup for scalarrepl. - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createScalarReplAggregatesPass()); // Break up aggregate allocas - PM.add(createInstructionCombiningPass()); // Combine silly seq's - PM.add(createCondPropagationPass()); // Propagate conditionals - - PM.add(createTailCallEliminationPass()); // Eliminate tail calls - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createReassociatePass()); // Reassociate expressions - PM.add(createLoopRotatePass()); - PM.add(createLICMPass()); // Hoist loop invariants - PM.add(createLoopUnswitchPass()); // Unswitch loops. - PM.add(createLoopIndexSplitPass()); // Index split loops. - PM.add(createInstructionCombiningPass()); // Clean up after LICM/reassoc - PM.add(createIndVarSimplifyPass()); // Canonicalize indvars - PM.add(createLoopUnrollPass()); // Unroll small loops - PM.add(createInstructionCombiningPass()); // Clean up after the unroller - PM.add(createGVNPass()); // Remove redundancies - PM.add(createSCCPPass()); // Constant prop with SCCP - - // Run instcombine after redundancy elimination to exploit opportunities - // opened up by them. - PM.add(createInstructionCombiningPass()); - PM.add(createCondPropagationPass()); // Propagate conditionals - - PM.add(createDeadStoreEliminationPass()); // Delete dead stores - PM.add(createAggressiveDCEPass()); // SSA based 'Aggressive DCE' - PM.add(createCFGSimplificationPass()); // Merge & remove BBs - PM.add(createSimplifyLibCallsPass()); // Library Call Optimizations - PM.add(createDeadTypeEliminationPass()); // Eliminate dead types - PM.add(createConstantMergePass()); // Merge dup global constants -} - -void gallivm_prog_delete(struct gallivm_prog *prog) -{ - delete prog->module; - prog->module = 0; - prog->function = 0; - free(prog); -} - -static inline void -constant_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan) -{ - unsigned i; - - for (i = 0; i < QUAD_SIZE; ++i) { - inputs[i][attrib][chan] = coefs[attrib].a0[chan]; - } -} - -static inline void -linear_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - const float x = inputs[i][0][0]; - const float y = inputs[i][0][1]; - - inputs[i][attrib][chan] = - coefs[attrib].a0[chan] + - coefs[attrib].dadx[chan] * x + - coefs[attrib].dady[chan] * y; - } -} - -static inline void -perspective_interpolation(float (*inputs)[16][4], - const struct tgsi_interp_coef *coefs, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - const float x = inputs[i][0][0]; - const float y = inputs[i][0][1]; - /* WPOS.w here is really 1/w */ - const float w = 1.0f / inputs[i][0][3]; - assert(inputs[i][0][3] != 0.0); - - inputs[i][attrib][chan] = - (coefs[attrib].a0[chan] + - coefs[attrib].dadx[chan] * x + - coefs[attrib].dady[chan] * y) * w; - } -} - -void gallivm_ir_dump(struct gallivm_ir *ir, const char *file_prefix) -{ - if (!ir || !ir->module) - return; - - if (file_prefix) { - std::ostringstream stream; - stream << file_prefix; - stream << ir->id; - stream << ".ll"; - std::string name = stream.str(); - std::ofstream out(name.c_str()); - if (!out) { - std::cerr<<"Can't open file : "<module); - out.close(); - } else { - const llvm::Module::FunctionListType &funcs = ir->module->getFunctionList(); - llvm::Module::FunctionListType::const_iterator itr; - std::cout<<"; ---------- Start shader "<id<id<num_interp; ++i) { - const gallivm_interpolate &interp = prog->interpolators[i]; - switch (interp.type) { - case TGSI_INTERPOLATE_CONSTANT: - constant_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - case TGSI_INTERPOLATE_LINEAR: - linear_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - perspective_interpolation(inputs, coef, interp.attrib, interp.chan); - break; - - default: - assert( 0 ); - } - } -} - - -struct gallivm_ir * gallivm_ir_new(enum gallivm_shader_type type) -{ - struct gallivm_ir *ir = - (struct gallivm_ir *)calloc(1, sizeof(struct gallivm_ir)); - ++GLOBAL_ID; - ir->id = GLOBAL_ID; - ir->type = type; - - return ir; -} - -void gallivm_ir_set_layout(struct gallivm_ir *ir, - enum gallivm_vector_layout layout) -{ - ir->layout = layout; -} - -void gallivm_ir_set_components(struct gallivm_ir *ir, int num) -{ - ir->num_components = num; -} - -void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, - const struct tgsi_token *tokens) -{ - std::cout << "Creating llvm from: " <module = mod; - gallivm_ir_dump(ir, 0); -} - -void gallivm_ir_delete(struct gallivm_ir *ir) -{ - delete ir->module; - free(ir); -} - -struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) -{ - struct gallivm_prog *prog = - (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); - llvm::Module *mod = llvm::CloneModule(ir->module); - prog->num_consts = ir->num_consts; - memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); - prog->num_interp = ir->num_interp; - - /* Run optimization passes over it */ - PassManager passes; - passes.add(new TargetData(mod)); - AddStandardCompilePasses(passes); - passes.run(*mod); - prog->module = mod; - - std::cout << "After optimizations:"<dump(); - - return prog; -} - -#endif /* MESA_LLVM */ diff --git a/src/gallium/auxiliary/llvm/gallivm.h b/src/gallium/auxiliary/llvm/gallivm.h deleted file mode 100644 index 92da4bca7f..0000000000 --- a/src/gallium/auxiliary/llvm/gallivm.h +++ /dev/null @@ -1,103 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ - -#ifndef GALLIVM_H -#define GALLIVM_H - -#if defined __cplusplus -extern "C" { -#endif - -#include "pipe/p_state.h" - -#ifdef MESA_LLVM - -struct tgsi_token; - -struct gallivm_ir; -struct gallivm_prog; -struct gallivm_cpu_engine; -struct tgsi_interp_coef; -struct tgsi_sampler; -struct tgsi_exec_vector; - -enum gallivm_shader_type { - GALLIVM_VS, - GALLIVM_FS -}; - -enum gallivm_vector_layout { - GALLIVM_AOS, - GALLIVM_SOA -}; - -struct gallivm_ir *gallivm_ir_new(enum gallivm_shader_type type); -void gallivm_ir_set_layout(struct gallivm_ir *ir, - enum gallivm_vector_layout layout); -void gallivm_ir_set_components(struct gallivm_ir *ir, int num); -void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, - const struct tgsi_token *tokens); -void gallivm_ir_delete(struct gallivm_ir *ir); - - -struct gallivm_prog *gallivm_ir_compile(struct gallivm_ir *ir); - -void gallivm_prog_inputs_interpolate(struct gallivm_prog *prog, - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - const struct tgsi_interp_coef *coefs); -void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); - - -struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); -struct gallivm_cpu_engine *gallivm_global_cpu_engine(); -int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_vector *inputs, - struct tgsi_exec_vector *dests, - float (*consts)[4], - struct tgsi_exec_vector *temps); -int gallivm_cpu_fs_exec(struct gallivm_prog *prog, - float x, float y, - float (*dests)[PIPE_MAX_SHADER_INPUTS][4], - float (*inputs)[PIPE_MAX_SHADER_INPUTS][4], - float (*consts)[4], - struct tgsi_sampler *samplers); -void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *ee, struct gallivm_prog *prog); -void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); - - -#endif /* MESA_LLVM */ - -#if defined __cplusplus -} // extern "C" -#endif - -#endif diff --git a/src/gallium/auxiliary/llvm/gallivm_builtins.cpp b/src/gallium/auxiliary/llvm/gallivm_builtins.cpp deleted file mode 100644 index 1796f0a177..0000000000 --- a/src/gallium/auxiliary/llvm/gallivm_builtins.cpp +++ /dev/null @@ -1,567 +0,0 @@ -// Generated by llvm2cpp - DO NOT MODIFY! - - -Module* createGallivmBuiltins(Module *mod) { - -mod->setModuleIdentifier("shader"); - -// Type Definitions -ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25); - -PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0); - -std::vectorFuncTy_2_args; -FuncTy_2_args.push_back(Type::FloatTy); -FuncTy_2_args.push_back(Type::FloatTy); -FunctionType* FuncTy_2 = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/FuncTy_2_args, - /*isVarArg=*/false); - -PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0); - -VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4); - -std::vectorFuncTy_5_args; -FuncTy_5_args.push_back(VectorTy_4); -FunctionType* FuncTy_5 = FunctionType::get( - /*Result=*/VectorTy_4, - /*Params=*/FuncTy_5_args, - /*isVarArg=*/false); - -std::vectorFuncTy_6_args; -FuncTy_6_args.push_back(VectorTy_4); -FuncTy_6_args.push_back(VectorTy_4); -FuncTy_6_args.push_back(VectorTy_4); -FunctionType* FuncTy_6 = FunctionType::get( - /*Result=*/VectorTy_4, - /*Params=*/FuncTy_6_args, - /*isVarArg=*/false); - -VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4); - -std::vectorFuncTy_9_args; -FunctionType* FuncTy_9 = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/FuncTy_9_args, - /*isVarArg=*/true); - -PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0); - -PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0); - -std::vectorFuncTy_12_args; -FuncTy_12_args.push_back(Type::FloatTy); -FunctionType* FuncTy_12 = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/FuncTy_12_args, - /*isVarArg=*/false); - -PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0); - -std::vectorFuncTy_13_args; -FuncTy_13_args.push_back(VectorTy_4); -FunctionType* FuncTy_13 = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/FuncTy_13_args, - /*isVarArg=*/false); - - -// Function Declarations - -Function* func_approx = new Function( - /*Type=*/FuncTy_2, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"approx", mod); -func_approx->setCallingConv(CallingConv::C); -const ParamAttrsList *func_approx_PAL = 0; -func_approx->setParamAttrs(func_approx_PAL); - -Function* func_powf = new Function( - /*Type=*/FuncTy_2, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"powf", mod); // (external, no body) -func_powf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_powf_PAL = 0; -func_powf->setParamAttrs(func_powf_PAL); - -Function* func_lit = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"lit", mod); -func_lit->setCallingConv(CallingConv::C); -const ParamAttrsList *func_lit_PAL = 0; -func_lit->setParamAttrs(func_lit_PAL); - -Function* func_cmp = new Function( - /*Type=*/FuncTy_6, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"cmp", mod); -func_cmp->setCallingConv(CallingConv::C); -const ParamAttrsList *func_cmp_PAL = 0; -{ - ParamAttrsVector Attrs; - ParamAttrsWithIndex PAWI; - PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; - Attrs.push_back(PAWI); - func_cmp_PAL = ParamAttrsList::get(Attrs); - -} -func_cmp->setParamAttrs(func_cmp_PAL); - -Function* func_vcos = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"vcos", mod); -func_vcos->setCallingConv(CallingConv::C); -const ParamAttrsList *func_vcos_PAL = 0; -func_vcos->setParamAttrs(func_vcos_PAL); - -Function* func_printf = new Function( - /*Type=*/FuncTy_9, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"printf", mod); // (external, no body) -func_printf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_printf_PAL = 0; -func_printf->setParamAttrs(func_printf_PAL); - -Function* func_cosf = new Function( - /*Type=*/FuncTy_12, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"cosf", mod); // (external, no body) -func_cosf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_cosf_PAL = 0; -func_cosf->setParamAttrs(func_cosf_PAL); - -Function* func_scs = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"scs", mod); -func_scs->setCallingConv(CallingConv::C); -const ParamAttrsList *func_scs_PAL = 0; -func_scs->setParamAttrs(func_scs_PAL); - -Function* func_sinf = new Function( - /*Type=*/FuncTy_12, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"sinf", mod); // (external, no body) -func_sinf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_sinf_PAL = 0; -func_sinf->setParamAttrs(func_sinf_PAL); - -Function* func_vsin = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"vsin", mod); -func_vsin->setCallingConv(CallingConv::C); -const ParamAttrsList *func_vsin_PAL = 0; -func_vsin->setParamAttrs(func_vsin_PAL); - -Function* func_kilp = new Function( - /*Type=*/FuncTy_13, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"kilp", mod); -func_kilp->setCallingConv(CallingConv::C); -const ParamAttrsList *func_kilp_PAL = 0; -{ - ParamAttrsVector Attrs; - ParamAttrsWithIndex PAWI; - PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; - Attrs.push_back(PAWI); - func_kilp_PAL = ParamAttrsList::get(Attrs); - -} -func_kilp->setParamAttrs(func_kilp_PAL); - -// Global Variable Declarations - - -GlobalVariable* gvar_array__str = new GlobalVariable( -/*Type=*/ArrayTy_0, -/*isConstant=*/true, -/*Linkage=*/GlobalValue::InternalLinkage, -/*Initializer=*/0, // has initializer, specified below -/*Name=*/".str", -mod); - -GlobalVariable* gvar_array__str1 = new GlobalVariable( -/*Type=*/ArrayTy_0, -/*isConstant=*/true, -/*Linkage=*/GlobalValue::InternalLinkage, -/*Initializer=*/0, // has initializer, specified below -/*Name=*/".str1", -mod); - -// Constant Definitions -Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true); -Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true); -ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f)); -ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f)); -Constant* const_float_18 = Constant::getNullValue(Type::FloatTy); -Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32)); -std::vector const_packed_20_elems; -ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); -const_packed_20_elems.push_back(const_float_21); -UndefValue* const_float_22 = UndefValue::get(Type::FloatTy); -const_packed_20_elems.push_back(const_float_22); -const_packed_20_elems.push_back(const_float_22); -const_packed_20_elems.push_back(const_float_21); -Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems); -ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10)); -ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10)); -ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10)); -std::vector const_packed_26_elems; -const_packed_26_elems.push_back(const_float_21); -const_packed_26_elems.push_back(const_float_18); -const_packed_26_elems.push_back(const_float_18); -const_packed_26_elems.push_back(const_float_21); -Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems); -Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy); -std::vector const_packed_28_elems; -const_packed_28_elems.push_back(const_int32_19); -ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10)); -const_packed_28_elems.push_back(const_int32_29); -const_packed_28_elems.push_back(const_int32_25); -const_packed_28_elems.push_back(const_int32_24); -Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems); -std::vector const_packed_30_elems; -const_packed_30_elems.push_back(const_int32_19); -const_packed_30_elems.push_back(const_int32_23); -ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10)); -const_packed_30_elems.push_back(const_int32_31); -const_packed_30_elems.push_back(const_int32_24); -Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems); -std::vector const_packed_32_elems; -const_packed_32_elems.push_back(const_int32_19); -const_packed_32_elems.push_back(const_int32_23); -const_packed_32_elems.push_back(const_int32_25); -ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10)); -const_packed_32_elems.push_back(const_int32_33); -Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems); -std::vector const_ptr_34_indices; -const_ptr_34_indices.push_back(const_int32_19); -const_ptr_34_indices.push_back(const_int32_19); -Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() ); -UndefValue* const_packed_35 = UndefValue::get(VectorTy_4); -std::vector const_ptr_36_indices; -const_ptr_36_indices.push_back(const_int32_19); -const_ptr_36_indices.push_back(const_int32_19); -Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() ); - -// Global Variable Definitions -gvar_array__str->setInitializer(const_array_14); -gvar_array__str1->setInitializer(const_array_15); - -// Function Definitions - -// Function: approx (func_approx) -{ - Function::arg_iterator args = func_approx->arg_begin(); - Value* float_a = args++; - float_a->setName("a"); - Value* float_b = args++; - float_b->setName("b"); - - BasicBlock* label_entry = new BasicBlock("entry",func_approx,0); - - // Block entry (label_entry) - FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry); - SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry); - FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry); - SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry); - FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry); - SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry); - std::vector float_call_params; - float_call_params.push_back(float_a_addr_0); - float_call_params.push_back(float_b_addr_1); - CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry); - float_call->setCallingConv(CallingConv::C); - float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0; - float_call->setParamAttrs(float_call_PAL); - - new ReturnInst(float_call, label_entry); - -} - -// Function: lit (func_lit) -{ - Function::arg_iterator args = func_lit->arg_begin(); - Value* packed_tmp = args++; - packed_tmp->setName("tmp"); - - BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0); - BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0); - BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0); - - // Block entry (label_entry_38) - ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38); - FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38); - new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38); - - // Block ifthen (label_ifthen) - InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen); - ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen); - ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen); - std::vector float_call_41_params; - float_call_41_params.push_back(float_tmp12); - float_call_41_params.push_back(float_tmp14); - CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen); - float_call_41->setCallingConv(CallingConv::C); - float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0; - float_call_41->setParamAttrs(float_call_41_PAL); - - InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen); - new ReturnInst(packed_tmp16, label_ifthen); - - // Block UnifiedReturnBlock (label_UnifiedReturnBlock) - new ReturnInst(const_packed_26, label_UnifiedReturnBlock); - -} - -// Function: cmp (func_cmp) -{ - Function::arg_iterator args = func_cmp->arg_begin(); - Value* packed_tmp0 = args++; - packed_tmp0->setName("tmp0"); - Value* packed_tmp1 = args++; - packed_tmp1->setName("tmp1"); - Value* packed_tmp2 = args++; - packed_tmp2->setName("tmp2"); - - BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0); - BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0); - BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0); - BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0); - BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0); - BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0); - BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0); - - // Block entry (label_entry_44) - ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44); - CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44); - FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44); - ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44); - CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44); - FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44); - SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44); - new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44); - - // Block cond.?14 (label_cond__14) - ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14); - ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14); - CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14); - FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14); - new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14); - - // Block cond.cont20 (label_cond_cont20) - ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20); - ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20); - CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20); - FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20); - new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20); - - // Block cond.?28 (label_cond__28) - PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28); - packed_tmp23_reg2mem_0->reserveOperandSpace(2); - packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14); - packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20); - - ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28); - ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28); - CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28); - FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28); - new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28); - - // Block cond.cont34 (label_cond_cont34) - PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34); - packed_tmp23_reg2mem_1->reserveOperandSpace(2); - packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14); - packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20); - - ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34); - ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34); - CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34); - FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34); - new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34); - - // Block cond.?42 (label_cond__42) - PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42); - packed_tmp37_reg2mem_0->reserveOperandSpace(2); - packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28); - packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34); - - ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42); - new ReturnInst(packed_tmp5113, label_cond__42); - - // Block cond.cont48 (label_cond_cont48) - PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48); - packed_tmp37_reg2mem_1->reserveOperandSpace(2); - packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28); - packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34); - - ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48); - new ReturnInst(packed_tmp51, label_cond_cont48); - -} - -// Function: vcos (func_vcos) -{ - Function::arg_iterator args = func_vcos->arg_begin(); - Value* packed_val = args++; - packed_val->setName("val"); - - BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0); - - // Block entry (label_entry_53) - ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53); - CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53); - ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53); - CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53); - ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53); - CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53); - ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53); - CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53); - std::vector int32_call_params; - int32_call_params.push_back(const_ptr_34); - int32_call_params.push_back(double_conv_54); - int32_call_params.push_back(double_conv4); - int32_call_params.push_back(double_conv7); - int32_call_params.push_back(double_conv10); - CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53); - int32_call->setCallingConv(CallingConv::C); - int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0; - int32_call->setParamAttrs(int32_call_PAL); - - CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53); - float_call13->setCallingConv(CallingConv::C); - float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0; - float_call13->setParamAttrs(float_call13_PAL); - - InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53); - CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53); - float_call18->setCallingConv(CallingConv::C); - float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0; - float_call18->setParamAttrs(float_call18_PAL); - - InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53); - CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53); - float_call23->setCallingConv(CallingConv::C); - float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0; - float_call23->setParamAttrs(float_call23_PAL); - - InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53); - CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53); - float_call28->setCallingConv(CallingConv::C); - float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0; - float_call28->setParamAttrs(float_call28_PAL); - - InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53); - CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53); - CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53); - CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53); - CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53); - std::vector int32_call43_params; - int32_call43_params.push_back(const_ptr_36); - int32_call43_params.push_back(double_conv33); - int32_call43_params.push_back(double_conv36); - int32_call43_params.push_back(double_conv39); - int32_call43_params.push_back(double_conv42); - CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53); - int32_call43->setCallingConv(CallingConv::C); - int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0; - int32_call43->setParamAttrs(int32_call43_PAL); - - new ReturnInst(packed_tmp30, label_entry_53); - -} - -// Function: scs (func_scs) -{ - Function::arg_iterator args = func_scs->arg_begin(); - Value* packed_val_58 = args++; - packed_val_58->setName("val"); - - BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0); - - // Block entry (label_entry_59) - ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59); - CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59); - float_call_60->setCallingConv(CallingConv::C); - float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0; - float_call_60->setParamAttrs(float_call_60_PAL); - - InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59); - CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59); - float_call7->setCallingConv(CallingConv::C); - float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0; - float_call7->setParamAttrs(float_call7_PAL); - - InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59); - new ReturnInst(packed_tmp9, label_entry_59); - -} - -// Function: vsin (func_vsin) -{ - Function::arg_iterator args = func_vsin->arg_begin(); - Value* packed_val_62 = args++; - packed_val_62->setName("val"); - - BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0); - - // Block entry (label_entry_63) - ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63); - CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63); - float_call_65->setCallingConv(CallingConv::C); - float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0; - float_call_65->setParamAttrs(float_call_65_PAL); - - InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63); - InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63); - InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63); - InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63); - new ReturnInst(packed_tmp15_67, label_entry_63); - -} - -// Function: kilp (func_kilp) -{ - Function::arg_iterator args = func_kilp->arg_begin(); - Value* packed_val_69 = args++; - packed_val_69->setName("val"); - - BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0); - BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0); - BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0); - BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0); - BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0); - - // Block entry (label_entry_70) - ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70); - FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70); - - // Block lor_rhs (label_lor_rhs) - ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs); - FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs); - - // Block lor_rhs5 (label_lor_rhs5) - ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5); - FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5); - - // Block lor_rhs11 (label_lor_rhs11) - ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11); - FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11); - CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11); - new ReturnInst(int32_retval, label_lor_rhs11); - - // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71) - new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71); - -} - -return mod; - -} diff --git a/src/gallium/auxiliary/llvm/gallivm_cpu.cpp b/src/gallium/auxiliary/llvm/gallivm_cpu.cpp deleted file mode 100644 index 8f9830d0b1..0000000000 --- a/src/gallium/auxiliary/llvm/gallivm_cpu.cpp +++ /dev/null @@ -1,202 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "instructions.h" -#include "loweringpass.h" -#include "storage.h" -#include "tgsitollvm.h" - -#include "pipe/p_context.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_dump.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -struct gallivm_cpu_engine { - llvm::ExecutionEngine *engine; -}; - -static struct gallivm_cpu_engine *CPU = 0; - -typedef int (*fragment_shader_runner)(float x, float y, - float (*dests)[16][4], - float (*inputs)[16][4], - int num_attribs, - float (*consts)[4], int num_consts, - struct tgsi_sampler *samplers); - -int gallivm_cpu_fs_exec(struct gallivm_prog *prog, - float fx, float fy, - float (*dests)[16][4], - float (*inputs)[16][4], - float (*consts)[4], - struct tgsi_sampler *samplers) -{ - fragment_shader_runner runner = reinterpret_cast(prog->function); - assert(runner); - - return runner(fx, fy, dests, inputs, prog->num_interp, - consts, prog->num_consts, - samplers); -} - -static inline llvm::Function *func_for_shader(struct gallivm_prog *prog) -{ - llvm::Module *mod = prog->module; - llvm::Function *func = 0; - - switch (prog->type) { - case GALLIVM_VS: - func = mod->getFunction("vs_shader"); - break; - case GALLIVM_FS: - func = mod->getFunction("fs_shader"); - break; - default: - assert(!"Unknown shader type!"); - break; - } - return func; -} - -/*! - This function creates a CPU based execution engine for the given gallivm_prog. - gallivm_cpu_engine should be used as a singleton throughout the library. Before - executing gallivm_prog_exec one needs to call gallivm_cpu_jit_compile. - The gallivm_prog instance which is being passed to the constructor is being - automatically JIT compiled so one shouldn't call gallivm_cpu_jit_compile - with it again. - */ -struct gallivm_cpu_engine * gallivm_cpu_engine_create(struct gallivm_prog *prog) -{ - struct gallivm_cpu_engine *cpu = (struct gallivm_cpu_engine *) - calloc(1, sizeof(struct gallivm_cpu_engine)); - llvm::Module *mod = static_cast(prog->module); - llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); - llvm::ExecutionEngine *ee = llvm::ExecutionEngine::create(mp, false); - ee->DisableLazyCompilation(); - cpu->engine = ee; - - llvm::Function *func = func_for_shader(prog); - - prog->function = ee->getPointerToFunction(func); - CPU = cpu; - return cpu; -} - - -/*! - This function JIT compiles the given gallivm_prog with the given cpu based execution engine. - The reference to the generated machine code entry point will be stored - in the gallivm_prog program. After executing this function one can call gallivm_prog_exec - in order to execute the gallivm_prog on the CPU. - */ -void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog *prog) -{ - llvm::Module *mod = static_cast(prog->module); - llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); - llvm::ExecutionEngine *ee = cpu->engine; - assert(ee); - /*FIXME : remove */ - ee->DisableLazyCompilation(); - ee->addModuleProvider(mp); - - llvm::Function *func = func_for_shader(prog); - prog->function = ee->getPointerToFunction(func); -} - -void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *cpu) -{ - free(cpu); -} - -struct gallivm_cpu_engine * gallivm_global_cpu_engine() -{ - return CPU; -} - - -typedef void (*vertex_shader_runner)(void *ainputs, - void *dests, - float (*aconsts)[4], - void *temps); - - -/*! - This function is used to execute the gallivm_prog in software. Before calling - this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile - function. - */ -int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_vector *inputs, - struct tgsi_exec_vector *dests, - float (*consts)[4], - struct tgsi_exec_vector *temps) -{ - vertex_shader_runner runner = reinterpret_cast(prog->function); - assert(runner); - /*FIXME*/ - runner(inputs, dests, consts, temps); - - return 0; -} - -#endif diff --git a/src/gallium/auxiliary/llvm/gallivm_p.h b/src/gallium/auxiliary/llvm/gallivm_p.h deleted file mode 100644 index cfe7b1901b..0000000000 --- a/src/gallium/auxiliary/llvm/gallivm_p.h +++ /dev/null @@ -1,110 +0,0 @@ -#ifndef GALLIVM_P_H -#define GALLIVM_P_H - -#ifdef MESA_LLVM - -#include "gallivm.h" -#include "pipe/p_shader_tokens.h" -#include "pipe/p_compiler.h" - -namespace llvm { - class Module; -} - -#if defined __cplusplus -extern "C" { -#endif - -enum gallivm_shader_type; -enum gallivm_vector_layout; - -struct gallivm_interpolate { - int attrib; - int chan; - int type; -}; - -struct gallivm_ir { - llvm::Module *module; - int id; - enum gallivm_shader_type type; - enum gallivm_vector_layout layout; - int num_components; - int num_consts; - - //FIXME: this might not be enough for some shaders - struct gallivm_interpolate interpolators[32*4]; - int num_interp; -}; - -struct gallivm_prog { - llvm::Module *module; - void *function; - - int id; - enum gallivm_shader_type type; - - int num_consts; - - //FIXME: this might not be enough for some shaders - struct gallivm_interpolate interpolators[32*4]; - int num_interp; -}; - -static INLINE void gallivm_swizzle_components(int swizzle, - int *xc, int *yc, - int *zc, int *wc) -{ - int x = swizzle / 1000; swizzle -= x * 1000; - int y = swizzle / 100; swizzle -= y * 100; - int z = swizzle / 10; swizzle -= z * 10; - int w = swizzle; - - if (xc) *xc = x; - if (yc) *yc = y; - if (zc) *zc = z; - if (wc) *wc = w; -} - -static INLINE boolean gallivm_is_swizzle(int swizzle) -{ - const int NO_SWIZZLE = TGSI_SWIZZLE_X * 1000 + TGSI_SWIZZLE_Y * 100 + - TGSI_SWIZZLE_Z * 10 + TGSI_SWIZZLE_W; - return swizzle != NO_SWIZZLE; -} - -static INLINE int gallivm_x_swizzle(int swizzle) -{ - int x; - gallivm_swizzle_components(swizzle, &x, 0, 0, 0); - return x; -} - -static INLINE int gallivm_y_swizzle(int swizzle) -{ - int y; - gallivm_swizzle_components(swizzle, 0, &y, 0, 0); - return y; -} - -static INLINE int gallivm_z_swizzle(int swizzle) -{ - int z; - gallivm_swizzle_components(swizzle, 0, 0, &z, 0); - return z; -} - -static INLINE int gallivm_w_swizzle(int swizzle) -{ - int w; - gallivm_swizzle_components(swizzle, 0, 0, 0, &w); - return w; -} - -#endif /* MESA_LLVM */ - -#if defined __cplusplus -} // extern "C" -#endif - -#endif diff --git a/src/gallium/auxiliary/llvm/instructions.cpp b/src/gallium/auxiliary/llvm/instructions.cpp deleted file mode 100644 index 55d39fa5f1..0000000000 --- a/src/gallium/auxiliary/llvm/instructions.cpp +++ /dev/null @@ -1,889 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -#ifdef MESA_LLVM - -#include "instructions.h" - -#include "storage.h" - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -using namespace llvm; - -#include "gallivm_builtins.cpp" - -static inline std::string createFuncName(int label) -{ - std::ostringstream stream; - stream << "function"; - stream << label; - return stream.str(); -} - -Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, - Storage *storage) - : m_mod(mod), m_func(func), m_builder(block), m_idx(0), - m_storage(storage) -{ - m_floatVecType = VectorType::get(Type::FloatTy, 4); - - m_llvmFSqrt = 0; - m_llvmFAbs = 0; - m_llvmPow = 0; - m_llvmFloor = 0; - m_llvmFlog = 0; - m_llvmLit = 0; - m_fmtPtr = 0; - - createGallivmBuiltins(m_mod); -} - -llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) -{ - return m_builder.CreateAdd(in1, in2, name("add")); -} - -llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) -{ - Value *mulRes = mul(in1, in2); - return add(mulRes, in3); -} - -llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) -{ - return m_builder.CreateMul(in1, in2, name("mul")); -} - -const char * Instructions::name(const char *prefix) -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *z = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(2), - name("extractz")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); - return vectorFromVals(dot3, dot3, dot3, dot3); -} - -llvm::Value *Instructions::callFSqrt(llvm::Value *val) -{ - if (!m_llvmFSqrt) { - // predeclare the intrinsic - std::vector fsqrtArgs; - fsqrtArgs.push_back(Type::FloatTy); - ParamAttrsList *fsqrtPal = 0; - FunctionType* fsqrtType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fsqrtArgs, - /*isVarArg=*/false); - m_llvmFSqrt = new Function( - /*Type=*/fsqrtType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.sqrt.f32", m_mod); - m_llvmFSqrt->setCallingConv(CallingConv::C); - m_llvmFSqrt->setParamAttrs(fsqrtPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, - name("sqrt")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::rsq(llvm::Value *in1) -{ - Value *x = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("extractx")); - Value *abs = callFAbs(x); - Value *sqrt = callFSqrt(abs); - - Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, - APFloat(1.f)), - sqrt, - name("rsqrt")); - return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); -} - -llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w) -{ - Constant *const_vec = Constant::getNullValue(m_floatVecType); - Value *res = m_builder.CreateInsertElement(const_vec, x, - m_storage->constantInt(0), - name("vecx")); - res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), - name("vecxy")); - res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), - name("vecxyz")); - if (w) - res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), - name("vecxyzw")); - return res; -} - -llvm::Value *Instructions::callFAbs(llvm::Value *val) -{ - if (!m_llvmFAbs) { - // predeclare the intrinsic - std::vector fabsArgs; - fabsArgs.push_back(Type::FloatTy); - ParamAttrsList *fabsPal = 0; - FunctionType* fabsType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fabsArgs, - /*isVarArg=*/false); - m_llvmFAbs = new Function( - /*Type=*/fabsType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"fabs", m_mod); - m_llvmFAbs->setCallingConv(CallingConv::C); - m_llvmFAbs->setParamAttrs(fabsPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, - name("fabs")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::lit(llvm::Value *in) -{ - if (!m_llvmLit) { - m_llvmLit = m_mod->getFunction("lit"); - } - CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) -{ - Value *res = m_builder.CreateSub(in1, in2, name("sub")); - return res; -} - -llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) -{ - if (!m_llvmPow) { - // predeclare the intrinsic - std::vector powArgs; - powArgs.push_back(Type::FloatTy); - powArgs.push_back(Type::FloatTy); - ParamAttrsList *powPal = 0; - FunctionType* powType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/powArgs, - /*isVarArg=*/false); - m_llvmPow = new Function( - /*Type=*/powType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.pow.f32", m_mod); - m_llvmPow->setCallingConv(CallingConv::C); - m_llvmPow->setParamAttrs(powPal); - } - std::vector params; - params.push_back(val1); - params.push_back(val2); - CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), - name("pow")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *x2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(0), - name("x2")); - llvm::Value *val = callPow(x1, x2); - return vectorFromVals(val, val, val, val); -} - -llvm::Value * Instructions::rcp(llvm::Value *in1) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *res = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, - APFloat(1.f)), - x1, name("rcp")); - return vectorFromVals(res, res, res, res); -} - -llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - std::vector vec = extractVector(mulRes); - Value *xy = m_builder.CreateAdd(vec[0], vec[1], name("xy")); - Value *xyz = m_builder.CreateAdd(xy, vec[2], name("xyz")); - Value *dot4 = m_builder.CreateAdd(xyz, vec[3], name("dot4")); - return vectorFromVals(dot4, dot4, dot4, dot4); -} - -llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) -{ - Value *mulRes = mul(in1, in2); - std::vector vec1 = extractVector(mulRes); - Value *xy = m_builder.CreateAdd(vec1[0], vec1[1], name("xy")); - Value *xyz = m_builder.CreateAdd(xy, vec1[2], name("xyz")); - Value *dph = m_builder.CreateAdd(xyz, vec1[3], name("dph")); - return vectorFromVals(dph, dph, dph, dph); -} - -llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) -{ - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *w = m_builder.CreateExtractElement(in2, - m_storage->constantInt(3), - name("w")); - Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); - return vectorFromVals(ConstantFP::get(Type::FloatTy, APFloat(1.f)), - ry, z, w); -} - -llvm::Value * Instructions::ex2(llvm::Value *in) -{ - llvm::Value *val = callPow(ConstantFP::get(Type::FloatTy, APFloat(2.f)), - m_builder.CreateExtractElement( - in, m_storage->constantInt(0), - name("x1"))); - return vectorFromVals(val, val, val, val); -} - -llvm::Value * Instructions::callFloor(llvm::Value *val) -{ - if (!m_llvmFloor) { - // predeclare the intrinsic - std::vector floorArgs; - floorArgs.push_back(Type::FloatTy); - ParamAttrsList *floorPal = 0; - FunctionType* floorType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/floorArgs, - /*isVarArg=*/false); - m_llvmFloor = new Function( - /*Type=*/floorType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"floorf", m_mod); - m_llvmFloor->setCallingConv(CallingConv::C); - m_llvmFloor->setParamAttrs(floorPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFloor, val, - name("floorf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::floor(llvm::Value *in) -{ - std::vector vec = extractVector(in); - return vectorFromVals(callFloor(vec[0]), callFloor(vec[1]), - callFloor(vec[2]), callFloor(vec[3])); -} - -llvm::Value * Instructions::arl(llvm::Value *in) -{ - return floor(in); -} - -llvm::Value * Instructions::frc(llvm::Value *in) -{ - llvm::Value *flr = floor(in); - return sub(in, flr); -} - -llvm::Value * Instructions::callFLog(llvm::Value *val) -{ - if (!m_llvmFlog) { - // predeclare the intrinsic - std::vector flogArgs; - flogArgs.push_back(Type::FloatTy); - ParamAttrsList *flogPal = 0; - FunctionType* flogType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/flogArgs, - /*isVarArg=*/false); - m_llvmFlog = new Function( - /*Type=*/flogType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"logf", m_mod); - m_llvmFlog->setCallingConv(CallingConv::C); - m_llvmFlog->setParamAttrs(flogPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFlog, val, - name("logf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::lg2(llvm::Value *in) -{ - std::vector vec = extractVector(in); - llvm::Value *const_vec = constVector(1.442695f, 1.442695f, - 1.442695f, 1.442695f); - return mul(vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), - callFLog(vec[2]), callFLog(vec[3])), const_vec); -} - -llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) -{ - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) -{ - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], - name("xcmp")); - Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], - name("selx")); - - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], - name("ycmp")); - Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], - name("sely")); - - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], - name("zcmp")); - Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], - name("selz")); - - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], - name("wcmp")); - Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], - name("selw")); - - return vectorFromVals(selx, sely, selz, selw); -} - -void Instructions::printVector(llvm::Value *val) -{ - static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; - - if (!m_fmtPtr) { - Constant *format = ConstantArray::get(frmt, true); - ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); - GlobalVariable* globalFormat = new GlobalVariable( - /*Type=*/arrayTy, - /*isConstant=*/true, - /*Linkage=*/GlobalValue::InternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name(".str"), - m_mod); - globalFormat->setInitializer(format); - - Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); - std::vector const_ptr_21_indices; - const_ptr_21_indices.push_back(const_int0); - const_ptr_21_indices.push_back(const_int0); - m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, - &const_ptr_21_indices[0], const_ptr_21_indices.size()); - } - - Function *func_printf = m_mod->getFunction("printf"); - if (!func_printf) - func_printf = declarePrintf(); - assert(func_printf); - std::vector vec = extractVector(val); - Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); - Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); - Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); - Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); - std::vector params; - params.push_back(m_fmtPtr); - params.push_back(dx); - params.push_back(dy); - params.push_back(dz); - params.push_back(dw); - CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), - name("printf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(true); -} - -llvm::Function * Instructions::declarePrintf() -{ - std::vector args; - ParamAttrsList *params = 0; - FunctionType* funcTy = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/args, - /*isVarArg=*/true); - Function* func_printf = new Function( - /*Type=*/funcTy, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"printf", m_mod); - func_printf->setCallingConv(CallingConv::C); - func_printf->setParamAttrs(params); - return func_printf; -} - - -llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} -llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - - -llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) -{ - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); - Constant *const0f = Constant::getNullValue(Type::FloatTy); - - std::vector vec1 = extractVector(in1); - std::vector vec2 = extractVector(in2); - - Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); - Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - - Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); - Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - - Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); - Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - - Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); - Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); - - return vectorFromVals(x, y, z, w); -} - -llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) -{ - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z1")); - - Value *x2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(0), - name("x2")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *z2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(2), - name("z2")); - Value *y1z2 = mul(y1, z2); - Value *z1y2 = mul(z1, y2); - - Value *z1x2 = mul(z1, x2); - Value *x1z2 = mul(x1, z2); - - Value *x1y2 = mul(x1, y2); - Value *y1x2 = mul(y1, x2); - - return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); -} - - -llvm::Value * Instructions::abs(llvm::Value *in) -{ - std::vector vec = extractVector(in); - Value *xabs = callFAbs(vec[0]); - Value *yabs = callFAbs(vec[1]); - Value *zabs = callFAbs(vec[2]); - Value *wabs = callFAbs(vec[3]); - return vectorFromVals(xabs, yabs, zabs, wabs); -} - -void Instructions::ifop(llvm::Value *in) -{ - BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0); - BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0); - - //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); - //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); - //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); - - Constant *float0 = Constant::getNullValue(Type::FloatTy); - - Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), - name("extractx")); - Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); - m_builder.CreateCondBr(xcmp, ifthen, ifend); - //m_builder.SetInsertPoint(yblock); - - m_builder.SetInsertPoint(ifthen); - m_ifStack.push(ifend); -} - -llvm::BasicBlock * Instructions::currentBlock() const -{ - return m_builder.GetInsertBlock(); -} - -void Instructions::elseop() -{ - assert(!m_ifStack.empty()); - BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0); - m_builder.CreateBr(ifend); - m_builder.SetInsertPoint(m_ifStack.top()); - currentBlock()->setName(name("ifelse")); - m_ifStack.pop(); - m_ifStack.push(ifend); -} - -void Instructions::endif() -{ - assert(!m_ifStack.empty()); - m_builder.CreateBr(m_ifStack.top()); - m_builder.SetInsertPoint(m_ifStack.top()); - m_ifStack.pop(); -} - -llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) -{ - llvm::Value *m = mul(in1, in2); - llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); - llvm::Value *s = sub(vec1, in1); - return add(m, mul(s, in3)); -} - -void Instructions::beginLoop() -{ - BasicBlock *begin = new BasicBlock(name("loop"), m_func,0); - BasicBlock *end = new BasicBlock(name("endloop"), m_func,0); - - m_builder.CreateBr(begin); - Loop loop; - loop.begin = begin; - loop.end = end; - m_builder.SetInsertPoint(begin); - m_loopStack.push(loop); -} - -void Instructions::endLoop() -{ - assert(!m_loopStack.empty()); - Loop loop = m_loopStack.top(); - m_builder.CreateBr(loop.begin); - loop.end->moveAfter(currentBlock()); - m_builder.SetInsertPoint(loop.end); - m_loopStack.pop(); -} - -void Instructions::brk() -{ - assert(!m_loopStack.empty()); - BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0); - m_builder.CreateBr(m_loopStack.top().end); - m_builder.SetInsertPoint(unr); -} - -llvm::Value * Instructions::trunc(llvm::Value *in) -{ - std::vector vec = extractVector(in); - Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), - name("ftoix")); - Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), - name("ftoiy")); - Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), - name("ftoiz")); - Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), - name("ftoiw")); - Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, - name("fx")); - Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, - name("fy")); - Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, - name("fz")); - Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, - name("fw")); - return vectorFromVals(fx, fy, fz, fw); -} - -void Instructions::end() -{ - m_builder.CreateRetVoid(); -} - -void Instructions::cal(int label, llvm::Value *input) -{ - std::vector params; - params.push_back(input); - llvm::Function *func = findFunction(label); - - m_builder.CreateCall(func, params.begin(), params.end()); -} - -llvm::Function * Instructions::declareFunc(int label) -{ - PointerType *vecPtr = PointerType::getUnqual(m_floatVecType); - std::vector args; - args.push_back(vecPtr); - args.push_back(vecPtr); - args.push_back(vecPtr); - args.push_back(vecPtr); - ParamAttrsList *params = 0; - FunctionType *funcType = FunctionType::get( - /*Result=*/Type::VoidTy, - /*Params=*/args, - /*isVarArg=*/false); - std::string name = createFuncName(label); - Function *func = new Function( - /*Type=*/funcType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/name.c_str(), m_mod); - func->setCallingConv(CallingConv::C); - func->setParamAttrs(params); - return func; -} - -void Instructions::bgnSub(unsigned label) -{ - llvm::Function *func = findFunction(label); - - Function::arg_iterator args = func->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("INPUT"); - m_storage->pushArguments(ptr_INPUT); - - llvm::BasicBlock *entry = new BasicBlock("entry", func, 0); - - m_func = func; - m_builder.SetInsertPoint(entry); -} - -void Instructions::endSub() -{ - m_func = 0; - m_builder.SetInsertPoint(0); -} - -llvm::Function * Instructions::findFunction(int label) -{ - llvm::Function *func = m_functions[label]; - if (!func) { - func = declareFunc(label); - m_functions[label] = func; - } - return func; -} - -llvm::Value * Instructions::constVector(float x, float y, float z, float w) -{ - std::vector vec(4); - vec[0] = ConstantFP::get(Type::FloatTy, APFloat(x)); - vec[1] = ConstantFP::get(Type::FloatTy, APFloat(y)); - vec[2] = ConstantFP::get(Type::FloatTy, APFloat(z)); - vec[3] = ConstantFP::get(Type::FloatTy, APFloat(w)); - return ConstantVector::get(m_floatVecType, vec); -} - - -std::vector Instructions::extractVector(llvm::Value *vec) -{ - std::vector elems(4); - elems[0] = m_builder.CreateExtractElement(vec, m_storage->constantInt(0), - name("x")); - elems[1] = m_builder.CreateExtractElement(vec, m_storage->constantInt(1), - name("y")); - elems[2] = m_builder.CreateExtractElement(vec, m_storage->constantInt(2), - name("z")); - elems[3] = m_builder.CreateExtractElement(vec, m_storage->constantInt(3), - name("w")); - return elems; -} - -llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - llvm::Function *func = m_mod->getFunction("cmp"); - assert(func); - - std::vector params; - params.push_back(in1); - params.push_back(in2); - params.push_back(in3); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::cos(llvm::Value *in) -{ -#if 0 - llvm::Function *func = m_mod->getFunction("vcos"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("cosres")); - call->setTailCall(false); - return call; -#else - std::vector elems = extractVector(in); - Function *func = m_mod->getFunction("cosf"); - assert(func); - CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); - cos->setCallingConv(CallingConv::C); - cos->setTailCall(true); - return vectorFromVals(cos, cos, cos, cos); -#endif -} - -llvm::Value * Instructions::scs(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("scs"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("scsres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::kilp(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("kilp"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::sin(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("vsin"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("sinres")); - call->setTailCall(false); - return call; -} -#endif //MESA_LLVM - - diff --git a/src/gallium/auxiliary/llvm/instructions.h b/src/gallium/auxiliary/llvm/instructions.h deleted file mode 100644 index 9ebc17dd8e..0000000000 --- a/src/gallium/auxiliary/llvm/instructions.h +++ /dev/null @@ -1,152 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ - -#ifndef INSTRUCTIONS_H -#define INSTRUCTIONS_H - -#include -#include -#include -#include - -#include -#include - -namespace llvm { - class VectorType; - class Function; -} - -class Storage; - -class Instructions -{ -public: - Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, - Storage *storage); - - llvm::BasicBlock *currentBlock() const; - - llvm::Value *abs(llvm::Value *in1); - llvm::Value *arl(llvm::Value *in1); - llvm::Value *add(llvm::Value *in1, llvm::Value *in2); - void beginLoop(); - void bgnSub(unsigned); - void brk(); - void cal(int label, llvm::Value *input); - llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *cos(llvm::Value *in); - llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); - llvm::Value *dst(llvm::Value *in1, llvm::Value *in2); - void elseop(); - void endif(); - void endLoop(); - void end(); - void endSub(); - llvm::Value *ex2(llvm::Value *in); - llvm::Value *floor(llvm::Value *in); - llvm::Value *frc(llvm::Value *in); - void ifop(llvm::Value *in); - llvm::Value *kilp(llvm::Value *in); - llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3); - llvm::Value *lit(llvm::Value *in); - llvm::Value *lg2(llvm::Value *in); - llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in2); - llvm::Value *min(llvm::Value *in1, llvm::Value *in2); - llvm::Value *max(llvm::Value *in1, llvm::Value *in2); - llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); - llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); - llvm::Value *rcp(llvm::Value *in); - llvm::Value *rsq(llvm::Value *in); - llvm::Value *scs(llvm::Value *in); - llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sin(llvm::Value *in); - llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); - llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); - llvm::Value *trunc(llvm::Value *in); - - void printVector(llvm::Value *val); -private: - const char *name(const char *prefix); - - llvm::Value *callFAbs(llvm::Value *val); - llvm::Value *callFloor(llvm::Value *val); - llvm::Value *callFSqrt(llvm::Value *val); - llvm::Value *callFLog(llvm::Value *val); - llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); - - llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w=0); - - llvm::Value *constVector(float x, float y, float z, float w); - - llvm::Function *declarePrintf(); - llvm::Function *declareFunc(int label); - - llvm::Function *findFunction(int label); - - std::vector extractVector(llvm::Value *vec); -private: - llvm::Module *m_mod; - llvm::Function *m_func; - char m_name[32]; - llvm::LLVMFoldingBuilder m_builder; - int m_idx; - - llvm::VectorType *m_floatVecType; - - llvm::Function *m_llvmFSqrt; - llvm::Function *m_llvmFAbs; - llvm::Function *m_llvmPow; - llvm::Function *m_llvmFloor; - llvm::Function *m_llvmFlog; - llvm::Function *m_llvmLit; - - llvm::Constant *m_fmtPtr; - - std::stack m_ifStack; - struct Loop { - llvm::BasicBlock *begin; - llvm::BasicBlock *end; - }; - std::stack m_loopStack; - std::map m_functions; - Storage *m_storage; -}; - -#endif diff --git a/src/gallium/auxiliary/llvm/instructionssoa.cpp b/src/gallium/auxiliary/llvm/instructionssoa.cpp deleted file mode 100644 index a4d5046637..0000000000 --- a/src/gallium/auxiliary/llvm/instructionssoa.cpp +++ /dev/null @@ -1,121 +0,0 @@ -#include "instructionssoa.h" - -#include "storagesoa.h" - -#include - -using namespace llvm; - -InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, - llvm::BasicBlock *block, StorageSoa *storage) - : m_builder(block), - m_storage(storage), - m_idx(0) -{ -} - -const char * InstructionsSoa::name(const char *prefix) const -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - Constant *constVector = Constant::getNullValue(vectorType); - Value *res = m_builder.CreateInsertElement(constVector, x, - m_storage->constantInt(0), - name("vecx")); - res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), - name("vecxy")); - res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), - name("vecxyz")); - if (w) - res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), - name("vecxyzw")); - return res; -} - -std::vector InstructionsSoa::arl(const std::vector in) -{ - std::vector res(4); - - //Extract x's - llvm::Value *x1 = m_builder.CreateExtractElement(in[0], - m_storage->constantInt(0), - name("extractX")); - //cast it to an unsigned int - x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); - - res[0] = x1;//vectorFromVals(x1, x2, x3, x4); - //only x is valid. the others shouldn't be necessary - /* - res[1] = Constant::getNullValue(m_floatVecType); - res[2] = Constant::getNullValue(m_floatVecType); - res[3] = Constant::getNullValue(m_floatVecType); - */ - - return res; -} - - -std::vector InstructionsSoa::add(const std::vector in1, - const std::vector in2) -{ - std::vector res(4); - - res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); - res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); - res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); - res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); - - return res; -} - -std::vector InstructionsSoa::mul(const std::vector in1, - const std::vector in2) -{ - std::vector res(4); - - res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); - res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); - res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); - res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); - - return res; -} - -void InstructionsSoa::end() -{ - m_builder.CreateRetVoid(); -} - -std::vector InstructionsSoa::madd(const std::vector in1, - const std::vector in2, - const std::vector in3) -{ - std::vector res = mul(in1, in2); - return add(res, in3); -} - -std::vector InstructionsSoa::extractVector(llvm::Value *vector) -{ - std::vector res(4); - res[0] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(0), - name("extract1X")); - res[1] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(1), - name("extract2X")); - res[2] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(2), - name("extract3X")); - res[3] = m_builder.CreateExtractElement(vector, - m_storage->constantInt(3), - name("extract4X")); - - return res; -} diff --git a/src/gallium/auxiliary/llvm/instructionssoa.h b/src/gallium/auxiliary/llvm/instructionssoa.h deleted file mode 100644 index 4169dcbb2e..0000000000 --- a/src/gallium/auxiliary/llvm/instructionssoa.h +++ /dev/null @@ -1,74 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef INSTRUCTIONSSOA_H -#define INSTRUCTIONSSOA_H - -#include - -#include - -namespace llvm { - class Module; - class Function; - class BasicBlock; - class Value; -} -class StorageSoa; - -class InstructionsSoa -{ -public: - InstructionsSoa(llvm::Module *mod, llvm::Function *func, - llvm::BasicBlock *block, StorageSoa *storage); - - std::vector arl(const std::vector in); - - std::vector add(const std::vector in1, - const std::vector in2); - std::vector madd(const std::vector in1, - const std::vector in2, - const std::vector in3); - std::vector mul(const std::vector in1, - const std::vector in2); - void end(); - - std::vector extractVector(llvm::Value *vector); -private: - const char * name(const char *prefix) const; - llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w); -private: - llvm::LLVMFoldingBuilder m_builder; - StorageSoa *m_storage; -private: - mutable int m_idx; - mutable char m_name[32]; -}; - - -#endif diff --git a/src/gallium/auxiliary/llvm/llvm_builtins.c b/src/gallium/auxiliary/llvm/llvm_builtins.c deleted file mode 100644 index 4f98d754ba..0000000000 --- a/src/gallium/auxiliary/llvm/llvm_builtins.c +++ /dev/null @@ -1,115 +0,0 @@ -/*clang --emit-llvm llvm_builtins.c |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=gallivm_builtins.cpp -f -for=shader -funcname=createGallivmBuiltins*/ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -typedef __attribute__(( ocu_vector_type(4) )) float float4; - -extern float powf(float a, float b); - -inline float approx(float a, float b) -{ - if (b < -128.0f) b = -128.0f; - if (b > 128.0f) b = 128.0f; - if (a < 0) a = 0; - return powf(a, b); -} - -inline float4 lit(float4 tmp) -{ - float4 result; - result.x = 1.0; - result.w = 1.0; - if (tmp.x > 0) { - result.y = tmp.x; - result.z = approx(tmp.y, tmp.w); - } else { - result.y = 0; - result.z = 0; - } - return result; -} - -inline float4 cmp(float4 tmp0, float4 tmp1, float4 tmp2) -{ - float4 result; - - result.x = (tmp0.x < 0.0) ? tmp1.x : tmp2.x; - result.y = (tmp0.y < 0.0) ? tmp1.y : tmp2.y; - result.z = (tmp0.z < 0.0) ? tmp1.z : tmp2.z; - result.w = (tmp0.w < 0.0) ? tmp1.w : tmp2.w; - - return result; -} - -extern float cosf(float val); -extern float sinf(float val); - -inline float4 vcos(float4 val) -{ - float4 result; - printf("VEC IN is %f %f %f %f\n", val.x, val.y, val.z, val.w); - result.x = cosf(val.x); - result.y = cosf(val.x); - result.z = cosf(val.x); - result.w = cosf(val.x); - printf("VEC OUT is %f %f %f %f\n", result.x, result.y, result.z, result.w); - return result; -} - -inline float4 scs(float4 val) -{ - float4 result; - float tmp = val.x; - result.x = cosf(tmp); - result.y = sinf(tmp); - return result; -} - - -inline float4 vsin(float4 val) -{ - float4 result; - float tmp = val.x; - float res = sinf(tmp); - result.x = res; - result.y = res; - result.z = res; - result.w = res; - return result; -} - -inline int kilp(float4 val) -{ - if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0) - return 1; - else - return 0; -} diff --git a/src/gallium/auxiliary/llvm/loweringpass.cpp b/src/gallium/auxiliary/llvm/loweringpass.cpp deleted file mode 100644 index 556dbec366..0000000000 --- a/src/gallium/auxiliary/llvm/loweringpass.cpp +++ /dev/null @@ -1,17 +0,0 @@ -#include "loweringpass.h" - -using namespace llvm; - -char LoweringPass::ID = 0; -RegisterPass X("lowering", "Lowering Pass"); - -LoweringPass::LoweringPass() - : ModulePass((intptr_t)&ID) -{ -} - -bool LoweringPass::runOnModule(Module &m) -{ - llvm::cerr << "Hello: " << m.getModuleIdentifier() << "\n"; - return false; -} diff --git a/src/gallium/auxiliary/llvm/loweringpass.h b/src/gallium/auxiliary/llvm/loweringpass.h deleted file mode 100644 index f62dcf6ba7..0000000000 --- a/src/gallium/auxiliary/llvm/loweringpass.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef LOWERINGPASS_H -#define LOWERINGPASS_H - -#include "llvm/Pass.h" -#include "llvm/Module.h" - -struct LoweringPass : public llvm::ModulePass -{ - static char ID; - LoweringPass(); - - virtual bool runOnModule(llvm::Module &m); -}; - -#endif diff --git a/src/gallium/auxiliary/llvm/storage.cpp b/src/gallium/auxiliary/llvm/storage.cpp deleted file mode 100644 index c4326de8c5..0000000000 --- a/src/gallium/auxiliary/llvm/storage.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ -#ifdef MESA_LLVM - -#include "storage.h" - -#include "gallivm_p.h" - -#include "pipe/p_shader_tokens.h" -#include -#include -#include - -#include -#include -#include -#include -#include - -using namespace llvm; - -Storage::Storage(llvm::BasicBlock *block, llvm::Value *input) - : m_block(block), - m_INPUT(input), - m_addrs(32), - m_idx(0) -{ - m_floatVecType = VectorType::get(Type::FloatTy, 4); - m_intVecType = VectorType::get(IntegerType::get(32), 4); - - m_undefFloatVec = UndefValue::get(m_floatVecType); - m_undefIntVec = UndefValue::get(m_intVecType); - m_extSwizzleVec = 0; - - m_numConsts = 0; -} - -//can only build vectors with all members in the [0, 9] range -llvm::Constant *Storage::shuffleMask(int vec) -{ - if (!m_extSwizzleVec) { - std::vector elems; - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); - m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); - } - - if (m_intVecs.find(vec) != m_intVecs.end()) { - return m_intVecs[vec]; - } - int origVec = vec; - Constant* const_vec = 0; - if (origVec == 0) { - const_vec = Constant::getNullValue(m_intVecType); - } else { - int x = gallivm_x_swizzle(vec); - int y = gallivm_y_swizzle(vec); - int z = gallivm_z_swizzle(vec); - int w = gallivm_w_swizzle(vec); - std::vector elems; - elems.push_back(constantInt(x)); - elems.push_back(constantInt(y)); - elems.push_back(constantInt(z)); - elems.push_back(constantInt(w)); - const_vec = ConstantVector::get(m_intVecType, elems); - } - - m_intVecs[origVec] = const_vec; - return const_vec; -} - -llvm::ConstantInt *Storage::constantInt(int idx) -{ - if (m_constInts.find(idx) != m_constInts.end()) { - return m_constInts[idx]; - } - ConstantInt *const_int = ConstantInt::get(APInt(32, idx)); - m_constInts[idx] = const_int; - return const_int; -} - -llvm::Value *Storage::inputElement(int idx, llvm::Value *indIdx) -{ - Value *val = element(InputsArg, idx, indIdx); - LoadInst *load = new LoadInst(val, name("input"), false, m_block); - load->setAlignment(8); - - return load; -} - -llvm::Value *Storage::constElement(int idx, llvm::Value *indIdx) -{ - m_numConsts = ((idx + 1) > m_numConsts) ? (idx + 1) : m_numConsts; - - Value *elem = element(ConstsArg, idx, indIdx); - LoadInst *load = new LoadInst(elem, name("const"), false, m_block); - load->setAlignment(8); - return load; -} - -llvm::Value *Storage::shuffleVector(llvm::Value *vec, int shuffle) -{ - Constant *mask = shuffleMask(shuffle); - ShuffleVectorInst *res = - new ShuffleVectorInst(vec, m_extSwizzleVec, mask, - name("shuffle"), m_block); - return res; -} - - -llvm::Value *Storage::tempElement(int idx, llvm::Value *indIdx) -{ - Value *elem = element(TempsArg, idx, indIdx); - - LoadInst *load = new LoadInst(elem, name("temp"), false, m_block); - load->setAlignment(8); - - return load; -} - -void Storage::setTempElement(int idx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = 0; - if (m_tempWriteMap[idx]) - templ = tempElement(idx); - val = maskWrite(val, mask, templ); - } - Value *elem = element(TempsArg, idx); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); - m_tempWriteMap[idx] = true; -} - -void Storage::setOutputElement(int dstIdx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = 0; - if (m_destWriteMap[dstIdx]) - templ = outputElement(dstIdx); - val = maskWrite(val, mask, templ); - } - - Value *elem = element(DestsArg, dstIdx); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); - m_destWriteMap[dstIdx] = true; -} - -llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) -{ - llvm::Value *dst = templ; - if (!dst) - dst = Constant::getNullValue(m_floatVecType); - if ((mask & TGSI_WRITEMASK_X)) { - llvm::Value *x = new ExtractElementInst(src, unsigned(0), - name("x"), m_block); - dst = new InsertElementInst(dst, x, unsigned(0), - name("dstx"), m_block); - } - if ((mask & TGSI_WRITEMASK_Y)) { - llvm::Value *y = new ExtractElementInst(src, unsigned(1), - name("y"), m_block); - dst = new InsertElementInst(dst, y, unsigned(1), - name("dsty"), m_block); - } - if ((mask & TGSI_WRITEMASK_Z)) { - llvm::Value *z = new ExtractElementInst(src, unsigned(2), - name("z"), m_block); - dst = new InsertElementInst(dst, z, unsigned(2), - name("dstz"), m_block); - } - if ((mask & TGSI_WRITEMASK_W)) { - llvm::Value *w = new ExtractElementInst(src, unsigned(3), - name("w"), m_block); - dst = new InsertElementInst(dst, w, unsigned(3), - name("dstw"), m_block); - } - return dst; -} - -const char * Storage::name(const char *prefix) -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -int Storage::numConsts() const -{ - return m_numConsts; -} - -llvm::Value * Storage::addrElement(int idx) const -{ - Value *ret = m_addrs[idx]; - if (!ret) - return m_undefFloatVec; - return ret; -} - -void Storage::setAddrElement(int idx, llvm::Value *val, int mask) -{ - if (mask != TGSI_WRITEMASK_XYZW) { - llvm::Value *templ = m_addrs[idx]; - val = maskWrite(val, mask, templ); - } - m_addrs[idx] = val; -} - -llvm::Value * Storage::extractIndex(llvm::Value *vec) -{ - llvm::Value *x = new ExtractElementInst(vec, unsigned(0), - name("x"), m_block); - return new FPToSIInst(x, IntegerType::get(32), name("intidx"), m_block); -} - -void Storage::setCurrentBlock(llvm::BasicBlock *block) -{ - m_block = block; -} - -llvm::Value * Storage::outputElement(int idx, llvm::Value *indIdx) -{ - Value *elem = element(DestsArg, idx, indIdx); - LoadInst *load = new LoadInst(elem, name("output"), false, m_block); - load->setAlignment(8); - - return load; -} - -llvm::Value * Storage::inputPtr() const -{ - return m_INPUT; -} - -void Storage::pushArguments(llvm::Value *input) -{ - m_argStack.push(m_INPUT); - - m_INPUT = input; -} - -void Storage::popArguments() -{ - m_INPUT = m_argStack.top(); - m_argStack.pop(); -} - -void Storage::pushTemps() -{ - m_extSwizzleVec = 0; -} - -void Storage::popTemps() -{ -} - -llvm::Value * Storage::immediateElement(int idx) -{ - return m_immediates[idx]; -} - -void Storage::addImmediate(float *val) -{ - std::vector vec(4); - vec[0] = ConstantFP::get(Type::FloatTy, APFloat(val[0])); - vec[1] = ConstantFP::get(Type::FloatTy, APFloat(val[1])); - vec[2] = ConstantFP::get(Type::FloatTy, APFloat(val[2])); - vec[3] = ConstantFP::get(Type::FloatTy, APFloat(val[3])); - m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); -} - - -llvm::Value * Storage::elemPtr(Args arg) -{ - std::vector indices; - indices.push_back(constantInt(0)); - indices.push_back(constantInt(static_cast(arg))); - GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT, - indices.begin(), - indices.end(), - name("input_ptr"), - m_block); - return new LoadInst(getElem, name("input_field"), false, m_block); -} - -llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, - llvm::Value *indIdx ) -{ - GetElementPtrInst *getElem = 0; - - if (indIdx) { - getElem = new GetElementPtrInst(ptr, - BinaryOperator::create(Instruction::Add, - indIdx, - constantInt(idx), - name("add"), - m_block), - name("field"), - m_block); - } else { - getElem = new GetElementPtrInst(ptr, - constantInt(idx), - name("field"), - m_block); - } - return getElem; -} - -llvm::Value * Storage::element(Args arg, int idx, llvm::Value *indIdx ) -{ - Value *val = elemPtr(arg); - return elemIdx(val, idx, indIdx); -} - -void Storage::setKilElement(llvm::Value *val) -{ - std::vector indices; - indices.push_back(constantInt(0)); - indices.push_back(constantInt(static_cast(KilArg))); - GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT, - indices.begin(), - indices.end(), - name("kil_ptr"), - m_block); - StoreInst *st = new StoreInst(val, elem, false, m_block); - st->setAlignment(8); -} - -#endif //MESA_LLVM - - diff --git a/src/gallium/auxiliary/llvm/storage.h b/src/gallium/auxiliary/llvm/storage.h deleted file mode 100644 index 8574f7554e..0000000000 --- a/src/gallium/auxiliary/llvm/storage.h +++ /dev/null @@ -1,133 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Zack Rusin zack@tungstengraphics.com - */ - -#ifndef STORAGE_H -#define STORAGE_H - -#include -#include -#include -#include - -namespace llvm { - class BasicBlock; - class Constant; - class ConstantInt; - class LoadInst; - class Value; - class VectorType; -} - -class Storage -{ -public: - Storage(llvm::BasicBlock *block, - llvm::Value *input); - - llvm::Value *inputPtr() const; - - void setCurrentBlock(llvm::BasicBlock *block); - - llvm::ConstantInt *constantInt(int); - llvm::Constant *shuffleMask(int vec); - llvm::Value *inputElement(int idx, llvm::Value *indIdx =0); - llvm::Value *constElement(int idx, llvm::Value *indIdx =0); - llvm::Value *outputElement(int idx, llvm::Value *indIdx =0); - llvm::Value *tempElement(int idx, llvm::Value *indIdx =0); - llvm::Value *immediateElement(int idx); - - void setOutputElement(int dstIdx, llvm::Value *val, int mask); - void setTempElement(int idx, llvm::Value *val, int mask); - - llvm::Value *addrElement(int idx) const; - void setAddrElement(int idx, llvm::Value *val, int mask); - - void setKilElement(llvm::Value *val); - - llvm::Value *shuffleVector(llvm::Value *vec, int shuffle); - - llvm::Value *extractIndex(llvm::Value *vec); - - int numConsts() const; - - void pushArguments(llvm::Value *input); - void popArguments(); - void pushTemps(); - void popTemps(); - - void addImmediate(float *val); - -private: - llvm::Value *maskWrite(llvm::Value *src, int mask, llvm::Value *templ); - const char *name(const char *prefix); - - enum Args { - DestsArg = 0, - InputsArg = 1, - TempsArg = 2, - ConstsArg = 3, - KilArg = 4 - }; - llvm::Value *elemPtr(Args arg); - llvm::Value *elemIdx(llvm::Value *ptr, int idx, - llvm::Value *indIdx = 0); - llvm::Value *element(Args arg, int idx, llvm::Value *indIdx = 0); - -private: - llvm::BasicBlock *m_block; - llvm::Value *m_INPUT; - - std::map m_constInts; - std::map m_intVecs; - std::vector m_addrs; - std::vector m_immediates; - - llvm::VectorType *m_floatVecType; - llvm::VectorType *m_intVecType; - - char m_name[32]; - int m_idx; - - int m_numConsts; - - std::map m_destWriteMap; - std::map m_tempWriteMap; - - llvm::Value *m_undefFloatVec; - llvm::Value *m_undefIntVec; - llvm::Value *m_extSwizzleVec; - - std::stack m_argStack; - std::stack > m_tempStack; -}; - -#endif diff --git a/src/gallium/auxiliary/llvm/storagesoa.cpp b/src/gallium/auxiliary/llvm/storagesoa.cpp deleted file mode 100644 index ed0674a96f..0000000000 --- a/src/gallium/auxiliary/llvm/storagesoa.cpp +++ /dev/null @@ -1,389 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "storagesoa.h" - -#include "gallivm_p.h" - -#include "pipe/p_shader_tokens.h" -#include "pipe/p_debug.h" - -#include -#include -#include - -#include -#include -#include -#include -#include - -using namespace llvm; - - -StorageSoa::StorageSoa(llvm::BasicBlock *block, - llvm::Value *input, - llvm::Value *output, - llvm::Value *consts, - llvm::Value *temps) - : m_block(block), - m_input(input), - m_output(output), - m_consts(consts), - m_temps(temps), - m_immediates(0), - m_idx(0) -{ -} - -void StorageSoa::addImmediate(float *vec) -{ - std::vector vals(4); - vals[0] = vec[0]; - vals[1] = vec[1]; - vals[2] = vec[2]; - vals[3] = vec[3]; - m_immediatesToFlush.push_back(vals); -} - -void StorageSoa::declareImmediates() -{ - if (m_immediatesToFlush.empty()) - return; - - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - ArrayType *vectorChannels = ArrayType::get(vectorType, 4); - ArrayType *arrayType = ArrayType::get(vectorChannels, m_immediatesToFlush.size()); - - m_immediates = new GlobalVariable( - /*Type=*/arrayType, - /*isConstant=*/false, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name("immediates"), - currentModule()); - - std::vector arrayVals; - for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { - std::vector vec = m_immediatesToFlush[i]; - std::vector vals(4); - std::vector channelArray; - - vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0]; - llvm::Constant *xChannel = createConstGlobalVector(vals); - - vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; - llvm::Constant *yChannel = createConstGlobalVector(vals); - - vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; - llvm::Constant *zChannel = createConstGlobalVector(vals); - - vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; - llvm::Constant *wChannel = createConstGlobalVector(vals); - channelArray.push_back(xChannel); - channelArray.push_back(yChannel); - channelArray.push_back(zChannel); - channelArray.push_back(wChannel); - Constant *constChannels = ConstantArray::get(vectorChannels, - channelArray); - arrayVals.push_back(constChannels); - } - Constant *constArray = ConstantArray::get(arrayType, arrayVals); - m_immediates->setInitializer(constArray); - - m_immediatesToFlush.clear(); -} - -llvm::Value *StorageSoa::addrElement(int idx) const -{ - std::map::const_iterator itr = m_addresses.find(idx); - if (itr == m_addresses.end()) { - debug_printf("Trying to access invalid shader 'address'\n"); - return 0; - } - llvm::Value * res = (*itr).second; - - res = new LoadInst(res, name("addr"), false, m_block); - - return res; -} - -std::vector StorageSoa::inputElement(llvm::Value *idx) -{ - std::vector res(4); - - res[0] = element(m_input, idx, 0); - res[1] = element(m_input, idx, 1); - res[2] = element(m_input, idx, 2); - res[3] = element(m_input, idx, 3); - - return res; -} - -std::vector StorageSoa::constElement(llvm::Value *idx) -{ - std::vector res(4); - llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; - - xChannel = elementPointer(m_consts, idx, 0); - yChannel = elementPointer(m_consts, idx, 1); - zChannel = elementPointer(m_consts, idx, 2); - wChannel = elementPointer(m_consts, idx, 3); - - res[0] = alignedArrayLoad(xChannel); - res[1] = alignedArrayLoad(yChannel); - res[2] = alignedArrayLoad(zChannel); - res[3] = alignedArrayLoad(wChannel); - - return res; -} - -std::vector StorageSoa::outputElement(llvm::Value *idx) -{ - std::vector res(4); - - res[0] = element(m_output, idx, 0); - res[1] = element(m_output, idx, 1); - res[2] = element(m_output, idx, 2); - res[3] = element(m_output, idx, 3); - - return res; -} - -std::vector StorageSoa::tempElement(llvm::Value *idx) -{ - std::vector res(4); - - res[0] = element(m_temps, idx, 0); - res[1] = element(m_temps, idx, 1); - res[2] = element(m_temps, idx, 2); - res[3] = element(m_temps, idx, 3); - - return res; -} - -std::vector StorageSoa::immediateElement(llvm::Value *idx) -{ - std::vector res(4); - - res[0] = element(m_immediates, idx, 0); - res[1] = element(m_immediates, idx, 1); - res[2] = element(m_immediates, idx, 2); - res[3] = element(m_immediates, idx, 3); - - return res; -} - -llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, - int channel) const -{ - std::vector indices; - if (m_immediates == ptr) - indices.push_back(constantInt(0)); - indices.push_back(index); - indices.push_back(constantInt(channel)); - - GetElementPtrInst *getElem = new GetElementPtrInst(ptr, - indices.begin(), - indices.end(), - name("ptr"), - m_block); - return getElem; -} - -llvm::Value * StorageSoa::element(llvm::Value *ptr, llvm::Value *index, - int channel) const -{ - llvm::Value *res = elementPointer(ptr, index, channel); - LoadInst *load = new LoadInst(res, name("element"), false, m_block); - //load->setAlignment(8); - return load; -} - -const char * StorageSoa::name(const char *prefix) const -{ - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; -} - -llvm::ConstantInt * StorageSoa::constantInt(int idx) const -{ - if (m_constInts.find(idx) != m_constInts.end()) { - return m_constInts[idx]; - } - ConstantInt *constInt = ConstantInt::get(APInt(32, idx)); - m_constInts[idx] = constInt; - return constInt; -} - -llvm::Value *StorageSoa::alignedArrayLoad(llvm::Value *val) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - PointerType *vectorPtr = PointerType::get(vectorType, 0); - - CastInst *cast = new BitCastInst(val, vectorPtr, name("toVector"), m_block); - LoadInst *load = new LoadInst(cast, name("alignLoad"), false, m_block); - load->setAlignment(8); - return load; -} - -llvm::Module * StorageSoa::currentModule() const -{ - if (!m_block || !m_block->getParent()) - return 0; - - return m_block->getParent()->getParent(); -} - -llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector &vec) -{ - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - std::vector immValues; - ConstantFP *constx = ConstantFP::get(Type::FloatTy, APFloat(vec[0])); - ConstantFP *consty = ConstantFP::get(Type::FloatTy, APFloat(vec[1])); - ConstantFP *constz = ConstantFP::get(Type::FloatTy, APFloat(vec[2])); - ConstantFP *constw = ConstantFP::get(Type::FloatTy, APFloat(vec[3])); - immValues.push_back(constx); - immValues.push_back(consty); - immValues.push_back(constz); - immValues.push_back(constw); - Constant *constVector = ConstantVector::get(vectorType, immValues); - - return constVector; -} - -std::vector StorageSoa::load(Argument type, int idx, int swizzle, - llvm::Value *indIdx) -{ - std::vector val(4); - - //if we have an indirect index, always use that - // if not use the integer offset to create one - llvm::Value *realIndex = 0; - if (indIdx) - realIndex = indIdx; - else - realIndex = constantInt(idx); - debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); - - switch(type) { - case Input: - val = inputElement(realIndex); - break; - case Output: - val = outputElement(realIndex); - break; - case Temp: - val = tempElement(realIndex); - break; - case Const: - val = constElement(realIndex); - break; - case Immediate: - val = immediateElement(realIndex); - break; - case Address: - debug_printf("Address not handled in the load phase!\n"); - assert(0); - break; - } - if (!gallivm_is_swizzle(swizzle)) - return val; - - std::vector res(4); - - res[0] = val[gallivm_x_swizzle(swizzle)]; - res[1] = val[gallivm_y_swizzle(swizzle)]; - res[2] = val[gallivm_z_swizzle(swizzle)]; - res[3] = val[gallivm_w_swizzle(swizzle)]; - return res; -} - -void StorageSoa::store(Argument type, int idx, const std::vector &val, - int mask) -{ - llvm::Value *out = 0; - switch(type) { - case Output: - out = m_output; - break; - case Temp: - out = m_temps; - break; - case Input: - out = m_input; - break; - case Address: { - llvm::Value *addr = m_addresses[idx]; - if (!addr) { - addAddress(idx); - addr = m_addresses[idx]; - assert(addr); - } - new StoreInst(val[0], addr, false, m_block); - return; - break; - } - default: - debug_printf("Can't save output of this type: %d !\n", type); - assert(0); - break; - } - llvm::Value *realIndex = constantInt(idx); - if ((mask & TGSI_WRITEMASK_X)) { - llvm::Value *xChannel = elementPointer(out, realIndex, 0); - new StoreInst(val[0], xChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_Y)) { - llvm::Value *yChannel = elementPointer(out, realIndex, 1); - new StoreInst(val[1], yChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_Z)) { - llvm::Value *zChannel = elementPointer(out, realIndex, 2); - new StoreInst(val[2], zChannel, false, m_block); - } - if ((mask & TGSI_WRITEMASK_W)) { - llvm::Value *wChannel = elementPointer(out, realIndex, 3); - new StoreInst(val[3], wChannel, false, m_block); - } -} - -void StorageSoa::addAddress(int idx) -{ - GlobalVariable *val = new GlobalVariable( - /*Type=*/IntegerType::get(32), - /*isConstant=*/false, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name("address"), - currentModule()); - val->setInitializer(Constant::getNullValue(IntegerType::get(32))); - - debug_printf("adding to %d\n", idx); - m_addresses[idx] = val; -} diff --git a/src/gallium/auxiliary/llvm/storagesoa.h b/src/gallium/auxiliary/llvm/storagesoa.h deleted file mode 100644 index 6443351f27..0000000000 --- a/src/gallium/auxiliary/llvm/storagesoa.h +++ /dev/null @@ -1,111 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef STORAGESOA_H -#define STORAGESOA_H - -#include -#include -#include - -namespace llvm { - class BasicBlock; - class Constant; - class ConstantInt; - class GlobalVariable; - class LoadInst; - class Value; - class VectorType; - class Module; -} - -class StorageSoa -{ -public: - enum Argument { - Input, - Output, - Temp, - Const, - Immediate, - Address - }; -public: - StorageSoa(llvm::BasicBlock *block, - llvm::Value *input, - llvm::Value *output, - llvm::Value *consts, - llvm::Value *temps); - - - std::vector load(Argument type, int idx, int swizzle, - llvm::Value *indIdx =0); - void store(Argument type, int idx, const std::vector &val, - int mask); - - void addImmediate(float *vec); - void declareImmediates(); - - void addAddress(int idx); - - llvm::Value * addrElement(int idx) const; - - llvm::ConstantInt *constantInt(int) const; -private: - llvm::Value *elementPointer(llvm::Value *ptr, llvm::Value *indIdx, - int channel) const; - llvm::Value *element(llvm::Value *ptr, llvm::Value *idx, - int channel) const; - const char *name(const char *prefix) const; - llvm::Value *alignedArrayLoad(llvm::Value *val); - llvm::Module *currentModule() const; - llvm::Constant *createConstGlobalVector(const std::vector &vec); - - std::vector inputElement(llvm::Value *indIdx); - std::vector constElement(llvm::Value *indIdx); - std::vector outputElement(llvm::Value *indIdx); - std::vector tempElement(llvm::Value *indIdx); - std::vector immediateElement(llvm::Value *indIdx); -private: - llvm::BasicBlock *m_block; - - llvm::Value *m_input; - llvm::Value *m_output; - llvm::Value *m_consts; - llvm::Value *m_temps; - llvm::GlobalVariable *m_immediates; - - std::map m_addresses; - - std::vector > m_immediatesToFlush; - - mutable std::map m_constInts; - mutable char m_name[32]; - mutable int m_idx; -}; - -#endif diff --git a/src/gallium/auxiliary/llvm/tgsitollvm.cpp b/src/gallium/auxiliary/llvm/tgsitollvm.cpp deleted file mode 100644 index 2cb4acce32..0000000000 --- a/src/gallium/auxiliary/llvm/tgsitollvm.cpp +++ /dev/null @@ -1,1221 +0,0 @@ -#include "tgsitollvm.h" - -#include "gallivm.h" -#include "gallivm_p.h" - -#include "storage.h" -#include "instructions.h" -#include "storagesoa.h" -#include "instructionssoa.h" - -#include "pipe/p_shader_tokens.h" - -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_dump.h" - - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - - -#include -#include -#include - -using namespace llvm; - -static inline FunctionType *vertexShaderFunctionType() -{ - //Function takes three arguments, - // the calling code has to make sure the types it will - // pass are castable to the following: - // [4 x <4 x float>] inputs, - // [4 x <4 x float>] output, - // [4 x [4 x float]] consts, - // [4 x <4 x float>] temps - - std::vector funcArgs; - VectorType *vectorType = VectorType::get(Type::FloatTy, 4); - ArrayType *vectorArray = ArrayType::get(vectorType, 4); - PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); - - ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); - ArrayType *constsArray = ArrayType::get(floatArray, 4); - PointerType *constsArrayPtr = PointerType::get(constsArray, 0); - - funcArgs.push_back(vectorArrayPtr);//inputs - funcArgs.push_back(vectorArrayPtr);//output - funcArgs.push_back(constsArrayPtr);//consts - funcArgs.push_back(vectorArrayPtr);//temps - - FunctionType *functionType = FunctionType::get( - /*Result=*/Type::VoidTy, - /*Params=*/funcArgs, - /*isVarArg=*/false); - - return functionType; -} - -static inline void -add_interpolator(struct gallivm_ir *ir, - struct gallivm_interpolate *interp) -{ - ir->interpolators[ir->num_interp] = *interp; - ++ir->num_interp; -} - -static void -translate_declaration(struct gallivm_ir *prog, - llvm::Module *module, - Storage *storage, - struct tgsi_full_declaration *decl, - struct tgsi_full_declaration *fd) -{ - if (decl->Declaration.File == TGSI_FILE_INPUT) { - unsigned first, last, mask; - uint interp_method; - - assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - /* Do not touch WPOS.xy */ - if (first == 0) { - mask &= ~TGSI_WRITEMASK_XY; - if (mask == TGSI_WRITEMASK_NONE) { - first++; - if (first > last) { - return; - } - } - } - - interp_method = decl->Interpolation.Interpolate; - - if (mask == TGSI_WRITEMASK_XYZW) { - unsigned i, j; - - for (i = first; i <= last; i++) { - for (j = 0; j < NUM_CHANNELS; j++) { - //interp( mach, i, j ); - struct gallivm_interpolate interp; - interp.type = interp_method; - interp.attrib = i; - interp.chan = j; - add_interpolator(prog, &interp); - } - } - } else { - unsigned i, j; - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - struct gallivm_interpolate interp; - interp.type = interp_method; - interp.attrib = i; - interp.chan = j; - add_interpolator(prog, &interp); - } - } - } - } - } -} - -static void -translate_declarationir(struct gallivm_ir *, - llvm::Module *, - StorageSoa *storage, - struct tgsi_full_declaration *decl, - struct tgsi_full_declaration *) -{ - if (decl->Declaration.File == TGSI_FILE_ADDRESS) { - int idx = decl->u.DeclarationRange.First; - storage->addAddress(idx); - } -} - -static void -translate_immediate(Storage *storage, - struct tgsi_full_immediate *imm) -{ - float vec[4]; - int i; - for (i = 0; i < imm->Immediate.Size - 1; ++i) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - vec[i] = imm->u.ImmediateFloat32[i].Float; - break; - default: - assert(0); - } - } - storage->addImmediate(vec); -} - - -static void -translate_immediateir(StorageSoa *storage, - struct tgsi_full_immediate *imm) -{ - float vec[4]; - int i; - for (i = 0; i < imm->Immediate.Size - 1; ++i) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - vec[i] = imm->u.ImmediateFloat32[i].Float; - break; - default: - assert(0); - } - } - storage->addImmediate(vec); -} - -static inline int -swizzleInt(struct tgsi_full_src_register *src) -{ - int swizzle = 0; - int start = 1000; - - for (int k = 0; k < 4; ++k) { - swizzle += tgsi_util_get_full_src_register_extswizzle(src, k) * start; - start /= 10; - } - return swizzle; -} - -static inline llvm::Value * -swizzleVector(llvm::Value *val, struct tgsi_full_src_register *src, - Storage *storage) -{ - int swizzle = swizzleInt(src); - - if (gallivm_is_swizzle(swizzle)) { - /*fprintf(stderr, "XXXXXXXX swizzle = %d\n", swizzle);*/ - val = storage->shuffleVector(val, swizzle); - } - return val; -} - -static void -translate_instruction(llvm::Module *module, - Storage *storage, - Instructions *instr, - struct tgsi_full_instruction *inst, - struct tgsi_full_instruction *fi, - unsigned instno) -{ - llvm::Value *inputs[4]; - inputs[0] = 0; - inputs[1] = 0; - inputs[2] = 0; - inputs[3] = 0; - - for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - llvm::Value *val = 0; - llvm::Value *indIdx = 0; - - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); - indIdx = storage->extractIndex(indIdx); - } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->constElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->inputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->tempElement(src->SrcRegister.Index); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->outputElement(src->SrcRegister.Index, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->immediateElement(src->SrcRegister.Index); - } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); - return; - } - - inputs[i] = swizzleVector(val, src, storage); - } - - /*if (inputs[0]) - instr->printVector(inputs[0]); - if (inputs[1]) - instr->printVector(inputs[1]);*/ - llvm::Value *out = 0; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: { - out = instr->arl(inputs[0]); - } - break; - case TGSI_OPCODE_MOV: { - out = inputs[0]; - } - break; - case TGSI_OPCODE_LIT: { - out = instr->lit(inputs[0]); - } - break; - case TGSI_OPCODE_RCP: { - out = instr->rcp(inputs[0]); - } - break; - case TGSI_OPCODE_RSQ: { - out = instr->rsq(inputs[0]); - } - break; - case TGSI_OPCODE_EXP: - break; - case TGSI_OPCODE_LOG: - break; - case TGSI_OPCODE_MUL: { - out = instr->mul(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ADD: { - out = instr->add(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP3: { - out = instr->dp3(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP4: { - out = instr->dp4(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DST: { - out = instr->dst(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MIN: { - out = instr->min(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAX: { - out = instr->max(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SLT: { - out = instr->slt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SGE: { - out = instr->sge(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MAD: { - out = instr->madd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SUB: { - out = instr->sub(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_LERP: { - out = instr->lerp(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_CND: - break; - case TGSI_OPCODE_CND0: - break; - case TGSI_OPCODE_DOT2ADD: - break; - case TGSI_OPCODE_INDEX: - break; - case TGSI_OPCODE_NEGATE: - break; - case TGSI_OPCODE_FRAC: { - out = instr->frc(inputs[0]); - } - break; - case TGSI_OPCODE_CLAMP: - break; - case TGSI_OPCODE_FLOOR: { - out = instr->floor(inputs[0]); - } - break; - case TGSI_OPCODE_ROUND: - break; - case TGSI_OPCODE_EXPBASE2: { - out = instr->ex2(inputs[0]); - } - break; - case TGSI_OPCODE_LOGBASE2: { - out = instr->lg2(inputs[0]); - } - break; - case TGSI_OPCODE_POWER: { - out = instr->pow(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_CROSSPRODUCT: { - out = instr->cross(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_MULTIPLYMATRIX: - break; - case TGSI_OPCODE_ABS: { - out = instr->abs(inputs[0]); - } - break; - case TGSI_OPCODE_RCC: - break; - case TGSI_OPCODE_DPH: { - out = instr->dph(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_COS: { - out = instr->cos(inputs[0]); - } - break; - case TGSI_OPCODE_DDX: - break; - case TGSI_OPCODE_DDY: - break; - case TGSI_OPCODE_KILP: { - out = instr->kilp(inputs[0]); - storage->setKilElement(out); - return; - } - break; - case TGSI_OPCODE_PK2H: - break; - case TGSI_OPCODE_PK2US: - break; - case TGSI_OPCODE_PK4B: - break; - case TGSI_OPCODE_PK4UB: - break; - case TGSI_OPCODE_RFL: - break; - case TGSI_OPCODE_SEQ: - break; - case TGSI_OPCODE_SFL: - break; - case TGSI_OPCODE_SGT: { - out = instr->sgt(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_SIN: { - out = instr->sin(inputs[0]); - } - break; - case TGSI_OPCODE_SLE: - break; - case TGSI_OPCODE_SNE: - break; - case TGSI_OPCODE_STR: - break; - case TGSI_OPCODE_TEX: - break; - case TGSI_OPCODE_TXD: - break; - case TGSI_OPCODE_UP2H: - break; - case TGSI_OPCODE_UP2US: - break; - case TGSI_OPCODE_UP4B: - break; - case TGSI_OPCODE_UP4UB: - break; - case TGSI_OPCODE_X2D: - break; - case TGSI_OPCODE_ARA: - break; - case TGSI_OPCODE_ARR: - break; - case TGSI_OPCODE_BRA: - break; - case TGSI_OPCODE_CAL: { - instr->cal(inst->InstructionExtLabel.Label, storage->inputPtr()); - return; - } - break; - case TGSI_OPCODE_RET: { - instr->end(); - return; - } - break; - case TGSI_OPCODE_SSG: - break; - case TGSI_OPCODE_CMP: { - out = instr->cmp(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SCS: { - out = instr->scs(inputs[0]); - } - break; - case TGSI_OPCODE_TXB: - break; - case TGSI_OPCODE_NRM: - break; - case TGSI_OPCODE_DIV: - break; - case TGSI_OPCODE_DP2: - break; - case TGSI_OPCODE_TXL: - break; - case TGSI_OPCODE_BRK: { - instr->brk(); - return; - } - break; - case TGSI_OPCODE_IF: { - instr->ifop(inputs[0]); - storage->setCurrentBlock(instr->currentBlock()); - return; //just update the state - } - break; - case TGSI_OPCODE_LOOP: - break; - case TGSI_OPCODE_REP: - break; - case TGSI_OPCODE_ELSE: { - instr->elseop(); - storage->setCurrentBlock(instr->currentBlock()); - return; //only state update - } - break; - case TGSI_OPCODE_ENDIF: { - instr->endif(); - storage->setCurrentBlock(instr->currentBlock()); - return; //just update the state - } - break; - case TGSI_OPCODE_ENDLOOP: - break; - case TGSI_OPCODE_ENDREP: - break; - case TGSI_OPCODE_PUSHA: - break; - case TGSI_OPCODE_POPA: - break; - case TGSI_OPCODE_CEIL: - break; - case TGSI_OPCODE_I2F: - break; - case TGSI_OPCODE_NOT: - break; - case TGSI_OPCODE_TRUNC: { - out = instr->trunc(inputs[0]); - } - break; - case TGSI_OPCODE_SHL: - break; - case TGSI_OPCODE_SHR: - break; - case TGSI_OPCODE_AND: - break; - case TGSI_OPCODE_OR: - break; - case TGSI_OPCODE_MOD: - break; - case TGSI_OPCODE_XOR: - break; - case TGSI_OPCODE_SAD: - break; - case TGSI_OPCODE_TXF: - break; - case TGSI_OPCODE_TXQ: - break; - case TGSI_OPCODE_CONT: - break; - case TGSI_OPCODE_EMIT: - break; - case TGSI_OPCODE_ENDPRIM: - break; - case TGSI_OPCODE_BGNLOOP2: { - instr->beginLoop(); - storage->setCurrentBlock(instr->currentBlock()); - return; - } - break; - case TGSI_OPCODE_BGNSUB: { - instr->bgnSub(instno); - storage->setCurrentBlock(instr->currentBlock()); - storage->pushTemps(); - return; - } - break; - case TGSI_OPCODE_ENDLOOP2: { - instr->endLoop(); - storage->setCurrentBlock(instr->currentBlock()); - return; - } - break; - case TGSI_OPCODE_ENDSUB: { - instr->endSub(); - storage->setCurrentBlock(instr->currentBlock()); - storage->popArguments(); - storage->popTemps(); - return; - } - break; - case TGSI_OPCODE_NOISE1: - break; - case TGSI_OPCODE_NOISE2: - break; - case TGSI_OPCODE_NOISE3: - break; - case TGSI_OPCODE_NOISE4: - break; - case TGSI_OPCODE_NOP: - break; - case TGSI_OPCODE_TEXBEM: - break; - case TGSI_OPCODE_TEXBEML: - break; - case TGSI_OPCODE_TEXREG2AR: - break; - case TGSI_OPCODE_TEXM3X2PAD: - break; - case TGSI_OPCODE_TEXM3X2TEX: - break; - case TGSI_OPCODE_TEXM3X3PAD: - break; - case TGSI_OPCODE_TEXM3X3TEX: - break; - case TGSI_OPCODE_TEXM3X3SPEC: - break; - case TGSI_OPCODE_TEXM3X3VSPEC: - break; - case TGSI_OPCODE_TEXREG2GB: - break; - case TGSI_OPCODE_TEXREG2RGB: - break; - case TGSI_OPCODE_TEXDP3TEX: - break; - case TGSI_OPCODE_TEXDP3: - break; - case TGSI_OPCODE_TEXM3X3: - break; - case TGSI_OPCODE_TEXM3X2DEPTH: - break; - case TGSI_OPCODE_TEXDEPTH: - break; - case TGSI_OPCODE_BEM: - break; - case TGSI_OPCODE_M4X3: - break; - case TGSI_OPCODE_M3X4: - break; - case TGSI_OPCODE_M3X3: - break; - case TGSI_OPCODE_M3X2: - break; - case TGSI_OPCODE_NRM4: - break; - case TGSI_OPCODE_CALLNZ: - break; - case TGSI_OPCODE_IFC: - break; - case TGSI_OPCODE_BREAKC: - break; - case TGSI_OPCODE_KIL: - break; - case TGSI_OPCODE_END: - instr->end(); - return; - break; - default: - fprintf(stderr, "ERROR: Unknown opcode %d\n", - inst->Instruction.Opcode); - assert(0); - break; - } - - if (!out) { - fprintf(stderr, "ERROR: unsupported opcode %d\n", - inst->Instruction.Opcode); - assert(!"Unsupported opcode"); - } - - /* # not sure if we need this */ - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - /*TXT( "_SAT" );*/ - break; - case TGSI_SAT_MINUS_PLUS_ONE: - /*TXT( "_SAT[-1,1]" );*/ - break; - default: - assert( 0 ); - } - - /* store results */ - for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->setAddrElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else { - fprintf(stderr, "ERROR: unsupported LLVM destination!"); - assert(!"wrong destination"); - } - } -} - - -static void -translate_instructionir(llvm::Module *module, - StorageSoa *storage, - InstructionsSoa *instr, - struct tgsi_full_instruction *inst, - struct tgsi_full_instruction *fi, - unsigned instno) -{ - std::vector< std::vector > inputs(inst->Instruction.NumSrcRegs); - - for (int i = 0; i < inst->Instruction.NumSrcRegs; ++i) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - std::vector val; - llvm::Value *indIdx = 0; - int swizzle = swizzleInt(src); - - if (src->SrcRegister.Indirect) { - indIdx = storage->addrElement(src->SrcRegisterInd.Index); - } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->load(StorageSoa::Const, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->load(StorageSoa::Input, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->load(StorageSoa::Temp, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->load(StorageSoa::Output, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->load(StorageSoa::Immediate, - src->SrcRegister.Index, swizzle, indIdx); - } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); - return; - } - - inputs[i] = val; - } - - std::vector out(4); - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: { - out = instr->arl(inputs[0]); - } - break; - case TGSI_OPCODE_MOV: { - out = inputs[0]; - } - break; - case TGSI_OPCODE_LIT: { - } - break; - case TGSI_OPCODE_RCP: { - } - break; - case TGSI_OPCODE_RSQ: { - } - break; - case TGSI_OPCODE_EXP: - break; - case TGSI_OPCODE_LOG: - break; - case TGSI_OPCODE_MUL: { - out = instr->mul(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_ADD: { - out = instr->add(inputs[0], inputs[1]); - } - break; - case TGSI_OPCODE_DP3: { - } - break; - case TGSI_OPCODE_DP4: { - } - break; - case TGSI_OPCODE_DST: { - } - break; - case TGSI_OPCODE_MIN: { - } - break; - case TGSI_OPCODE_MAX: { - } - break; - case TGSI_OPCODE_SLT: { - } - break; - case TGSI_OPCODE_SGE: { - } - break; - case TGSI_OPCODE_MAD: { - out = instr->madd(inputs[0], inputs[1], inputs[2]); - } - break; - case TGSI_OPCODE_SUB: { - } - break; - case TGSI_OPCODE_LERP: { - } - break; - case TGSI_OPCODE_CND: - break; - case TGSI_OPCODE_CND0: - break; - case TGSI_OPCODE_DOT2ADD: - break; - case TGSI_OPCODE_INDEX: - break; - case TGSI_OPCODE_NEGATE: - break; - case TGSI_OPCODE_FRAC: { - } - break; - case TGSI_OPCODE_CLAMP: - break; - case TGSI_OPCODE_FLOOR: { - } - break; - case TGSI_OPCODE_ROUND: - break; - case TGSI_OPCODE_EXPBASE2: { - } - break; - case TGSI_OPCODE_LOGBASE2: { - } - break; - case TGSI_OPCODE_POWER: { - } - break; - case TGSI_OPCODE_CROSSPRODUCT: { - } - break; - case TGSI_OPCODE_MULTIPLYMATRIX: - break; - case TGSI_OPCODE_ABS: { - } - break; - case TGSI_OPCODE_RCC: - break; - case TGSI_OPCODE_DPH: { - } - break; - case TGSI_OPCODE_COS: { - } - break; - case TGSI_OPCODE_DDX: - break; - case TGSI_OPCODE_DDY: - break; - case TGSI_OPCODE_KILP: { - } - break; - case TGSI_OPCODE_PK2H: - break; - case TGSI_OPCODE_PK2US: - break; - case TGSI_OPCODE_PK4B: - break; - case TGSI_OPCODE_PK4UB: - break; - case TGSI_OPCODE_RFL: - break; - case TGSI_OPCODE_SEQ: - break; - case TGSI_OPCODE_SFL: - break; - case TGSI_OPCODE_SGT: { - } - break; - case TGSI_OPCODE_SIN: { - } - break; - case TGSI_OPCODE_SLE: - break; - case TGSI_OPCODE_SNE: - break; - case TGSI_OPCODE_STR: - break; - case TGSI_OPCODE_TEX: - break; - case TGSI_OPCODE_TXD: - break; - case TGSI_OPCODE_UP2H: - break; - case TGSI_OPCODE_UP2US: - break; - case TGSI_OPCODE_UP4B: - break; - case TGSI_OPCODE_UP4UB: - break; - case TGSI_OPCODE_X2D: - break; - case TGSI_OPCODE_ARA: - break; - case TGSI_OPCODE_ARR: - break; - case TGSI_OPCODE_BRA: - break; - case TGSI_OPCODE_CAL: { - } - break; - case TGSI_OPCODE_RET: { - } - break; - case TGSI_OPCODE_SSG: - break; - case TGSI_OPCODE_CMP: { - } - break; - case TGSI_OPCODE_SCS: { - } - break; - case TGSI_OPCODE_TXB: - break; - case TGSI_OPCODE_NRM: - break; - case TGSI_OPCODE_DIV: - break; - case TGSI_OPCODE_DP2: - break; - case TGSI_OPCODE_TXL: - break; - case TGSI_OPCODE_BRK: { - } - break; - case TGSI_OPCODE_IF: { - } - break; - case TGSI_OPCODE_LOOP: - break; - case TGSI_OPCODE_REP: - break; - case TGSI_OPCODE_ELSE: { - } - break; - case TGSI_OPCODE_ENDIF: { - } - break; - case TGSI_OPCODE_ENDLOOP: - break; - case TGSI_OPCODE_ENDREP: - break; - case TGSI_OPCODE_PUSHA: - break; - case TGSI_OPCODE_POPA: - break; - case TGSI_OPCODE_CEIL: - break; - case TGSI_OPCODE_I2F: - break; - case TGSI_OPCODE_NOT: - break; - case TGSI_OPCODE_TRUNC: { - } - break; - case TGSI_OPCODE_SHL: - break; - case TGSI_OPCODE_SHR: - break; - case TGSI_OPCODE_AND: - break; - case TGSI_OPCODE_OR: - break; - case TGSI_OPCODE_MOD: - break; - case TGSI_OPCODE_XOR: - break; - case TGSI_OPCODE_SAD: - break; - case TGSI_OPCODE_TXF: - break; - case TGSI_OPCODE_TXQ: - break; - case TGSI_OPCODE_CONT: - break; - case TGSI_OPCODE_EMIT: - break; - case TGSI_OPCODE_ENDPRIM: - break; - case TGSI_OPCODE_BGNLOOP2: { - } - break; - case TGSI_OPCODE_BGNSUB: { - } - break; - case TGSI_OPCODE_ENDLOOP2: { - } - break; - case TGSI_OPCODE_ENDSUB: { - } - break; - case TGSI_OPCODE_NOISE1: - break; - case TGSI_OPCODE_NOISE2: - break; - case TGSI_OPCODE_NOISE3: - break; - case TGSI_OPCODE_NOISE4: - break; - case TGSI_OPCODE_NOP: - break; - case TGSI_OPCODE_TEXBEM: - break; - case TGSI_OPCODE_TEXBEML: - break; - case TGSI_OPCODE_TEXREG2AR: - break; - case TGSI_OPCODE_TEXM3X2PAD: - break; - case TGSI_OPCODE_TEXM3X2TEX: - break; - case TGSI_OPCODE_TEXM3X3PAD: - break; - case TGSI_OPCODE_TEXM3X3TEX: - break; - case TGSI_OPCODE_TEXM3X3SPEC: - break; - case TGSI_OPCODE_TEXM3X3VSPEC: - break; - case TGSI_OPCODE_TEXREG2GB: - break; - case TGSI_OPCODE_TEXREG2RGB: - break; - case TGSI_OPCODE_TEXDP3TEX: - break; - case TGSI_OPCODE_TEXDP3: - break; - case TGSI_OPCODE_TEXM3X3: - break; - case TGSI_OPCODE_TEXM3X2DEPTH: - break; - case TGSI_OPCODE_TEXDEPTH: - break; - case TGSI_OPCODE_BEM: - break; - case TGSI_OPCODE_M4X3: - break; - case TGSI_OPCODE_M3X4: - break; - case TGSI_OPCODE_M3X3: - break; - case TGSI_OPCODE_M3X2: - break; - case TGSI_OPCODE_NRM4: - break; - case TGSI_OPCODE_CALLNZ: - break; - case TGSI_OPCODE_IFC: - break; - case TGSI_OPCODE_BREAKC: - break; - case TGSI_OPCODE_KIL: - break; - case TGSI_OPCODE_END: - instr->end(); - return; - break; - default: - fprintf(stderr, "ERROR: Unknown opcode %d\n", - inst->Instruction.Opcode); - assert(0); - break; - } - - if (!out[0]) { - fprintf(stderr, "ERROR: unsupported opcode %d\n", - inst->Instruction.Opcode); - assert(!"Unsupported opcode"); - } - - /* store results */ - for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->store(StorageSoa::Output, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->store(StorageSoa::Temp, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->store(StorageSoa::Address, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else { - fprintf(stderr, "ERROR: unsupported LLVM destination!"); - assert(!"wrong destination"); - } - } -} - -llvm::Module * -tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) -{ - llvm::Module *mod = new Module("shader"); - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - unsigned instno = 0; - Function* shader = mod->getFunction("execute_shader"); - std::ostringstream stream; - if (ir->type == GALLIVM_VS) { - stream << "vs_shader"; - } else { - stream << "fs_shader"; - } - stream << ir->id; - std::string func_name = stream.str(); - shader->setName(func_name.c_str()); - - Function::arg_iterator args = shader->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("input"); - - BasicBlock *label_entry = new BasicBlock("entry", shader, 0); - - tgsi_parse_init(&parse, tokens); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - Storage storage(label_entry, ptr_INPUT); - Instructions instr(mod, shader, label_entry, &storage); - while(!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - translate_declaration(ir, mod, &storage, - &parse.FullToken.FullDeclaration, - &fd); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - translate_immediate(&storage, - &parse.FullToken.FullImmediate); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - translate_instruction(mod, &storage, &instr, - &parse.FullToken.FullInstruction, - &fi, instno); - ++instno; - break; - - default: - assert(0); - } - } - - tgsi_parse_free(&parse); - - ir->num_consts = storage.numConsts(); - return mod; -} - -llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, - const struct tgsi_token *tokens) -{ - llvm::Module *mod = new Module("shader"); - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - unsigned instno = 0; - std::ostringstream stream; - if (ir->type == GALLIVM_VS) { - stream << "vs_shader"; - } else { - stream << "fs_shader"; - } - //stream << ir->id; - std::string func_name = stream.str(); - Function *shader = llvm::cast(mod->getOrInsertFunction( - func_name.c_str(), - vertexShaderFunctionType())); - - Function::arg_iterator args = shader->arg_begin(); - Value *input = args++; - input->setName("inputs"); - Value *output = args++; - output->setName("outputs"); - Value *consts = args++; - consts->setName("consts"); - Value *temps = args++; - temps->setName("temps"); - - BasicBlock *label_entry = new BasicBlock("entry", shader, 0); - - tgsi_parse_init(&parse, tokens); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - - StorageSoa storage(label_entry, input, output, consts, temps); - InstructionsSoa instr(mod, shader, label_entry, &storage); - - while(!tgsi_parse_end_of_tokens(&parse)) { - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - translate_declarationir(ir, mod, &storage, - &parse.FullToken.FullDeclaration, - &fd); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - translate_immediateir(&storage, - &parse.FullToken.FullImmediate); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - storage.declareImmediates(); - translate_instructionir(mod, &storage, &instr, - &parse.FullToken.FullInstruction, - &fi, instno); - ++instno; - break; - - default: - assert(0); - } - } - - tgsi_parse_free(&parse); - - return mod; -} diff --git a/src/gallium/auxiliary/llvm/tgsitollvm.h b/src/gallium/auxiliary/llvm/tgsitollvm.h deleted file mode 100644 index 7ada04d629..0000000000 --- a/src/gallium/auxiliary/llvm/tgsitollvm.h +++ /dev/null @@ -1,20 +0,0 @@ -#ifndef TGSITOLLVM_H -#define TGSITOLLVM_H - - -namespace llvm { - class Module; -} - -struct gallivm_ir; -struct tgsi_token; - - -llvm::Module * tgsi_to_llvm(struct gallivm_ir *ir, - const struct tgsi_token *tokens); - - -llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, - const struct tgsi_token *tokens); - -#endif -- cgit v1.2.3 From 0448dbd64a2ef217349f4ada4777d432bc82e46d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 18 Feb 2008 12:33:34 +0000 Subject: Update for llvm -> gallivm rename. --- configs/linux-llvm | 2 +- src/gallium/SConscript | 2 +- src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 +- src/gallium/winsys/xlib/Makefile | 8 ++------ src/mesa/Makefile | 4 ++-- 5 files changed, 7 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/configs/linux-llvm b/configs/linux-llvm index a8889321a0..44e200e856 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -5,7 +5,7 @@ include $(TOP)/configs/linux CONFIG_NAME = linux-llvm -GALLIUM_AUXILIARY_DIRS += llvm +GALLIUM_AUXILIARY_DIRS += gallivm OPT_FLAGS = -g -ansi -pedantic DEFINES += -DDEBUG -DDEBUG_MATH -DMESA_LLVM=1 diff --git a/src/gallium/SConscript b/src/gallium/SConscript index a835f6d661..ea55ed2ed5 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -13,7 +13,7 @@ SConscript([ 'auxiliary/tgsi/SConscript', 'auxiliary/cso_cache/SConscript', 'auxiliary/draw/SConscript', - #'auxiliary/llvm/SConscript', + #'auxiliary/gallivm/SConscript', 'auxiliary/pipebuffer/SConscript', 'drivers/softpipe/SConscript', diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 44022b6e07..0fd557d667 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -40,7 +40,7 @@ #ifdef MESA_LLVM -#include "llvm/gallivm.h" +#include "gallivm/gallivm.h" struct draw_llvm_vertex_shader { struct draw_vertex_shader base; diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 2664ac47ce..17405388ee 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -38,10 +38,6 @@ CELL_LIB = $(TOP)/src/gallium/drivers/cell/ppu/libcell.a CELL_LIB_SPU = $(TOP)/src/gallium/drivers/cell/spu/g3d_spu.a endif -ifeq ($(CONFIG_NAME), linux-llvm) -LLVM_LIB = $(TOP)/src/gallium/auxiliary/llvm/libgallivm.a -endif - .SUFFIXES : .cpp @@ -69,13 +65,13 @@ STAND_ALONE_OBJECTS = \ $(STAND_ALONE_DRIVER_OBJECTS) # Make the GL library -$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(LLVM_LIB) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(TOP)/bin/mklib -o $(GL_LIB) \ -linker "$(CC)" \ -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ -install $(TOP)/$(LIB_DIR) \ $(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \ - --start-group $(PIPE_LIB) $(LLVM_LIB) --end-group $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS) + --start-group $(PIPE_LIB) --end-group $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS) ###################################################################### diff --git a/src/mesa/Makefile b/src/mesa/Makefile index 86a9cf0b88..2403223db2 100644 --- a/src/mesa/Makefile +++ b/src/mesa/Makefile @@ -67,13 +67,13 @@ stand-alone: depend subdirs $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME) $(TOP)/$(LIB_DIR)/$ osmesa-only: depend subdirs $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME) # Make the GL library -$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(LLVM_LIB) +$(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(STAND_ALONE_OBJECTS) $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) @ $(TOP)/bin/mklib -o $(GL_LIB) \ -linker "$(CC)" \ -major $(GL_MAJOR) -minor $(GL_MINOR) -patch $(GL_TINY) \ -install $(TOP)/$(LIB_DIR) \ $(MKLIB_OPTIONS) $(STAND_ALONE_OBJECTS) \ - $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(LLVM_LIB) $(GL_LIB_DEPS) + $(PIPE_LIB) $(CELL_LIB) $(CELL_LIB_SPU) $(GL_LIB_DEPS) # Make the OSMesa library $(TOP)/$(LIB_DIR)/$(OSMESA_LIB_NAME): $(OSMESA_DRIVER_OBJECTS) \ -- cgit v1.2.3 From aceeb80d4f706980aaf71b8e098d4c6718d8ac90 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 18 Feb 2008 16:19:05 -0700 Subject: gallium: antialiased line drawing New draw/prim stage: draw_aaline. When installed, lines are replaced by textured quads to do antialiasing. The current user-defined fragment shader is modified to do a texture fetch and modulate fragment alpha. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_aaline.c | 832 ++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_context.c | 22 + src/gallium/auxiliary/draw/draw_context.h | 21 +- src/gallium/auxiliary/draw/draw_prim.c | 7 + src/gallium/auxiliary/draw/draw_private.h | 12 +- src/gallium/auxiliary/draw/draw_validate.c | 8 +- src/gallium/auxiliary/tgsi/Makefile | 1 + src/gallium/drivers/softpipe/sp_context.c | 3 + src/gallium/drivers/softpipe/sp_state_derived.c | 17 +- 10 files changed, 914 insertions(+), 10 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_aaline.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index c56b63d85b..c8000cbe9c 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = draw DRIVER_SOURCES = \ + draw_aaline.c \ draw_clip.c \ draw_vs_exec.c \ draw_vs_sse.c \ diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c new file mode 100644 index 0000000000..f1fee4f8ba --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -0,0 +1,832 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA line stage: AA lines are converted to texture mapped triangles. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/** + * Max texture level for the alpha texture used for antialiasing + */ +#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aaline_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *aaline_fs; + void *aapoint_fs; /* not yet */ + void *sprite_fs; /* not yet */ +}; + + +/** + * Subclass of draw_stage + */ +struct aaline_stage +{ + struct draw_stage stage; + + float half_line_width; + + /** For AA lines, this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + + /* + * Currently bound state + */ + struct aaline_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxSampler; /**< max sampler index found */ + int maxInput, maxGeneric; /**< max input index found */ + int colorTemp, texTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if (decl->u.DeclarationRange.Last > aactx->maxSampler) + aactx->maxSampler = decl->u.DeclarationRange.Last + 1; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if (decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else if (aactx->texTemp < 0) + aactx->texTemp = i; + else + break; + } + } + assert(aactx->colorTemp >= 0); + assert(aactx->texTemp >= 0); + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxInput + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxSampler + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->texTemp; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END && + aactx->colorOutput != -1) { + struct tgsi_full_instruction newInst; + + /* TEX */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->maxSampler + 1; + + ctx->emit_instruction(ctx, &newInst); + + /* MOV rgb */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL alpha */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + ctx->emit_instruction(ctx, &newInst); + + /* END */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_END; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + + ctx->emit_instruction(ctx, inst); + } +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aaline_fs(struct aaline_stage *aaline) +{ + const struct pipe_shader_state *orig_fs = &aaline->fs->state; + struct draw_context *draw = aaline->stage.draw; + struct pipe_shader_state aaline_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aaline_fs = *orig_fs; /* copy to init */ + aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxSampler = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aaline_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aaline_fs.tokens, 0); +#endif + + aaline_fs.input_semantic_name[aaline_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; + aaline_fs.input_semantic_index[aaline_fs.num_inputs] = transform.maxGeneric + 1; + aaline_fs.num_inputs++; + + aaline->fs->aaline_fs + = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + + /* advertise the extra post-transform vertex attributes which will have + * the texcoords. + */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; +} + + +/** + * Create the texture map we'll use for antialiasing the lines. + */ +static void +aaline_create_texture(struct aaline_stage *aaline) +{ + struct pipe_context *pipe = aaline->pipe; + struct pipe_texture texTemp; + uint level; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = MAX_TEXTURE_LEVEL; + texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + aaline->texture = pipe->texture_create(pipe, &texTemp); + + /* Fill in mipmap images. + * Basically each level is solid opaque, except for the outermost + * texels which are zero. Special case the 1x1 and 2x2 levels. + */ + for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { + struct pipe_surface *surface; + const uint size = aaline->texture->width[level]; + ubyte *data; + uint i, j; + + assert(aaline->texture->width[level] == aaline->texture->height[level]); + + surface = pipe->get_tex_surface(pipe, aaline->texture, 0, level, 0); + data = pipe_surface_map(surface); + + for (i = 0; i < size; i++) { + for (j = 0; j < size; j++) { + uint d; + if (size == 1) { + d = 255; + } + else if (size == 2) { + d = 200; /* tuneable */ + } + else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { + d = 0; + } + else { + d = 255; + } + data[i * surface->pitch + j] = d; + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + } +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +aaline_create_sampler(struct aaline_stage *aaline) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = aaline->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = MAX_TEXTURE_LEVEL; + + aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aaline_fragment_shader(struct aaline_stage *aaline) +{ + if (!aaline->fs->aaline_fs) { + generate_aaline_fs(aaline); + } + aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); +} + + + +static INLINE struct aaline_stage * +aaline_stage( struct draw_stage *stage ) +{ + return (struct aaline_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad, using geometry which will + * fullfill GL's antialiased line requirements. + */ +static void +aaline_line(struct draw_stage *stage, struct prim_header *header) +{ + const struct aaline_stage *aaline = aaline_stage(stage); + const float half_width = aaline->half_line_width; + struct prim_header tri; + struct vertex_header *v[8]; + uint texPos = aaline->tex_slot; + float *pos, *tex; + float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; + float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; + float a = atan2(dy, dx); + float c_a = cos(a), s_a = sin(a); + uint i; + + /* XXX the ends of lines aren't quite perfect yet, but probably passable */ + dx = 0.5 * half_width; + dy = half_width; + + /* allocate/dup new verts */ + for (i = 0; i < 8; i++) { + v[i] = dup_vert(stage, header->v[i/4], i); + } + + /* + * Quad strip for line from v0 to v1 (*=endpoints): + * + * 1 3 5 7 + * +---+---------------------+---+ + * | | + * | *v0 v1* | + * | | + * +---+---------------------+---+ + * 0 2 4 6 + */ + + /* new verts */ + pos = v[0]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[1]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[2]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[3]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + pos = v[4]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[5]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[6]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[7]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, 0, 0, 0, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 0, 1, 0, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[4]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[5]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[6]->data[texPos]; + ASSIGN_4V(tex, 1, 0, 0, 1); + + tex = v[7]->data[texPos]; + ASSIGN_4V(tex, 1, 1, 0, 1); + + /* emit 6 tris for the quad strip */ + tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[4]; tri.v[1] = v[3]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[5]; tri.v[1] = v[3]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[6]; tri.v[1] = v[5]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[7]; tri.v[1] = v[5]; tri.v[2] = v[6]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aaline_first_line(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aaline_stage *aaline = aaline_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = aaline->pipe; + + assert(draw->rasterizer->line_smooth); + + if (draw->rasterizer->line_width <= 3.0) + aaline->half_line_width = 1.5f; + else + aaline->half_line_width = 0.5f * draw->rasterizer->line_width; + + aaline->tex_slot = draw->num_vs_outputs; + assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ + draw->extra_vp_outputs.slot = aaline->tex_slot; + + /* + * Bind our fragprog, sampler and texture + */ + bind_aaline_fragment_shader(aaline); + + aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso); + aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture); + + /* now really draw first line */ + stage->line = aaline_line; + stage->line(stage, header); +} + + +static void +aaline_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aaline_stage *aaline = aaline_stage(stage); + struct pipe_context *pipe = aaline->pipe; + + stage->line = aaline_first_line; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, + aaline->state.sampler[aaline->sampler_unit]); + aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, + aaline->state.texture[aaline->sampler_unit]); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aaline_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aaline_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aaline_stage * +draw_aaline_stage(struct draw_context *draw) +{ + struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + + draw_alloc_temp_verts( &aaline->stage, 8 ); + + aaline->stage.draw = draw; + aaline->stage.next = NULL; + aaline->stage.point = passthrough_point; + aaline->stage.line = aaline_first_line; + aaline->stage.tri = passthrough_tri; + aaline->stage.flush = aaline_flush; + aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; + aaline->stage.destroy = aaline_destroy; + + return aaline; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a aaline_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct aaline_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_aa_pipe_context(struct pipe_context *pipe, struct aaline_stage *aa) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = aa; + NumContexts++; +} + +static struct aaline_stage * +aaline_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aaline_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); + } + + return aafs; +} + + +static void +aaline_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* save current */ + aaline->fs = aafs; + /* pass-through */ + aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs); +} + + +static void +aaline_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* pass-through */ + aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +aaline_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + aaline->state.sampler[unit] = sampler; + /* pass-through */ + aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler); +} + + +static void +aaline_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + aaline->state.texture[sampler] = texture; + /* pass-through */ + aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture); +} + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) +{ + struct aaline_stage *aaline; + + /* + * Create / install AA line drawing / prim stage + */ + aaline = draw_aaline_stage( draw ); + assert(aaline); + draw->pipeline.aaline = &aaline->stage; + + aaline->pipe = pipe; + + /* create special texture, sampler state */ + aaline_create_texture(aaline); + aaline_create_sampler(aaline); + + /* save original driver functions */ + aaline->driver_create_fs_state = pipe->create_fs_state; + aaline->driver_bind_fs_state = pipe->bind_fs_state; + aaline->driver_delete_fs_state = pipe->delete_fs_state; + + aaline->driver_bind_sampler_state = pipe->bind_sampler_state; + aaline->driver_set_sampler_texture = pipe->set_sampler_texture; + + /* override the driver's functions */ + pipe->create_fs_state = aaline_create_fs_state; + pipe->bind_fs_state = aaline_bind_fs_state; + pipe->delete_fs_state = aaline_delete_fs_state; + + pipe->bind_sampler_state = aaline_bind_sampler_state; + pipe->set_sampler_texture = aaline_set_sampler_texture; + + add_aa_pipe_context(pipe, aaline); +} diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 4be3830316..a7f3b4aecb 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -103,6 +103,8 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); draw->pipeline.cull->destroy( draw->pipeline.cull ); draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); @@ -239,6 +241,26 @@ draw_convert_wide_lines(struct draw_context *draw, boolean enable) } +/** + * The draw module may sometimes generate vertices with extra attributes + * (such as texcoords for AA lines). The driver can call this function + * to find those attributes. + */ +int +draw_find_vs_output(struct draw_context *draw, + uint semantic_name, uint semantic_index) +{ + /* XXX there may be more than one extra vertex attrib. + * For example, simulated gl_FragCoord and gl_PointCoord. + */ + if (draw->extra_vp_outputs.semantic_name == semantic_name && + draw->extra_vp_outputs.semantic_index == semantic_index) { + return draw->extra_vp_outputs.slot; + } + return 0; +} + + /** * Allocate space for temporary post-transform vertices, such as for clipping. */ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index ddeb184497..82035aa8ad 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -41,6 +41,7 @@ #include "pipe/p_state.h" +struct pipe_context; struct vertex_buffer; struct vertex_info; struct draw_context; @@ -93,6 +94,20 @@ void draw_convert_wide_points(struct draw_context *draw, boolean enable); void draw_convert_wide_lines(struct draw_context *draw, boolean enable); +boolean draw_use_sse(struct draw_context *draw); + +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); + + +int +draw_find_vs_output(struct draw_context *draw, + uint semantic_name, uint semantic_index); + + +/* + * Vertex shader functions + */ struct draw_vertex_shader * draw_create_vertex_shader(struct draw_context *draw, @@ -102,7 +117,11 @@ void draw_bind_vertex_shader(struct draw_context *draw, void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs); -boolean draw_use_sse(struct draw_context *draw); + + +/* + * Vertex data functions + */ void draw_set_vertex_buffer(struct draw_context *draw, unsigned attr, diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 51e2242719..dd9a848863 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -121,11 +121,15 @@ static void draw_prim_queue_flush( struct draw_context *draw ) void draw_do_flush( struct draw_context *draw, unsigned flags ) { + static boolean flushing = FALSE; + if (0) debug_printf("Flushing with %d verts, %d prims\n", draw->vs.queue_nr, draw->pq.queue_nr ); + if (!flushing) { + flushing = TRUE; if (flags >= DRAW_FLUSH_SHADER_QUEUE) { if (draw->vs.queue_nr) @@ -146,6 +150,9 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) } } } + + flushing = FALSE; + } } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index bc11259cb2..dd75f9aefc 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -48,6 +48,7 @@ #include "tgsi/exec/tgsi_exec.h" +struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; @@ -179,6 +180,7 @@ struct draw_context struct draw_stage *offset; struct draw_stage *unfilled; struct draw_stage *stipple; + struct draw_stage *aaline; struct draw_stage *wide; struct draw_stage *rasterize; } pipeline; @@ -212,9 +214,17 @@ struct draw_context unsigned nr_planes; boolean convert_wide_points; /**< convert wide points to tris? */ - boolean convert_wide_lines; /**< convert side lines to tris? */ + boolean convert_wide_lines; /**< convert wide lines to tris? */ boolean use_sse; + /* If a prim stage introduces new vertex attributes, they'll be stored here + */ + struct { + uint semantic_name; + uint semantic_index; + int slot; + } extra_vp_outputs; + unsigned reduced_prim; /** TGSI program interpreter runtime state */ diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 4375ebabbc..97e40e372b 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -58,7 +58,13 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) * shorter pipelines for lines & points. */ - if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines) || + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) { + draw->pipeline.aaline->next = next; + next = draw->pipeline.aaline; + } + + if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines + && !draw->rasterizer->line_smooth) || (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || draw->rasterizer->point_sprite) { draw->pipeline.wide->next = next; diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index c10ab396d8..8bb62b2a0a 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -10,6 +10,7 @@ DRIVER_SOURCES = \ util/tgsi_build.c \ util/tgsi_dump.c \ util/tgsi_parse.c \ + util/tgsi_transform.c \ util/tgsi_util.c C_SOURCES = \ diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 5e98f190bb..254c6adca4 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -327,6 +327,9 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_set_rasterize_stage(softpipe->draw, softpipe->setup); } + /* enable aaline stage */ + draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + sp_init_surface_functions(softpipe); return &softpipe->pipe; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 9d8fd8b750..f9f2c5eaa8 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -44,7 +44,8 @@ * condition that users shouldn't hit anyway. */ static int -find_vs_output(const struct pipe_shader_state *vs, +find_vs_output(struct softpipe_context *sp, + const struct pipe_shader_state *vs, uint semantic_name, uint semantic_index) { @@ -54,7 +55,9 @@ find_vs_output(const struct pipe_shader_state *vs, vs->output_semantic_index[i] == semantic_index) return i; } - return 0; + + /* See if the draw module is introducing a new attribute... */ + return draw_find_vs_output(sp->draw, semantic_name, semantic_index); } @@ -111,24 +114,24 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) int src; switch (fs->input_semantic_name[i]) { case TGSI_SEMANTIC_POSITION: - src = find_vs_output(vs, TGSI_SEMANTIC_POSITION, 0); + src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_POSITION, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_POS, src); break; case TGSI_SEMANTIC_COLOR: - src = find_vs_output(vs, TGSI_SEMANTIC_COLOR, + src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_COLOR, fs->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src); break; case TGSI_SEMANTIC_FOG: - src = find_vs_output(vs, TGSI_SEMANTIC_FOG, 0); + src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_FOG, 0); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; case TGSI_SEMANTIC_GENERIC: /* this includes texcoords and varying vars */ - src = find_vs_output(vs, TGSI_SEMANTIC_GENERIC, + src = find_vs_output(softpipe, vs, TGSI_SEMANTIC_GENERIC, fs->input_semantic_index[i]); draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src); break; @@ -138,7 +141,7 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) } } - softpipe->psize_slot = find_vs_output(vs, TGSI_SEMANTIC_PSIZE, 0); + softpipe->psize_slot = find_vs_output(softpipe, vs, TGSI_SEMANTIC_PSIZE, 0); if (softpipe->psize_slot > 0) { draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_CONSTANT, softpipe->psize_slot); -- cgit v1.2.3 From 478c14453b33b20724bcdb70cf1f54f4addb71ab Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 18 Feb 2008 16:50:07 -0700 Subject: gallium: remove the prototype/unused wide_line_aa() function --- src/gallium/auxiliary/draw/draw_wide_prims.c | 69 +--------------------------- 1 file changed, 1 insertion(+), 68 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c index 655774b155..1f8069bdca 100644 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ b/src/gallium/auxiliary/draw/draw_wide_prims.c @@ -161,70 +161,6 @@ static void wide_line( struct draw_stage *stage, } -/** - * Draw a wide line by drawing a quad, using geometry which will - * fullfill GL's antialiased line requirements. - */ -static void wide_line_aa(struct draw_stage *stage, - struct prim_header *header) -{ - const struct wide_stage *wide = wide_stage(stage); - const float half_width = wide->half_line_width; - struct prim_header tri; - struct vertex_header *v[4]; - float *pos; - float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; - float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; - const float len = (float) sqrt(dx * dx + dy * dy); - uint i; - - dx = dx * half_width / len; - dy = dy * half_width / len; - - /* allocate/dup new verts */ - for (i = 0; i < 4; i++) { - v[i] = dup_vert(stage, header->v[i/2], i); - } - - /* - * Quad for line from v0 to v1: - * - * 1 3 - * +-------------------------+ - * | | - * *v0 v1* - * | | - * +-------------------------+ - * 0 2 - */ - - pos = v[0]->data[0]; - pos[0] += dy; - pos[1] -= dx; - - pos = v[1]->data[0]; - pos[0] -= dy; - pos[1] += dx; - - pos = v[2]->data[0]; - pos[0] += dy; - pos[1] -= dx; - - pos = v[3]->data[0]; - pos[0] -= dy; - pos[1] += dx; - - tri.det = header->det; /* only the sign matters */ - - tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; - stage->next->tri( stage->next, &tri ); - -} - - /** * Set the vertex texcoords for sprite mode. * Coords may be left untouched or set to a right-side-up or upside-down @@ -379,10 +315,7 @@ static void wide_first_line( struct draw_stage *stage, wide->half_line_width = 0.5f * draw->rasterizer->line_width; if (draw->rasterizer->line_width != 1.0) { - if (draw->rasterizer->line_smooth) - wide->stage.line = wide_line_aa; - else - wide->stage.line = wide_line; + wide->stage.line = wide_line; } else { wide->stage.line = passthrough_line; -- cgit v1.2.3 From e773a813cf475e2a7ad79ea1ec698bf2530d0433 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 10:50:39 +0900 Subject: Initial scons support to build gallivm. Not yet complete. --- SConstruct | 13 +++++++++++++ src/gallium/SConscript | 6 +++++- src/gallium/auxiliary/gallivm/SConscript | 16 ++++++++++++++++ 3 files changed, 34 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/gallivm/SConscript (limited to 'src/gallium/auxiliary') diff --git a/SConstruct b/SConstruct index 4fb51f2e6d..b20f88a303 100644 --- a/SConstruct +++ b/SConstruct @@ -32,6 +32,7 @@ import sys opts = Options('config.py') opts.Add(BoolOption('debug', 'build debug version', False)) opts.Add(BoolOption('dri', 'build dri drivers', False)) +opts.Add(BoolOption('llvm', 'use llvm', False)) opts.Add(EnumOption('machine', 'use machine-specific assembly code', 'x86', allowed_values=('generic', 'x86', 'x86-64'))) @@ -55,6 +56,7 @@ else: # replicate options values in local variables debug = env['debug'] dri = env['dri'] +llvm = env['llvm'] machine = env['machine'] # derived options @@ -66,6 +68,7 @@ Export([ 'debug', 'x86', 'dri', + 'llvm', 'platform', 'gcc', 'msvc', @@ -159,6 +162,14 @@ if dri: 'GLX_INDIRECT_RENDERING', ]) +# LLVM +if llvm: + # See also http://www.scons.org/wiki/UsingPkgConfig + env.ParseConfig('llvm-config --cflags --ldflags --libs') + env.Append(CPPDEFINES = ['MESA_LLVM']) + env.Append(CXXFLAGS = ['-Wno-long-long']) + + # libGL if 1: env.Append(LIBS = [ @@ -214,6 +225,8 @@ build_topdir = 'build' build_subdir = platform if dri: build_subdir += "-dri" +if llvm: + build_subdir += "-llvm" if x86: build_subdir += "-x86" if debug: diff --git a/src/gallium/SConscript b/src/gallium/SConscript index ea55ed2ed5..c505eee792 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -13,9 +13,13 @@ SConscript([ 'auxiliary/tgsi/SConscript', 'auxiliary/cso_cache/SConscript', 'auxiliary/draw/SConscript', - #'auxiliary/gallivm/SConscript', 'auxiliary/pipebuffer/SConscript', +]) + +if llvm: + SConscript(['auxiliary/gallivm/SConscript']) +SConscript([ 'drivers/softpipe/SConscript', 'drivers/i915simple/SConscript', 'drivers/i965simple/SConscript', diff --git a/src/gallium/auxiliary/gallivm/SConscript b/src/gallium/auxiliary/gallivm/SConscript new file mode 100644 index 0000000000..c0aa51b90a --- /dev/null +++ b/src/gallium/auxiliary/gallivm/SConscript @@ -0,0 +1,16 @@ +Import('*') + +gallivm = env.ConvenienceLibrary( + target = 'gallivm', + source = [ + 'gallivm.cpp', + 'gallivm_cpu.cpp', + 'instructions.cpp', + 'loweringpass.cpp', + 'tgsitollvm.cpp', + 'storage.cpp', + 'storagesoa.cpp', + 'instructionssoa.cpp', + ]) + +auxiliaries.insert(0, gallivm) -- cgit v1.2.3 From df8ab3140ce05599e1dc983ac211a30fc845d9b5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 11:49:48 +0900 Subject: Bring rtasm from mesa to gallium. --- configs/default | 2 +- src/gallium/SConscript | 1 + src/gallium/auxiliary/rtasm/Makefile | 20 + src/gallium/auxiliary/rtasm/SConscript | 11 + src/gallium/auxiliary/rtasm/execmem.c | 133 ++++ src/gallium/auxiliary/rtasm/execmem.h | 45 ++ src/gallium/auxiliary/rtasm/mm.c | 283 ++++++++ src/gallium/auxiliary/rtasm/mm.h | 89 +++ src/gallium/auxiliary/rtasm/x86sse.c | 1195 ++++++++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/x86sse.h | 256 +++++++ 10 files changed, 2034 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/rtasm/Makefile create mode 100644 src/gallium/auxiliary/rtasm/SConscript create mode 100644 src/gallium/auxiliary/rtasm/execmem.c create mode 100644 src/gallium/auxiliary/rtasm/execmem.h create mode 100644 src/gallium/auxiliary/rtasm/mm.c create mode 100644 src/gallium/auxiliary/rtasm/mm.h create mode 100644 src/gallium/auxiliary/rtasm/x86sse.c create mode 100644 src/gallium/auxiliary/rtasm/x86sse.h (limited to 'src/gallium/auxiliary') diff --git a/configs/default b/configs/default index 7f659725cb..c9be5ec3e3 100644 --- a/configs/default +++ b/configs/default @@ -68,7 +68,7 @@ PROGRAM_DIRS = demos redbook samples glsl xdemos # Gallium directories and -GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi util +GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi rtasm util GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple failover GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) diff --git a/src/gallium/SConscript b/src/gallium/SConscript index c505eee792..a08b4b830e 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -10,6 +10,7 @@ Export('auxiliaries') SConscript([ # NOTE: order matters! 'auxiliary/util/SConscript', + 'auxiliary/rtasm/SConscript', 'auxiliary/tgsi/SConscript', 'auxiliary/cso_cache/SConscript', 'auxiliary/draw/SConscript', diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile new file mode 100644 index 0000000000..b3b9934e10 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -0,0 +1,20 @@ + +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = rtasm + +DRIVER_SOURCES = \ + x86sse.c \ + mm.c \ + execmem.c + +C_SOURCES = \ + $(DRIVER_SOURCES) + +ASM_SOURCES = + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript new file mode 100644 index 0000000000..c5b1551786 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -0,0 +1,11 @@ +Import('*') + +rtasm = env.ConvenienceLibrary( + target = 'rtasm', + source = [ + 'x86sse.c', + 'mm.c', + 'execmem.c', + ]) + +auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/execmem.c b/src/gallium/auxiliary/rtasm/execmem.c new file mode 100644 index 0000000000..c7c35f7ef2 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/execmem.c @@ -0,0 +1,133 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_thread.h" + +#include "execmem.h" + + +#if defined(__linux__) + +/* + * Allocate a large block of memory which can hold code then dole it out + * in pieces by means of the generic memory manager code. +*/ + +#include +#include +#include "mm.h" + +#define EXEC_HEAP_SIZE (10*1024*1024) + +_glthread_DECLARE_STATIC_MUTEX(exec_mutex); + +static struct mem_block *exec_heap = NULL; +static unsigned char *exec_mem = NULL; + + +static void +init_heap(void) +{ + if (!exec_heap) + exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); + + if (!exec_mem) + exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} + + +void * +_mesa_exec_malloc(size_t size) +{ + struct mem_block *block = NULL; + void *addr = NULL; + + _glthread_LOCK_MUTEX(exec_mutex); + + init_heap(); + + if (exec_heap) { + size = (size + 31) & ~31; + block = mmAllocMem( exec_heap, size, 32, 0 ); + } + + if (block) + addr = exec_mem + block->ofs; + else + debug_printf("_mesa_exec_malloc failed\n"); + + _glthread_UNLOCK_MUTEX(exec_mutex); + + return addr; +} + + +void +_mesa_exec_free(void *addr) +{ + _glthread_LOCK_MUTEX(exec_mutex); + + if (exec_heap) { + struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + + if (block) + mmFreeMem(block); + } + + _glthread_UNLOCK_MUTEX(exec_mutex); +} + + +#else + +/* + * Just use regular memory. + */ + +void * +_mesa_exec_malloc(GLuint size) +{ + return _mesa_malloc( size ); +} + + +void +_mesa_exec_free(void *addr) +{ + _mesa_free(addr); +} + + +#endif diff --git a/src/gallium/auxiliary/rtasm/execmem.h b/src/gallium/auxiliary/rtasm/execmem.h new file mode 100644 index 0000000000..9fd4569165 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/execmem.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + +#ifndef _EXECMEM_H_ +#define _EXECMEM_H_ + +#include "pipe/p_compiler.h" + + +extern void * +_mesa_exec_malloc( size_t size ); + + +extern void +_mesa_exec_free( void *addr ); + + +#endif diff --git a/src/gallium/auxiliary/rtasm/mm.c b/src/gallium/auxiliary/rtasm/mm.c new file mode 100644 index 0000000000..15f50491da --- /dev/null +++ b/src/gallium/auxiliary/rtasm/mm.c @@ -0,0 +1,283 @@ +/* + * GLX Hardware Device Driver common code + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" + +#include "mm.h" + + +void +mmDumpMemInfo(const struct mem_block *heap) +{ + debug_printf("Memory heap %p:\n", (void *)heap); + if (heap == 0) { + debug_printf(" heap == 0\n"); + } else { + const struct mem_block *p; + + for(p = heap->next; p != heap; p = p->next) { + debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + debug_printf("\nFree list:\n"); + + for(p = heap->next_free; p != heap; p = p->next_free) { + debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + } + debug_printf("End of memory blocks\n"); +} + +struct mem_block * +mmInit(int ofs, int size) +{ + struct mem_block *heap, *block; + + if (size <= 0) + return NULL; + + heap = CALLOC_STRUCT(mem_block); + if (!heap) + return NULL; + + block = CALLOC_STRUCT(mem_block); + if (!block) { + FREE(heap); + return NULL; + } + + heap->next = block; + heap->prev = block; + heap->next_free = block; + heap->prev_free = block; + + block->heap = heap; + block->next = heap; + block->prev = heap; + block->next_free = heap; + block->prev_free = heap; + + block->ofs = ofs; + block->size = size; + block->free = 1; + + return heap; +} + + +static struct mem_block * +SliceBlock(struct mem_block *p, + int startofs, int size, + int reserved, int alignment) +{ + struct mem_block *newblock; + + /* break left [p, newblock, p->next], then p = newblock */ + if (startofs > p->ofs) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs; + newblock->size = p->size - (startofs - p->ofs); + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size -= newblock->size; + p = newblock; + } + + /* break right, also [p, newblock, p->next] */ + if (size < p->size) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs + size; + newblock->size = p->size - size; + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size = size; + } + + /* p = middle block */ + p->free = 0; + + /* Remove p from the free list: + */ + p->next_free->prev_free = p->prev_free; + p->prev_free->next_free = p->next_free; + + p->next_free = 0; + p->prev_free = 0; + + p->reserved = reserved; + return p; +} + + +struct mem_block * +mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +{ + struct mem_block *p; + const int mask = (1 << align2)-1; + int startofs = 0; + int endofs; + + if (!heap || align2 < 0 || size <= 0) + return NULL; + + for (p = heap->next_free; p != heap; p = p->next_free) { + assert(p->free); + + startofs = (p->ofs + mask) & ~mask; + if ( startofs < startSearch ) { + startofs = startSearch; + } + endofs = startofs+size; + if (endofs <= (p->ofs+p->size)) + break; + } + + if (p == heap) + return NULL; + + assert(p->free); + p = SliceBlock(p,startofs,size,0,mask+1); + + return p; +} + + +struct mem_block * +mmFindBlock(struct mem_block *heap, int start) +{ + struct mem_block *p; + + for (p = heap->next; p != heap; p = p->next) { + if (p->ofs == start) + return p; + } + + return NULL; +} + + +static INLINE int +Join2Blocks(struct mem_block *p) +{ + /* XXX there should be some assertions here */ + + /* NOTE: heap->free == 0 */ + + if (p->free && p->next->free) { + struct mem_block *q = p->next; + + assert(p->ofs + p->size == q->ofs); + p->size += q->size; + + p->next = q->next; + q->next->prev = p; + + q->next_free->prev_free = q->prev_free; + q->prev_free->next_free = q->next_free; + + FREE(q); + return 1; + } + return 0; +} + +int +mmFreeMem(struct mem_block *b) +{ + if (!b) + return 0; + + if (b->free) { + debug_printf("block already free\n"); + return -1; + } + if (b->reserved) { + debug_printf("block is reserved\n"); + return -1; + } + + b->free = 1; + b->next_free = b->heap->next_free; + b->prev_free = b->heap; + b->next_free->prev_free = b; + b->prev_free->next_free = b; + + Join2Blocks(b); + if (b->prev != b->heap) + Join2Blocks(b->prev); + + return 0; +} + + +void +mmDestroy(struct mem_block *heap) +{ + struct mem_block *p; + + if (!heap) + return; + + for (p = heap->next; p != heap; ) { + struct mem_block *next = p->next; + FREE(p); + p = next; + } + + FREE(heap); +} diff --git a/src/gallium/auxiliary/rtasm/mm.h b/src/gallium/auxiliary/rtasm/mm.h new file mode 100644 index 0000000000..f469b18d3e --- /dev/null +++ b/src/gallium/auxiliary/rtasm/mm.h @@ -0,0 +1,89 @@ +/* + * GLX Hardware Device Driver common code + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * Memory manager code. Primarily used by device drivers to manage texture + * heaps, etc. + */ + + +#ifndef MM_H +#define MM_H + + +struct mem_block { + struct mem_block *next, *prev; + struct mem_block *next_free, *prev_free; + struct mem_block *heap; + int ofs,size; + unsigned int free:1; + unsigned int reserved:1; +}; + + + +/** + * input: total size in bytes + * return: a heap pointer if OK, NULL if error + */ +extern struct mem_block *mmInit(int ofs, int size); + +/** + * Allocate 'size' bytes with 2^align2 bytes alignment, + * restrict the search to free memory after 'startSearch' + * depth and back buffers should be in different 4mb banks + * to get better page hits if possible + * input: size = size of block + * align2 = 2^align2 bytes alignment + * startSearch = linear offset from start of heap to begin search + * return: pointer to the allocated block, 0 if error + */ +extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, + int startSearch); + +/** + * Free block starts at offset + * input: pointer to a block + * return: 0 if OK, -1 if error + */ +extern int mmFreeMem(struct mem_block *b); + +/** + * Free block starts at offset + * input: pointer to a heap, start offset + * return: pointer to a block + */ +extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); + +/** + * destroy MM + */ +extern void mmDestroy(struct mem_block *mmInit); + +/** + * For debuging purpose. + */ +extern void mmDumpMemInfo(const struct mem_block *mmInit); + +#endif diff --git a/src/gallium/auxiliary/rtasm/x86sse.c b/src/gallium/auxiliary/rtasm/x86sse.c new file mode 100644 index 0000000000..fff6f77a6b --- /dev/null +++ b/src/gallium/auxiliary/rtasm/x86sse.c @@ -0,0 +1,1195 @@ +#if defined(__i386__) || defined(__386__) + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + +#include "x86sse.h" + +#define DISASSEM 0 +#define X86_TWOB 0x0f + +static unsigned char *cptr( void (*label)() ) +{ + return (unsigned char *)(unsigned long)label; +} + + +static void do_realloc( struct x86_function *p ) +{ + if (p->size == 0) { + p->size = 1024; + p->store = _mesa_exec_malloc(p->size); + p->csr = p->store; + } + else { + unsigned used = p->csr - p->store; + unsigned char *tmp = p->store; + p->size *= 2; + p->store = _mesa_exec_malloc(p->size); + memcpy(p->store, tmp, used); + p->csr = p->store + used; + _mesa_exec_free(tmp); + } +} + +/* Emit bytes to the instruction stream: + */ +static unsigned char *reserve( struct x86_function *p, int bytes ) +{ + if (p->csr + bytes - p->store > p->size) + do_realloc(p); + + { + unsigned char *csr = p->csr; + p->csr += bytes; + return csr; + } +} + + + +static void emit_1b( struct x86_function *p, char b0 ) +{ + char *csr = (char *)reserve(p, 1); + *csr = b0; +} + +static void emit_1i( struct x86_function *p, int i0 ) +{ + int *icsr = (int *)reserve(p, sizeof(i0)); + *icsr = i0; +} + +static void emit_1ub( struct x86_function *p, unsigned char b0 ) +{ + unsigned char *csr = reserve(p, 1); + *csr++ = b0; +} + +static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) +{ + unsigned char *csr = reserve(p, 2); + *csr++ = b0; + *csr++ = b1; +} + +static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) +{ + unsigned char *csr = reserve(p, 3); + *csr++ = b0; + *csr++ = b1; + *csr++ = b2; +} + + +/* Build a modRM byte + possible displacement. No treatment of SIB + * indexing. BZZT - no way to encode an absolute address. + */ +static void emit_modrm( struct x86_function *p, + struct x86_reg reg, + struct x86_reg regmem ) +{ + unsigned char val = 0; + + assert(reg.mod == mod_REG); + + val |= regmem.mod << 6; /* mod field */ + val |= reg.idx << 3; /* reg field */ + val |= regmem.idx; /* r/m field */ + + emit_1ub(p, val); + + /* Oh-oh we've stumbled into the SIB thing. + */ + if (regmem.file == file_REG32 && + regmem.idx == reg_SP) { + emit_1ub(p, 0x24); /* simplistic! */ + } + + switch (regmem.mod) { + case mod_REG: + case mod_INDIRECT: + break; + case mod_DISP8: + emit_1b(p, regmem.disp); + break; + case mod_DISP32: + emit_1i(p, regmem.disp); + break; + default: + assert(0); + break; + } +} + + +static void emit_modrm_noreg( struct x86_function *p, + unsigned op, + struct x86_reg regmem ) +{ + struct x86_reg dummy = x86_make_reg(file_REG32, op); + emit_modrm(p, dummy, regmem); +} + +/* Many x86 instructions have two opcodes to cope with the situations + * where the destination is a register or memory reference + * respectively. This function selects the correct opcode based on + * the arguments presented. + */ +static void emit_op_modrm( struct x86_function *p, + unsigned char op_dst_is_reg, + unsigned char op_dst_is_mem, + struct x86_reg dst, + struct x86_reg src ) +{ + switch (dst.mod) { + case mod_REG: + emit_1ub(p, op_dst_is_reg); + emit_modrm(p, dst, src); + break; + case mod_INDIRECT: + case mod_DISP32: + case mod_DISP8: + assert(src.mod == mod_REG); + emit_1ub(p, op_dst_is_mem); + emit_modrm(p, src, dst); + break; + default: + assert(0); + break; + } +} + + + + + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ) +{ + struct x86_reg reg; + + reg.file = file; + reg.idx = idx; + reg.mod = mod_REG; + reg.disp = 0; + + return reg; +} + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ) +{ + assert(reg.file == file_REG32); + + if (reg.mod == mod_REG) + reg.disp = disp; + else + reg.disp += disp; + + if (reg.disp == 0) + reg.mod = mod_INDIRECT; + else if (reg.disp <= 127 && reg.disp >= -128) + reg.mod = mod_DISP8; + else + reg.mod = mod_DISP32; + + return reg; +} + +struct x86_reg x86_deref( struct x86_reg reg ) +{ + return x86_make_disp(reg, 0); +} + +struct x86_reg x86_get_base_reg( struct x86_reg reg ) +{ + return x86_make_reg( reg.file, reg.idx ); +} + +unsigned char *x86_get_label( struct x86_function *p ) +{ + return p->csr; +} + + + +/*********************************************************************** + * x86 instructions + */ + + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ) +{ + int offset = label - (x86_get_label(p) + 2); + + if (offset <= 127 && offset >= -128) { + emit_1ub(p, 0x70 + cc); + emit_1b(p, (char) offset); + } + else { + offset = label - (x86_get_label(p) + 6); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, offset); + } +} + +/* Always use a 32bit offset for forward jumps: + */ +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) +{ + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_jmp_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe9); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_call_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe8); + emit_1i(p, 0); + return x86_get_label(p); +} + +/* Fixup offset from forward jump: + */ +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ) +{ + *(int *)(fixup - 4) = x86_get_label(p) - fixup; +} + +void x86_jmp( struct x86_function *p, unsigned char *label) +{ + emit_1ub(p, 0xe9); + emit_1i(p, label - x86_get_label(p) - 4); +} + +#if 0 +/* This doesn't work once we start reallocating & copying the + * generated code on buffer fills, because the call is relative to the + * current pc. + */ +void x86_call( struct x86_function *p, void (*label)()) +{ + emit_1ub(p, 0xe8); + emit_1i(p, cptr(label) - x86_get_label(p) - 4); +} +#else +void x86_call( struct x86_function *p, struct x86_reg reg) +{ + emit_1ub(p, 0xff); + emit_modrm(p, reg, reg); +} +#endif + + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + assert(dst.mod == mod_REG); + emit_1ub(p, 0xb8 + dst.idx); + emit_1i(p, imm); +} + +void x86_push( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x50 + reg.idx); + p->stack_offset += 4; +} + +void x86_pop( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x58 + reg.idx); + p->stack_offset -= 4; +} + +void x86_inc( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x40 + reg.idx); +} + +void x86_dec( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x48 + reg.idx); +} + +void x86_ret( struct x86_function *p ) +{ + emit_1ub(p, 0xc3); +} + +void x86_sahf( struct x86_function *p ) +{ + emit_1ub(p, 0x9e); +} + +void x86_mov( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_xor( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x33, 0x31, dst, src ); +} + +void x86_cmp( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x3b, 0x39, dst, src ); +} + +void x86_lea( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x8d); + emit_modrm( p, dst, src ); +} + +void x86_test( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x85); + emit_modrm( p, dst, src ); +} + +void x86_add( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x03, 0x01, dst, src ); +} + +void x86_mul( struct x86_function *p, + struct x86_reg src ) +{ + assert (src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); +} + +void x86_sub( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x2b, 0x29, dst, src ); +} + +void x86_or( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x0b, 0x09, dst, src ); +} + +void x86_and( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x23, 0x21, dst, src ); +} + + + +/*********************************************************************** + * SSE instructions + */ + + +void sse_movss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0xF3, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movaps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x28, 0x29, dst, src ); +} + +void sse_movups( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ +} + +void sse_movlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ +} + +void sse_maxps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_maxss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_divss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5E); + emit_modrm( p, dst, src ); +} + +void sse_minps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5D); + emit_modrm( p, dst, src ); +} + +void sse_subps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5C); + emit_modrm( p, dst, src ); +} + +void sse_mulps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_mulss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_addps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_addss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + +void sse_andps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x54); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); + +} + +void sse_movhlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x12); + emit_modrm( p, dst, src ); +} + +void sse_movlhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x16); + emit_modrm( p, dst, src ); +} + +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} + +void sse_cvtps2pi( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_XMM || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x2d); + emit_modrm( p, dst, src ); +} + + +/* Shufps can also be used to implement a reduced swizzle when dest == + * arg0. + */ +void sse_shufps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_2ub(p, X86_TWOB, 0xC6); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse_cmpps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char cc) +{ + emit_2ub(p, X86_TWOB, 0xC2); + emit_modrm(p, dest, arg0); + emit_1ub(p, cc); +} + +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dest, + struct x86_reg src) +{ + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dest, src); +} + +/*********************************************************************** + * SSE2 instructions + */ + +/** + * Perform a reduced swizzle: + */ +void sse2_pshufd( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x70); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + +void sse2_cvtps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x5B); + emit_modrm( p, dst, src ); +} + +void sse2_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x6B); + emit_modrm( p, dst, src ); +} + +void sse2_packsswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x63); + emit_modrm( p, dst, src ); +} + +void sse2_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_rcpss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0x66, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + + + + +/*********************************************************************** + * x87 instructions + */ +void x87_fist( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 2, dst); +} + +void x87_fistp( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 3, dst); +} + +void x87_fild( struct x86_function *p, struct x86_reg arg ) +{ + emit_1ub(p, 0xdf); + emit_modrm_noreg(p, 0, arg); +} + +void x87_fldz( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xee); +} + + +void x87_fldcw( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_REG32); + assert(arg.mod != mod_REG); + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 5, arg); +} + +void x87_fld1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe8); +} + +void x87_fldl2e( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xea); +} + +void x87_fldln2( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xed); +} + +void x87_fwait( struct x86_function *p ) +{ + emit_1ub(p, 0x9b); +} + +void x87_fnclex( struct x86_function *p ) +{ + emit_2ub(p, 0xdb, 0xe2); +} + +void x87_fclex( struct x86_function *p ) +{ + x87_fwait(p); + x87_fnclex(p); +} + + +static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, + unsigned char dst0ub0, + unsigned char dst0ub1, + unsigned char arg0ub0, + unsigned char arg0ub1, + unsigned char argmem_noreg) +{ + assert(dst.file == file_x87); + + if (arg.file == file_x87) { + if (dst.idx == 0) + emit_2ub(p, dst0ub0, dst0ub1+arg.idx); + else if (arg.idx == 0) + emit_2ub(p, arg0ub0, arg0ub1+arg.idx); + else + assert(0); + } + else if (dst.idx == 0) { + assert(arg.file == file_REG32); + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, argmem_noreg, arg); + } + else + assert(0); +} + +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc8, + 0xdc, 0xc8, + 4); +} + +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe0, + 0xdc, 0xe8, + 4); +} + +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe8, + 0xdc, 0xe0, + 5); +} + +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc0, + 0xdc, 0xc0, + 0); +} + +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf0, + 0xdc, 0xf8, + 6); +} + +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf8, + 0xdc, 0xf0, + 7); +} + +void x87_fmulp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc8+dst.idx); +} + +void x87_fsubp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe8+dst.idx); +} + +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe0+dst.idx); +} + +void x87_faddp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc0+dst.idx); +} + +void x87_fdivp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf8+dst.idx); +} + +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf0+dst.idx); +} + +void x87_fucom( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe0+arg.idx); +} + +void x87_fucomp( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe8+arg.idx); +} + +void x87_fucompp( struct x86_function *p ) +{ + emit_2ub(p, 0xda, 0xe9); +} + +void x87_fxch( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xd9, 0xc8+arg.idx); +} + +void x87_fabs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe1); +} + +void x87_fchs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe0); +} + +void x87_fcos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xff); +} + + +void x87_fprndint( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfc); +} + +void x87_fscale( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfd); +} + +void x87_fsin( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfe); +} + +void x87_fsincos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfb); +} + +void x87_fsqrt( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfa); +} + +void x87_fxtract( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf4); +} + +/* st0 = (2^st0)-1 + * + * Restrictions: -1.0 <= st0 <= 1.0 + */ +void x87_f2xm1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf0); +} + +/* st1 = st1 * log2(st0); + * pop_stack; + */ +void x87_fyl2x( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf1); +} + +/* st1 = st1 * log2(st0 + 1.0); + * pop_stack; + * + * A fast operation, with restrictions: -.29 < st0 < .29 + */ +void x87_fyl2xp1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf9); +} + + +void x87_fld( struct x86_function *p, struct x86_reg arg ) +{ + if (arg.file == file_x87) + emit_2ub(p, 0xd9, 0xc0 + arg.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 0, arg); + } +} + +void x87_fst( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fstp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 3, dst); + } +} + +void x87_fcom( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fcomp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 3, dst); + } +} + + +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_REG32); + + if (dst.idx == reg_AX && + dst.mod == mod_REG) + emit_2ub(p, 0xdf, 0xe0); + else { + emit_1ub(p, 0xdd); + emit_modrm_noreg(p, 7, dst); + } +} + + + + +/*********************************************************************** + * MMX instructions + */ + +void mmx_emms( struct x86_function *p ) +{ + assert(p->need_emms); + emit_2ub(p, 0x0f, 0x77); + p->need_emms = 0; +} + +void mmx_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x6b); + emit_modrm( p, dst, src ); +} + +void mmx_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void mmx_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + +void mmx_movq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6f, 0x7f, dst, src ); +} + + +/*********************************************************************** + * Helper functions + */ + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity: + */ +struct x86_reg x86_fn_arg( struct x86_function *p, + unsigned arg ) +{ + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + arg * 4); /* ??? */ +} + + +void x86_init_func( struct x86_function *p ) +{ + p->size = 0; + p->store = NULL; + p->csr = p->store; +} + +void x86_init_func_size( struct x86_function *p, unsigned code_size ) +{ + p->size = code_size; + p->store = _mesa_exec_malloc(code_size); + p->csr = p->store; +} + +void x86_release_func( struct x86_function *p ) +{ + _mesa_exec_free(p->store); + p->store = NULL; + p->csr = NULL; + p->size = 0; +} + + +void (*x86_get_func( struct x86_function *p ))(void) +{ + if (DISASSEM && p->store) + _mesa_printf("disassemble %p %p\n", p->store, p->csr); + return (void (*)(void)) (unsigned long) p->store; +} + +#else + +void x86sse_dummy( void ) +{ +} + +#endif diff --git a/src/gallium/auxiliary/rtasm/x86sse.h b/src/gallium/auxiliary/rtasm/x86sse.h new file mode 100644 index 0000000000..c2aa416492 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/x86sse.h @@ -0,0 +1,256 @@ + +#ifndef _X86SSE_H_ +#define _X86SSE_H_ + +#if defined(__i386__) || defined(__386__) + +/* It is up to the caller to ensure that instructions issued are + * suitable for the host cpu. There are no checks made in this module + * for mmx/sse/sse2 support on the cpu. + */ +struct x86_reg { + unsigned file:3; + unsigned idx:3; + unsigned mod:2; /* mod_REG if this is just a register */ + int disp:24; /* only +/- 23bits of offset - should be enough... */ +}; + +struct x86_function { + unsigned size; + unsigned char *store; + unsigned char *csr; + unsigned stack_offset; + int need_emms; + const char *fn; +}; + +enum x86_reg_file { + file_REG32, + file_MMX, + file_XMM, + file_x87 +}; + +/* Values for mod field of modr/m byte + */ +enum x86_reg_mod { + mod_INDIRECT, + mod_DISP8, + mod_DISP32, + mod_REG +}; + +enum x86_reg_name { + reg_AX, + reg_CX, + reg_DX, + reg_BX, + reg_SP, + reg_BP, + reg_SI, + reg_DI +}; + + +enum x86_cc { + cc_O, /* overflow */ + cc_NO, /* not overflow */ + cc_NAE, /* not above or equal / carry */ + cc_AE, /* above or equal / not carry */ + cc_E, /* equal / zero */ + cc_NE /* not equal / not zero */ +}; + +enum sse_cc { + cc_Equal, + cc_LessThan, + cc_LessThanEqual, + cc_Unordered, + cc_NotEqual, + cc_NotLessThan, + cc_NotLessThanEqual, + cc_Ordered +}; + +#define cc_Z cc_E +#define cc_NZ cc_NE + +/* Begin/end/retreive function creation: + */ + + +void x86_init_func( struct x86_function *p ); +void x86_init_func_size( struct x86_function *p, unsigned code_size ); +void x86_release_func( struct x86_function *p ); +void (*x86_get_func( struct x86_function *p ))( void ); + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ); + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ); + +struct x86_reg x86_deref( struct x86_reg reg ); + +struct x86_reg x86_get_base_reg( struct x86_reg reg ); + + +/* Labels, jumps and fixup: + */ +unsigned char *x86_get_label( struct x86_function *p ); + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ); + +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ); + +unsigned char *x86_jmp_forward( struct x86_function *p); + +unsigned char *x86_call_forward( struct x86_function *p); + +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ); + +void x86_jmp( struct x86_function *p, unsigned char *label ); + +/* void x86_call( struct x86_function *p, void (*label)() ); */ +void x86_call( struct x86_function *p, struct x86_reg reg); + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); + + +/* Macro for sse_shufps() and sse2_pshufd(): + */ +#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) +#define SHUF_NOOP RSW(0,1,2,3) +#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +void mmx_emms( struct x86_function *p ); +void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, + unsigned char cc ); +void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); + +void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_dec( struct x86_function *p, struct x86_reg reg ); +void x86_inc( struct x86_function *p, struct x86_reg reg ); +void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_pop( struct x86_function *p, struct x86_reg reg ); +void x86_push( struct x86_function *p, struct x86_reg reg ); +void x86_ret( struct x86_function *p ); +void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_sahf( struct x86_function *p ); + +void x87_f2xm1( struct x86_function *p ); +void x87_fabs( struct x86_function *p ); +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_faddp( struct x86_function *p, struct x86_reg dst ); +void x87_fchs( struct x86_function *p ); +void x87_fclex( struct x86_function *p ); +void x87_fcom( struct x86_function *p, struct x86_reg dst ); +void x87_fcomp( struct x86_function *p, struct x86_reg dst ); +void x87_fcos( struct x86_function *p ); +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivp( struct x86_function *p, struct x86_reg dst ); +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); +void x87_fild( struct x86_function *p, struct x86_reg arg ); +void x87_fist( struct x86_function *p, struct x86_reg dst ); +void x87_fistp( struct x86_function *p, struct x86_reg dst ); +void x87_fld( struct x86_function *p, struct x86_reg arg ); +void x87_fld1( struct x86_function *p ); +void x87_fldcw( struct x86_function *p, struct x86_reg arg ); +void x87_fldl2e( struct x86_function *p ); +void x87_fldln2( struct x86_function *p ); +void x87_fldz( struct x86_function *p ); +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fmulp( struct x86_function *p, struct x86_reg dst ); +void x87_fnclex( struct x86_function *p ); +void x87_fprndint( struct x86_function *p ); +void x87_fscale( struct x86_function *p ); +void x87_fsin( struct x86_function *p ); +void x87_fsincos( struct x86_function *p ); +void x87_fsqrt( struct x86_function *p ); +void x87_fst( struct x86_function *p, struct x86_reg dst ); +void x87_fstp( struct x86_function *p, struct x86_reg dst ); +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubp( struct x86_function *p, struct x86_reg dst ); +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); +void x87_fxch( struct x86_function *p, struct x86_reg dst ); +void x87_fxtract( struct x86_function *p ); +void x87_fyl2x( struct x86_function *p ); +void x87_fyl2xp1( struct x86_function *p ); +void x87_fwait( struct x86_function *p ); +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); +void x87_fucompp( struct x86_function *p ); +void x87_fucomp( struct x86_function *p, struct x86_reg arg ); +void x87_fucom( struct x86_function *p, struct x86_reg arg ); + + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity. Note - doesn't track explict + * manipulation of ESP by other instructions. + */ +struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); + +#endif +#endif -- cgit v1.2.3 From 39ea0308425ad04618061129c63c22ac0efb0692 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 12:00:48 +0900 Subject: Rename rtasm files. --- src/gallium/auxiliary/rtasm/Makefile | 4 +- src/gallium/auxiliary/rtasm/SConscript | 4 +- src/gallium/auxiliary/rtasm/execmem.c | 133 --- src/gallium/auxiliary/rtasm/execmem.h | 45 - src/gallium/auxiliary/rtasm/rtasm_execmem.c | 134 +++ src/gallium/auxiliary/rtasm/rtasm_execmem.h | 45 + src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 1196 +++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 256 ++++++ src/gallium/auxiliary/rtasm/x86sse.c | 1195 -------------------------- src/gallium/auxiliary/rtasm/x86sse.h | 256 ------ 10 files changed, 1635 insertions(+), 1633 deletions(-) delete mode 100644 src/gallium/auxiliary/rtasm/execmem.c delete mode 100644 src/gallium/auxiliary/rtasm/execmem.h create mode 100644 src/gallium/auxiliary/rtasm/rtasm_execmem.c create mode 100644 src/gallium/auxiliary/rtasm/rtasm_execmem.h create mode 100644 src/gallium/auxiliary/rtasm/rtasm_x86sse.c create mode 100644 src/gallium/auxiliary/rtasm/rtasm_x86sse.h delete mode 100644 src/gallium/auxiliary/rtasm/x86sse.c delete mode 100644 src/gallium/auxiliary/rtasm/x86sse.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index b3b9934e10..7c8ac60794 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -5,9 +5,9 @@ include $(TOP)/configs/current LIBNAME = rtasm DRIVER_SOURCES = \ + execmem.c \ x86sse.c \ - mm.c \ - execmem.c + mm.c C_SOURCES = \ $(DRIVER_SOURCES) diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index c5b1551786..de8456e0ca 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -3,9 +3,9 @@ Import('*') rtasm = env.ConvenienceLibrary( target = 'rtasm', source = [ - 'x86sse.c', + 'rtasm_execmem.c', + 'rtasm_x86sse.c', 'mm.c', - 'execmem.c', ]) auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/execmem.c b/src/gallium/auxiliary/rtasm/execmem.c deleted file mode 100644 index c7c35f7ef2..0000000000 --- a/src/gallium/auxiliary/rtasm/execmem.c +++ /dev/null @@ -1,133 +0,0 @@ -/************************************************************************** - * - * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - -/** - * \file exemem.c - * Functions for allocating executable memory. - * - * \author Keith Whitwell - */ - - -#include "pipe/p_compiler.h" -#include "pipe/p_thread.h" - -#include "execmem.h" - - -#if defined(__linux__) - -/* - * Allocate a large block of memory which can hold code then dole it out - * in pieces by means of the generic memory manager code. -*/ - -#include -#include -#include "mm.h" - -#define EXEC_HEAP_SIZE (10*1024*1024) - -_glthread_DECLARE_STATIC_MUTEX(exec_mutex); - -static struct mem_block *exec_heap = NULL; -static unsigned char *exec_mem = NULL; - - -static void -init_heap(void) -{ - if (!exec_heap) - exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); - - if (!exec_mem) - exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, - PROT_EXEC | PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); -} - - -void * -_mesa_exec_malloc(size_t size) -{ - struct mem_block *block = NULL; - void *addr = NULL; - - _glthread_LOCK_MUTEX(exec_mutex); - - init_heap(); - - if (exec_heap) { - size = (size + 31) & ~31; - block = mmAllocMem( exec_heap, size, 32, 0 ); - } - - if (block) - addr = exec_mem + block->ofs; - else - debug_printf("_mesa_exec_malloc failed\n"); - - _glthread_UNLOCK_MUTEX(exec_mutex); - - return addr; -} - - -void -_mesa_exec_free(void *addr) -{ - _glthread_LOCK_MUTEX(exec_mutex); - - if (exec_heap) { - struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); - - if (block) - mmFreeMem(block); - } - - _glthread_UNLOCK_MUTEX(exec_mutex); -} - - -#else - -/* - * Just use regular memory. - */ - -void * -_mesa_exec_malloc(GLuint size) -{ - return _mesa_malloc( size ); -} - - -void -_mesa_exec_free(void *addr) -{ - _mesa_free(addr); -} - - -#endif diff --git a/src/gallium/auxiliary/rtasm/execmem.h b/src/gallium/auxiliary/rtasm/execmem.h deleted file mode 100644 index 9fd4569165..0000000000 --- a/src/gallium/auxiliary/rtasm/execmem.h +++ /dev/null @@ -1,45 +0,0 @@ -/************************************************************************** - * - * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN - * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN - * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file exemem.c - * Functions for allocating executable memory. - * - * \author Keith Whitwell - */ - -#ifndef _EXECMEM_H_ -#define _EXECMEM_H_ - -#include "pipe/p_compiler.h" - - -extern void * -_mesa_exec_malloc( size_t size ); - - -extern void -_mesa_exec_free( void *addr ); - - -#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c new file mode 100644 index 0000000000..cb13db2498 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -0,0 +1,134 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" + +#include "rtasm_execmem.h" + + +#if defined(__linux__) + +/* + * Allocate a large block of memory which can hold code then dole it out + * in pieces by means of the generic memory manager code. +*/ + +#include +#include +#include "mm.h" + +#define EXEC_HEAP_SIZE (10*1024*1024) + +_glthread_DECLARE_STATIC_MUTEX(exec_mutex); + +static struct mem_block *exec_heap = NULL; +static unsigned char *exec_mem = NULL; + + +static void +init_heap(void) +{ + if (!exec_heap) + exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); + + if (!exec_mem) + exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, + PROT_EXEC | PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); +} + + +void * +rtasm_exec_malloc(size_t size) +{ + struct mem_block *block = NULL; + void *addr = NULL; + + _glthread_LOCK_MUTEX(exec_mutex); + + init_heap(); + + if (exec_heap) { + size = (size + 31) & ~31; + block = mmAllocMem( exec_heap, size, 32, 0 ); + } + + if (block) + addr = exec_mem + block->ofs; + else + debug_printf("rtasm_exec_malloc failed\n"); + + _glthread_UNLOCK_MUTEX(exec_mutex); + + return addr; +} + + +void +rtasm_exec_free(void *addr) +{ + _glthread_LOCK_MUTEX(exec_mutex); + + if (exec_heap) { + struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + + if (block) + mmFreeMem(block); + } + + _glthread_UNLOCK_MUTEX(exec_mutex); +} + + +#else + +/* + * Just use regular memory. + */ + +void * +rtasm_exec_malloc(GLuint size) +{ + return MALLOC( size ); +} + + +void +rtasm_exec_free(void *addr) +{ + FREE(addr); +} + + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.h b/src/gallium/auxiliary/rtasm/rtasm_execmem.h new file mode 100644 index 0000000000..155c6d34e0 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.h @@ -0,0 +1,45 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file exemem.c + * Functions for allocating executable memory. + * + * \author Keith Whitwell + */ + +#ifndef _RTASM_EXECMEM_H_ +#define _RTASM_EXECMEM_H_ + +#include "pipe/p_compiler.h" + + +extern void * +rtasm_exec_malloc( size_t size ); + + +extern void +rtasm_exec_free( void *addr ); + + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c new file mode 100644 index 0000000000..3c885a9fff --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -0,0 +1,1196 @@ +#if defined(__i386__) || defined(__386__) + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + +#include "rtasm_execmem.h" +#include "rtasm_x86sse.h" + +#define DISASSEM 0 +#define X86_TWOB 0x0f + +static unsigned char *cptr( void (*label)() ) +{ + return (unsigned char *)(unsigned long)label; +} + + +static void do_realloc( struct x86_function *p ) +{ + if (p->size == 0) { + p->size = 1024; + p->store = rtasm_exec_malloc(p->size); + p->csr = p->store; + } + else { + unsigned used = p->csr - p->store; + unsigned char *tmp = p->store; + p->size *= 2; + p->store = rtasm_exec_malloc(p->size); + memcpy(p->store, tmp, used); + p->csr = p->store + used; + rtasm_exec_free(tmp); + } +} + +/* Emit bytes to the instruction stream: + */ +static unsigned char *reserve( struct x86_function *p, int bytes ) +{ + if (p->csr + bytes - p->store > p->size) + do_realloc(p); + + { + unsigned char *csr = p->csr; + p->csr += bytes; + return csr; + } +} + + + +static void emit_1b( struct x86_function *p, char b0 ) +{ + char *csr = (char *)reserve(p, 1); + *csr = b0; +} + +static void emit_1i( struct x86_function *p, int i0 ) +{ + int *icsr = (int *)reserve(p, sizeof(i0)); + *icsr = i0; +} + +static void emit_1ub( struct x86_function *p, unsigned char b0 ) +{ + unsigned char *csr = reserve(p, 1); + *csr++ = b0; +} + +static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) +{ + unsigned char *csr = reserve(p, 2); + *csr++ = b0; + *csr++ = b1; +} + +static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) +{ + unsigned char *csr = reserve(p, 3); + *csr++ = b0; + *csr++ = b1; + *csr++ = b2; +} + + +/* Build a modRM byte + possible displacement. No treatment of SIB + * indexing. BZZT - no way to encode an absolute address. + */ +static void emit_modrm( struct x86_function *p, + struct x86_reg reg, + struct x86_reg regmem ) +{ + unsigned char val = 0; + + assert(reg.mod == mod_REG); + + val |= regmem.mod << 6; /* mod field */ + val |= reg.idx << 3; /* reg field */ + val |= regmem.idx; /* r/m field */ + + emit_1ub(p, val); + + /* Oh-oh we've stumbled into the SIB thing. + */ + if (regmem.file == file_REG32 && + regmem.idx == reg_SP) { + emit_1ub(p, 0x24); /* simplistic! */ + } + + switch (regmem.mod) { + case mod_REG: + case mod_INDIRECT: + break; + case mod_DISP8: + emit_1b(p, regmem.disp); + break; + case mod_DISP32: + emit_1i(p, regmem.disp); + break; + default: + assert(0); + break; + } +} + + +static void emit_modrm_noreg( struct x86_function *p, + unsigned op, + struct x86_reg regmem ) +{ + struct x86_reg dummy = x86_make_reg(file_REG32, op); + emit_modrm(p, dummy, regmem); +} + +/* Many x86 instructions have two opcodes to cope with the situations + * where the destination is a register or memory reference + * respectively. This function selects the correct opcode based on + * the arguments presented. + */ +static void emit_op_modrm( struct x86_function *p, + unsigned char op_dst_is_reg, + unsigned char op_dst_is_mem, + struct x86_reg dst, + struct x86_reg src ) +{ + switch (dst.mod) { + case mod_REG: + emit_1ub(p, op_dst_is_reg); + emit_modrm(p, dst, src); + break; + case mod_INDIRECT: + case mod_DISP32: + case mod_DISP8: + assert(src.mod == mod_REG); + emit_1ub(p, op_dst_is_mem); + emit_modrm(p, src, dst); + break; + default: + assert(0); + break; + } +} + + + + + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ) +{ + struct x86_reg reg; + + reg.file = file; + reg.idx = idx; + reg.mod = mod_REG; + reg.disp = 0; + + return reg; +} + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ) +{ + assert(reg.file == file_REG32); + + if (reg.mod == mod_REG) + reg.disp = disp; + else + reg.disp += disp; + + if (reg.disp == 0) + reg.mod = mod_INDIRECT; + else if (reg.disp <= 127 && reg.disp >= -128) + reg.mod = mod_DISP8; + else + reg.mod = mod_DISP32; + + return reg; +} + +struct x86_reg x86_deref( struct x86_reg reg ) +{ + return x86_make_disp(reg, 0); +} + +struct x86_reg x86_get_base_reg( struct x86_reg reg ) +{ + return x86_make_reg( reg.file, reg.idx ); +} + +unsigned char *x86_get_label( struct x86_function *p ) +{ + return p->csr; +} + + + +/*********************************************************************** + * x86 instructions + */ + + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ) +{ + int offset = label - (x86_get_label(p) + 2); + + if (offset <= 127 && offset >= -128) { + emit_1ub(p, 0x70 + cc); + emit_1b(p, (char) offset); + } + else { + offset = label - (x86_get_label(p) + 6); + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, offset); + } +} + +/* Always use a 32bit offset for forward jumps: + */ +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) +{ + emit_2ub(p, 0x0f, 0x80 + cc); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_jmp_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe9); + emit_1i(p, 0); + return x86_get_label(p); +} + +unsigned char *x86_call_forward( struct x86_function *p) +{ + emit_1ub(p, 0xe8); + emit_1i(p, 0); + return x86_get_label(p); +} + +/* Fixup offset from forward jump: + */ +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ) +{ + *(int *)(fixup - 4) = x86_get_label(p) - fixup; +} + +void x86_jmp( struct x86_function *p, unsigned char *label) +{ + emit_1ub(p, 0xe9); + emit_1i(p, label - x86_get_label(p) - 4); +} + +#if 0 +/* This doesn't work once we start reallocating & copying the + * generated code on buffer fills, because the call is relative to the + * current pc. + */ +void x86_call( struct x86_function *p, void (*label)()) +{ + emit_1ub(p, 0xe8); + emit_1i(p, cptr(label) - x86_get_label(p) - 4); +} +#else +void x86_call( struct x86_function *p, struct x86_reg reg) +{ + emit_1ub(p, 0xff); + emit_modrm(p, reg, reg); +} +#endif + + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + assert(dst.mod == mod_REG); + emit_1ub(p, 0xb8 + dst.idx); + emit_1i(p, imm); +} + +void x86_push( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x50 + reg.idx); + p->stack_offset += 4; +} + +void x86_pop( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x58 + reg.idx); + p->stack_offset -= 4; +} + +void x86_inc( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x40 + reg.idx); +} + +void x86_dec( struct x86_function *p, + struct x86_reg reg ) +{ + assert(reg.mod == mod_REG); + emit_1ub(p, 0x48 + reg.idx); +} + +void x86_ret( struct x86_function *p ) +{ + emit_1ub(p, 0xc3); +} + +void x86_sahf( struct x86_function *p ) +{ + emit_1ub(p, 0x9e); +} + +void x86_mov( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x8b, 0x89, dst, src ); +} + +void x86_xor( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x33, 0x31, dst, src ); +} + +void x86_cmp( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x3b, 0x39, dst, src ); +} + +void x86_lea( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x8d); + emit_modrm( p, dst, src ); +} + +void x86_test( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, 0x85); + emit_modrm( p, dst, src ); +} + +void x86_add( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x03, 0x01, dst, src ); +} + +void x86_mul( struct x86_function *p, + struct x86_reg src ) +{ + assert (src.file == file_REG32 && src.mod == mod_REG); + emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); +} + +void x86_sub( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm(p, 0x2b, 0x29, dst, src ); +} + +void x86_or( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x0b, 0x09, dst, src ); +} + +void x86_and( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_op_modrm( p, 0x23, 0x21, dst, src ); +} + + + +/*********************************************************************** + * SSE instructions + */ + + +void sse_movss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0xF3, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movaps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x28, 0x29, dst, src ); +} + +void sse_movups( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x10, 0x11, dst, src ); +} + +void sse_movhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ +} + +void sse_movlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod != mod_REG || src.mod != mod_REG); + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ +} + +void sse_maxps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_maxss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5F); + emit_modrm( p, dst, src ); +} + +void sse_divss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x5E); + emit_modrm( p, dst, src ); +} + +void sse_minps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5D); + emit_modrm( p, dst, src ); +} + +void sse_subps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5C); + emit_modrm( p, dst, src ); +} + +void sse_mulps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_mulss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x59); + emit_modrm( p, dst, src ); +} + +void sse_addps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_addss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x58); + emit_modrm( p, dst, src ); +} + +void sse_andnps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x55); + emit_modrm( p, dst, src ); +} + +void sse_andps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x54); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); +} + +void sse_rsqrtss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x52); + emit_modrm( p, dst, src ); + +} + +void sse_movhlps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x12); + emit_modrm( p, dst, src ); +} + +void sse_movlhps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.mod == mod_REG && src.mod == mod_REG); + emit_2ub(p, X86_TWOB, 0x16); + emit_modrm( p, dst, src ); +} + +void sse_orps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x56); + emit_modrm( p, dst, src ); +} + +void sse_xorps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x57); + emit_modrm( p, dst, src ); +} + +void sse_cvtps2pi( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_XMM || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x2d); + emit_modrm( p, dst, src ); +} + + +/* Shufps can also be used to implement a reduced swizzle when dest == + * arg0. + */ +void sse_shufps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_2ub(p, X86_TWOB, 0xC6); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse_cmpps( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char cc) +{ + emit_2ub(p, X86_TWOB, 0xC2); + emit_modrm(p, dest, arg0); + emit_1ub(p, cc); +} + +void sse_pmovmskb( struct x86_function *p, + struct x86_reg dest, + struct x86_reg src) +{ + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dest, src); +} + +/*********************************************************************** + * SSE2 instructions + */ + +/** + * Perform a reduced swizzle: + */ +void sse2_pshufd( struct x86_function *p, + struct x86_reg dest, + struct x86_reg arg0, + unsigned char shuf) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x70); + emit_modrm(p, dest, arg0); + emit_1ub(p, shuf); +} + +void sse2_cvttps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); + emit_modrm( p, dst, src ); +} + +void sse2_cvtps2dq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x5B); + emit_modrm( p, dst, src ); +} + +void sse2_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x6B); + emit_modrm( p, dst, src ); +} + +void sse2_packsswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x63); + emit_modrm( p, dst, src ); +} + +void sse2_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void sse2_rcpps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_rcpss( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0xF3, X86_TWOB, 0x53); + emit_modrm( p, dst, src ); +} + +void sse2_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, 0x66, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + + + + +/*********************************************************************** + * x87 instructions + */ +void x87_fist( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 2, dst); +} + +void x87_fistp( struct x86_function *p, struct x86_reg dst ) +{ + emit_1ub(p, 0xdb); + emit_modrm_noreg(p, 3, dst); +} + +void x87_fild( struct x86_function *p, struct x86_reg arg ) +{ + emit_1ub(p, 0xdf); + emit_modrm_noreg(p, 0, arg); +} + +void x87_fldz( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xee); +} + + +void x87_fldcw( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_REG32); + assert(arg.mod != mod_REG); + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 5, arg); +} + +void x87_fld1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe8); +} + +void x87_fldl2e( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xea); +} + +void x87_fldln2( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xed); +} + +void x87_fwait( struct x86_function *p ) +{ + emit_1ub(p, 0x9b); +} + +void x87_fnclex( struct x86_function *p ) +{ + emit_2ub(p, 0xdb, 0xe2); +} + +void x87_fclex( struct x86_function *p ) +{ + x87_fwait(p); + x87_fnclex(p); +} + + +static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, + unsigned char dst0ub0, + unsigned char dst0ub1, + unsigned char arg0ub0, + unsigned char arg0ub1, + unsigned char argmem_noreg) +{ + assert(dst.file == file_x87); + + if (arg.file == file_x87) { + if (dst.idx == 0) + emit_2ub(p, dst0ub0, dst0ub1+arg.idx); + else if (arg.idx == 0) + emit_2ub(p, arg0ub0, arg0ub1+arg.idx); + else + assert(0); + } + else if (dst.idx == 0) { + assert(arg.file == file_REG32); + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, argmem_noreg, arg); + } + else + assert(0); +} + +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc8, + 0xdc, 0xc8, + 4); +} + +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe0, + 0xdc, 0xe8, + 4); +} + +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xe8, + 0xdc, 0xe0, + 5); +} + +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xc0, + 0xdc, 0xc0, + 0); +} + +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf0, + 0xdc, 0xf8, + 6); +} + +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +{ + x87_arith_op(p, dst, arg, + 0xd8, 0xf8, + 0xdc, 0xf0, + 7); +} + +void x87_fmulp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc8+dst.idx); +} + +void x87_fsubp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe8+dst.idx); +} + +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xe0+dst.idx); +} + +void x87_faddp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xc0+dst.idx); +} + +void x87_fdivp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf8+dst.idx); +} + +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_x87); + assert(dst.idx >= 1); + emit_2ub(p, 0xde, 0xf0+dst.idx); +} + +void x87_fucom( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe0+arg.idx); +} + +void x87_fucomp( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xdd, 0xe8+arg.idx); +} + +void x87_fucompp( struct x86_function *p ) +{ + emit_2ub(p, 0xda, 0xe9); +} + +void x87_fxch( struct x86_function *p, struct x86_reg arg ) +{ + assert(arg.file == file_x87); + emit_2ub(p, 0xd9, 0xc8+arg.idx); +} + +void x87_fabs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe1); +} + +void x87_fchs( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xe0); +} + +void x87_fcos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xff); +} + + +void x87_fprndint( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfc); +} + +void x87_fscale( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfd); +} + +void x87_fsin( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfe); +} + +void x87_fsincos( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfb); +} + +void x87_fsqrt( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xfa); +} + +void x87_fxtract( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf4); +} + +/* st0 = (2^st0)-1 + * + * Restrictions: -1.0 <= st0 <= 1.0 + */ +void x87_f2xm1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf0); +} + +/* st1 = st1 * log2(st0); + * pop_stack; + */ +void x87_fyl2x( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf1); +} + +/* st1 = st1 * log2(st0 + 1.0); + * pop_stack; + * + * A fast operation, with restrictions: -.29 < st0 < .29 + */ +void x87_fyl2xp1( struct x86_function *p ) +{ + emit_2ub(p, 0xd9, 0xf9); +} + + +void x87_fld( struct x86_function *p, struct x86_reg arg ) +{ + if (arg.file == file_x87) + emit_2ub(p, 0xd9, 0xc0 + arg.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 0, arg); + } +} + +void x87_fst( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fstp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xdd, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 3, dst); + } +} + +void x87_fcom( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd0 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 2, dst); + } +} + +void x87_fcomp( struct x86_function *p, struct x86_reg dst ) +{ + if (dst.file == file_x87) + emit_2ub(p, 0xd8, 0xd8 + dst.idx); + else { + emit_1ub(p, 0xd8); + emit_modrm_noreg(p, 3, dst); + } +} + + +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) +{ + assert(dst.file == file_REG32); + + if (dst.idx == reg_AX && + dst.mod == mod_REG) + emit_2ub(p, 0xdf, 0xe0); + else { + emit_1ub(p, 0xdd); + emit_modrm_noreg(p, 7, dst); + } +} + + + + +/*********************************************************************** + * MMX instructions + */ + +void mmx_emms( struct x86_function *p ) +{ + assert(p->need_emms); + emit_2ub(p, 0x0f, 0x77); + p->need_emms = 0; +} + +void mmx_packssdw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x6b); + emit_modrm( p, dst, src ); +} + +void mmx_packuswb( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + assert(dst.file == file_MMX && + (src.file == file_MMX || src.mod != mod_REG)); + + p->need_emms = 1; + + emit_2ub(p, X86_TWOB, 0x67); + emit_modrm( p, dst, src ); +} + +void mmx_movd( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6e, 0x7e, dst, src ); +} + +void mmx_movq( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + p->need_emms = 1; + emit_1ub(p, X86_TWOB); + emit_op_modrm( p, 0x6f, 0x7f, dst, src ); +} + + +/*********************************************************************** + * Helper functions + */ + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity: + */ +struct x86_reg x86_fn_arg( struct x86_function *p, + unsigned arg ) +{ + return x86_make_disp(x86_make_reg(file_REG32, reg_SP), + p->stack_offset + arg * 4); /* ??? */ +} + + +void x86_init_func( struct x86_function *p ) +{ + p->size = 0; + p->store = NULL; + p->csr = p->store; +} + +void x86_init_func_size( struct x86_function *p, unsigned code_size ) +{ + p->size = code_size; + p->store = rtasm_exec_malloc(code_size); + p->csr = p->store; +} + +void x86_release_func( struct x86_function *p ) +{ + rtasm_exec_free(p->store); + p->store = NULL; + p->csr = NULL; + p->size = 0; +} + + +void (*x86_get_func( struct x86_function *p ))(void) +{ + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + return (void (*)(void)) (unsigned long) p->store; +} + +#else + +void x86sse_dummy( void ) +{ +} + +#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h new file mode 100644 index 0000000000..c2aa416492 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -0,0 +1,256 @@ + +#ifndef _X86SSE_H_ +#define _X86SSE_H_ + +#if defined(__i386__) || defined(__386__) + +/* It is up to the caller to ensure that instructions issued are + * suitable for the host cpu. There are no checks made in this module + * for mmx/sse/sse2 support on the cpu. + */ +struct x86_reg { + unsigned file:3; + unsigned idx:3; + unsigned mod:2; /* mod_REG if this is just a register */ + int disp:24; /* only +/- 23bits of offset - should be enough... */ +}; + +struct x86_function { + unsigned size; + unsigned char *store; + unsigned char *csr; + unsigned stack_offset; + int need_emms; + const char *fn; +}; + +enum x86_reg_file { + file_REG32, + file_MMX, + file_XMM, + file_x87 +}; + +/* Values for mod field of modr/m byte + */ +enum x86_reg_mod { + mod_INDIRECT, + mod_DISP8, + mod_DISP32, + mod_REG +}; + +enum x86_reg_name { + reg_AX, + reg_CX, + reg_DX, + reg_BX, + reg_SP, + reg_BP, + reg_SI, + reg_DI +}; + + +enum x86_cc { + cc_O, /* overflow */ + cc_NO, /* not overflow */ + cc_NAE, /* not above or equal / carry */ + cc_AE, /* above or equal / not carry */ + cc_E, /* equal / zero */ + cc_NE /* not equal / not zero */ +}; + +enum sse_cc { + cc_Equal, + cc_LessThan, + cc_LessThanEqual, + cc_Unordered, + cc_NotEqual, + cc_NotLessThan, + cc_NotLessThanEqual, + cc_Ordered +}; + +#define cc_Z cc_E +#define cc_NZ cc_NE + +/* Begin/end/retreive function creation: + */ + + +void x86_init_func( struct x86_function *p ); +void x86_init_func_size( struct x86_function *p, unsigned code_size ); +void x86_release_func( struct x86_function *p ); +void (*x86_get_func( struct x86_function *p ))( void ); + + + +/* Create and manipulate registers and regmem values: + */ +struct x86_reg x86_make_reg( enum x86_reg_file file, + enum x86_reg_name idx ); + +struct x86_reg x86_make_disp( struct x86_reg reg, + int disp ); + +struct x86_reg x86_deref( struct x86_reg reg ); + +struct x86_reg x86_get_base_reg( struct x86_reg reg ); + + +/* Labels, jumps and fixup: + */ +unsigned char *x86_get_label( struct x86_function *p ); + +void x86_jcc( struct x86_function *p, + enum x86_cc cc, + unsigned char *label ); + +unsigned char *x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ); + +unsigned char *x86_jmp_forward( struct x86_function *p); + +unsigned char *x86_call_forward( struct x86_function *p); + +void x86_fixup_fwd_jump( struct x86_function *p, + unsigned char *fixup ); + +void x86_jmp( struct x86_function *p, unsigned char *label ); + +/* void x86_call( struct x86_function *p, void (*label)() ); */ +void x86_call( struct x86_function *p, struct x86_reg reg); + +/* michal: + * Temporary. As I need immediate operands, and dont want to mess with the codegen, + * I load the immediate into general purpose register and use it. + */ +void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); + + +/* Macro for sse_shufps() and sse2_pshufd(): + */ +#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) +#define SHUF_NOOP RSW(0,1,2,3) +#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) + +void mmx_emms( struct x86_function *p ); +void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, + unsigned char cc ); +void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, + unsigned char shuf ); +void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); + +void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_dec( struct x86_function *p, struct x86_reg reg ); +void x86_inc( struct x86_function *p, struct x86_reg reg ); +void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_pop( struct x86_function *p, struct x86_reg reg ); +void x86_push( struct x86_function *p, struct x86_reg reg ); +void x86_ret( struct x86_function *p ); +void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void x86_sahf( struct x86_function *p ); + +void x87_f2xm1( struct x86_function *p ); +void x87_fabs( struct x86_function *p ); +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_faddp( struct x86_function *p, struct x86_reg dst ); +void x87_fchs( struct x86_function *p ); +void x87_fclex( struct x86_function *p ); +void x87_fcom( struct x86_function *p, struct x86_reg dst ); +void x87_fcomp( struct x86_function *p, struct x86_reg dst ); +void x87_fcos( struct x86_function *p ); +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivp( struct x86_function *p, struct x86_reg dst ); +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); +void x87_fild( struct x86_function *p, struct x86_reg arg ); +void x87_fist( struct x86_function *p, struct x86_reg dst ); +void x87_fistp( struct x86_function *p, struct x86_reg dst ); +void x87_fld( struct x86_function *p, struct x86_reg arg ); +void x87_fld1( struct x86_function *p ); +void x87_fldcw( struct x86_function *p, struct x86_reg arg ); +void x87_fldl2e( struct x86_function *p ); +void x87_fldln2( struct x86_function *p ); +void x87_fldz( struct x86_function *p ); +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fmulp( struct x86_function *p, struct x86_reg dst ); +void x87_fnclex( struct x86_function *p ); +void x87_fprndint( struct x86_function *p ); +void x87_fscale( struct x86_function *p ); +void x87_fsin( struct x86_function *p ); +void x87_fsincos( struct x86_function *p ); +void x87_fsqrt( struct x86_function *p ); +void x87_fst( struct x86_function *p, struct x86_reg dst ); +void x87_fstp( struct x86_function *p, struct x86_reg dst ); +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubp( struct x86_function *p, struct x86_reg dst ); +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); +void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); +void x87_fxch( struct x86_function *p, struct x86_reg dst ); +void x87_fxtract( struct x86_function *p ); +void x87_fyl2x( struct x86_function *p ); +void x87_fyl2xp1( struct x86_function *p ); +void x87_fwait( struct x86_function *p ); +void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); +void x87_fucompp( struct x86_function *p ); +void x87_fucomp( struct x86_function *p, struct x86_reg arg ); +void x87_fucom( struct x86_function *p, struct x86_reg arg ); + + + +/* Retreive a reference to one of the function arguments, taking into + * account any push/pop activity. Note - doesn't track explict + * manipulation of ESP by other instructions. + */ +struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); + +#endif +#endif diff --git a/src/gallium/auxiliary/rtasm/x86sse.c b/src/gallium/auxiliary/rtasm/x86sse.c deleted file mode 100644 index fff6f77a6b..0000000000 --- a/src/gallium/auxiliary/rtasm/x86sse.c +++ /dev/null @@ -1,1195 +0,0 @@ -#if defined(__i386__) || defined(__386__) - -#include "pipe/p_compiler.h" -#include "pipe/p_debug.h" - -#include "x86sse.h" - -#define DISASSEM 0 -#define X86_TWOB 0x0f - -static unsigned char *cptr( void (*label)() ) -{ - return (unsigned char *)(unsigned long)label; -} - - -static void do_realloc( struct x86_function *p ) -{ - if (p->size == 0) { - p->size = 1024; - p->store = _mesa_exec_malloc(p->size); - p->csr = p->store; - } - else { - unsigned used = p->csr - p->store; - unsigned char *tmp = p->store; - p->size *= 2; - p->store = _mesa_exec_malloc(p->size); - memcpy(p->store, tmp, used); - p->csr = p->store + used; - _mesa_exec_free(tmp); - } -} - -/* Emit bytes to the instruction stream: - */ -static unsigned char *reserve( struct x86_function *p, int bytes ) -{ - if (p->csr + bytes - p->store > p->size) - do_realloc(p); - - { - unsigned char *csr = p->csr; - p->csr += bytes; - return csr; - } -} - - - -static void emit_1b( struct x86_function *p, char b0 ) -{ - char *csr = (char *)reserve(p, 1); - *csr = b0; -} - -static void emit_1i( struct x86_function *p, int i0 ) -{ - int *icsr = (int *)reserve(p, sizeof(i0)); - *icsr = i0; -} - -static void emit_1ub( struct x86_function *p, unsigned char b0 ) -{ - unsigned char *csr = reserve(p, 1); - *csr++ = b0; -} - -static void emit_2ub( struct x86_function *p, unsigned char b0, unsigned char b1 ) -{ - unsigned char *csr = reserve(p, 2); - *csr++ = b0; - *csr++ = b1; -} - -static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1, unsigned char b2 ) -{ - unsigned char *csr = reserve(p, 3); - *csr++ = b0; - *csr++ = b1; - *csr++ = b2; -} - - -/* Build a modRM byte + possible displacement. No treatment of SIB - * indexing. BZZT - no way to encode an absolute address. - */ -static void emit_modrm( struct x86_function *p, - struct x86_reg reg, - struct x86_reg regmem ) -{ - unsigned char val = 0; - - assert(reg.mod == mod_REG); - - val |= regmem.mod << 6; /* mod field */ - val |= reg.idx << 3; /* reg field */ - val |= regmem.idx; /* r/m field */ - - emit_1ub(p, val); - - /* Oh-oh we've stumbled into the SIB thing. - */ - if (regmem.file == file_REG32 && - regmem.idx == reg_SP) { - emit_1ub(p, 0x24); /* simplistic! */ - } - - switch (regmem.mod) { - case mod_REG: - case mod_INDIRECT: - break; - case mod_DISP8: - emit_1b(p, regmem.disp); - break; - case mod_DISP32: - emit_1i(p, regmem.disp); - break; - default: - assert(0); - break; - } -} - - -static void emit_modrm_noreg( struct x86_function *p, - unsigned op, - struct x86_reg regmem ) -{ - struct x86_reg dummy = x86_make_reg(file_REG32, op); - emit_modrm(p, dummy, regmem); -} - -/* Many x86 instructions have two opcodes to cope with the situations - * where the destination is a register or memory reference - * respectively. This function selects the correct opcode based on - * the arguments presented. - */ -static void emit_op_modrm( struct x86_function *p, - unsigned char op_dst_is_reg, - unsigned char op_dst_is_mem, - struct x86_reg dst, - struct x86_reg src ) -{ - switch (dst.mod) { - case mod_REG: - emit_1ub(p, op_dst_is_reg); - emit_modrm(p, dst, src); - break; - case mod_INDIRECT: - case mod_DISP32: - case mod_DISP8: - assert(src.mod == mod_REG); - emit_1ub(p, op_dst_is_mem); - emit_modrm(p, src, dst); - break; - default: - assert(0); - break; - } -} - - - - - - - -/* Create and manipulate registers and regmem values: - */ -struct x86_reg x86_make_reg( enum x86_reg_file file, - enum x86_reg_name idx ) -{ - struct x86_reg reg; - - reg.file = file; - reg.idx = idx; - reg.mod = mod_REG; - reg.disp = 0; - - return reg; -} - -struct x86_reg x86_make_disp( struct x86_reg reg, - int disp ) -{ - assert(reg.file == file_REG32); - - if (reg.mod == mod_REG) - reg.disp = disp; - else - reg.disp += disp; - - if (reg.disp == 0) - reg.mod = mod_INDIRECT; - else if (reg.disp <= 127 && reg.disp >= -128) - reg.mod = mod_DISP8; - else - reg.mod = mod_DISP32; - - return reg; -} - -struct x86_reg x86_deref( struct x86_reg reg ) -{ - return x86_make_disp(reg, 0); -} - -struct x86_reg x86_get_base_reg( struct x86_reg reg ) -{ - return x86_make_reg( reg.file, reg.idx ); -} - -unsigned char *x86_get_label( struct x86_function *p ) -{ - return p->csr; -} - - - -/*********************************************************************** - * x86 instructions - */ - - -void x86_jcc( struct x86_function *p, - enum x86_cc cc, - unsigned char *label ) -{ - int offset = label - (x86_get_label(p) + 2); - - if (offset <= 127 && offset >= -128) { - emit_1ub(p, 0x70 + cc); - emit_1b(p, (char) offset); - } - else { - offset = label - (x86_get_label(p) + 6); - emit_2ub(p, 0x0f, 0x80 + cc); - emit_1i(p, offset); - } -} - -/* Always use a 32bit offset for forward jumps: - */ -unsigned char *x86_jcc_forward( struct x86_function *p, - enum x86_cc cc ) -{ - emit_2ub(p, 0x0f, 0x80 + cc); - emit_1i(p, 0); - return x86_get_label(p); -} - -unsigned char *x86_jmp_forward( struct x86_function *p) -{ - emit_1ub(p, 0xe9); - emit_1i(p, 0); - return x86_get_label(p); -} - -unsigned char *x86_call_forward( struct x86_function *p) -{ - emit_1ub(p, 0xe8); - emit_1i(p, 0); - return x86_get_label(p); -} - -/* Fixup offset from forward jump: - */ -void x86_fixup_fwd_jump( struct x86_function *p, - unsigned char *fixup ) -{ - *(int *)(fixup - 4) = x86_get_label(p) - fixup; -} - -void x86_jmp( struct x86_function *p, unsigned char *label) -{ - emit_1ub(p, 0xe9); - emit_1i(p, label - x86_get_label(p) - 4); -} - -#if 0 -/* This doesn't work once we start reallocating & copying the - * generated code on buffer fills, because the call is relative to the - * current pc. - */ -void x86_call( struct x86_function *p, void (*label)()) -{ - emit_1ub(p, 0xe8); - emit_1i(p, cptr(label) - x86_get_label(p) - 4); -} -#else -void x86_call( struct x86_function *p, struct x86_reg reg) -{ - emit_1ub(p, 0xff); - emit_modrm(p, reg, reg); -} -#endif - - -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ -void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) -{ - assert(dst.mod == mod_REG); - emit_1ub(p, 0xb8 + dst.idx); - emit_1i(p, imm); -} - -void x86_push( struct x86_function *p, - struct x86_reg reg ) -{ - assert(reg.mod == mod_REG); - emit_1ub(p, 0x50 + reg.idx); - p->stack_offset += 4; -} - -void x86_pop( struct x86_function *p, - struct x86_reg reg ) -{ - assert(reg.mod == mod_REG); - emit_1ub(p, 0x58 + reg.idx); - p->stack_offset -= 4; -} - -void x86_inc( struct x86_function *p, - struct x86_reg reg ) -{ - assert(reg.mod == mod_REG); - emit_1ub(p, 0x40 + reg.idx); -} - -void x86_dec( struct x86_function *p, - struct x86_reg reg ) -{ - assert(reg.mod == mod_REG); - emit_1ub(p, 0x48 + reg.idx); -} - -void x86_ret( struct x86_function *p ) -{ - emit_1ub(p, 0xc3); -} - -void x86_sahf( struct x86_function *p ) -{ - emit_1ub(p, 0x9e); -} - -void x86_mov( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_op_modrm( p, 0x8b, 0x89, dst, src ); -} - -void x86_xor( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_op_modrm( p, 0x33, 0x31, dst, src ); -} - -void x86_cmp( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_op_modrm( p, 0x3b, 0x39, dst, src ); -} - -void x86_lea( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_1ub(p, 0x8d); - emit_modrm( p, dst, src ); -} - -void x86_test( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_1ub(p, 0x85); - emit_modrm( p, dst, src ); -} - -void x86_add( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_op_modrm(p, 0x03, 0x01, dst, src ); -} - -void x86_mul( struct x86_function *p, - struct x86_reg src ) -{ - assert (src.file == file_REG32 && src.mod == mod_REG); - emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); -} - -void x86_sub( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_op_modrm(p, 0x2b, 0x29, dst, src ); -} - -void x86_or( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_op_modrm( p, 0x0b, 0x09, dst, src ); -} - -void x86_and( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_op_modrm( p, 0x23, 0x21, dst, src ); -} - - - -/*********************************************************************** - * SSE instructions - */ - - -void sse_movss( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, 0xF3, X86_TWOB); - emit_op_modrm( p, 0x10, 0x11, dst, src ); -} - -void sse_movaps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_1ub(p, X86_TWOB); - emit_op_modrm( p, 0x28, 0x29, dst, src ); -} - -void sse_movups( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_1ub(p, X86_TWOB); - emit_op_modrm( p, 0x10, 0x11, dst, src ); -} - -void sse_movhps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - assert(dst.mod != mod_REG || src.mod != mod_REG); - emit_1ub(p, X86_TWOB); - emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ -} - -void sse_movlps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - assert(dst.mod != mod_REG || src.mod != mod_REG); - emit_1ub(p, X86_TWOB); - emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ -} - -void sse_maxps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x5F); - emit_modrm( p, dst, src ); -} - -void sse_maxss( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0xF3, X86_TWOB, 0x5F); - emit_modrm( p, dst, src ); -} - -void sse_divss( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0xF3, X86_TWOB, 0x5E); - emit_modrm( p, dst, src ); -} - -void sse_minps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x5D); - emit_modrm( p, dst, src ); -} - -void sse_subps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x5C); - emit_modrm( p, dst, src ); -} - -void sse_mulps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x59); - emit_modrm( p, dst, src ); -} - -void sse_mulss( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0xF3, X86_TWOB, 0x59); - emit_modrm( p, dst, src ); -} - -void sse_addps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x58); - emit_modrm( p, dst, src ); -} - -void sse_addss( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0xF3, X86_TWOB, 0x58); - emit_modrm( p, dst, src ); -} - -void sse_andnps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x55); - emit_modrm( p, dst, src ); -} - -void sse_andps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x54); - emit_modrm( p, dst, src ); -} - -void sse_rsqrtps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x52); - emit_modrm( p, dst, src ); -} - -void sse_rsqrtss( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0xF3, X86_TWOB, 0x52); - emit_modrm( p, dst, src ); - -} - -void sse_movhlps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - assert(dst.mod == mod_REG && src.mod == mod_REG); - emit_2ub(p, X86_TWOB, 0x12); - emit_modrm( p, dst, src ); -} - -void sse_movlhps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - assert(dst.mod == mod_REG && src.mod == mod_REG); - emit_2ub(p, X86_TWOB, 0x16); - emit_modrm( p, dst, src ); -} - -void sse_orps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x56); - emit_modrm( p, dst, src ); -} - -void sse_xorps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x57); - emit_modrm( p, dst, src ); -} - -void sse_cvtps2pi( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - assert(dst.file == file_MMX && - (src.file == file_XMM || src.mod != mod_REG)); - - p->need_emms = 1; - - emit_2ub(p, X86_TWOB, 0x2d); - emit_modrm( p, dst, src ); -} - - -/* Shufps can also be used to implement a reduced swizzle when dest == - * arg0. - */ -void sse_shufps( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, - unsigned char shuf) -{ - emit_2ub(p, X86_TWOB, 0xC6); - emit_modrm(p, dest, arg0); - emit_1ub(p, shuf); -} - -void sse_cmpps( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, - unsigned char cc) -{ - emit_2ub(p, X86_TWOB, 0xC2); - emit_modrm(p, dest, arg0); - emit_1ub(p, cc); -} - -void sse_pmovmskb( struct x86_function *p, - struct x86_reg dest, - struct x86_reg src) -{ - emit_3ub(p, 0x66, X86_TWOB, 0xD7); - emit_modrm(p, dest, src); -} - -/*********************************************************************** - * SSE2 instructions - */ - -/** - * Perform a reduced swizzle: - */ -void sse2_pshufd( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, - unsigned char shuf) -{ - emit_3ub(p, 0x66, X86_TWOB, 0x70); - emit_modrm(p, dest, arg0); - emit_1ub(p, shuf); -} - -void sse2_cvttps2dq( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); - emit_modrm( p, dst, src ); -} - -void sse2_cvtps2dq( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0x66, X86_TWOB, 0x5B); - emit_modrm( p, dst, src ); -} - -void sse2_packssdw( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0x66, X86_TWOB, 0x6B); - emit_modrm( p, dst, src ); -} - -void sse2_packsswb( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0x66, X86_TWOB, 0x63); - emit_modrm( p, dst, src ); -} - -void sse2_packuswb( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0x66, X86_TWOB, 0x67); - emit_modrm( p, dst, src ); -} - -void sse2_rcpps( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, X86_TWOB, 0x53); - emit_modrm( p, dst, src ); -} - -void sse2_rcpss( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_3ub(p, 0xF3, X86_TWOB, 0x53); - emit_modrm( p, dst, src ); -} - -void sse2_movd( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - emit_2ub(p, 0x66, X86_TWOB); - emit_op_modrm( p, 0x6e, 0x7e, dst, src ); -} - - - - -/*********************************************************************** - * x87 instructions - */ -void x87_fist( struct x86_function *p, struct x86_reg dst ) -{ - emit_1ub(p, 0xdb); - emit_modrm_noreg(p, 2, dst); -} - -void x87_fistp( struct x86_function *p, struct x86_reg dst ) -{ - emit_1ub(p, 0xdb); - emit_modrm_noreg(p, 3, dst); -} - -void x87_fild( struct x86_function *p, struct x86_reg arg ) -{ - emit_1ub(p, 0xdf); - emit_modrm_noreg(p, 0, arg); -} - -void x87_fldz( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xee); -} - - -void x87_fldcw( struct x86_function *p, struct x86_reg arg ) -{ - assert(arg.file == file_REG32); - assert(arg.mod != mod_REG); - emit_1ub(p, 0xd9); - emit_modrm_noreg(p, 5, arg); -} - -void x87_fld1( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xe8); -} - -void x87_fldl2e( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xea); -} - -void x87_fldln2( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xed); -} - -void x87_fwait( struct x86_function *p ) -{ - emit_1ub(p, 0x9b); -} - -void x87_fnclex( struct x86_function *p ) -{ - emit_2ub(p, 0xdb, 0xe2); -} - -void x87_fclex( struct x86_function *p ) -{ - x87_fwait(p); - x87_fnclex(p); -} - - -static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, - unsigned char dst0ub0, - unsigned char dst0ub1, - unsigned char arg0ub0, - unsigned char arg0ub1, - unsigned char argmem_noreg) -{ - assert(dst.file == file_x87); - - if (arg.file == file_x87) { - if (dst.idx == 0) - emit_2ub(p, dst0ub0, dst0ub1+arg.idx); - else if (arg.idx == 0) - emit_2ub(p, arg0ub0, arg0ub1+arg.idx); - else - assert(0); - } - else if (dst.idx == 0) { - assert(arg.file == file_REG32); - emit_1ub(p, 0xd8); - emit_modrm_noreg(p, argmem_noreg, arg); - } - else - assert(0); -} - -void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) -{ - x87_arith_op(p, dst, arg, - 0xd8, 0xc8, - 0xdc, 0xc8, - 4); -} - -void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) -{ - x87_arith_op(p, dst, arg, - 0xd8, 0xe0, - 0xdc, 0xe8, - 4); -} - -void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) -{ - x87_arith_op(p, dst, arg, - 0xd8, 0xe8, - 0xdc, 0xe0, - 5); -} - -void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) -{ - x87_arith_op(p, dst, arg, - 0xd8, 0xc0, - 0xdc, 0xc0, - 0); -} - -void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) -{ - x87_arith_op(p, dst, arg, - 0xd8, 0xf0, - 0xdc, 0xf8, - 6); -} - -void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) -{ - x87_arith_op(p, dst, arg, - 0xd8, 0xf8, - 0xdc, 0xf0, - 7); -} - -void x87_fmulp( struct x86_function *p, struct x86_reg dst ) -{ - assert(dst.file == file_x87); - assert(dst.idx >= 1); - emit_2ub(p, 0xde, 0xc8+dst.idx); -} - -void x87_fsubp( struct x86_function *p, struct x86_reg dst ) -{ - assert(dst.file == file_x87); - assert(dst.idx >= 1); - emit_2ub(p, 0xde, 0xe8+dst.idx); -} - -void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) -{ - assert(dst.file == file_x87); - assert(dst.idx >= 1); - emit_2ub(p, 0xde, 0xe0+dst.idx); -} - -void x87_faddp( struct x86_function *p, struct x86_reg dst ) -{ - assert(dst.file == file_x87); - assert(dst.idx >= 1); - emit_2ub(p, 0xde, 0xc0+dst.idx); -} - -void x87_fdivp( struct x86_function *p, struct x86_reg dst ) -{ - assert(dst.file == file_x87); - assert(dst.idx >= 1); - emit_2ub(p, 0xde, 0xf8+dst.idx); -} - -void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) -{ - assert(dst.file == file_x87); - assert(dst.idx >= 1); - emit_2ub(p, 0xde, 0xf0+dst.idx); -} - -void x87_fucom( struct x86_function *p, struct x86_reg arg ) -{ - assert(arg.file == file_x87); - emit_2ub(p, 0xdd, 0xe0+arg.idx); -} - -void x87_fucomp( struct x86_function *p, struct x86_reg arg ) -{ - assert(arg.file == file_x87); - emit_2ub(p, 0xdd, 0xe8+arg.idx); -} - -void x87_fucompp( struct x86_function *p ) -{ - emit_2ub(p, 0xda, 0xe9); -} - -void x87_fxch( struct x86_function *p, struct x86_reg arg ) -{ - assert(arg.file == file_x87); - emit_2ub(p, 0xd9, 0xc8+arg.idx); -} - -void x87_fabs( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xe1); -} - -void x87_fchs( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xe0); -} - -void x87_fcos( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xff); -} - - -void x87_fprndint( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xfc); -} - -void x87_fscale( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xfd); -} - -void x87_fsin( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xfe); -} - -void x87_fsincos( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xfb); -} - -void x87_fsqrt( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xfa); -} - -void x87_fxtract( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xf4); -} - -/* st0 = (2^st0)-1 - * - * Restrictions: -1.0 <= st0 <= 1.0 - */ -void x87_f2xm1( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xf0); -} - -/* st1 = st1 * log2(st0); - * pop_stack; - */ -void x87_fyl2x( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xf1); -} - -/* st1 = st1 * log2(st0 + 1.0); - * pop_stack; - * - * A fast operation, with restrictions: -.29 < st0 < .29 - */ -void x87_fyl2xp1( struct x86_function *p ) -{ - emit_2ub(p, 0xd9, 0xf9); -} - - -void x87_fld( struct x86_function *p, struct x86_reg arg ) -{ - if (arg.file == file_x87) - emit_2ub(p, 0xd9, 0xc0 + arg.idx); - else { - emit_1ub(p, 0xd9); - emit_modrm_noreg(p, 0, arg); - } -} - -void x87_fst( struct x86_function *p, struct x86_reg dst ) -{ - if (dst.file == file_x87) - emit_2ub(p, 0xdd, 0xd0 + dst.idx); - else { - emit_1ub(p, 0xd9); - emit_modrm_noreg(p, 2, dst); - } -} - -void x87_fstp( struct x86_function *p, struct x86_reg dst ) -{ - if (dst.file == file_x87) - emit_2ub(p, 0xdd, 0xd8 + dst.idx); - else { - emit_1ub(p, 0xd9); - emit_modrm_noreg(p, 3, dst); - } -} - -void x87_fcom( struct x86_function *p, struct x86_reg dst ) -{ - if (dst.file == file_x87) - emit_2ub(p, 0xd8, 0xd0 + dst.idx); - else { - emit_1ub(p, 0xd8); - emit_modrm_noreg(p, 2, dst); - } -} - -void x87_fcomp( struct x86_function *p, struct x86_reg dst ) -{ - if (dst.file == file_x87) - emit_2ub(p, 0xd8, 0xd8 + dst.idx); - else { - emit_1ub(p, 0xd8); - emit_modrm_noreg(p, 3, dst); - } -} - - -void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) -{ - assert(dst.file == file_REG32); - - if (dst.idx == reg_AX && - dst.mod == mod_REG) - emit_2ub(p, 0xdf, 0xe0); - else { - emit_1ub(p, 0xdd); - emit_modrm_noreg(p, 7, dst); - } -} - - - - -/*********************************************************************** - * MMX instructions - */ - -void mmx_emms( struct x86_function *p ) -{ - assert(p->need_emms); - emit_2ub(p, 0x0f, 0x77); - p->need_emms = 0; -} - -void mmx_packssdw( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - assert(dst.file == file_MMX && - (src.file == file_MMX || src.mod != mod_REG)); - - p->need_emms = 1; - - emit_2ub(p, X86_TWOB, 0x6b); - emit_modrm( p, dst, src ); -} - -void mmx_packuswb( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - assert(dst.file == file_MMX && - (src.file == file_MMX || src.mod != mod_REG)); - - p->need_emms = 1; - - emit_2ub(p, X86_TWOB, 0x67); - emit_modrm( p, dst, src ); -} - -void mmx_movd( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - p->need_emms = 1; - emit_1ub(p, X86_TWOB); - emit_op_modrm( p, 0x6e, 0x7e, dst, src ); -} - -void mmx_movq( struct x86_function *p, - struct x86_reg dst, - struct x86_reg src ) -{ - p->need_emms = 1; - emit_1ub(p, X86_TWOB); - emit_op_modrm( p, 0x6f, 0x7f, dst, src ); -} - - -/*********************************************************************** - * Helper functions - */ - - -/* Retreive a reference to one of the function arguments, taking into - * account any push/pop activity: - */ -struct x86_reg x86_fn_arg( struct x86_function *p, - unsigned arg ) -{ - return x86_make_disp(x86_make_reg(file_REG32, reg_SP), - p->stack_offset + arg * 4); /* ??? */ -} - - -void x86_init_func( struct x86_function *p ) -{ - p->size = 0; - p->store = NULL; - p->csr = p->store; -} - -void x86_init_func_size( struct x86_function *p, unsigned code_size ) -{ - p->size = code_size; - p->store = _mesa_exec_malloc(code_size); - p->csr = p->store; -} - -void x86_release_func( struct x86_function *p ) -{ - _mesa_exec_free(p->store); - p->store = NULL; - p->csr = NULL; - p->size = 0; -} - - -void (*x86_get_func( struct x86_function *p ))(void) -{ - if (DISASSEM && p->store) - _mesa_printf("disassemble %p %p\n", p->store, p->csr); - return (void (*)(void)) (unsigned long) p->store; -} - -#else - -void x86sse_dummy( void ) -{ -} - -#endif diff --git a/src/gallium/auxiliary/rtasm/x86sse.h b/src/gallium/auxiliary/rtasm/x86sse.h deleted file mode 100644 index c2aa416492..0000000000 --- a/src/gallium/auxiliary/rtasm/x86sse.h +++ /dev/null @@ -1,256 +0,0 @@ - -#ifndef _X86SSE_H_ -#define _X86SSE_H_ - -#if defined(__i386__) || defined(__386__) - -/* It is up to the caller to ensure that instructions issued are - * suitable for the host cpu. There are no checks made in this module - * for mmx/sse/sse2 support on the cpu. - */ -struct x86_reg { - unsigned file:3; - unsigned idx:3; - unsigned mod:2; /* mod_REG if this is just a register */ - int disp:24; /* only +/- 23bits of offset - should be enough... */ -}; - -struct x86_function { - unsigned size; - unsigned char *store; - unsigned char *csr; - unsigned stack_offset; - int need_emms; - const char *fn; -}; - -enum x86_reg_file { - file_REG32, - file_MMX, - file_XMM, - file_x87 -}; - -/* Values for mod field of modr/m byte - */ -enum x86_reg_mod { - mod_INDIRECT, - mod_DISP8, - mod_DISP32, - mod_REG -}; - -enum x86_reg_name { - reg_AX, - reg_CX, - reg_DX, - reg_BX, - reg_SP, - reg_BP, - reg_SI, - reg_DI -}; - - -enum x86_cc { - cc_O, /* overflow */ - cc_NO, /* not overflow */ - cc_NAE, /* not above or equal / carry */ - cc_AE, /* above or equal / not carry */ - cc_E, /* equal / zero */ - cc_NE /* not equal / not zero */ -}; - -enum sse_cc { - cc_Equal, - cc_LessThan, - cc_LessThanEqual, - cc_Unordered, - cc_NotEqual, - cc_NotLessThan, - cc_NotLessThanEqual, - cc_Ordered -}; - -#define cc_Z cc_E -#define cc_NZ cc_NE - -/* Begin/end/retreive function creation: - */ - - -void x86_init_func( struct x86_function *p ); -void x86_init_func_size( struct x86_function *p, unsigned code_size ); -void x86_release_func( struct x86_function *p ); -void (*x86_get_func( struct x86_function *p ))( void ); - - - -/* Create and manipulate registers and regmem values: - */ -struct x86_reg x86_make_reg( enum x86_reg_file file, - enum x86_reg_name idx ); - -struct x86_reg x86_make_disp( struct x86_reg reg, - int disp ); - -struct x86_reg x86_deref( struct x86_reg reg ); - -struct x86_reg x86_get_base_reg( struct x86_reg reg ); - - -/* Labels, jumps and fixup: - */ -unsigned char *x86_get_label( struct x86_function *p ); - -void x86_jcc( struct x86_function *p, - enum x86_cc cc, - unsigned char *label ); - -unsigned char *x86_jcc_forward( struct x86_function *p, - enum x86_cc cc ); - -unsigned char *x86_jmp_forward( struct x86_function *p); - -unsigned char *x86_call_forward( struct x86_function *p); - -void x86_fixup_fwd_jump( struct x86_function *p, - unsigned char *fixup ); - -void x86_jmp( struct x86_function *p, unsigned char *label ); - -/* void x86_call( struct x86_function *p, void (*label)() ); */ -void x86_call( struct x86_function *p, struct x86_reg reg); - -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ -void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); - - -/* Macro for sse_shufps() and sse2_pshufd(): - */ -#define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6)) -#define SHUF_NOOP RSW(0,1,2,3) -#define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3) - -void mmx_emms( struct x86_function *p ); -void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); - -void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, - unsigned char shuf ); -void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); - -void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, - unsigned char cc ); -void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, - unsigned char shuf ); -void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); - -void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_dec( struct x86_function *p, struct x86_reg reg ); -void x86_inc( struct x86_function *p, struct x86_reg reg ); -void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_mul( struct x86_function *p, struct x86_reg src ); -void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_pop( struct x86_function *p, struct x86_reg reg ); -void x86_push( struct x86_function *p, struct x86_reg reg ); -void x86_ret( struct x86_function *p ); -void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -void x86_sahf( struct x86_function *p ); - -void x87_f2xm1( struct x86_function *p ); -void x87_fabs( struct x86_function *p ); -void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); -void x87_faddp( struct x86_function *p, struct x86_reg dst ); -void x87_fchs( struct x86_function *p ); -void x87_fclex( struct x86_function *p ); -void x87_fcom( struct x86_function *p, struct x86_reg dst ); -void x87_fcomp( struct x86_function *p, struct x86_reg dst ); -void x87_fcos( struct x86_function *p ); -void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); -void x87_fdivp( struct x86_function *p, struct x86_reg dst ); -void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); -void x87_fdivrp( struct x86_function *p, struct x86_reg dst ); -void x87_fild( struct x86_function *p, struct x86_reg arg ); -void x87_fist( struct x86_function *p, struct x86_reg dst ); -void x87_fistp( struct x86_function *p, struct x86_reg dst ); -void x87_fld( struct x86_function *p, struct x86_reg arg ); -void x87_fld1( struct x86_function *p ); -void x87_fldcw( struct x86_function *p, struct x86_reg arg ); -void x87_fldl2e( struct x86_function *p ); -void x87_fldln2( struct x86_function *p ); -void x87_fldz( struct x86_function *p ); -void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); -void x87_fmulp( struct x86_function *p, struct x86_reg dst ); -void x87_fnclex( struct x86_function *p ); -void x87_fprndint( struct x86_function *p ); -void x87_fscale( struct x86_function *p ); -void x87_fsin( struct x86_function *p ); -void x87_fsincos( struct x86_function *p ); -void x87_fsqrt( struct x86_function *p ); -void x87_fst( struct x86_function *p, struct x86_reg dst ); -void x87_fstp( struct x86_function *p, struct x86_reg dst ); -void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); -void x87_fsubp( struct x86_function *p, struct x86_reg dst ); -void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); -void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); -void x87_fxch( struct x86_function *p, struct x86_reg dst ); -void x87_fxtract( struct x86_function *p ); -void x87_fyl2x( struct x86_function *p ); -void x87_fyl2xp1( struct x86_function *p ); -void x87_fwait( struct x86_function *p ); -void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); -void x87_fucompp( struct x86_function *p ); -void x87_fucomp( struct x86_function *p, struct x86_reg arg ); -void x87_fucom( struct x86_function *p, struct x86_reg arg ); - - - -/* Retreive a reference to one of the function arguments, taking into - * account any push/pop activity. Note - doesn't track explict - * manipulation of ESP by other instructions. - */ -struct x86_reg x86_fn_arg( struct x86_function *p, unsigned arg ); - -#endif -#endif -- cgit v1.2.3 From d2f6c9ab10656f6ecda131a6785a60565026d249 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 12:05:32 +0900 Subject: Add copyright headers to all rtasm source files. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 23 +++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 28 +++++++++++++++++++++++++--- 2 files changed, 48 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 3c885a9fff..b332192a62 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -1,3 +1,26 @@ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #if defined(__i386__) || defined(__386__) #include "pipe/p_compiler.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index c2aa416492..e4576001bf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -1,6 +1,28 @@ - -#ifndef _X86SSE_H_ -#define _X86SSE_H_ +/************************************************************************** + * + * Copyright (C) 1999-2005 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _RTASM_X86SSE_H_ +#define _RTASM_X86SSE_H_ #if defined(__i386__) || defined(__386__) -- cgit v1.2.3 From 17158c2f00f5bee29ec8239367fd5498f22e4a91 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 12:24:42 +0900 Subject: Move mm.c code into util module. Using the u_ prefix to distingish the c source files that support gallium interfaces and those that have really no relation with gallium itself. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 302 +----------------------- src/gallium/auxiliary/rtasm/Makefile | 5 +- src/gallium/auxiliary/rtasm/SConscript | 3 +- src/gallium/auxiliary/rtasm/mm.c | 283 ---------------------- src/gallium/auxiliary/rtasm/mm.h | 89 ------- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 2 +- src/gallium/auxiliary/util/Makefile | 3 +- src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_mm.c | 283 ++++++++++++++++++++++ src/gallium/auxiliary/util/u_mm.h | 91 +++++++ 10 files changed, 382 insertions(+), 680 deletions(-) delete mode 100644 src/gallium/auxiliary/rtasm/mm.c delete mode 100644 src/gallium/auxiliary/rtasm/mm.h create mode 100644 src/gallium/auxiliary/util/u_mm.c create mode 100644 src/gallium/auxiliary/util/u_mm.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 969aab51b5..983a105347 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -1,7 +1,6 @@ /************************************************************************** * * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * Copyright 1999 Wittawat Yamwong * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -40,6 +39,7 @@ #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_util.h" +#include "util/u_mm.h" #include "pb_buffer.h" #include "pb_bufmgr.h" @@ -50,306 +50,6 @@ #define SUPER(__derived) (&(__derived)->base) -struct mem_block -{ - struct mem_block *next, *prev; - struct mem_block *next_free, *prev_free; - struct mem_block *heap; - int ofs, size; - unsigned int free:1; - unsigned int reserved:1; -}; - - -#ifdef DEBUG -/** - * For debugging purposes. - */ -static void -mmDumpMemInfo(const struct mem_block *heap) -{ - debug_printf("Memory heap %p:\n", (void *)heap); - if (heap == 0) { - debug_printf(" heap == 0\n"); - } else { - const struct mem_block *p; - - for(p = heap->next; p != heap; p = p->next) { - debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, - p->free ? 'F':'.', - p->reserved ? 'R':'.'); - } - - debug_printf("\nFree list:\n"); - - for(p = heap->next_free; p != heap; p = p->next_free) { - debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, - p->free ? 'F':'.', - p->reserved ? 'R':'.'); - } - - } - debug_printf("End of memory blocks\n"); -} -#endif - - -/** - * input: total size in bytes - * return: a heap pointer if OK, NULL if error - */ -static struct mem_block * -mmInit(int ofs, int size) -{ - struct mem_block *heap, *block; - - if (size <= 0) - return NULL; - - heap = CALLOC_STRUCT(mem_block); - if (!heap) - return NULL; - - block = CALLOC_STRUCT(mem_block); - if (!block) { - FREE(heap); - return NULL; - } - - heap->next = block; - heap->prev = block; - heap->next_free = block; - heap->prev_free = block; - - block->heap = heap; - block->next = heap; - block->prev = heap; - block->next_free = heap; - block->prev_free = heap; - - block->ofs = ofs; - block->size = size; - block->free = 1; - - return heap; -} - - -static struct mem_block * -SliceBlock(struct mem_block *p, - int startofs, int size, - int reserved, int alignment) -{ - struct mem_block *newblock; - - /* break left [p, newblock, p->next], then p = newblock */ - if (startofs > p->ofs) { - newblock = CALLOC_STRUCT(mem_block); - if (!newblock) - return NULL; - newblock->ofs = startofs; - newblock->size = p->size - (startofs - p->ofs); - newblock->free = 1; - newblock->heap = p->heap; - - newblock->next = p->next; - newblock->prev = p; - p->next->prev = newblock; - p->next = newblock; - - newblock->next_free = p->next_free; - newblock->prev_free = p; - p->next_free->prev_free = newblock; - p->next_free = newblock; - - p->size -= newblock->size; - p = newblock; - } - - /* break right, also [p, newblock, p->next] */ - if (size < p->size) { - newblock = CALLOC_STRUCT(mem_block); - if (!newblock) - return NULL; - newblock->ofs = startofs + size; - newblock->size = p->size - size; - newblock->free = 1; - newblock->heap = p->heap; - - newblock->next = p->next; - newblock->prev = p; - p->next->prev = newblock; - p->next = newblock; - - newblock->next_free = p->next_free; - newblock->prev_free = p; - p->next_free->prev_free = newblock; - p->next_free = newblock; - - p->size = size; - } - - /* p = middle block */ - p->free = 0; - - /* Remove p from the free list: - */ - p->next_free->prev_free = p->prev_free; - p->prev_free->next_free = p->next_free; - - p->next_free = 0; - p->prev_free = 0; - - p->reserved = reserved; - return p; -} - - -/** - * Allocate 'size' bytes with 2^align2 bytes alignment, - * restrict the search to free memory after 'startSearch' - * depth and back buffers should be in different 4mb banks - * to get better page hits if possible - * input: size = size of block - * align2 = 2^align2 bytes alignment - * startSearch = linear offset from start of heap to begin search - * return: pointer to the allocated block, 0 if error - */ -static struct mem_block * -mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) -{ - struct mem_block *p; - const int mask = (1 << align2)-1; - int startofs = 0; - int endofs; - - if (!heap || align2 < 0 || size <= 0) - return NULL; - - for (p = heap->next_free; p != heap; p = p->next_free) { - assert(p->free); - - startofs = (p->ofs + mask) & ~mask; - if ( startofs < startSearch ) { - startofs = startSearch; - } - endofs = startofs+size; - if (endofs <= (p->ofs+p->size)) - break; - } - - if (p == heap) - return NULL; - - assert(p->free); - p = SliceBlock(p,startofs,size,0,mask+1); - - return p; -} - - -#if 0 -/** - * Free block starts at offset - * input: pointer to a heap, start offset - * return: pointer to a block - */ -static struct mem_block * -mmFindBlock(struct mem_block *heap, int start) -{ - struct mem_block *p; - - for (p = heap->next; p != heap; p = p->next) { - if (p->ofs == start) - return p; - } - - return NULL; -} -#endif - - -static INLINE int -Join2Blocks(struct mem_block *p) -{ - /* XXX there should be some assertions here */ - - /* NOTE: heap->free == 0 */ - - if (p->free && p->next->free) { - struct mem_block *q = p->next; - - assert(p->ofs + p->size == q->ofs); - p->size += q->size; - - p->next = q->next; - q->next->prev = p; - - q->next_free->prev_free = q->prev_free; - q->prev_free->next_free = q->next_free; - - FREE(q); - return 1; - } - return 0; -} - - -/** - * Free block starts at offset - * input: pointer to a block - * return: 0 if OK, -1 if error - */ -static int -mmFreeMem(struct mem_block *b) -{ - if (!b) - return 0; - - if (b->free) { - debug_printf("block already free\n"); - return -1; - } - if (b->reserved) { - debug_printf("block is reserved\n"); - return -1; - } - - b->free = 1; - b->next_free = b->heap->next_free; - b->prev_free = b->heap; - b->next_free->prev_free = b; - b->prev_free->next_free = b; - - Join2Blocks(b); - if (b->prev != b->heap) - Join2Blocks(b->prev); - - return 0; -} - - -/** - * destroy MM - */ -static void -mmDestroy(struct mem_block *heap) -{ - struct mem_block *p; - - if (!heap) - return; - - for (p = heap->next; p != heap; ) { - struct mem_block *next = p->next; - FREE(p); - p = next; - } - - FREE(heap); -} - - struct mm_pb_manager { struct pb_manager base; diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index 7c8ac60794..edfae2a204 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -5,9 +5,8 @@ include $(TOP)/configs/current LIBNAME = rtasm DRIVER_SOURCES = \ - execmem.c \ - x86sse.c \ - mm.c + rtasm_execmem.c \ + rtasm_x86sse.c C_SOURCES = \ $(DRIVER_SOURCES) diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index de8456e0ca..6eca1fe4c0 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -4,8 +4,7 @@ rtasm = env.ConvenienceLibrary( target = 'rtasm', source = [ 'rtasm_execmem.c', - 'rtasm_x86sse.c', - 'mm.c', + 'rtasm_x86sse.c' ]) auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/mm.c b/src/gallium/auxiliary/rtasm/mm.c deleted file mode 100644 index 15f50491da..0000000000 --- a/src/gallium/auxiliary/rtasm/mm.c +++ /dev/null @@ -1,283 +0,0 @@ -/* - * GLX Hardware Device Driver common code - * Copyright (C) 1999 Wittawat Yamwong - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - */ - - -#include "pipe/p_compiler.h" -#include "pipe/p_util.h" -#include "pipe/p_debug.h" - -#include "mm.h" - - -void -mmDumpMemInfo(const struct mem_block *heap) -{ - debug_printf("Memory heap %p:\n", (void *)heap); - if (heap == 0) { - debug_printf(" heap == 0\n"); - } else { - const struct mem_block *p; - - for(p = heap->next; p != heap; p = p->next) { - debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, - p->free ? 'F':'.', - p->reserved ? 'R':'.'); - } - - debug_printf("\nFree list:\n"); - - for(p = heap->next_free; p != heap; p = p->next_free) { - debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, - p->free ? 'F':'.', - p->reserved ? 'R':'.'); - } - - } - debug_printf("End of memory blocks\n"); -} - -struct mem_block * -mmInit(int ofs, int size) -{ - struct mem_block *heap, *block; - - if (size <= 0) - return NULL; - - heap = CALLOC_STRUCT(mem_block); - if (!heap) - return NULL; - - block = CALLOC_STRUCT(mem_block); - if (!block) { - FREE(heap); - return NULL; - } - - heap->next = block; - heap->prev = block; - heap->next_free = block; - heap->prev_free = block; - - block->heap = heap; - block->next = heap; - block->prev = heap; - block->next_free = heap; - block->prev_free = heap; - - block->ofs = ofs; - block->size = size; - block->free = 1; - - return heap; -} - - -static struct mem_block * -SliceBlock(struct mem_block *p, - int startofs, int size, - int reserved, int alignment) -{ - struct mem_block *newblock; - - /* break left [p, newblock, p->next], then p = newblock */ - if (startofs > p->ofs) { - newblock = CALLOC_STRUCT(mem_block); - if (!newblock) - return NULL; - newblock->ofs = startofs; - newblock->size = p->size - (startofs - p->ofs); - newblock->free = 1; - newblock->heap = p->heap; - - newblock->next = p->next; - newblock->prev = p; - p->next->prev = newblock; - p->next = newblock; - - newblock->next_free = p->next_free; - newblock->prev_free = p; - p->next_free->prev_free = newblock; - p->next_free = newblock; - - p->size -= newblock->size; - p = newblock; - } - - /* break right, also [p, newblock, p->next] */ - if (size < p->size) { - newblock = CALLOC_STRUCT(mem_block); - if (!newblock) - return NULL; - newblock->ofs = startofs + size; - newblock->size = p->size - size; - newblock->free = 1; - newblock->heap = p->heap; - - newblock->next = p->next; - newblock->prev = p; - p->next->prev = newblock; - p->next = newblock; - - newblock->next_free = p->next_free; - newblock->prev_free = p; - p->next_free->prev_free = newblock; - p->next_free = newblock; - - p->size = size; - } - - /* p = middle block */ - p->free = 0; - - /* Remove p from the free list: - */ - p->next_free->prev_free = p->prev_free; - p->prev_free->next_free = p->next_free; - - p->next_free = 0; - p->prev_free = 0; - - p->reserved = reserved; - return p; -} - - -struct mem_block * -mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) -{ - struct mem_block *p; - const int mask = (1 << align2)-1; - int startofs = 0; - int endofs; - - if (!heap || align2 < 0 || size <= 0) - return NULL; - - for (p = heap->next_free; p != heap; p = p->next_free) { - assert(p->free); - - startofs = (p->ofs + mask) & ~mask; - if ( startofs < startSearch ) { - startofs = startSearch; - } - endofs = startofs+size; - if (endofs <= (p->ofs+p->size)) - break; - } - - if (p == heap) - return NULL; - - assert(p->free); - p = SliceBlock(p,startofs,size,0,mask+1); - - return p; -} - - -struct mem_block * -mmFindBlock(struct mem_block *heap, int start) -{ - struct mem_block *p; - - for (p = heap->next; p != heap; p = p->next) { - if (p->ofs == start) - return p; - } - - return NULL; -} - - -static INLINE int -Join2Blocks(struct mem_block *p) -{ - /* XXX there should be some assertions here */ - - /* NOTE: heap->free == 0 */ - - if (p->free && p->next->free) { - struct mem_block *q = p->next; - - assert(p->ofs + p->size == q->ofs); - p->size += q->size; - - p->next = q->next; - q->next->prev = p; - - q->next_free->prev_free = q->prev_free; - q->prev_free->next_free = q->next_free; - - FREE(q); - return 1; - } - return 0; -} - -int -mmFreeMem(struct mem_block *b) -{ - if (!b) - return 0; - - if (b->free) { - debug_printf("block already free\n"); - return -1; - } - if (b->reserved) { - debug_printf("block is reserved\n"); - return -1; - } - - b->free = 1; - b->next_free = b->heap->next_free; - b->prev_free = b->heap; - b->next_free->prev_free = b; - b->prev_free->next_free = b; - - Join2Blocks(b); - if (b->prev != b->heap) - Join2Blocks(b->prev); - - return 0; -} - - -void -mmDestroy(struct mem_block *heap) -{ - struct mem_block *p; - - if (!heap) - return; - - for (p = heap->next; p != heap; ) { - struct mem_block *next = p->next; - FREE(p); - p = next; - } - - FREE(heap); -} diff --git a/src/gallium/auxiliary/rtasm/mm.h b/src/gallium/auxiliary/rtasm/mm.h deleted file mode 100644 index f469b18d3e..0000000000 --- a/src/gallium/auxiliary/rtasm/mm.h +++ /dev/null @@ -1,89 +0,0 @@ -/* - * GLX Hardware Device Driver common code - * Copyright (C) 1999 Wittawat Yamwong - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * the rights to use, copy, modify, merge, publish, distribute, sublicense, - * and/or sell copies of the Software, and to permit persons to whom the - * Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included - * in all copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL - * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE - * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -/** - * Memory manager code. Primarily used by device drivers to manage texture - * heaps, etc. - */ - - -#ifndef MM_H -#define MM_H - - -struct mem_block { - struct mem_block *next, *prev; - struct mem_block *next_free, *prev_free; - struct mem_block *heap; - int ofs,size; - unsigned int free:1; - unsigned int reserved:1; -}; - - - -/** - * input: total size in bytes - * return: a heap pointer if OK, NULL if error - */ -extern struct mem_block *mmInit(int ofs, int size); - -/** - * Allocate 'size' bytes with 2^align2 bytes alignment, - * restrict the search to free memory after 'startSearch' - * depth and back buffers should be in different 4mb banks - * to get better page hits if possible - * input: size = size of block - * align2 = 2^align2 bytes alignment - * startSearch = linear offset from start of heap to begin search - * return: pointer to the allocated block, 0 if error - */ -extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, - int startSearch); - -/** - * Free block starts at offset - * input: pointer to a block - * return: 0 if OK, -1 if error - */ -extern int mmFreeMem(struct mem_block *b); - -/** - * Free block starts at offset - * input: pointer to a heap, start offset - * return: pointer to a block - */ -extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); - -/** - * destroy MM - */ -extern void mmDestroy(struct mem_block *mmInit); - -/** - * For debuging purpose. - */ -extern void mmDumpMemInfo(const struct mem_block *mmInit); - -#endif diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index cb13db2498..9c78fa5626 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -46,7 +46,7 @@ #include #include -#include "mm.h" +#include "util/u_mm.h" #define EXEC_HEAP_SIZE (10*1024*1024) diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index b8cb148c4f..7cc2aa44f9 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -7,7 +7,8 @@ LIBNAME = util DRIVER_SOURCES = \ p_debug.c \ p_tile.c \ - p_util.c + p_util.c \ + u_mm.c C_SOURCES = \ $(DRIVER_SOURCES) diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index b126cf44d6..4717941434 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,6 +6,7 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_tile.c', 'p_util.c', + 'u_mm.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c new file mode 100644 index 0000000000..b49ae074e0 --- /dev/null +++ b/src/gallium/auxiliary/util/u_mm.c @@ -0,0 +1,283 @@ +/************************************************************************** + * + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * WITTAWAT YAMWONG, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" + +#include "util/u_mm.h" + + +void +mmDumpMemInfo(const struct mem_block *heap) +{ + debug_printf("Memory heap %p:\n", (void *)heap); + if (heap == 0) { + debug_printf(" heap == 0\n"); + } else { + const struct mem_block *p; + + for(p = heap->next; p != heap; p = p->next) { + debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + debug_printf("\nFree list:\n"); + + for(p = heap->next_free; p != heap; p = p->next_free) { + debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size, + p->free ? 'F':'.', + p->reserved ? 'R':'.'); + } + + } + debug_printf("End of memory blocks\n"); +} + +struct mem_block * +mmInit(int ofs, int size) +{ + struct mem_block *heap, *block; + + if (size <= 0) + return NULL; + + heap = CALLOC_STRUCT(mem_block); + if (!heap) + return NULL; + + block = CALLOC_STRUCT(mem_block); + if (!block) { + FREE(heap); + return NULL; + } + + heap->next = block; + heap->prev = block; + heap->next_free = block; + heap->prev_free = block; + + block->heap = heap; + block->next = heap; + block->prev = heap; + block->next_free = heap; + block->prev_free = heap; + + block->ofs = ofs; + block->size = size; + block->free = 1; + + return heap; +} + + +static struct mem_block * +SliceBlock(struct mem_block *p, + int startofs, int size, + int reserved, int alignment) +{ + struct mem_block *newblock; + + /* break left [p, newblock, p->next], then p = newblock */ + if (startofs > p->ofs) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs; + newblock->size = p->size - (startofs - p->ofs); + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size -= newblock->size; + p = newblock; + } + + /* break right, also [p, newblock, p->next] */ + if (size < p->size) { + newblock = CALLOC_STRUCT(mem_block); + if (!newblock) + return NULL; + newblock->ofs = startofs + size; + newblock->size = p->size - size; + newblock->free = 1; + newblock->heap = p->heap; + + newblock->next = p->next; + newblock->prev = p; + p->next->prev = newblock; + p->next = newblock; + + newblock->next_free = p->next_free; + newblock->prev_free = p; + p->next_free->prev_free = newblock; + p->next_free = newblock; + + p->size = size; + } + + /* p = middle block */ + p->free = 0; + + /* Remove p from the free list: + */ + p->next_free->prev_free = p->prev_free; + p->prev_free->next_free = p->next_free; + + p->next_free = 0; + p->prev_free = 0; + + p->reserved = reserved; + return p; +} + + +struct mem_block * +mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +{ + struct mem_block *p; + const int mask = (1 << align2)-1; + int startofs = 0; + int endofs; + + if (!heap || align2 < 0 || size <= 0) + return NULL; + + for (p = heap->next_free; p != heap; p = p->next_free) { + assert(p->free); + + startofs = (p->ofs + mask) & ~mask; + if ( startofs < startSearch ) { + startofs = startSearch; + } + endofs = startofs+size; + if (endofs <= (p->ofs+p->size)) + break; + } + + if (p == heap) + return NULL; + + assert(p->free); + p = SliceBlock(p,startofs,size,0,mask+1); + + return p; +} + + +struct mem_block * +mmFindBlock(struct mem_block *heap, int start) +{ + struct mem_block *p; + + for (p = heap->next; p != heap; p = p->next) { + if (p->ofs == start) + return p; + } + + return NULL; +} + + +static INLINE int +Join2Blocks(struct mem_block *p) +{ + /* XXX there should be some assertions here */ + + /* NOTE: heap->free == 0 */ + + if (p->free && p->next->free) { + struct mem_block *q = p->next; + + assert(p->ofs + p->size == q->ofs); + p->size += q->size; + + p->next = q->next; + q->next->prev = p; + + q->next_free->prev_free = q->prev_free; + q->prev_free->next_free = q->next_free; + + FREE(q); + return 1; + } + return 0; +} + +int +mmFreeMem(struct mem_block *b) +{ + if (!b) + return 0; + + if (b->free) { + debug_printf("block already free\n"); + return -1; + } + if (b->reserved) { + debug_printf("block is reserved\n"); + return -1; + } + + b->free = 1; + b->next_free = b->heap->next_free; + b->prev_free = b->heap; + b->next_free->prev_free = b; + b->prev_free->next_free = b; + + Join2Blocks(b); + if (b->prev != b->heap) + Join2Blocks(b->prev); + + return 0; +} + + +void +mmDestroy(struct mem_block *heap) +{ + struct mem_block *p; + + if (!heap) + return; + + for (p = heap->next; p != heap; ) { + struct mem_block *next = p->next; + FREE(p); + p = next; + } + + FREE(heap); +} diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h new file mode 100644 index 0000000000..b226b101cb --- /dev/null +++ b/src/gallium/auxiliary/util/u_mm.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright (C) 1999 Wittawat Yamwong + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * KEITH WHITWELL, OR ANY OTHER CONTRIBUTORS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE + * OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * @file + * Memory manager code. Primarily used by device drivers to manage texture + * heaps, etc. + */ + + +#ifndef _U_MM_H_ +#define _U_MM_H_ + + +struct mem_block { + struct mem_block *next, *prev; + struct mem_block *next_free, *prev_free; + struct mem_block *heap; + int ofs,size; + unsigned int free:1; + unsigned int reserved:1; +}; + + + +/** + * input: total size in bytes + * return: a heap pointer if OK, NULL if error + */ +extern struct mem_block *mmInit(int ofs, int size); + +/** + * Allocate 'size' bytes with 2^align2 bytes alignment, + * restrict the search to free memory after 'startSearch' + * depth and back buffers should be in different 4mb banks + * to get better page hits if possible + * input: size = size of block + * align2 = 2^align2 bytes alignment + * startSearch = linear offset from start of heap to begin search + * return: pointer to the allocated block, 0 if error + */ +extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, + int startSearch); + +/** + * Free block starts at offset + * input: pointer to a block + * return: 0 if OK, -1 if error + */ +extern int mmFreeMem(struct mem_block *b); + +/** + * Free block starts at offset + * input: pointer to a heap, start offset + * return: pointer to a block + */ +extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); + +/** + * destroy MM + */ +extern void mmDestroy(struct mem_block *mmInit); + +/** + * For debuging purpose. + */ +extern void mmDumpMemInfo(const struct mem_block *mmInit); + +#endif -- cgit v1.2.3 From f430d95a36d55141cd9ef911aab70364ce4a4108 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 12:52:28 +0900 Subject: Use gallium's rtasm module. --- src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/auxiliary/draw/draw_vf.c | 10 +++------- src/gallium/auxiliary/draw/draw_vf_sse.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/mesa/state_tracker/st_program.h | 1 - 7 files changed, 8 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index dd75f9aefc..6c7e860861 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,7 +44,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_exec.h" diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index dc3a5ecd21..901ff20a7e 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -30,6 +30,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" +#include "rtasm/rtasm_execmem.h" #include "draw_vf.h" @@ -37,11 +38,6 @@ #define DRAW_VF_DBG 0 -/* TODO: remove this */ -extern void -_mesa_exec_free( void *addr ); - - static boolean match_fastpath( struct draw_vertex_fetch *vf, const struct draw_vf_fastpath *fp) { @@ -414,12 +410,12 @@ void draw_vf_destroy( struct draw_vertex_fetch *vf ) FREE(fp->attr); /* KW: At the moment, fp->func is constrained to be allocated by - * _mesa_exec_alloc(), as the hardwired fastpaths in + * rtasm_exec_alloc(), as the hardwired fastpaths in * t_vertex_generic.c are handled specially. It would be nice * to unify them, but this probably won't change until this * module gets another overhaul. */ - //_mesa_exec_free((void *) fp->func); + //rtasm_exec_free((void *) fp->func); FREE(fp); } diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c index 1ad2ae756d..1e889deeea 100644 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ b/src/gallium/auxiliary/draw/draw_vf_sse.c @@ -35,7 +35,7 @@ #if defined(USE_SSE_ASM) -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "x86/common_x86_asm.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 27bc66812c..11ef0c503d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -41,7 +41,7 @@ #include "draw_private.h" #include "draw_context.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_sse2.h" diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 62c6a69c63..29a7f842ed 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -32,7 +32,7 @@ #include "tgsi_exec.h" #include "tgsi_sse2.h" -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" #if defined(__i386__) || defined(__386__) diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index d90066e025..b18772f4e6 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -42,7 +42,7 @@ #if defined(__i386__) || defined(__386__) -#include "x86/rtasm/x86sse.h" +#include "rtasm/rtasm_x86sse.h" /* Surely this should be defined somewhere in a tgsi header: */ diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index ea1dde4a7a..25cf3e94a8 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -36,7 +36,6 @@ #include "mtypes.h" #include "pipe/p_shader_tokens.h" -#include "x86/rtasm/x86sse.h" #define ST_MAX_SHADER_TOKENS 1024 -- cgit v1.2.3 From 90b2beb661f630966788a6e909dc759c99e38973 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 13:27:13 +0900 Subject: Simplify makefile boilerplate code. Don't define ASM_SOURCES variable globally -- reserve that variable to be defined locally by makefiles, together with C_SOURCES and CPP_SOURCES. --- configs/beos | 4 ++-- configs/default | 2 +- configs/freebsd-dri | 2 +- configs/freebsd-dri-amd64 | 4 ++-- configs/freebsd-dri-x86 | 4 ++-- configs/linux-directfb | 4 ++-- configs/linux-dri | 2 +- configs/linux-dri-ppc | 2 +- configs/linux-dri-x86 | 4 ++-- configs/linux-dri-x86-64 | 4 ++-- configs/linux-dri-xcb | 2 +- configs/linux-icc | 4 ++-- configs/linux-icc-static | 4 ++-- configs/linux-indirect | 2 +- configs/linux-solo | 2 +- configs/linux-solo-x86 | 4 ++-- configs/linux-sparc | 4 ++-- configs/linux-x86 | 4 ++-- configs/linux-x86-64 | 4 ++-- configs/linux-x86-glide | 4 ++-- configs/sunos5-gcc | 4 ++-- src/gallium/auxiliary/cso_cache/Makefile | 7 +------ src/gallium/auxiliary/draw/Makefile | 7 +------ src/gallium/auxiliary/pipebuffer/Makefile | 8 +------- src/gallium/auxiliary/rtasm/Makefile | 8 +------- src/gallium/auxiliary/tgsi/Makefile | 8 +------- src/gallium/auxiliary/util/Makefile | 8 +------- src/gallium/drivers/failover/Makefile | 9 +-------- src/gallium/drivers/i915simple/Makefile | 9 +-------- src/gallium/drivers/i965simple/Makefile | 25 +++++++------------------ src/gallium/drivers/softpipe/Makefile | 9 +-------- src/gallium/winsys/dri/Makefile.template | 5 +++-- src/glx/x11/Makefile | 6 +++--- src/mesa/sources | 6 +++--- 34 files changed, 60 insertions(+), 125 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/configs/beos b/configs/beos index 2b74af739d..c6e972789a 100644 --- a/configs/beos +++ b/configs/beos @@ -26,8 +26,8 @@ ifeq ($(CPU), x86) -DUSE_3DNOW_ASM \ -DUSE_SSE_ASM - ASM_SOURCES = $(X86_SOURCES) - ASM_API = $(X86_API) + MESA_ASM_SOURCES = $(X86_SOURCES) + GLAPI_ASM_SOURCES = $(X86_API) CC = gcc CXX = g++ diff --git a/configs/default b/configs/default index c9be5ec3e3..48ddd29282 100644 --- a/configs/default +++ b/configs/default @@ -51,7 +51,7 @@ OSMESA_LIB_NAME = lib$(OSMESA_LIB).so # Optional assembly language optimization files for libGL -ASM_SOURCES = +MESA_ASM_SOURCES = # GLw widget sources (Append "GLwMDrawA.c" here and add -lXm to GLW_LIB_DEPS in # order to build the Motif widget too) diff --git a/configs/freebsd-dri b/configs/freebsd-dri index 67d253b869..6fc1abbc80 100644 --- a/configs/freebsd-dri +++ b/configs/freebsd-dri @@ -22,7 +22,7 @@ CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) -Wmissing-prototypes -std=c99 - CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(DEFINES) -Wall -ansi -pedantic $(ASM_FLAGS) $(X11_INCLUDES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies LIBDRM_CFLAGS = `pkg-config --cflags libdrm` diff --git a/configs/freebsd-dri-amd64 b/configs/freebsd-dri-amd64 index 39341b9701..bb6c361398 100644 --- a/configs/freebsd-dri-amd64 +++ b/configs/freebsd-dri-amd64 @@ -6,5 +6,5 @@ include $(TOP)/configs/freebsd-dri CONFIG_NAME = freebsd-dri-x86-64 ASM_FLAGS = -DUSE_X86_64_ASM -ASM_SOURCES = $(X86-64_SOURCES) -ASM_API = $(X86-64_API) +MESA_ASM_SOURCES = $(X86-64_SOURCES) +GLAPI_ASM_SOURCES = $(X86-64_API) diff --git a/configs/freebsd-dri-x86 b/configs/freebsd-dri-x86 index af0d27ff47..9475437fc5 100644 --- a/configs/freebsd-dri-x86 +++ b/configs/freebsd-dri-x86 @@ -9,5 +9,5 @@ CONFIG_NAME = freebsd-dri-x86 PIC_FLAGS = ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-directfb b/configs/linux-directfb index dff27f7850..2ed94fe275 100644 --- a/configs/linux-directfb +++ b/configs/linux-directfb @@ -17,8 +17,8 @@ HAVE_X86 = $(shell uname -m | grep 'i[3-6]86' >/dev/null && echo yes) ifeq ($(HAVE_X86), yes) CFLAGS += -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM CXXFLAGS += -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM - ASM_SOURCES = $(X86_SOURCES) - ASM_API = $(X86_API) + MESA_ASM_SOURCES = $(X86_SOURCES) + GLAPI_ASM_SOURCES = $(X86_API) endif # Directories diff --git a/configs/linux-dri b/configs/linux-dri index c45b600013..67e60cbd4c 100644 --- a/configs/linux-dri +++ b/configs/linux-dri @@ -33,7 +33,7 @@ CFLAGS = -Wall -Wmissing-prototypes -std=c99 -ffast-math \ CXXFLAGS = -Wall $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies EXTRA_LIB_PATH=-L/usr/X11R6/lib diff --git a/configs/linux-dri-ppc b/configs/linux-dri-ppc index fb87688065..a3a3ca83cb 100644 --- a/configs/linux-dri-ppc +++ b/configs/linux-dri-ppc @@ -9,7 +9,7 @@ OPT_FLAGS = -Os -mcpu=603 PIC_FLAGS = -fPIC ASM_FLAGS = -DUSE_PPC_ASM -DUSE_VMX_ASM -ASM_SOURCES = $(PPC_SOURCES) +MESA_ASM_SOURCES = $(PPC_SOURCES) # Build only the drivers for cards that exist on PowerPC. At some point MGA # will be added, but not yet. diff --git a/configs/linux-dri-x86 b/configs/linux-dri-x86 index b196004e58..ec8242dd68 100644 --- a/configs/linux-dri-x86 +++ b/configs/linux-dri-x86 @@ -12,6 +12,6 @@ PIC_FLAGS = ARCH_FLAGS = -m32 ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-dri-x86-64 b/configs/linux-dri-x86-64 index 821ab3e336..bb56de375a 100644 --- a/configs/linux-dri-x86-64 +++ b/configs/linux-dri-x86-64 @@ -8,8 +8,8 @@ CONFIG_NAME = linux-dri-x86-64 ARCH_FLAGS = -m64 ASM_FLAGS = -DUSE_X86_64_ASM -ASM_SOURCES = $(X86-64_SOURCES) -ASM_API = $(X86-64_API) +MESA_ASM_SOURCES = $(X86-64_SOURCES) +GLAPI_ASM_SOURCES = $(X86-64_API) LIB_DIR = lib64 diff --git a/configs/linux-dri-xcb b/configs/linux-dri-xcb index ea4bdf1864..fbf9b9b268 100644 --- a/configs/linux-dri-xcb +++ b/configs/linux-dri-xcb @@ -33,7 +33,7 @@ CFLAGS = -Wall -Wmissing-prototypes $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) \ CXXFLAGS = -Wall $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies EXTRA_LIB_PATH=$(shell pkg-config --libs-only-L x11) diff --git a/configs/linux-icc b/configs/linux-icc index 978a45af70..d90a1dab3d 100644 --- a/configs/linux-icc +++ b/configs/linux-icc @@ -16,7 +16,7 @@ GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -lm -lpthread GLUT_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -lm -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-icc-static b/configs/linux-icc-static index 0c957568c2..384db3bfe4 100644 --- a/configs/linux-icc-static +++ b/configs/linux-icc-static @@ -23,5 +23,5 @@ GL_LIB_DEPS = GLUT_LIB_DEPS = APP_LIB_DEPS = -L$(TOP)/$(LIB_DIR) -l$(GLUT_LIB) -l$(GLU_LIB) -l$(GL_LIB) -L/usr/X11R6/lib -lX11 -lXmu -lXt -lXi -lm -lpthread -lcxa -lunwind -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-indirect b/configs/linux-indirect index bd33345ed7..0c4805ea87 100644 --- a/configs/linux-indirect +++ b/configs/linux-indirect @@ -34,7 +34,7 @@ CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \ CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies EXTRA_LIB_PATH=-L/usr/X11R6/lib diff --git a/configs/linux-solo b/configs/linux-solo index d49b972228..3145e12775 100644 --- a/configs/linux-solo +++ b/configs/linux-solo @@ -33,7 +33,7 @@ CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \ CXXFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) -ASM_SOURCES = +MESA_ASM_SOURCES = # Library/program dependencies DRI_LIB_DEPS = -lm -lpthread -lexpat -ldl -L$(TOP)/$(LIB_DIR) $(PCIACCESS_LIB) diff --git a/configs/linux-solo-x86 b/configs/linux-solo-x86 index 13cab37658..5f5aa09c82 100644 --- a/configs/linux-solo-x86 +++ b/configs/linux-solo-x86 @@ -9,5 +9,5 @@ CONFIG_NAME = linux-solo-x86 PIC_FLAGS = ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-sparc b/configs/linux-sparc index 9925afc19b..346d438c28 100644 --- a/configs/linux-sparc +++ b/configs/linux-sparc @@ -5,5 +5,5 @@ include $(TOP)/configs/linux CONFIG_NAME = linux-sparc #ASM_FLAGS = -DUSE_SPARC_ASM -#ASM_SOURCES = $(SPARC_SOURCES) -#ASM_API = $(SPARC_API) +#MESA_ASM_SOURCES = $(SPARC_SOURCES) +#GLAPI_ASM_SOURCES = $(SPARC_API) diff --git a/configs/linux-x86 b/configs/linux-x86 index 18fa06101d..a4cf4e8d62 100644 --- a/configs/linux-x86 +++ b/configs/linux-x86 @@ -5,5 +5,5 @@ include $(TOP)/configs/linux CONFIG_NAME = linux-x86 ASM_FLAGS = -DUSE_X86_ASM -DUSE_MMX_ASM -DUSE_3DNOW_ASM -DUSE_SSE_ASM -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) diff --git a/configs/linux-x86-64 b/configs/linux-x86-64 index 67c0391836..c2441e09d0 100644 --- a/configs/linux-x86-64 +++ b/configs/linux-x86-64 @@ -6,8 +6,8 @@ CONFIG_NAME = linux-x86-64 ARCH_FLAGS = -m64 -ASM_SOURCES = $(X86-64_SOURCES) -ASM_API = $(X86-64_API) +MESA_ASM_SOURCES = $(X86-64_SOURCES) +GLAPI_ASM_SOURCES = $(X86-64_API) ASM_FLAGS = -DUSE_X86_64_ASM LIB_DIR = lib64 diff --git a/configs/linux-x86-glide b/configs/linux-x86-glide index f2f8aeea60..b963fbdc66 100644 --- a/configs/linux-x86-glide +++ b/configs/linux-x86-glide @@ -15,8 +15,8 @@ CXXFLAGS = -Wall -O3 -ansi -pedantic -fPIC -D_POSIX_SOURCE -D_POSIX_C_SOURCE=199 GLUT_CFLAGS = -fexceptions -ASM_SOURCES = $(X86_SOURCES) -ASM_API = $(X86_API) +MESA_ASM_SOURCES = $(X86_SOURCES) +GLAPI_ASM_SOURCES = $(X86_API) # Library/program dependencies GL_LIB_DEPS = -L/usr/X11R6/lib -lX11 -lXext -L/usr/local/glide/lib -lglide3x -lm -lpthread diff --git a/configs/sunos5-gcc b/configs/sunos5-gcc index 77b293c545..3fa13d0496 100644 --- a/configs/sunos5-gcc +++ b/configs/sunos5-gcc @@ -16,8 +16,8 @@ ARCH_FLAGS ?= DEFINES = -D_REENTRANT -DUSE_XSHM -ASM_SOURCES = $(SPARC_SOURCES) -ASM_API = $(SPARC_API) +MESA_ASM_SOURCES = $(SPARC_SOURCES) +GLAPI_ASM_SOURCES = $(SPARC_API) ASM_FLAGS = -DUSE_SPARC_ASM CFLAGS = $(WARN_FLAGS) $(OPT_FLAGS) $(PIC_FLAGS) $(ARCH_FLAGS) $(DEFINES) \ diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile index 8248b097fd..3e49266163 100644 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -3,15 +3,10 @@ include $(TOP)/configs/current LIBNAME = cso_cache -DRIVER_SOURCES = \ +C_SOURCES = \ cso_cache.c \ cso_hash.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index c8000cbe9c..1ee9eca0ca 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -3,7 +3,7 @@ include $(TOP)/configs/current LIBNAME = draw -DRIVER_SOURCES = \ +C_SOURCES = \ draw_aaline.c \ draw_clip.c \ draw_vs_exec.c \ @@ -29,11 +29,6 @@ DRIVER_SOURCES = \ draw_vf_sse.c \ draw_wide_prims.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 588629e870..a9fa518c67 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = pipebuffer -DRIVER_SOURCES = \ +C_SOURCES = \ pb_buffer_fenced.c \ pb_buffer_malloc.c \ pb_bufmgr_fenced.c \ @@ -12,11 +11,6 @@ DRIVER_SOURCES = \ pb_bufmgr_pool.c \ pb_winsys.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index edfae2a204..bc339d2aa6 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -1,18 +1,12 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = rtasm -DRIVER_SOURCES = \ +C_SOURCES = \ rtasm_execmem.c \ rtasm_x86sse.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 8bb62b2a0a..71f64b747c 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = tgsi -DRIVER_SOURCES = \ +C_SOURCES = \ exec/tgsi_exec.c \ exec/tgsi_sse2.c \ util/tgsi_build.c \ @@ -13,11 +12,6 @@ DRIVER_SOURCES = \ util/tgsi_transform.c \ util/tgsi_util.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 7cc2aa44f9..906a46d6b4 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -1,20 +1,14 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = util -DRIVER_SOURCES = \ +C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ u_mm.c -C_SOURCES = \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/failover/Makefile b/src/gallium/drivers/failover/Makefile index 14389bd055..f08b8df07a 100644 --- a/src/gallium/drivers/failover/Makefile +++ b/src/gallium/drivers/failover/Makefile @@ -1,20 +1,13 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = failover -DRIVER_SOURCES = \ +C_SOURCES = \ fo_state.c \ fo_state_emit.c \ fo_context.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i915simple/Makefile b/src/gallium/drivers/i915simple/Makefile index ee22ba86f9..2a75f5d57c 100644 --- a/src/gallium/drivers/i915simple/Makefile +++ b/src/gallium/drivers/i915simple/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = i915simple -DRIVER_SOURCES = \ +C_SOURCES = \ i915_blit.c \ i915_clear.c \ i915_flush.c \ @@ -26,12 +25,6 @@ DRIVER_SOURCES = \ i915_fpc_translate.c \ i915_surface.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/i965simple/Makefile b/src/gallium/drivers/i965simple/Makefile index 1dec1f9749..cc8580836c 100644 --- a/src/gallium/drivers/i965simple/Makefile +++ b/src/gallium/drivers/i965simple/Makefile @@ -1,14 +1,13 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = i965simple -DRIVER_SOURCES = \ - brw_blit.c \ - brw_flush.c \ - brw_strings.c \ - brw_surface.c \ +C_SOURCES = \ + brw_blit.c \ + brw_flush.c \ + brw_strings.c \ + brw_surface.c \ brw_cc.c \ brw_clip.c \ brw_clip_line.c \ @@ -31,8 +30,8 @@ DRIVER_SOURCES = \ brw_sf.c \ brw_sf_emit.c \ brw_sf_state.c \ - brw_shader_info.c \ - brw_state.c \ + brw_shader_info.c \ + brw_state.c \ brw_state_batch.c \ brw_state_cache.c \ brw_state_pool.c \ @@ -51,16 +50,6 @@ DRIVER_SOURCES = \ brw_wm_state.c \ brw_wm_surface_state.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(COMMON_BM_SOURCES) \ - $(MINIGLX_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - -DRIVER_DEFINES = -I. - include ../../Makefile.template symlinks: diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 5479daf8ea..539ffb77f5 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -1,10 +1,9 @@ - TOP = ../../../.. include $(TOP)/configs/current LIBNAME = softpipe -DRIVER_SOURCES = \ +C_SOURCES = \ sp_fs_exec.c \ sp_fs_sse.c \ sp_fs_llvm.c \ @@ -41,12 +40,6 @@ DRIVER_SOURCES = \ sp_tile_cache.c \ sp_surface.c -C_SOURCES = \ - $(COMMON_SOURCES) \ - $(DRIVER_SOURCES) - -ASM_SOURCES = - include ../../Makefile.template symlinks: diff --git a/src/gallium/winsys/dri/Makefile.template b/src/gallium/winsys/dri/Makefile.template index 65a93bd53e..3bc1fdd4d4 100644 --- a/src/gallium/winsys/dri/Makefile.template +++ b/src/gallium/winsys/dri/Makefile.template @@ -25,8 +25,9 @@ WINOBJ= WINLIB= INCLUDES = $(SHARED_INCLUDES) $(EXPAT_INCLUDES) -OBJECTS = $(C_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) +OBJECTS = \ + $(C_SOURCES:.c=.o) \ + $(ASM_SOURCES:.S=.o) else # miniglx diff --git a/src/glx/x11/Makefile b/src/glx/x11/Makefile index 5f74fcff06..b404727f08 100644 --- a/src/glx/x11/Makefile +++ b/src/glx/x11/Makefile @@ -35,7 +35,7 @@ SOURCES = \ include $(TOP)/src/mesa/sources -MESA_ASM_API = $(addprefix $(TOP)/src/mesa/, $(ASM_API)) +MESA_GLAPI_ASM_SOURCES = $(addprefix $(TOP)/src/mesa/, $(GLAPI_ASM_SOURCES)) MESA_GLAPI_SOURCES = $(addprefix $(TOP)/src/mesa/, $(GLAPI_SOURCES)) MESA_GLAPI_OBJECTS = $(addprefix $(TOP)/src/mesa/, $(GLAPI_OBJECTS)) @@ -70,11 +70,11 @@ $(TOP)/$(LIB_DIR)/$(GL_LIB_NAME): $(OBJECTS) Makefile -install $(TOP)/$(LIB_DIR) $(GL_LIB_DEPS) $(OBJECTS) -depend: $(SOURCES) $(MESA_GLAPI_SOURCES) $(MESA_ASM_API) Makefile +depend: $(SOURCES) $(MESA_GLAPI_SOURCES) $(MESA_GLAPI_ASM_SOURCES) Makefile rm -f depend touch depend $(MKDEP) $(MKDEP_OPTIONS) $(INCLUDES) $(SOURCES) \ - $(MESA_GLAPI_SOURCES) $(MESA_ASM_API) + $(MESA_GLAPI_SOURCES) $(MESA_GLAPI_ASM_SOURCES) # Emacs tags diff --git a/src/mesa/sources b/src/mesa/sources index 0d185fd5f3..9e56694893 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -320,7 +320,7 @@ FBDEV_DRIVER_SOURCES = \ ALL_SOURCES = \ $(GLAPI_SOURCES) \ $(SOLO_SOURCES) \ - $(ASM_SOURCES) \ + $(MESA_ASM_SOURCES) \ $(COMMON_DRIVER_SOURCES)\ $(X11_DRIVER_SOURCES) \ $(FBDEV_DRIVER_SOURCES) \ @@ -353,11 +353,11 @@ CORE_SOURCES = \ SOLO_OBJECTS = \ $(SOLO_SOURCES:.c=.o) \ - $(ASM_SOURCES:.S=.o) + $(MESA_ASM_SOURCES:.S=.o) GLAPI_OBJECTS = \ $(GLAPI_SOURCES:.c=.o) \ - $(ASM_API:.S=.o) + $(GLAPI_ASM_SOURCES:.S=.o) CORE_OBJECTS = $(SOLO_OBJECTS) $(GLAPI_OBJECTS) -- cgit v1.2.3 From b0eef0dc2557febea7d425fee1f9c2da382898a6 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 13:41:38 +0900 Subject: Add run-time cpu capabilities detection stubs. --- src/gallium/auxiliary/draw/draw_vf_sse.c | 6 ++-- src/gallium/auxiliary/rtasm/Makefile | 1 + src/gallium/auxiliary/rtasm/SConscript | 1 + src/gallium/auxiliary/rtasm/rtasm_cpu.c | 50 ++++++++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_cpu.h | 42 +++++++++++++++++++++++++++ 5 files changed, 97 insertions(+), 3 deletions(-) create mode 100644 src/gallium/auxiliary/rtasm/rtasm_cpu.c create mode 100644 src/gallium/auxiliary/rtasm/rtasm_cpu.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c index 1e889deeea..6076f9849d 100644 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ b/src/gallium/auxiliary/draw/draw_vf_sse.c @@ -35,8 +35,8 @@ #if defined(USE_SSE_ASM) +#include "rtasm/rtasm_cpu.h" #include "rtasm/rtasm_x86sse.h" -#include "x86/common_x86_asm.h" #define X 0 @@ -576,7 +576,7 @@ void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) { struct x86_program p; - if (!cpu_has_xmm) { + if (!rtasm_cpu_has_sse()) { vf->codegen_emit = NULL; return; } @@ -586,7 +586,7 @@ void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) p.vf = vf; p.inputs_safe = 0; /* for now */ p.outputs_safe = 1; /* for now */ - p.have_sse2 = cpu_has_xmm2; + p.have_sse2 = rtasm_cpu_has_sse2(); p.identity = x86_make_reg(file_XMM, 6); p.chan0 = x86_make_reg(file_XMM, 7); diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index bc339d2aa6..9b972f8f86 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = rtasm C_SOURCES = \ + rtasm_cpu.c \ rtasm_execmem.c \ rtasm_x86sse.c diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index 6eca1fe4c0..ac41a4f212 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -3,6 +3,7 @@ Import('*') rtasm = env.ConvenienceLibrary( target = 'rtasm', source = [ + 'rtasm_cpu.c', 'rtasm_execmem.c', 'rtasm_x86sse.c' ]) diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c new file mode 100644 index 0000000000..eb3359750b --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -0,0 +1,50 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "rtasm_cpu.h" + + +int rtasm_cpu_has_sse(void) +{ + /* FIXME: actually detect this at run-time */ +#if defined(__i386__) || defined(__386__) + return 1; +#else + return 0; +#endif +} + +int rtasm_cpu_has_sse2(void) +{ + /* FIXME: actually detect this at run-time */ +#if defined(__i386__) || defined(__386__) + return 1; +#else + return 0; +#endif +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.h b/src/gallium/auxiliary/rtasm/rtasm_cpu.h new file mode 100644 index 0000000000..ebc71634fd --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.h @@ -0,0 +1,42 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Runtime detection of CPU capabilities. + */ + +#ifndef _RTASM_CPU_H_ +#define _RTASM_CPU_H_ + + +int rtasm_cpu_has_sse(void); + +int rtasm_cpu_has_sse2(void); + + +#endif /* _RTASM_CPU_H_ */ -- cgit v1.2.3 From 5d78212d752e021555356bbb9cc5993ad6d9e847 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 14:00:16 +0900 Subject: Bring in ppc spe rtasm into gallium's rtasm module. Moving files since these are not being used outside gallium. --- src/gallium/auxiliary/rtasm/Makefile | 3 +- src/gallium/auxiliary/rtasm/SConscript | 3 +- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 386 +++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 314 ++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_context.h | 2 +- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 2 +- src/mesa/ppc/rtasm/spe_asm.c | 385 ---------------------- src/mesa/ppc/rtasm/spe_asm.h | 314 ------------------ src/mesa/sources | 1 - 9 files changed, 706 insertions(+), 704 deletions(-) create mode 100644 src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c create mode 100644 src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h delete mode 100644 src/mesa/ppc/rtasm/spe_asm.c delete mode 100644 src/mesa/ppc/rtasm/spe_asm.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index 9b972f8f86..39b8a4dbd7 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -6,7 +6,8 @@ LIBNAME = rtasm C_SOURCES = \ rtasm_cpu.c \ rtasm_execmem.c \ - rtasm_x86sse.c + rtasm_x86sse.c \ + rtasm_ppc_spe.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index ac41a4f212..8ea25922aa 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -5,7 +5,8 @@ rtasm = env.ConvenienceLibrary( source = [ 'rtasm_cpu.c', 'rtasm_execmem.c', - 'rtasm_x86sse.c' + 'rtasm_x86sse.c', + 'rtasm_ppc_spe.c', ]) auxiliaries.insert(0, rtasm) diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c new file mode 100644 index 0000000000..95a2d6fcbb --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -0,0 +1,386 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "rtasm_ppc_spe.h" + +#ifdef GALLIUM_CELL +/** + * SPE instruction types + * + * There are 6 primary instruction encodings used on the Cell's SPEs. Each of + * the following unions encodes one type. + * + * \bug + * If, at some point, we start generating SPE code from a little-endian host + * these unions will not work. + */ +/*@{*/ +/** + * Encode one output register with two input registers + */ +union spe_inst_RR { + uint32_t bits; + struct { + unsigned op:11; + unsigned rB:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with three input registers + */ +union spe_inst_RRR { + uint32_t bits; + struct { + unsigned op:4; + unsigned rT:7; + unsigned rB:7; + unsigned rA:7; + unsigned rC:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 7-bit signed immed + */ +union spe_inst_RI7 { + uint32_t bits; + struct { + unsigned op:11; + unsigned i7:7; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and an 8-bit signed immed + */ +union spe_inst_RI8 { + uint32_t bits; + struct { + unsigned op:10; + unsigned i8:8; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with one input reg. and a 10-bit signed immed + */ +union spe_inst_RI10 { + uint32_t bits; + struct { + unsigned op:8; + unsigned i10:10; + unsigned rA:7; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 16-bit signed immediate + */ +union spe_inst_RI16 { + uint32_t bits; + struct { + unsigned op:9; + unsigned i16:16; + unsigned rT:7; + } inst; +}; + + +/** + * Encode one output register with a 18-bit signed immediate + */ +union spe_inst_RI18 { + uint32_t bits; + struct { + unsigned op:7; + unsigned i18:18; + unsigned rT:7; + } inst; +}; +/*@}*/ + + +static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB) +{ + union spe_inst_RR inst; + inst.inst.op = op; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, unsigned rB, unsigned rC) +{ + union spe_inst_RRR inst; + inst.inst.op = op; + inst.inst.rT = rT; + inst.inst.rB = rB; + inst.inst.rA = rA; + inst.inst.rC = rC; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI7 inst; + inst.inst.op = op; + inst.inst.i7 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + +static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI8 inst; + inst.inst.op = op; + inst.inst.i8 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + +static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm) +{ + union spe_inst_RI10 inst; + inst.inst.op = op; + inst.inst.i10 = imm; + inst.inst.rA = rA; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, + int imm) +{ + union spe_inst_RI16 inst; + inst.inst.op = op; + inst.inst.i16 = imm; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + +static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, + int imm) +{ + union spe_inst_RI18 inst; + inst.inst.op = op; + inst.inst.i18 = imm; + inst.inst.rT = rT; + *p->csr = inst.bits; + p->csr++; +} + + + + +#define EMIT_(_name, _op) \ +void _name (struct spe_function *p, unsigned rT) \ +{ \ + emit_RR(p, _op, rT, 0, 0); \ +} + +#define EMIT_R(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA) \ +{ \ + emit_RR(p, _op, rT, rA, 0); \ +} + +#define EMIT_RR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ +{ \ + emit_RR(p, _op, rT, rA, rB); \ +} + +#define EMIT_RRR(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ +{ \ + emit_RRR(p, _op, rT, rA, rB, rC); \ +} + +#define EMIT_RI7(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI7(p, _op, rT, rA, imm); \ +} + +#define EMIT_RI8(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI8(p, _op, rT, rA, 155 - imm); \ +} + +#define EMIT_RI10(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI10(p, _op, rT, rA, imm); \ +} + +#define EMIT_RI16(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI16(p, _op, rT, imm); \ +} + +#define EMIT_RI18(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, int imm) \ +{ \ + emit_RI18(p, _op, rT, imm); \ +} + +#define EMIT_I16(_name, _op) \ +void _name (struct spe_function *p, int imm) \ +{ \ + emit_RI16(p, _op, 0, imm); \ +} + +#include "rtasm_ppc_spe.h" + + +/* + */ +void spe_init_func(struct spe_function *p, unsigned code_size) +{ + p->store = align_malloc(code_size, 16); + p->csr = p->store; +} + + +void spe_release_func(struct spe_function *p) +{ + align_free(p->store); + p->store = NULL; + p->csr = NULL; +} + + +void spe_bi(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); +} + +void spe_iret(struct spe_function *p, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); +} + +void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); +} + +void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, + int e) +{ + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); +} + +void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); +} + +void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +{ + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); +} + + +/* Hint-for-branch instructions + */ +#if 0 +hbr; +hbra; +hbrr; +#endif + + +/* Control instructions + */ +#if 0 +stop; +EMIT_RR (spe_stopd, 0x140); +EMIT_ (spe_lnop, 0x001); +EMIT_ (spe_nop, 0x201); +sync; +EMIT_ (spe_dsync, 0x003); +EMIT_R (spe_mfspr, 0x00c); +EMIT_R (spe_mtspr, 0x10c); +#endif + +#endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h new file mode 100644 index 0000000000..10ce44b3a0 --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -0,0 +1,314 @@ +/* + * (C) Copyright IBM Corporation 2008 + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * \file + * Real-time assembly generation interface for Cell B.E. SPEs. + * + * \author Ian Romanick + */ + +#ifndef RTASM_PPC_SPE_H +#define RTASM_PPC_SPE_H + +struct spe_function { + /** + * + */ + uint32_t *store; + uint32_t *csr; + const char *fn; +}; + +extern void spe_init_func(struct spe_function *p, unsigned code_size); +extern void spe_release_func(struct spe_function *p); + +#endif /* RTASM_PPC_SPE_H */ + +#ifndef EMIT_ +#define EMIT_(name, _op) \ + extern void _name (struct spe_function *p, unsigned rT) +#define EMIT_R(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA) +#define EMIT_RR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB) +#define EMIT_RRR(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + unsigned rB, unsigned rC) +#define EMIT_RI7(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI8(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI10(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) +#define EMIT_RI16(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_RI18(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, int imm) +#define EMIT_I16(_name, _op) \ + extern void _name (struct spe_function *p, int imm) +#define UNDEF_EMIT_MACROS +#endif /* EMIT_ */ + + +/* Memory load / store instructions + */ +EMIT_RI10(spe_lqd, 0x034); +EMIT_RR (spe_lqx, 0x1c4); +EMIT_RI16(spe_lqa, 0x061); +EMIT_RI16(spe_lqr, 0x067); +EMIT_RI10(spe_stqd, 0x024); +EMIT_RR (spe_stqx, 0x144); +EMIT_RI16(spe_stqa, 0x041); +EMIT_RI16(spe_stqr, 0x047); +EMIT_RI7 (spe_cbd, 0x1f4); +EMIT_RR (spe_cbx, 0x1d4); +EMIT_RI7 (spe_chd, 0x1f5); +EMIT_RI7 (spe_chx, 0x1d5); +EMIT_RI7 (spe_cwd, 0x1f6); +EMIT_RI7 (spe_cwx, 0x1d6); +EMIT_RI7 (spe_cdd, 0x1f7); +EMIT_RI7 (spe_cdx, 0x1d7); + + +/* Constant formation instructions + */ +EMIT_RI16(spe_ilh, 0x083); +EMIT_RI16(spe_ilhu, 0x082); +EMIT_RI16(spe_il, 0x081); +EMIT_RI18(spe_ila, 0x021); +EMIT_RI16(spe_iohl, 0x0c1); +EMIT_RI16(spe_fsmbi, 0x0c5); + + +/* Integer and logical instructions + */ +EMIT_RR (spe_ah, 0x0c8); +EMIT_RI10(spe_ahi, 0x01d); +EMIT_RR (spe_a, 0x0c0); +EMIT_RI10(spe_ai, 0x01c); +EMIT_RR (spe_sfh, 0x048); +EMIT_RI10(spe_sfhi, 0x00d); +EMIT_RR (spe_sf, 0x040); +EMIT_RI10(spe_sfi, 0x00c); +EMIT_RR (spe_addx, 0x340); +EMIT_RR (spe_cg, 0x0c2); +EMIT_RR (spe_cgx, 0x342); +EMIT_RR (spe_sfx, 0x341); +EMIT_RR (spe_bg, 0x042); +EMIT_RR (spe_bgx, 0x343); +EMIT_RR (spe_mpy, 0x3c4); +EMIT_RR (spe_mpyu, 0x3cc); +EMIT_RI10(spe_mpyi, 0x074); +EMIT_RI10(spe_mpyui, 0x075); +EMIT_RRR (spe_mpya, 0x00c); +EMIT_RR (spe_mpyh, 0x3c5); +EMIT_RR (spe_mpys, 0x3c7); +EMIT_RR (spe_mpyhh, 0x3c6); +EMIT_RR (spe_mpyhha, 0x346); +EMIT_RR (spe_mpyhhu, 0x3ce); +EMIT_RR (spe_mpyhhau, 0x34e); +EMIT_R (spe_clz, 0x2a5); +EMIT_R (spe_cntb, 0x2b4); +EMIT_R (spe_fsmb, 0x1b6); +EMIT_R (spe_fsmh, 0x1b5); +EMIT_R (spe_fsm, 0x1b4); +EMIT_R (spe_gbb, 0x1b2); +EMIT_R (spe_gbh, 0x1b1); +EMIT_R (spe_gb, 0x1b0); +EMIT_RR (spe_avgb, 0x0d3); +EMIT_RR (spe_absdb, 0x053); +EMIT_RR (spe_sumb, 0x253); +EMIT_R (spe_xsbh, 0x2b6); +EMIT_R (spe_xshw, 0x2ae); +EMIT_R (spe_xswd, 0x2a6); +EMIT_RR (spe_and, 0x0c1); +EMIT_RR (spe_andc, 0x2c1); +EMIT_RI10(spe_andbi, 0x016); +EMIT_RI10(spe_andhi, 0x015); +EMIT_RI10(spe_andi, 0x014); +EMIT_RR (spe_or, 0x041); +EMIT_RR (spe_orc, 0x2c9); +EMIT_RI10(spe_orbi, 0x006); +EMIT_RI10(spe_orhi, 0x005); +EMIT_RI10(spe_ori, 0x004); +EMIT_R (spe_orx, 0x1f0); +EMIT_RR (spe_xor, 0x241); +EMIT_RI10(spe_xorbi, 0x026); +EMIT_RI10(spe_xorhi, 0x025); +EMIT_RI10(spe_xori, 0x024); +EMIT_RR (spe_nand, 0x0c9); +EMIT_RR (spe_nor, 0x049); +EMIT_RR (spe_eqv, 0x249); +EMIT_RRR (spe_selb, 0x008); +EMIT_RRR (spe_shufb, 0x00b); + + +/* Shift and rotate instructions + */ +EMIT_RR (spe_shlh, 0x05f); +EMIT_RI7 (spe_shlhi, 0x07f); +EMIT_RR (spe_shl, 0x05b); +EMIT_RI7 (spe_shli, 0x07b); +EMIT_RR (spe_shlqbi, 0x1db); +EMIT_RI7 (spe_shlqbii, 0x1fb); +EMIT_RR (spe_shlqby, 0x1df); +EMIT_RI7 (spe_shlqbyi, 0x1ff); +EMIT_RR (spe_shlqbybi, 0x1cf); +EMIT_RR (spe_roth, 0x05c); +EMIT_RI7 (spe_rothi, 0x07c); +EMIT_RR (spe_rot, 0x058); +EMIT_RI7 (spe_roti, 0x078); +EMIT_RR (spe_rotqby, 0x1dc); +EMIT_RI7 (spe_rotqbyi, 0x1fc); +EMIT_RR (spe_rotqbybi, 0x1cc); +EMIT_RR (spe_rotqbi, 0x1d8); +EMIT_RI7 (spe_rotqbii, 0x1f8); +EMIT_RR (spe_rothm, 0x05d); +EMIT_RI7 (spe_rothmi, 0x07d); +EMIT_RR (spe_rotm, 0x059); +EMIT_RI7 (spe_rotmi, 0x079); +EMIT_RR (spe_rotqmby, 0x1dd); +EMIT_RI7 (spe_rotqmbyi, 0x1fd); +EMIT_RR (spe_rotqmbybi, 0x1cd); +EMIT_RR (spe_rotqmbi, 0x1c9); +EMIT_RI7 (spe_rotqmbii, 0x1f9); +EMIT_RR (spe_rotmah, 0x05e); +EMIT_RI7 (spe_rotmahi, 0x07e); +EMIT_RR (spe_rotma, 0x05a); +EMIT_RI7 (spe_rotmai, 0x07a); + + +/* Compare, branch, and halt instructions + */ +EMIT_RR (spe_heq, 0x3d8); +EMIT_RI10(spe_heqi, 0x07f); +EMIT_RR (spe_hgt, 0x258); +EMIT_RI10(spe_hgti, 0x04f); +EMIT_RR (spe_hlgt, 0x2d8); +EMIT_RI10(spe_hlgti, 0x05f); +EMIT_RR (spe_ceqb, 0x3d0); +EMIT_RI10(spe_ceqbi, 0x07e); +EMIT_RR (spe_ceqh, 0x3c8); +EMIT_RI10(spe_ceqhi, 0x07d); +EMIT_RR (spe_ceq, 0x3c0); +EMIT_RI10(spe_ceqi, 0x07c); +EMIT_RR (spe_cgtb, 0x250); +EMIT_RI10(spe_cgtbi, 0x04e); +EMIT_RR (spe_cgth, 0x248); +EMIT_RI10(spe_cgthi, 0x04d); +EMIT_RR (spe_cgt, 0x240); +EMIT_RI10(spe_cgti, 0x04c); +EMIT_RR (spe_clgtb, 0x2d0); +EMIT_RI10(spe_clgtbi, 0x05e); +EMIT_RR (spe_clgth, 0x2c8); +EMIT_RI10(spe_clgthi, 0x05d); +EMIT_RR (spe_clgt, 0x2c0); +EMIT_RI10(spe_clgti, 0x05c); +EMIT_I16 (spe_br, 0x064); +EMIT_I16 (spe_bra, 0x060); +EMIT_RI16(spe_brsl, 0x066); +EMIT_RI16(spe_brasl, 0x062); +EMIT_RI16(spe_brnz, 0x042); +EMIT_RI16(spe_brz, 0x040); +EMIT_RI16(spe_brhnz, 0x046); +EMIT_RI16(spe_brhz, 0x044); + +extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); +extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); +extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, + int d, int e); + + +/* Floating-point instructions + */ +EMIT_RR (spe_fa, 0x2c4); +EMIT_RR (spe_dfa, 0x2cc); +EMIT_RR (spe_fs, 0x2c5); +EMIT_RR (spe_dfs, 0x2cd); +EMIT_RR (spe_fm, 0x2c6); +EMIT_RR (spe_dfm, 0x2ce); +EMIT_RRR (spe_fma, 0x00e); +EMIT_RR (spe_dfma, 0x35c); +EMIT_RRR (spe_fnms, 0x00d); +EMIT_RR (spe_dfnms, 0x35e); +EMIT_RRR (spe_fms, 0x00f); +EMIT_RR (spe_dfms, 0x35d); +EMIT_RR (spe_dfnma, 0x35f); +EMIT_R (spe_frest, 0x1b8); +EMIT_R (spe_frsqest, 0x1b9); +EMIT_RR (spe_fi, 0x3d4); +EMIT_RI8 (spe_csflt, 0x1da); +EMIT_RI8 (spe_cflts, 0x1d8); +EMIT_RI8 (spe_cuflt, 0x1db); +EMIT_RI8 (spe_cfltu, 0x1d9); +EMIT_R (spe_frds, 0x3b9); +EMIT_R (spe_fesd, 0x3b8); +EMIT_RR (spe_dfceq, 0x3c3); +EMIT_RR (spe_dfcmeq, 0x3cb); +EMIT_RR (spe_dfcgt, 0x2c3); +EMIT_RR (spe_dfcmgt, 0x2cb); +EMIT_RI7 (spe_dftsv, 0x3bf); +EMIT_RR (spe_fceq, 0x3c2); +EMIT_RR (spe_fcmeq, 0x3ca); +EMIT_RR (spe_fcgt, 0x2c2); +EMIT_RR (spe_fcmgt, 0x2ca); +EMIT_R (spe_fscrwr, 0x3ba); +EMIT_ (spe_fscrrd, 0x398); + + +/* Channel instructions + */ +EMIT_R (spe_rdch, 0x00d); +EMIT_R (spe_rdchcnt, 0x00f); +EMIT_R (spe_wrch, 0x10d); + + +#ifdef UNDEF_EMIT_MACROS +#undef EMIT_ +#undef EMIT_R +#undef EMIT_RR +#undef EMIT_RRR +#undef EMIT_RI7 +#undef EMIT_RI8 +#undef EMIT_RI10 +#undef EMIT_RI16 +#undef EMIT_RI18 +#undef EMIT_I16 +#undef UNDEF_EMIT_MACROS +#endif /* EMIT_ */ diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 91f8e542a2..3b687bb868 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -36,7 +36,7 @@ #include "draw/draw_vbuf.h" #include "cell_winsys.h" #include "cell/common.h" -#include "ppc/rtasm/spe_asm.h" +#include "rtasm/rtasm_ppc_spe.h" struct cell_vbuf_render; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index f10689a959..9cf74bab47 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -31,7 +31,7 @@ #include "../auxiliary/draw/draw_private.h" #include "cell_context.h" -#include "ppc/rtasm/spe_asm.h" +#include "rtasm/rtasm_ppc_spe.h" typedef uint64_t register_mask; diff --git a/src/mesa/ppc/rtasm/spe_asm.c b/src/mesa/ppc/rtasm/spe_asm.c deleted file mode 100644 index 1037637250..0000000000 --- a/src/mesa/ppc/rtasm/spe_asm.c +++ /dev/null @@ -1,385 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file spe_asm.c - * Real-time assembly generation interface for Cell B.E. SPEs. - * - * \author Ian Romanick - */ -#ifdef GALLIUM_CELL -#include -#include -#include "spe_asm.h" - -/** - * SPE instruction types - * - * There are 6 primary instruction encodings used on the Cell's SPEs. Each of - * the following unions encodes one type. - * - * \bug - * If, at some point, we start generating SPE code from a little-endian host - * these unions will not work. - */ -/*@{*/ -/** - * Encode one output register with two input registers - */ -union spe_inst_RR { - uint32_t bits; - struct { - unsigned op:11; - unsigned rB:7; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with three input registers - */ -union spe_inst_RRR { - uint32_t bits; - struct { - unsigned op:4; - unsigned rT:7; - unsigned rB:7; - unsigned rA:7; - unsigned rC:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and a 7-bit signed immed - */ -union spe_inst_RI7 { - uint32_t bits; - struct { - unsigned op:11; - unsigned i7:7; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and an 8-bit signed immed - */ -union spe_inst_RI8 { - uint32_t bits; - struct { - unsigned op:10; - unsigned i8:8; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with one input reg. and a 10-bit signed immed - */ -union spe_inst_RI10 { - uint32_t bits; - struct { - unsigned op:8; - unsigned i10:10; - unsigned rA:7; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with a 16-bit signed immediate - */ -union spe_inst_RI16 { - uint32_t bits; - struct { - unsigned op:9; - unsigned i16:16; - unsigned rT:7; - } inst; -}; - - -/** - * Encode one output register with a 18-bit signed immediate - */ -union spe_inst_RI18 { - uint32_t bits; - struct { - unsigned op:7; - unsigned i18:18; - unsigned rT:7; - } inst; -}; -/*@}*/ - - -static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB) -{ - union spe_inst_RR inst; - inst.inst.op = op; - inst.inst.rB = rB; - inst.inst.rA = rA; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - -static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB, unsigned rC) -{ - union spe_inst_RRR inst; - inst.inst.op = op; - inst.inst.rT = rT; - inst.inst.rB = rB; - inst.inst.rA = rA; - inst.inst.rC = rC; - *p->csr = inst.bits; - p->csr++; -} - - -static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) -{ - union spe_inst_RI7 inst; - inst.inst.op = op; - inst.inst.i7 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - - -static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) -{ - union spe_inst_RI8 inst; - inst.inst.op = op; - inst.inst.i8 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - - -static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) -{ - union spe_inst_RI10 inst; - inst.inst.op = op; - inst.inst.i10 = imm; - inst.inst.rA = rA; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - -static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, - int imm) -{ - union spe_inst_RI16 inst; - inst.inst.op = op; - inst.inst.i16 = imm; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - -static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, - int imm) -{ - union spe_inst_RI18 inst; - inst.inst.op = op; - inst.inst.i18 = imm; - inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; -} - - - - -#define EMIT_(_name, _op) \ -void _name (struct spe_function *p, unsigned rT) \ -{ \ - emit_RR(p, _op, rT, 0, 0); \ -} - -#define EMIT_R(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA) \ -{ \ - emit_RR(p, _op, rT, rA, 0); \ -} - -#define EMIT_RR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ -{ \ - emit_RR(p, _op, rT, rA, rB); \ -} - -#define EMIT_RRR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ -{ \ - emit_RRR(p, _op, rT, rA, rB, rC); \ -} - -#define EMIT_RI7(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ -{ \ - emit_RI7(p, _op, rT, rA, imm); \ -} - -#define EMIT_RI8(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ -{ \ - emit_RI8(p, _op, rT, rA, 155 - imm); \ -} - -#define EMIT_RI10(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ -{ \ - emit_RI10(p, _op, rT, rA, imm); \ -} - -#define EMIT_RI16(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ -{ \ - emit_RI16(p, _op, rT, imm); \ -} - -#define EMIT_RI18(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ -{ \ - emit_RI18(p, _op, rT, imm); \ -} - -#define EMIT_I16(_name, _op) \ -void _name (struct spe_function *p, int imm) \ -{ \ - emit_RI16(p, _op, 0, imm); \ -} - -#include "spe_asm.h" - - -/* - */ -void spe_init_func(struct spe_function *p, unsigned code_size) -{ - p->store = _mesa_align_malloc(code_size, 16); - p->csr = p->store; -} - - -void spe_release_func(struct spe_function *p) -{ - _mesa_align_free(p->store); - p->store = NULL; - p->csr = NULL; -} - - -void spe_bi(struct spe_function *p, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); -} - -void spe_iret(struct spe_function *p, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); -} - -void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, - int e) -{ - emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); -} - -void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, - int e) -{ - emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); -} - -void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, - int e) -{ - emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); -} - -void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); -} - -void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); -} - -void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) -{ - emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); -} - - -/* Hint-for-branch instructions - */ -#if 0 -hbr; -hbra; -hbrr; -#endif - - -/* Control instructions - */ -#if 0 -stop; -EMIT_RR (spe_stopd, 0x140); -EMIT_ (spe_lnop, 0x001); -EMIT_ (spe_nop, 0x201); -sync; -EMIT_ (spe_dsync, 0x003); -EMIT_R (spe_mfspr, 0x00c); -EMIT_R (spe_mtspr, 0x10c); -#endif - -#endif /* GALLIUM_CELL */ diff --git a/src/mesa/ppc/rtasm/spe_asm.h b/src/mesa/ppc/rtasm/spe_asm.h deleted file mode 100644 index 6d69ae655d..0000000000 --- a/src/mesa/ppc/rtasm/spe_asm.h +++ /dev/null @@ -1,314 +0,0 @@ -/* - * (C) Copyright IBM Corporation 2008 - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - -/** - * \file spe_asm.h - * Real-time assembly generation interface for Cell B.E. SPEs. - * - * \author Ian Romanick - */ - -#ifndef SPE_ASM_H -#define SPE_ASM_H - -struct spe_function { - /** - * - */ - uint32_t *store; - uint32_t *csr; - const char *fn; -}; - -extern void spe_init_func(struct spe_function *p, unsigned code_size); -extern void spe_release_func(struct spe_function *p); - -#endif /* SPE_ASM_H */ - -#ifndef EMIT_ -#define EMIT_(name, _op) \ - extern void _name (struct spe_function *p, unsigned rT) -#define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA) -#define EMIT_RR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB) -#define EMIT_RRR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB, unsigned rC) -#define EMIT_RI7(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) -#define EMIT_RI8(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) -#define EMIT_RI10(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) -#define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) -#define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) -#define EMIT_I16(_name, _op) \ - extern void _name (struct spe_function *p, int imm) -#define UNDEF_EMIT_MACROS -#endif /* EMIT_ */ - - -/* Memory load / store instructions - */ -EMIT_RI10(spe_lqd, 0x034); -EMIT_RR (spe_lqx, 0x1c4); -EMIT_RI16(spe_lqa, 0x061); -EMIT_RI16(spe_lqr, 0x067); -EMIT_RI10(spe_stqd, 0x024); -EMIT_RR (spe_stqx, 0x144); -EMIT_RI16(spe_stqa, 0x041); -EMIT_RI16(spe_stqr, 0x047); -EMIT_RI7 (spe_cbd, 0x1f4); -EMIT_RR (spe_cbx, 0x1d4); -EMIT_RI7 (spe_chd, 0x1f5); -EMIT_RI7 (spe_chx, 0x1d5); -EMIT_RI7 (spe_cwd, 0x1f6); -EMIT_RI7 (spe_cwx, 0x1d6); -EMIT_RI7 (spe_cdd, 0x1f7); -EMIT_RI7 (spe_cdx, 0x1d7); - - -/* Constant formation instructions - */ -EMIT_RI16(spe_ilh, 0x083); -EMIT_RI16(spe_ilhu, 0x082); -EMIT_RI16(spe_il, 0x081); -EMIT_RI18(spe_ila, 0x021); -EMIT_RI16(spe_iohl, 0x0c1); -EMIT_RI16(spe_fsmbi, 0x0c5); - - -/* Integer and logical instructions - */ -EMIT_RR (spe_ah, 0x0c8); -EMIT_RI10(spe_ahi, 0x01d); -EMIT_RR (spe_a, 0x0c0); -EMIT_RI10(spe_ai, 0x01c); -EMIT_RR (spe_sfh, 0x048); -EMIT_RI10(spe_sfhi, 0x00d); -EMIT_RR (spe_sf, 0x040); -EMIT_RI10(spe_sfi, 0x00c); -EMIT_RR (spe_addx, 0x340); -EMIT_RR (spe_cg, 0x0c2); -EMIT_RR (spe_cgx, 0x342); -EMIT_RR (spe_sfx, 0x341); -EMIT_RR (spe_bg, 0x042); -EMIT_RR (spe_bgx, 0x343); -EMIT_RR (spe_mpy, 0x3c4); -EMIT_RR (spe_mpyu, 0x3cc); -EMIT_RI10(spe_mpyi, 0x074); -EMIT_RI10(spe_mpyui, 0x075); -EMIT_RRR (spe_mpya, 0x00c); -EMIT_RR (spe_mpyh, 0x3c5); -EMIT_RR (spe_mpys, 0x3c7); -EMIT_RR (spe_mpyhh, 0x3c6); -EMIT_RR (spe_mpyhha, 0x346); -EMIT_RR (spe_mpyhhu, 0x3ce); -EMIT_RR (spe_mpyhhau, 0x34e); -EMIT_R (spe_clz, 0x2a5); -EMIT_R (spe_cntb, 0x2b4); -EMIT_R (spe_fsmb, 0x1b6); -EMIT_R (spe_fsmh, 0x1b5); -EMIT_R (spe_fsm, 0x1b4); -EMIT_R (spe_gbb, 0x1b2); -EMIT_R (spe_gbh, 0x1b1); -EMIT_R (spe_gb, 0x1b0); -EMIT_RR (spe_avgb, 0x0d3); -EMIT_RR (spe_absdb, 0x053); -EMIT_RR (spe_sumb, 0x253); -EMIT_R (spe_xsbh, 0x2b6); -EMIT_R (spe_xshw, 0x2ae); -EMIT_R (spe_xswd, 0x2a6); -EMIT_RR (spe_and, 0x0c1); -EMIT_RR (spe_andc, 0x2c1); -EMIT_RI10(spe_andbi, 0x016); -EMIT_RI10(spe_andhi, 0x015); -EMIT_RI10(spe_andi, 0x014); -EMIT_RR (spe_or, 0x041); -EMIT_RR (spe_orc, 0x2c9); -EMIT_RI10(spe_orbi, 0x006); -EMIT_RI10(spe_orhi, 0x005); -EMIT_RI10(spe_ori, 0x004); -EMIT_R (spe_orx, 0x1f0); -EMIT_RR (spe_xor, 0x241); -EMIT_RI10(spe_xorbi, 0x026); -EMIT_RI10(spe_xorhi, 0x025); -EMIT_RI10(spe_xori, 0x024); -EMIT_RR (spe_nand, 0x0c9); -EMIT_RR (spe_nor, 0x049); -EMIT_RR (spe_eqv, 0x249); -EMIT_RRR (spe_selb, 0x008); -EMIT_RRR (spe_shufb, 0x00b); - - -/* Shift and rotate instructions - */ -EMIT_RR (spe_shlh, 0x05f); -EMIT_RI7 (spe_shlhi, 0x07f); -EMIT_RR (spe_shl, 0x05b); -EMIT_RI7 (spe_shli, 0x07b); -EMIT_RR (spe_shlqbi, 0x1db); -EMIT_RI7 (spe_shlqbii, 0x1fb); -EMIT_RR (spe_shlqby, 0x1df); -EMIT_RI7 (spe_shlqbyi, 0x1ff); -EMIT_RR (spe_shlqbybi, 0x1cf); -EMIT_RR (spe_roth, 0x05c); -EMIT_RI7 (spe_rothi, 0x07c); -EMIT_RR (spe_rot, 0x058); -EMIT_RI7 (spe_roti, 0x078); -EMIT_RR (spe_rotqby, 0x1dc); -EMIT_RI7 (spe_rotqbyi, 0x1fc); -EMIT_RR (spe_rotqbybi, 0x1cc); -EMIT_RR (spe_rotqbi, 0x1d8); -EMIT_RI7 (spe_rotqbii, 0x1f8); -EMIT_RR (spe_rothm, 0x05d); -EMIT_RI7 (spe_rothmi, 0x07d); -EMIT_RR (spe_rotm, 0x059); -EMIT_RI7 (spe_rotmi, 0x079); -EMIT_RR (spe_rotqmby, 0x1dd); -EMIT_RI7 (spe_rotqmbyi, 0x1fd); -EMIT_RR (spe_rotqmbybi, 0x1cd); -EMIT_RR (spe_rotqmbi, 0x1c9); -EMIT_RI7 (spe_rotqmbii, 0x1f9); -EMIT_RR (spe_rotmah, 0x05e); -EMIT_RI7 (spe_rotmahi, 0x07e); -EMIT_RR (spe_rotma, 0x05a); -EMIT_RI7 (spe_rotmai, 0x07a); - - -/* Compare, branch, and halt instructions - */ -EMIT_RR (spe_heq, 0x3d8); -EMIT_RI10(spe_heqi, 0x07f); -EMIT_RR (spe_hgt, 0x258); -EMIT_RI10(spe_hgti, 0x04f); -EMIT_RR (spe_hlgt, 0x2d8); -EMIT_RI10(spe_hlgti, 0x05f); -EMIT_RR (spe_ceqb, 0x3d0); -EMIT_RI10(spe_ceqbi, 0x07e); -EMIT_RR (spe_ceqh, 0x3c8); -EMIT_RI10(spe_ceqhi, 0x07d); -EMIT_RR (spe_ceq, 0x3c0); -EMIT_RI10(spe_ceqi, 0x07c); -EMIT_RR (spe_cgtb, 0x250); -EMIT_RI10(spe_cgtbi, 0x04e); -EMIT_RR (spe_cgth, 0x248); -EMIT_RI10(spe_cgthi, 0x04d); -EMIT_RR (spe_cgt, 0x240); -EMIT_RI10(spe_cgti, 0x04c); -EMIT_RR (spe_clgtb, 0x2d0); -EMIT_RI10(spe_clgtbi, 0x05e); -EMIT_RR (spe_clgth, 0x2c8); -EMIT_RI10(spe_clgthi, 0x05d); -EMIT_RR (spe_clgt, 0x2c0); -EMIT_RI10(spe_clgti, 0x05c); -EMIT_I16 (spe_br, 0x064); -EMIT_I16 (spe_bra, 0x060); -EMIT_RI16(spe_brsl, 0x066); -EMIT_RI16(spe_brasl, 0x062); -EMIT_RI16(spe_brnz, 0x042); -EMIT_RI16(spe_brz, 0x040); -EMIT_RI16(spe_brhnz, 0x046); -EMIT_RI16(spe_brhz, 0x044); - -extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); -extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, - int d, int e); - - -/* Floating-point instructions - */ -EMIT_RR (spe_fa, 0x2c4); -EMIT_RR (spe_dfa, 0x2cc); -EMIT_RR (spe_fs, 0x2c5); -EMIT_RR (spe_dfs, 0x2cd); -EMIT_RR (spe_fm, 0x2c6); -EMIT_RR (spe_dfm, 0x2ce); -EMIT_RRR (spe_fma, 0x00e); -EMIT_RR (spe_dfma, 0x35c); -EMIT_RRR (spe_fnms, 0x00d); -EMIT_RR (spe_dfnms, 0x35e); -EMIT_RRR (spe_fms, 0x00f); -EMIT_RR (spe_dfms, 0x35d); -EMIT_RR (spe_dfnma, 0x35f); -EMIT_R (spe_frest, 0x1b8); -EMIT_R (spe_frsqest, 0x1b9); -EMIT_RR (spe_fi, 0x3d4); -EMIT_RI8 (spe_csflt, 0x1da); -EMIT_RI8 (spe_cflts, 0x1d8); -EMIT_RI8 (spe_cuflt, 0x1db); -EMIT_RI8 (spe_cfltu, 0x1d9); -EMIT_R (spe_frds, 0x3b9); -EMIT_R (spe_fesd, 0x3b8); -EMIT_RR (spe_dfceq, 0x3c3); -EMIT_RR (spe_dfcmeq, 0x3cb); -EMIT_RR (spe_dfcgt, 0x2c3); -EMIT_RR (spe_dfcmgt, 0x2cb); -EMIT_RI7 (spe_dftsv, 0x3bf); -EMIT_RR (spe_fceq, 0x3c2); -EMIT_RR (spe_fcmeq, 0x3ca); -EMIT_RR (spe_fcgt, 0x2c2); -EMIT_RR (spe_fcmgt, 0x2ca); -EMIT_R (spe_fscrwr, 0x3ba); -EMIT_ (spe_fscrrd, 0x398); - - -/* Channel instructions - */ -EMIT_R (spe_rdch, 0x00d); -EMIT_R (spe_rdchcnt, 0x00f); -EMIT_R (spe_wrch, 0x10d); - - -#ifdef UNDEF_EMIT_MACROS -#undef EMIT_ -#undef EMIT_R -#undef EMIT_RR -#undef EMIT_RRR -#undef EMIT_RI7 -#undef EMIT_RI8 -#undef EMIT_RI10 -#undef EMIT_RI16 -#undef EMIT_RI18 -#undef EMIT_I16 -#undef UNDEF_EMIT_MACROS -#endif /* EMIT_ */ diff --git a/src/mesa/sources b/src/mesa/sources index 9e56694893..f0bf7b31fb 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -246,7 +246,6 @@ ASM_C_SOURCES = \ x86/rtasm/x86sse.c \ sparc/sparc.c \ ppc/common_ppc.c \ - ppc/rtasm/spe_asm.c \ x86-64/x86-64.c X86_SOURCES = \ -- cgit v1.2.3 From b9da3791c934e05b82063a8c79c423a0a8e29a94 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 15:07:53 +0900 Subject: Remove src/mesa and src/mesa/main from gallium source include paths. --- SConstruct | 2 - src/gallium/Makefile.template | 2 - src/gallium/auxiliary/draw/draw_vf_sse.c | 3 +- src/gallium/auxiliary/pipebuffer/linked_list.h | 91 ------ .../auxiliary/pipebuffer/pb_buffer_fenced.c | 3 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 3 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 3 +- src/gallium/auxiliary/util/u_double_list.h | 91 ++++++ src/gallium/auxiliary/util/u_simple_list.h | 197 +++++++++++++ src/gallium/drivers/i915simple/i915_debug.c | 2 - src/gallium/include/pipe/p_thread.h | 327 +++++++++++++++++++-- src/gallium/winsys/dri/intel/SConscript | 2 +- src/gallium/winsys/dri/intel/intel_batchpool.c | 15 +- src/gallium/winsys/xlib/SConscript | 6 + src/gallium/winsys/xlib/brw_aub.c | 16 +- src/mesa/SConscript | 7 + src/mesa/x86/common_x86_asm.S | 2 +- 17 files changed, 620 insertions(+), 152 deletions(-) delete mode 100644 src/gallium/auxiliary/pipebuffer/linked_list.h create mode 100644 src/gallium/auxiliary/util/u_double_list.h create mode 100644 src/gallium/auxiliary/util/u_simple_list.h (limited to 'src/gallium/auxiliary') diff --git a/SConstruct b/SConstruct index b20f88a303..6cb07302e8 100644 --- a/SConstruct +++ b/SConstruct @@ -109,8 +109,6 @@ else: # Includes env.Append(CPPPATH = [ '#/include', - '#/src/mesa', - '#/src/mesa/main', '#/src/gallium/include', '#/src/gallium/auxiliary', '#/src/gallium/drivers', diff --git a/src/gallium/Makefile.template b/src/gallium/Makefile.template index 6698212e77..4e462b5c97 100644 --- a/src/gallium/Makefile.template +++ b/src/gallium/Makefile.template @@ -18,8 +18,6 @@ INCLUDES = \ -I$(TOP)/src/gallium/include \ -I$(TOP)/src/gallium/auxiliary \ -I$(TOP)/src/gallium/drivers \ - -I$(TOP)/src/mesa \ - -I$(TOP)/src/mesa/main \ -I$(TOP)/include \ $(DRIVER_INCLUDES) diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c index 6076f9849d..aff4ffd985 100644 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ b/src/gallium/auxiliary/draw/draw_vf_sse.c @@ -26,9 +26,8 @@ */ -#include "simple_list.h" - #include "pipe/p_compiler.h" +#include "util/u_simple_list.h" #include "draw_vf.h" diff --git a/src/gallium/auxiliary/pipebuffer/linked_list.h b/src/gallium/auxiliary/pipebuffer/linked_list.h deleted file mode 100644 index e99817fb13..0000000000 --- a/src/gallium/auxiliary/pipebuffer/linked_list.h +++ /dev/null @@ -1,91 +0,0 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - **************************************************************************/ - -/** - * \file - * List macros heavily inspired by the Linux kernel - * list handling. No list looping yet. - * - * Is not threadsafe, so common operations need to - * be protected using an external mutex. - */ - -#ifndef LINKED_LIST_H_ -#define LINKED_LIST_H_ - - -#include - - -struct list_head -{ - struct list_head *prev; - struct list_head *next; -}; - - -#define LIST_INITHEAD(__item) \ - do { \ - (__item)->prev = (__item); \ - (__item)->next = (__item); \ - } while (0) - -#define LIST_ADD(__item, __list) \ - do { \ - (__item)->prev = (__list); \ - (__item)->next = (__list)->next; \ - (__list)->next->prev = (__item); \ - (__list)->next = (__item); \ - } while (0) - -#define LIST_ADDTAIL(__item, __list) \ - do { \ - (__item)->next = (__list); \ - (__item)->prev = (__list)->prev; \ - (__list)->prev->next = (__item); \ - (__list)->prev = (__item); \ - } while(0) - -#define LIST_DEL(__item) \ - do { \ - (__item)->prev->next = (__item)->next; \ - (__item)->next->prev = (__item)->prev; \ - } while(0) - -#define LIST_DELINIT(__item) \ - do { \ - (__item)->prev->next = (__item)->next; \ - (__item)->next->prev = (__item)->prev; \ - (__item)->next = (__item); \ - (__item)->prev = (__item); \ - } while(0) - -#define LIST_ENTRY(__type, __item, __field) \ - ((__type *)(((char *)(__item)) - offsetof(__type, __field))) - - -#endif /*LINKED_LIST_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index bc85c4b19f..e2ee72ed1f 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -34,13 +34,12 @@ */ -#include "linked_list.h" - #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" #include "pipe/p_util.h" +#include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 983a105347..ff4fd123f3 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -33,12 +33,11 @@ */ -#include "linked_list.h" - #include "pipe/p_defines.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_util.h" +#include "util/u_double_list.h" #include "util/u_mm.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index beb145b7cb..528e9528f6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -35,13 +35,12 @@ */ -#include "linked_list.h" - #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "util/u_double_list.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h new file mode 100644 index 0000000000..8cb10c9820 --- /dev/null +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND. USA. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + **************************************************************************/ + +/** + * \file + * List macros heavily inspired by the Linux kernel + * list handling. No list looping yet. + * + * Is not threadsafe, so common operations need to + * be protected using an external mutex. + */ + +#ifndef _U_DOUBLE_LIST_H_ +#define _U_DOUBLE_LIST_H_ + + +#include + + +struct list_head +{ + struct list_head *prev; + struct list_head *next; +}; + + +#define LIST_INITHEAD(__item) \ + do { \ + (__item)->prev = (__item); \ + (__item)->next = (__item); \ + } while (0) + +#define LIST_ADD(__item, __list) \ + do { \ + (__item)->prev = (__list); \ + (__item)->next = (__list)->next; \ + (__list)->next->prev = (__item); \ + (__list)->next = (__item); \ + } while (0) + +#define LIST_ADDTAIL(__item, __list) \ + do { \ + (__item)->next = (__list); \ + (__item)->prev = (__list)->prev; \ + (__list)->prev->next = (__item); \ + (__list)->prev = (__item); \ + } while(0) + +#define LIST_DEL(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + } while(0) + +#define LIST_DELINIT(__item) \ + do { \ + (__item)->prev->next = (__item)->next; \ + (__item)->next->prev = (__item)->prev; \ + (__item)->next = (__item); \ + (__item)->prev = (__item); \ + } while(0) + +#define LIST_ENTRY(__type, __item, __field) \ + ((__type *)(((char *)(__item)) - offsetof(__type, __field))) + + +#endif /*_U_DOUBLE_LIST_H_*/ diff --git a/src/gallium/auxiliary/util/u_simple_list.h b/src/gallium/auxiliary/util/u_simple_list.h new file mode 100644 index 0000000000..f5f43b0faa --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_list.h @@ -0,0 +1,197 @@ +/** + * \file simple_list.h + * Simple macros for type-safe, intrusive lists. + * + * Intended to work with a list sentinal which is created as an empty + * list. Insert & delete are O(1). + * + * \author + * (C) 1997, Keith Whitwell + */ + +/* + * Mesa 3-D graphics library + * Version: 3.5 + * + * Copyright (C) 1999-2001 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +#ifndef _U_SIMPLE_LIST_H_ +#define _U_SIMPLE_LIST_H_ + +/** + * Remove an element from list. + * + * \param elem element to remove. + */ +#define remove_from_list(elem) \ +do { \ + (elem)->next->prev = (elem)->prev; \ + (elem)->prev->next = (elem)->next; \ +} while (0) + +/** + * Insert an element to the list head. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_head(list, elem) \ +do { \ + (elem)->prev = list; \ + (elem)->next = (list)->next; \ + (list)->next->prev = elem; \ + (list)->next = elem; \ +} while(0) + +/** + * Insert an element to the list tail. + * + * \param list list. + * \param elem element to insert. + */ +#define insert_at_tail(list, elem) \ +do { \ + (elem)->next = list; \ + (elem)->prev = (list)->prev; \ + (list)->prev->next = elem; \ + (list)->prev = elem; \ +} while(0) + +/** + * Move an element to the list head. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_head(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_head(list, elem); \ +} while (0) + +/** + * Move an element to the list tail. + * + * \param list list. + * \param elem element to move. + */ +#define move_to_tail(list, elem) \ +do { \ + remove_from_list(elem); \ + insert_at_tail(list, elem); \ +} while (0) + +/** + * Make a empty list empty. + * + * \param sentinal list (sentinal element). + */ +#define make_empty_list(sentinal) \ +do { \ + (sentinal)->next = sentinal; \ + (sentinal)->prev = sentinal; \ +} while (0) + +/** + * Get list first element. + * + * \param list list. + * + * \return pointer to first element. + */ +#define first_elem(list) ((list)->next) + +/** + * Get list last element. + * + * \param list list. + * + * \return pointer to last element. + */ +#define last_elem(list) ((list)->prev) + +/** + * Get next element. + * + * \param elem element. + * + * \return pointer to next element. + */ +#define next_elem(elem) ((elem)->next) + +/** + * Get previous element. + * + * \param elem element. + * + * \return pointer to previous element. + */ +#define prev_elem(elem) ((elem)->prev) + +/** + * Test whether element is at end of the list. + * + * \param list list. + * \param elem element. + * + * \return non-zero if element is at end of list, or zero otherwise. + */ +#define at_end(list, elem) ((elem) == (list)) + +/** + * Test if a list is empty. + * + * \param list list. + * + * \return non-zero if list empty, or zero otherwise. + */ +#define is_empty_list(list) ((list)->next == (list)) + +/** + * Walk through the elements of a list. + * + * \param ptr pointer to the current element. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach(ptr, list) \ + for( ptr=(list)->next ; ptr!=list ; ptr=(ptr)->next ) + +/** + * Walk through the elements of a list. + * + * Same as #foreach but lets you unlink the current value during a list + * traversal. Useful for freeing a list, element by element. + * + * \param ptr pointer to the current element. + * \param t temporary pointer. + * \param list list. + * + * \note It should be followed by a { } block or a single statement, as in a \c + * for loop. + */ +#define foreach_s(ptr, t, list) \ + for(ptr=(list)->next,t=(ptr)->next; list != ptr; ptr=t, t=(t)->next) + +#endif /* _U_SIMPLE_LIST_H_ */ diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index 94db44e1aa..78102dbac2 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -25,8 +25,6 @@ * **************************************************************************/ -//#include "imports.h" - #include "i915_reg.h" #include "i915_context.h" #include "i915_winsys.h" diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index cd432c547c..4325abc951 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -1,54 +1,317 @@ /************************************************************************** * + * Copyright 1999-2006 Brian Paul * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * **************************************************************************/ -#ifndef P_THREAD_H -#define P_THREAD_H +/** + * @file + * Thread + * + * Initial version by John Stone (j.stone@acm.org) (johns@cs.umr.edu) + * and Christoph Poliwoda (poliwoda@volumegraphics.com) + * Revised by Keith Whitwell + * Adapted for new gl dispatcher by Brian Paul + * + * + * + * DOCUMENTATION + * + * This thread module exports the following types: + * _glthread_TSD Thread-specific data area + * _glthread_Thread Thread datatype + * _glthread_Mutex Mutual exclusion lock + * + * Macros: + * _glthread_DECLARE_STATIC_MUTEX(name) Declare a non-local mutex + * _glthread_INIT_MUTEX(name) Initialize a mutex + * _glthread_LOCK_MUTEX(name) Lock a mutex + * _glthread_UNLOCK_MUTEX(name) Unlock a mutex + * + * Functions: + * _glthread_GetID(v) Get integer thread ID + * _glthread_InitTSD() Initialize thread-specific data + * _glthread_GetTSD() Get thread-specific data + * _glthread_SetTSD() Set thread-specific data + * + * If this file is accidentally included by a non-threaded build, + * it should not cause the build to fail, or otherwise cause problems. + * In general, it should only be included when needed however. + */ + +#ifndef _P_THREAD_H_ +#define _P_THREAD_H_ + + +#if defined(USE_MGL_NAMESPACE) +#define _glapi_Dispatch _mglapi_Dispatch +#endif + + + +#if (defined(PTHREADS) || defined(SOLARIS_THREADS) ||\ + defined(WIN32_THREADS) || defined(USE_XTHREADS) || defined(BEOS_THREADS)) \ + && !defined(THREADS) +# define THREADS +#endif + +#ifdef VMS +#include +#endif + +/* + * POSIX threads. This should be your choice in the Unix world + * whenever possible. When building with POSIX threads, be sure + * to enable any compiler flags which will cause the MT-safe + * libc (if one exists) to be used when linking, as well as any + * header macros for MT-safe errno, etc. For Solaris, this is the -mt + * compiler flag. On Solaris with gcc, use -D_REENTRANT to enable + * proper compiling for MT-safe libc etc. + */ +#if defined(PTHREADS) +#include /* POSIX threads headers */ + +typedef struct { + pthread_key_t key; + int initMagic; +} _glthread_TSD; + +typedef pthread_t _glthread_Thread; + +typedef pthread_mutex_t _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name = PTHREAD_MUTEX_INITIALIZER + +#define _glthread_INIT_MUTEX(name) \ + pthread_mutex_init(&(name), NULL) + +#define _glthread_DESTROY_MUTEX(name) \ + pthread_mutex_destroy(&(name)) + +#define _glthread_LOCK_MUTEX(name) \ + (void) pthread_mutex_lock(&(name)) + +#define _glthread_UNLOCK_MUTEX(name) \ + (void) pthread_mutex_unlock(&(name)) + +#endif /* PTHREADS */ + + + + +/* + * Solaris threads. Use only up to Solaris 2.4. + * Solaris 2.5 and higher provide POSIX threads. + * Be sure to compile with -mt on the Solaris compilers, or + * use -D_REENTRANT if using gcc. + */ +#ifdef SOLARIS_THREADS +#include + +typedef struct { + thread_key_t key; + mutex_t keylock; + int initMagic; +} _glthread_TSD; + +typedef thread_t _glthread_Thread; + +typedef mutex_t _glthread_Mutex; + +/* XXX need to really implement mutex-related macros */ +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 +#define _glthread_INIT_MUTEX(name) (void) name +#define _glthread_DESTROY_MUTEX(name) (void) name +#define _glthread_LOCK_MUTEX(name) (void) name +#define _glthread_UNLOCK_MUTEX(name) (void) name + +#endif /* SOLARIS_THREADS */ + + + + +/* + * Windows threads. Should work with Windows NT and 95. + * IMPORTANT: Link with multithreaded runtime library when THREADS are + * used! + */ +#ifdef WIN32_THREADS +#include + +typedef struct { + DWORD key; + int initMagic; +} _glthread_TSD; + +typedef HANDLE _glthread_Thread; + +typedef CRITICAL_SECTION _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) /*static*/ _glthread_Mutex name = {0,0,0,0,0,0} +#define _glthread_INIT_MUTEX(name) InitializeCriticalSection(&name) +#define _glthread_DESTROY_MUTEX(name) DeleteCriticalSection(&name) +#define _glthread_LOCK_MUTEX(name) EnterCriticalSection(&name) +#define _glthread_UNLOCK_MUTEX(name) LeaveCriticalSection(&name) + +#endif /* WIN32_THREADS */ + + -#include "p_compiler.h" /* - * XXX: We should come up with a system-independent thread definitions. - * XXX: Patching glthread defs for now. + * XFree86 has its own thread wrapper, Xthreads.h + * We wrap it again for GL. */ +#ifdef USE_XTHREADS +#include + +typedef struct { + xthread_key_t key; + int initMagic; +} _glthread_TSD; + +typedef xthread_t _glthread_Thread; + +typedef xmutex_rec _glthread_Mutex; + +#ifdef XMUTEX_INITIALIZER +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name = XMUTEX_INITIALIZER +#else +#define _glthread_DECLARE_STATIC_MUTEX(name) \ + static _glthread_Mutex name +#endif + +#define _glthread_INIT_MUTEX(name) \ + xmutex_init(&(name)) + +#define _glthread_DESTROY_MUTEX(name) \ + xmutex_clear(&(name)) + +#define _glthread_LOCK_MUTEX(name) \ + (void) xmutex_lock(&(name)) + +#define _glthread_UNLOCK_MUTEX(name) \ + (void) xmutex_unlock(&(name)) + +#endif /* USE_XTHREADS */ + + + +/* + * BeOS threads. R5.x required. + */ +#ifdef BEOS_THREADS + +#include +#include + +typedef struct { + int32 key; + int initMagic; +} _glthread_TSD; + +typedef thread_id _glthread_Thread; + +/* Use Benaphore, aka speeder semaphore */ +typedef struct { + int32 lock; + sem_id sem; +} benaphore; +typedef benaphore _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = { 0, 0 } +#define _glthread_INIT_MUTEX(name) name.sem = create_sem(0, #name"_benaphore"), name.lock = 0 +#define _glthread_DESTROY_MUTEX(name) delete_sem(name.sem), name.lock = 0 +#define _glthread_LOCK_MUTEX(name) if (name.sem == 0) _glthread_INIT_MUTEX(name); \ + if (atomic_add(&(name.lock), 1) >= 1) acquire_sem(name.sem) +#define _glthread_UNLOCK_MUTEX(name) if (atomic_add(&(name.lock), -1) > 1) release_sem(name.sem) + +#endif /* BEOS_THREADS */ + + + +#ifndef THREADS + +/* + * THREADS not defined + */ + +typedef unsigned _glthread_TSD; + +typedef unsigned _glthread_Thread; + +typedef unsigned _glthread_Mutex; + +#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 + +#define _glthread_INIT_MUTEX(name) (void) name + +#define _glthread_DESTROY_MUTEX(name) (void) name + +#define _glthread_LOCK_MUTEX(name) (void) name + +#define _glthread_UNLOCK_MUTEX(name) (void) name + +#endif /* THREADS */ + + + +/* + * Platform independent thread specific data API. + */ + +extern unsigned long +_glthread_GetID(void); + + +extern void +_glthread_InitTSD(_glthread_TSD *); + + +extern void * +_glthread_GetTSD(_glthread_TSD *); -#ifndef __MSC__ -#include "glapi/glthread.h" +extern void +_glthread_SetTSD(_glthread_TSD *, void *); -#else /* __MSC__ */ +#if defined(GLX_USE_TLS) -typedef int _glthread_Mutex; +extern __thread struct _glapi_table * _glapi_tls_Dispatch + __attribute__((tls_model("initial-exec"))); -#define _glthread_INIT_MUTEX( M ) ((void) (M)) -#define _glthread_LOCK_MUTEX( M ) ((void) (M)) -#define _glthread_UNLOCK_MUTEX( M ) ((void) (M)) +#define GET_DISPATCH() _glapi_tls_Dispatch -#define sched_yield() ((void) 0) +#elif !defined(GL_CALL) +# if defined(THREADS) +# define GET_DISPATCH() \ + ((__builtin_expect( _glapi_Dispatch != NULL, 1 )) \ + ? _glapi_Dispatch : _glapi_get_dispatch()) +# else +# define GET_DISPATCH() _glapi_Dispatch +# endif /* defined(THREADS) */ +#endif /* ndef GL_CALL */ -#endif /* __MSC__ */ -#endif /* P_THREAD_H */ +#endif /* _P_THREAD_H_ */ diff --git a/src/gallium/winsys/dri/intel/SConscript b/src/gallium/winsys/dri/intel/SConscript index 525ba580e8..0ad19d42a8 100644 --- a/src/gallium/winsys/dri/intel/SConscript +++ b/src/gallium/winsys/dri/intel/SConscript @@ -35,5 +35,5 @@ drivers = [ env.SharedLibrary( target ='i915tex_dri.so', source = sources, - LIBS = mesa + drivers + auxiliaries + env['LIBS'], + LIBS = drivers + mesa + auxiliaries + env['LIBS'], ) \ No newline at end of file diff --git a/src/gallium/winsys/dri/intel/intel_batchpool.c b/src/gallium/winsys/dri/intel/intel_batchpool.c index 33b56817f6..ce154c7b88 100644 --- a/src/gallium/winsys/dri/intel/intel_batchpool.c +++ b/src/gallium/winsys/dri/intel/intel_batchpool.c @@ -36,9 +36,12 @@ #include #include +#include #include -#include "imports.h" -#include "glthread.h" + +#include "pipe/p_compiler.h" +#include "pipe/p_thread.h" + #include "dri_bufpool.h" #include "dri_bufmgr.h" #include "intel_batchpool.h" @@ -196,7 +199,7 @@ pool_create(struct _DriBufferPool *pool, _glthread_LOCK_MUTEX(p->mutex); if (p->numFree == 0) - pool_checkFree(p, GL_TRUE); + pool_checkFree(p, TRUE); if (p->numFree == 0) { fprintf(stderr, "Out of fixed size buffer objects\n"); @@ -278,7 +281,7 @@ pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, return -EBUSY; } - buf->mapped = GL_TRUE; + buf->mapped = TRUE; *virtual = (unsigned char *) p->virtual + buf->start; _glthread_UNLOCK_MUTEX(p->mutex); return 0; @@ -361,7 +364,7 @@ pool_validate(struct _DriBufferPool *pool, void *private) BBuf *buf = (BBuf *) private; BPool *p = buf->parent; _glthread_LOCK_MUTEX(p->mutex); - buf->unfenced = GL_TRUE; + buf->unfenced = TRUE; _glthread_UNLOCK_MUTEX(p->mutex); return 0; } @@ -379,7 +382,7 @@ pool_takedown(struct _DriBufferPool *pool) while ((p->numFree < p->numTot) && p->numDelayed) { _glthread_UNLOCK_MUTEX(p->mutex); sched_yield(); - pool_checkFree(p, GL_TRUE); + pool_checkFree(p, TRUE); _glthread_LOCK_MUTEX(p->mutex); } diff --git a/src/gallium/winsys/xlib/SConscript b/src/gallium/winsys/xlib/SConscript index f8aa5ef945..c38b5be52c 100644 --- a/src/gallium/winsys/xlib/SConscript +++ b/src/gallium/winsys/xlib/SConscript @@ -3,6 +3,12 @@ Import('*') +env = env.Clone() + +env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', +]) sources = [ 'glxapi.c', diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index 541d50c6e4..10eedd8402 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -29,11 +29,13 @@ * Keith Whitwell */ +#include +#include #include "brw_aub.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "imports.h" -//#include "intel_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" struct brw_aubfile { @@ -350,9 +352,9 @@ struct brw_aubfile *brw_aubfile_create( void ) i++; - if (_mesa_getenv("INTEL_AUBFILE")) { - val = snprintf(filename, sizeof(filename), "%s%d.aub", _mesa_getenv("INTEL_AUBFILE"), i%4); - _mesa_printf("--> Aub file: %s\n", filename); + if (getenv("INTEL_AUBFILE")) { + val = snprintf(filename, sizeof(filename), "%s%d.aub", getenv("INTEL_AUBFILE"), i%4); + debug_printf("--> Aub file: %s\n", filename); aubfile->file = fopen(filename, "w"); } else { @@ -360,12 +362,12 @@ struct brw_aubfile *brw_aubfile_create( void ) if (val < 0 || val > sizeof(filename)) strcpy(filename, "default.aub"); - _mesa_printf("--> Aub file: %s\n", filename); + debug_printf("--> Aub file: %s\n", filename); aubfile->file = fopen(filename, "w"); } if (!aubfile->file) { - _mesa_printf("couldn't open aubfile\n"); + debug_printf("couldn't open aubfile\n"); exit(1); } diff --git a/src/mesa/SConscript b/src/mesa/SConscript index a828133580..db18c61fac 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -4,6 +4,13 @@ Import('*') +env = env.Clone() + +# Includes +env.Append(CPPPATH = [ + '#/src/mesa', + '#/src/mesa/main', +]) ####################################################################### # Core sources diff --git a/src/mesa/x86/common_x86_asm.S b/src/mesa/x86/common_x86_asm.S index ef3cc9eb59..09c86b05ba 100644 --- a/src/mesa/x86/common_x86_asm.S +++ b/src/mesa/x86/common_x86_asm.S @@ -39,7 +39,7 @@ * in there will break the build on some platforms. */ -#include "matypes.h" +#include "assyntax.h" #include "common_x86_features.h" SEG_TEXT -- cgit v1.2.3 From b62f0ddd09f8ef9c400feca321412b3f0bc77e63 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 18:56:55 +0900 Subject: Portability fixes. Eliminate C99/C++ constructs. (We should actually disable gcc C99 syntax options if we are serious about portability.) --- src/gallium/auxiliary/cso_cache/cso_hash.c | 35 ++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 0338cb3b47..b40217c524 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -158,11 +158,14 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) while (firstNode != e) { unsigned h = firstNode->key; struct cso_node *lastNode = firstNode; + struct cso_node *afterLastNode; + struct cso_node **beforeFirstNode; + while (lastNode->next != e && lastNode->next->key == h) lastNode = lastNode->next; - struct cso_node *afterLastNode = lastNode->next; - struct cso_node **beforeFirstNode = &hash->buckets[h % hash->numBuckets]; + afterLastNode = lastNode->next; + beforeFirstNode = &hash->buckets[h % hash->numBuckets]; while (*beforeFirstNode != e) beforeFirstNode = &(*beforeFirstNode)->next; lastNode->next = *beforeFirstNode; @@ -222,10 +225,12 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, { cso_data_might_grow(hash->data.d); - struct cso_node **nextNode = cso_hash_find_node(hash, key); - struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); - struct cso_hash_iter iter = {hash, node}; - return iter; + { + struct cso_node **nextNode = cso_hash_find_node(hash, key); + struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); + struct cso_hash_iter iter = {hash, node}; + return iter; + } } struct cso_hash * cso_hash_create(void) @@ -290,6 +295,10 @@ static struct cso_node *cso_hash_data_next(struct cso_node *node) struct cso_node *e; struct cso_hash_data *d; } a; + int start; + struct cso_node **bucket; + int n; + a.next = node->next; if (!a.next) { fprintf(stderr, "iterating beyond the last element\n"); @@ -298,9 +307,9 @@ static struct cso_node *cso_hash_data_next(struct cso_node *node) if (a.next->next) return a.next; - int start = (node->key % a.d->numBuckets) + 1; - struct cso_node **bucket = a.d->buckets + start; - int n = a.d->numBuckets - start; + start = (node->key % a.d->numBuckets) + 1; + bucket = a.d->buckets + start; + n = a.d->numBuckets - start; while (n--) { if (*bucket != a.e) return *bucket; @@ -316,19 +325,21 @@ static struct cso_node *cso_hash_data_prev(struct cso_node *node) struct cso_node *e; struct cso_hash_data *d; } a; + int start; + struct cso_node *sentinel; + struct cso_node **bucket; a.e = node; while (a.e->next) a.e = a.e->next; - int start; if (node == a.e) start = a.d->numBuckets - 1; else start = node->key % a.d->numBuckets; - struct cso_node *sentinel = node; - struct cso_node **bucket = a.d->buckets + start; + sentinel = node; + bucket = a.d->buckets + start; while (start >= 0) { if (*bucket != sentinel) { struct cso_node *prev = *bucket; -- cgit v1.2.3 From 5480a6bc13a555f99a89fc801cfe153182697dda Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Feb 2008 18:57:25 +0900 Subject: Fix windows build. --- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 3 ++- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 +- 3 files changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 9c78fa5626..300c1c2d9d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -33,6 +33,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" +#include "pipe/p_util.h" #include "rtasm_execmem.h" @@ -118,7 +119,7 @@ rtasm_exec_free(void *addr) */ void * -rtasm_exec_malloc(GLuint size) +rtasm_exec_malloc(size_t size) { return MALLOC( size ); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index b332192a62..dcbf76f600 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -21,7 +21,7 @@ * **************************************************************************/ -#if defined(__i386__) || defined(__386__) +#if defined(__i386__) || defined(__386__) || defined(i386) #include "pipe/p_compiler.h" #include "pipe/p_debug.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index e4576001bf..606b41eb35 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -24,7 +24,7 @@ #ifndef _RTASM_X86SSE_H_ #define _RTASM_X86SSE_H_ -#if defined(__i386__) || defined(__386__) +#if defined(__i386__) || defined(__386__) || defined(i386) /* It is up to the caller to ensure that instructions issued are * suitable for the host cpu. There are no checks made in this module -- cgit v1.2.3 From 228f6b978804268a482718e762ebfccbba784949 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 20 Feb 2008 15:32:41 -0700 Subject: gallium: re-fix some msvc warnings --- src/gallium/auxiliary/draw/draw_aaline.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index f1fee4f8ba..73a02a32e4 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -140,14 +140,14 @@ aa_transform_decl(struct tgsi_transform_context *ctx, aactx->colorOutput = decl->u.DeclarationRange.First; } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - if (decl->u.DeclarationRange.Last > aactx->maxSampler) + if ((int) decl->u.DeclarationRange.Last > aactx->maxSampler) aactx->maxSampler = decl->u.DeclarationRange.Last + 1; } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if (decl->u.DeclarationRange.Last > aactx->maxInput) + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) aactx->maxInput = decl->u.DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - decl->Semantic.SemanticIndex > aactx->maxGeneric) { + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { aactx->maxGeneric = decl->Semantic.SemanticIndex; } } @@ -393,7 +393,7 @@ aaline_create_texture(struct aaline_stage *aaline) for (i = 0; i < size; i++) { for (j = 0; j < size; j++) { - uint d; + ubyte d; if (size == 1) { d = 255; } @@ -494,12 +494,12 @@ aaline_line(struct draw_stage *stage, struct prim_header *header) float *pos, *tex; float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; - float a = atan2(dy, dx); - float c_a = cos(a), s_a = sin(a); + double a = atan2(dy, dx); + float c_a = (float) cos(a), s_a = (float) sin(a); uint i; /* XXX the ends of lines aren't quite perfect yet, but probably passable */ - dx = 0.5 * half_width; + dx = 0.5F * half_width; dy = half_width; /* allocate/dup new verts */ -- cgit v1.2.3 From 1eaf7b775ba0dacff8a3debd7c0f260970e5a61d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Feb 2008 18:54:00 +0000 Subject: tgsi: print debug messages on failure to codegenerate --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 29a7f842ed..779b901f2b 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2356,11 +2356,17 @@ tgsi_emit_sse2_fs( ok = emit_instruction( func, &parse.FullToken.FullInstruction ); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d\n", + parse.FullToken.FullInstruction.Instruction.Opcode ); + } break; case TGSI_TOKEN_TYPE_IMMEDIATE: /* XXX implement this */ ok = 0; + debug_printf("failed to emit immediate value\n"); break; default: -- cgit v1.2.3 From 30479ef11004c9498c4ef09048efc56227f104cc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 15 Feb 2008 18:56:41 +0000 Subject: draw: vertex cache rework Take a baby step to straightening out vertex paths. --- src/gallium/auxiliary/draw/draw_context.c | 8 ++-- src/gallium/auxiliary/draw/draw_prim.c | 2 + src/gallium/auxiliary/draw/draw_private.h | 13 ++++-- src/gallium/auxiliary/draw/draw_vertex_cache.c | 60 ++++++++++++++----------- src/gallium/auxiliary/draw/draw_vertex_shader.c | 7 +-- 5 files changed, 53 insertions(+), 37 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index a7f3b4aecb..b90a9f474d 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -72,10 +72,10 @@ struct draw_context *draw_create( void ) { uint i; const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; - char *tmp = align_malloc(Elements(draw->vcache.vertex) * size, 16); + char *tmp = align_malloc(Elements(draw->vs.queue) * size, 16); - for (i = 0; i < Elements(draw->vcache.vertex); i++) - draw->vcache.vertex[i] = (struct vertex_header *)(tmp + i * size); + for (i = 0; i < Elements(draw->vs.queue); i++) + draw->vs.queue[i].vertex = (struct vertex_header *)(tmp + i * size); } draw->shader_queue_flush = draw_vertex_shader_queue_flush; @@ -108,7 +108,7 @@ void draw_destroy( struct draw_context *draw ) if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); - align_free( draw->vcache.vertex[0] ); /* Frees all the vertices. */ + align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ FREE( draw ); } diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index dd9a848863..7d6cd43410 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -114,6 +114,7 @@ static void draw_prim_queue_flush( struct draw_context *draw ) } draw->pq.queue_nr = 0; + draw->vs.post_nr = 0; draw_vertex_cache_unreference( draw ); } @@ -226,6 +227,7 @@ static void do_triangle( struct draw_context *draw, { struct prim_header *prim = get_queued_prim( draw, 3 ); +// _mesa_printf("tri %d %d %d\n", i0, i1, i2); prim->reset_line_stipple = 1; prim->edgeflags = ~0; prim->pad = 0; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 6c7e860861..492c152ff9 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -120,7 +120,7 @@ struct draw_stage }; -#define PRIM_QUEUE_LENGTH 16 +#define PRIM_QUEUE_LENGTH 32 #define VCACHE_SIZE 32 #define VCACHE_OVERFLOW 4 #define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ @@ -244,8 +244,12 @@ struct draw_context */ struct { unsigned referenced; /**< bitfield */ - unsigned idx[VCACHE_SIZE + VCACHE_OVERFLOW]; - struct vertex_header *vertex[VCACHE_SIZE + VCACHE_OVERFLOW]; + + struct { + unsigned in; /* client array element */ + unsigned out; /* index in vs queue/array */ + } idx[VCACHE_SIZE + VCACHE_OVERFLOW]; + unsigned overflow; /** To find space in the vertex cache: */ @@ -258,9 +262,10 @@ struct draw_context struct { struct { unsigned elt; /**< index into the user's vertex arrays */ - struct vertex_header *dest; /**< points into vcache.vertex[] array */ + struct vertex_header *vertex; } queue[VS_QUEUE_LENGTH]; unsigned queue_nr; + unsigned post_nr; } vs; /** diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 44427999cc..53f8bbec44 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -41,7 +41,7 @@ void draw_vertex_cache_invalidate( struct draw_context *draw ) assert(draw->vs.queue_nr == 0); assert(draw->vcache.referenced == 0); - memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); +// memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); } @@ -62,43 +62,51 @@ static struct vertex_header *get_vertex( struct draw_context *draw, assert(slot < 32); /* so we don't exceed the bitfield size below */ - /* Cache miss? - */ - if (draw->vcache.idx[slot] != i) { - - /* If slot is in use, use the overflow area: + if (draw->vcache.referenced & (1<vcache.referenced & (1 << slot)) { - slot = VCACHE_SIZE + draw->vcache.overflow++; + if (draw->vcache.idx[slot].in == i) { +// _mesa_printf("HIT %d %d\n", slot, i); + assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); + return draw->vs.queue[draw->vcache.idx[slot].out].vertex; } + /* Otherwise a collision + */ + slot = VCACHE_SIZE + draw->vcache.overflow++; +// _mesa_printf("XXX %d --> %d\n", i, slot); + } + + /* Deal with the cache miss: + */ + { + unsigned out; + assert(slot < Elements(draw->vcache.idx)); - draw->vcache.idx[slot] = i; +// _mesa_printf("NEW %d %d\n", slot, i); + draw->vcache.idx[slot].in = i; + draw->vcache.idx[slot].out = out = draw->vs.queue_nr++; + draw->vcache.referenced |= (1 << slot); + /* Add to vertex shader queue: */ assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - draw->vs.queue[draw->vs.queue_nr].dest = draw->vcache.vertex[slot]; - draw->vs.queue[draw->vs.queue_nr].elt = i; - draw->vs.queue_nr++; + + draw->vs.queue[out].elt = i; + draw->vs.queue[out].vertex->clipmask = 0; + draw->vs.queue[out].vertex->edgeflag = 1; /*XXX use user's edge flag! */ + draw->vs.queue[out].vertex->pad = 0; + draw->vs.queue[out].vertex->vertex_id = UNDEFINED_VERTEX_ID; /* Need to set the vertex's edge flag here. If we're being called * by do_ef_triangle(), that function needs edge flag info! */ - draw->vcache.vertex[slot]->clipmask = 0; - draw->vcache.vertex[slot]->edgeflag = 1; /*XXX use user's edge flag! */ - draw->vcache.vertex[slot]->pad = 0; - draw->vcache.vertex[slot]->vertex_id = UNDEFINED_VERTEX_ID; - } - - /* primitive flushing may have cleared the bitfield but did not - * clear the idx[] array values. Set the bit now. This fixes a - * bug found when drawing long triangle fans. - */ - draw->vcache.referenced |= (1 << slot); - return draw->vcache.vertex[slot]; + return draw->vs.queue[draw->vcache.idx[slot].out].vertex; + } } @@ -130,8 +138,8 @@ void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) { unsigned i; - for (i = 0; i < Elements(draw->vcache.vertex); i++) - draw->vcache.vertex[i]->vertex_id = UNDEFINED_VERTEX_ID; + for (i = 0; i < draw->vs.post_nr; i++) + draw->vs.queue[i].vertex->vertex_id = UNDEFINED_VERTEX_ID; } diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index f68f6e3244..5d2f5c9c43 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -55,7 +55,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) */ shader->prepare( shader, draw ); -// fprintf(stderr, " q(%d) ", draw->vs.queue_nr ); +// fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr ); /* run vertex shader on vertex cache entries, four per invokation */ for (i = 0; i < draw->vs.queue_nr; i += 4) { @@ -65,12 +65,12 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) for (j = 0; j < n; j++) { elts[j] = draw->vs.queue[i + j].elt; - dests[j] = draw->vs.queue[i + j].dest; + dests[j] = draw->vs.queue[i + j].vertex; } for ( ; j < 4; j++) { elts[j] = elts[0]; - dests[j] = dests[0]; + dests[j] = draw->vs.queue[i + j].vertex; } assert(n > 0); @@ -79,6 +79,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) shader->run(shader, draw, elts, n, dests); } + draw->vs.post_nr = draw->vs.queue_nr; draw->vs.queue_nr = 0; } -- cgit v1.2.3 From eb4dc2dd5ed62e6ccb55ccc2bc13f6a2f3fc1f76 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 21 Feb 2008 16:18:05 -0700 Subject: gallium: new AA point drawing stage AA points are drawn by converting the point to a quad, then modifying the user's fragment shader to compute a coverage value. The final fragment color's alpha is modulated by the coverage value. Fragments outside the point's radius are killed. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_aapoint.c | 875 +++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_context.c | 2 + src/gallium/auxiliary/draw/draw_context.h | 3 + src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_validate.c | 5 + src/gallium/drivers/softpipe/sp_context.c | 6 +- 7 files changed, 892 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/draw/draw_aapoint.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 1ee9eca0ca..b7e71ed3ec 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -5,6 +5,7 @@ LIBNAME = draw C_SOURCES = \ draw_aaline.c \ + draw_aapoint.c \ draw_clip.c \ draw_vs_exec.c \ draw_vs_sse.c \ diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c new file mode 100644 index 0000000000..43119cc70b --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -0,0 +1,875 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA point stage: AA points are converted to quads and rendered with a + * special fragment shader. Another approach would be to use a texture + * map image of a point, but experiments indicate the quality isn't nearly + * as good as this approach. + * + * Note: this looks a lot like draw_aaline.c but there's actually little + * if any code that can be shared. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/* + * Enabling NORMALIZE might give _slightly_ better results. + * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or + * d=x*x+y*y. Since we're working with a unit circle, the later seems + * close enough and saves some costly instructions. + */ +#define NORMALIZE 0 + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aapoint_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; /**< the regular shader */ + void *aapoint_fs; /**< the aa point-augmented shader */ +}; + + +/** + * Subclass of draw_stage + */ +struct aapoint_stage +{ + struct draw_stage stage; + + int psize_slot; + float radius; + + /** this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + /* + * Currently bound state + */ + struct aapoint_fragment_shader *fs; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxInput, maxGeneric; /**< max input index found */ + int tmp0, colorTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for two free temp regs and available input reg for new texcoords. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + struct tgsi_full_instruction newInst; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + const int texInput = aactx->maxInput + 1; + int tmp0; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->tmp0 < 0) + aactx->tmp0 = i; + else if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else + break; + } + } + + assert(aactx->colorTemp != aactx->tmp0); + + tmp0 = aactx->tmp0; + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = texInput; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = tmp0; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + + + /* + * Emit code to compute fragment coverage, kill if outside point radius + * + * Temp reg0 usage: + * t0.x = distance of fragment from center point + * t0.y = boolean, is t0.x > 1 ? + * t0.z = temporary for computing 1/(1-k) value + * t0.w = final coverage value + */ + + /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + + /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ADD; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + +#if NORMALIZE /* OPTIONAL normalization of length */ + /* RSQ t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); +#endif + + /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + /* KILP -t0.yyyy; # if b, KILL */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + + /* SGT t0.y, t0.x, tex.z; # bool b = distance > k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* IF t0.y # if b then */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_IF; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + + { + /* compute coverage factor = (1-d)/(1-k) */ + + /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.z, t0.z; # t0.z = 1 / m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SUB t0.x, 1, t0.x; # d = 1 - d */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + ctx->emit_instruction(ctx, &newInst); + + /* MUL t0.w, t0.x, t0.z; # coverage = d * m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + } + + /* ELSE */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ELSE; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + + { + /* MOV t0.w, tex.w; # coverage = 1.0 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + } + + /* ENDIF */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ENDIF; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + /* add alpha modulation code at tail of program */ + + /* MOV result.color.xyz, colorTemp; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL result.color.w, colorTemp, tmp0.w; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + } + + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aapoint_fs(struct aapoint_stage *aapoint) +{ + const struct pipe_shader_state *orig_fs = &aapoint->fs->state; + struct draw_context *draw = aapoint->stage.draw; + struct pipe_shader_state aapoint_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aapoint_fs = *orig_fs; /* copy to init */ + aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.tmp0 = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aapoint_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aapoint_fs.tokens, 0); +#endif + + aapoint_fs.input_semantic_name[aapoint_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; + aapoint_fs.input_semantic_index[aapoint_fs.num_inputs] = transform.maxGeneric + 1; + aapoint_fs.num_inputs++; + + aapoint->fs->aapoint_fs + = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + + /* advertise the extra post-transform vertex attributes which will have + * the texcoords. + */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) +{ + if (!aapoint->fs->aapoint_fs) { + generate_aapoint_fs(aapoint); + } + aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); +} + + + +static INLINE struct aapoint_stage * +aapoint_stage( struct draw_stage *stage ) +{ + return (struct aapoint_stage *) stage; +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw an AA point by drawing a quad. + */ +static void +aapoint_point(struct draw_stage *stage, struct prim_header *header) +{ + const struct aapoint_stage *aapoint = aapoint_stage(stage); + struct prim_header tri; + struct vertex_header *v[4]; + uint texPos = aapoint->tex_slot; + float radius, *pos, *tex; + uint i; + float k; + + if (aapoint->psize_slot >= 0) { + radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0]; + } + else { + radius = aapoint->radius; + } + + /* + * Note: the texcoords (generic attrib, really) we use are special: + * The S and T components simply vary from -1 to +1. + * The R component is k, below. + * The Q component is 1.0 and will used as a handy constant in the + * fragment shader. + */ + + /* + * k is the threshold distance from the point's center at which + * we begin alpha attenuation (the coverage value). + * Operating within a unit circle, we'll compute the fragment's + * distance 'd' from the center point using the texcoords. + * IF d > 1.0 THEN + * KILL fragment + * ELSE IF d > k THEN + * compute coverage in [0,1] proportional to d in [k, 1]. + * ELSE + * coverage = 1.0; // full coverage + * ENDIF + */ + +#if !NORMALIZE + k = 1.0 / radius; + k = 1.0 - 2.0 * k + k * k; +#else + k = 1.0 - 1.0 / radius; +#endif + + /* allocate/dup new verts */ + for (i = 0; i < 4; i++) { + v[i] = dup_vert(stage, header->v[0], i); + } + + /* new verts */ + pos = v[0]->data[0]; + pos[0] -= radius; + pos[1] -= radius; + + pos = v[1]->data[0]; + pos[0] += radius; + pos[1] -= radius; + + pos = v[2]->data[0]; + pos[0] += radius; + pos[1] += radius; + + pos = v[3]->data[0]; + pos[0] -= radius; + pos[1] += radius; + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, -1, -1, k, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 1, -1, k, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, 1, 1, k, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, -1, 1, k, 1); + + /* emit 2 tris for the quad strip */ + tri.v[0] = v[0]; + tri.v[1] = v[1]; + tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[0]; + tri.v[1] = v[2]; + tri.v[2] = v[3]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aapoint_first_point(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aapoint_stage *aapoint = aapoint_stage(stage); + struct draw_context *draw = stage->draw; + + assert(draw->rasterizer->point_smooth); + + if (draw->rasterizer->point_size <= 2.0) + aapoint->radius = 1.0; + else + aapoint->radius = 0.5f * draw->rasterizer->point_size; + + aapoint->tex_slot = draw->num_vs_outputs; + assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ + draw->extra_vp_outputs.slot = aapoint->tex_slot; + + /* + * Bind our fragprog. + */ + bind_aapoint_fragment_shader(aapoint); + + /* find psize slot in post-transform vertex */ + aapoint->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->state->num_outputs; i++) { + if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + aapoint->psize_slot = i; + break; + } + } + } + + /* now really draw first line */ + stage->point = aapoint_point; + stage->point(stage, header); +} + + +static void +aapoint_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aapoint_stage *aapoint = aapoint_stage(stage); + struct pipe_context *pipe = aapoint->pipe; + + stage->point = aapoint_first_point; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aapoint_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aapoint_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aapoint_stage * +draw_aapoint_stage(struct draw_context *draw) +{ + struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + + draw_alloc_temp_verts( &aapoint->stage, 4 ); + + aapoint->stage.draw = draw; + aapoint->stage.next = NULL; + aapoint->stage.point = aapoint_first_point; + aapoint->stage.line = passthrough_line; + aapoint->stage.tri = passthrough_tri; + aapoint->stage.flush = aapoint_flush; + aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; + aapoint->stage.destroy = aapoint_destroy; + + return aapoint; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a aapoint_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct aapoint_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_aa_pipe_context(struct pipe_context *pipe, struct aapoint_stage *aa) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = aa; + NumContexts++; +} + +static struct aapoint_stage * +aapoint_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aapoint_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); + } + + return aafs; +} + + +static void +aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* save current */ + aapoint->fs = aafs; + /* pass-through */ + aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs); +} + + +static void +aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* pass-through */ + aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs); + FREE(aafs); +} + + +/** + * Called by drivers that want to install this AA point prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA points. + */ +void +draw_install_aapoint_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct aapoint_stage *aapoint; + + /* + * Create / install AA point drawing / prim stage + */ + aapoint = draw_aapoint_stage( draw ); + assert(aapoint); + draw->pipeline.aapoint = &aapoint->stage; + + aapoint->pipe = pipe; + + /* save original driver functions */ + aapoint->driver_create_fs_state = pipe->create_fs_state; + aapoint->driver_bind_fs_state = pipe->bind_fs_state; + aapoint->driver_delete_fs_state = pipe->delete_fs_state; + + /* override the driver's functions */ + pipe->create_fs_state = aapoint_create_fs_state; + pipe->bind_fs_state = aapoint_bind_fs_state; + pipe->delete_fs_state = aapoint_delete_fs_state; + + add_aa_pipe_context(pipe, aapoint); +} diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index b90a9f474d..c28e78d33a 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -105,6 +105,8 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.validate->destroy( draw->pipeline.validate ); if (draw->pipeline.aaline) draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 82035aa8ad..43b58bc056 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -99,6 +99,9 @@ boolean draw_use_sse(struct draw_context *draw); void draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); +void +draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); + int draw_find_vs_output(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 492c152ff9..8c469e74b7 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -180,6 +180,7 @@ struct draw_context struct draw_stage *offset; struct draw_stage *unfilled; struct draw_stage *stipple; + struct draw_stage *aapoint; struct draw_stage *aaline; struct draw_stage *wide; struct draw_stage *rasterize; diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 97e40e372b..5167ef1e75 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -63,6 +63,11 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.aaline; } + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) { + draw->pipeline.aapoint->next = next; + next = draw->pipeline.aapoint; + } + if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines && !draw->rasterizer->line_smooth) || (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 316020cba6..de4c6c18e7 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -328,8 +328,12 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_set_rasterize_stage(softpipe->draw, softpipe->setup); } - /* enable aaline stage */ + /* plug in AA line/point stages */ draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + + /* sp_prim_setup can do wide points (don't convert to quads) */ + draw_convert_wide_points(softpipe->draw, FALSE); sp_init_surface_functions(softpipe); -- cgit v1.2.3 From 446bfc32a83008e0865ec869bc80b920c907f10f Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 21 Feb 2008 16:56:32 -0700 Subject: gallium: new draw stage for polygon stipple. For hardware without native polygon stipple. Create a 32x32 alpha texture that encodes the stipple pattern. Modify the user's fragment program to sample the texture (with gl_FragCoord) and kill the fragment according to the texel value. Temporarily enabled in softpipe driver, replacing the sp_quad_stipple.c step. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_context.h | 3 + src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_pstipple.c | 717 +++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_validate.c | 6 + src/gallium/drivers/softpipe/sp_context.c | 5 + src/gallium/drivers/softpipe/sp_context.h | 7 + src/gallium/drivers/softpipe/sp_quad.c | 2 + 8 files changed, 742 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_pstipple.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index b7e71ed3ec..c9980f0b83 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -16,6 +16,7 @@ C_SOURCES = \ draw_flatshade.c \ draw_offset.c \ draw_prim.c \ + draw_pstipple.c \ draw_stipple.c \ draw_twoside.c \ draw_unfilled.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 43b58bc056..c25301f71d 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -102,6 +102,9 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); void draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); +void +draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); + int draw_find_vs_output(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 8c469e74b7..6abced139b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -182,6 +182,7 @@ struct draw_context struct draw_stage *stipple; struct draw_stage *aapoint; struct draw_stage *aaline; + struct draw_stage *pstipple; struct draw_stage *wide; struct draw_stage *rasterize; } pipeline; diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c new file mode 100644 index 0000000000..4048abf856 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -0,0 +1,717 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Polygon stipple stage: implement polygon stipple with texture map and + * fragment program. The fragment program samples the texture and does + * a fragment kill for the stipple-failing fragments. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct pstip_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *pstip_fs; +}; + + +/** + * Subclass of draw_stage + */ +struct pstip_stage +{ + struct draw_stage stage; + + void *sampler_cso; + struct pipe_texture *texture; + uint sampler_unit; + + /* + * Currently bound state + */ + struct pstip_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + const struct pipe_poly_stipple *stipple; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + + void (*driver_set_sampler_texture)(struct pipe_context *, + unsigned sampler, + struct pipe_texture *); + + void (*driver_set_polygon_stipple)(struct pipe_context *, + const struct pipe_poly_stipple *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct pstip_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int wincoordInput; + int maxInput; + int maxSampler; /**< max sampler index found */ + int texTemp; /**< temp registers */ + int numImmed; + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +pstip_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + if ((int) decl->u.DeclarationRange.Last > pctx->maxSampler) + pctx->maxSampler = (int) decl->u.DeclarationRange.Last; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + pctx->maxInput = MAX2(pctx->maxInput, decl->u.DeclarationRange.Last); + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + pctx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +static void +pstip_transform_immed(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *immed) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + pctx->numImmed++; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +pstip_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (pctx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction newInst; + uint i; + int wincoordInput; + const int sampler = pctx->maxSampler + 1; + + if (pctx->wincoordInput < 0) + wincoordInput = pctx->maxInput + 1; + else + wincoordInput = pctx->wincoordInput; + + /* find one free temp reg */ + for (i = 0; i < 32; i++) { + if ((pctx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (pctx->texTemp < 0) + pctx->texTemp = i; + else + break; + } + } + assert(pctx->texTemp >= 0); + + if (pctx->wincoordInput < 0) { + /* declare new position input reg */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = wincoordInput; + ctx->emit_declaration(ctx, &decl); + } + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = sampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = pctx->texTemp; + ctx->emit_declaration(ctx, &decl); + + /* emit immediate = {1/32, 1/32, 1, 1} + * The index/position of this immediate will be pctx->numImmed + */ + { + static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; + struct tgsi_full_immediate immed; + uint size = 4; + immed = tgsi_default_full_immediate(); + immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; + ctx->emit_immediate(ctx, &immed); + } + + pctx->firstInstruction = FALSE; + + + /* + * Insert new MUL/TEX/KILP instructions at start of program + * Take gl_FragCoord, divide by 32 (stipple size), sample the + * texture and kill fragment if needed. + * + * We'd like to use non-normalized texcoords to index into a RECT + * texture, but we can only use GL_REPEAT wrap mode with normalized + * texcoords. Darn. + */ + + /* MUL texTemp, INPUT[wincoord], 1/32; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + ctx->emit_instruction(ctx, &newInst); + + /* TEX texTemp, texTemp, sampler; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = sampler; + ctx->emit_instruction(ctx, &newInst); + + /* KILP texTemp; # if texTemp < 0, KILL fragment */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + } + + /* emit this instruction */ + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for doing polygon stipple. + * This will be the user's shader prefixed with a TEX and KIL instruction. + */ +static void +generate_pstip_fs(struct pstip_stage *pstip) +{ + const struct pipe_shader_state *orig_fs = &pstip->fs->state; + /*struct draw_context *draw = pstip->stage.draw;*/ + struct pipe_shader_state pstip_fs; + struct pstip_transform_context transform; + +#define MAX 1000 + + pstip_fs = *orig_fs; /* copy to init */ + pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.wincoordInput = -1; + transform.maxInput = -1; + transform.maxSampler = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = pstip_transform_inst; + transform.base.transform_declaration = pstip_transform_decl; + transform.base.transform_immediate = pstip_transform_immed; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) pstip_fs.tokens, + MAX, &transform.base); + +#if 1 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(pstip_fs.tokens, 0); +#endif + + pstip->sampler_unit = transform.maxSampler + 1; + + if (transform.wincoordInput < 0) { + pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; + pstip_fs.input_semantic_index[pstip_fs.num_inputs] = transform.maxInput; + pstip_fs.num_inputs++; + } + + pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); +} + + +/** + * Load texture image with current stipple pattern. + */ +static void +pstip_update_texture(struct pstip_stage *pstip) +{ + static const uint bit31 = 1 << 31; + struct pipe_context *pipe = pstip->pipe; + struct pipe_surface *surface; + const uint *stipple = pstip->state.stipple->stipple; + uint i, j; + ubyte *data; + + surface = pipe->get_tex_surface(pipe, pstip->texture, 0, 0, 0); + data = pipe_surface_map(surface); + + /* + * Load alpha texture. + * Note: 0 means keep the fragment, 255 means kill it. + * We'll negate the texel value and use KILP which kills if value + * is negative. + */ + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + if (stipple[i] & (bit31 >> j)) { + /* fragment "on" */ + data[i * surface->pitch + j] = 0; + } + else { + /* fragment "off" */ + data[i * surface->pitch + j] = 255; + } + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pstip->texture); +} + + +/** + * Create the texture map we'll use for stippling. + */ +static void +pstip_create_texture(struct pstip_stage *pstip) +{ + struct pipe_context *pipe = pstip->pipe; + struct pipe_texture texTemp; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width[0] = 32; + texTemp.height[0] = 32; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + pstip->texture = pipe->texture_create(pipe, &texTemp); + + //pstip_update_texture(pstip); +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +pstip_create_sampler(struct pstip_stage *pstip) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = pstip->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = 0.0f; + + pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_pstip_fragment_shader(struct pstip_stage *pstip) +{ + if (!pstip->fs->pstip_fs) { + generate_pstip_fs(pstip); + } + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); +} + + + +static INLINE struct pstip_stage * +pstip_stage( struct draw_stage *stage ) +{ + return (struct pstip_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + +static void +pstip_first_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct pstip_stage *pstip = pstip_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = pstip->pipe; + + assert(draw->rasterizer->poly_stipple_enable); + + /* + * Bind our fragprog, sampler and texture + */ + bind_pstip_fragment_shader(pstip); + + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + + /* now really draw first line */ + stage->tri = passthrough_tri; + stage->tri(stage, header); +} + + +static void +pstip_flush(struct draw_stage *stage, unsigned flags) +{ + /*struct draw_context *draw = stage->draw;*/ + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + + stage->tri = pstip_first_tri; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, + pstip->state.sampler[pstip->sampler_unit]); + pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, + pstip->state.texture[pstip->sampler_unit]); +} + + +static void +pstip_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +pstip_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct pstip_stage * +draw_pstip_stage(struct draw_context *draw) +{ + struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); + + draw_alloc_temp_verts( &pstip->stage, 8 ); + + pstip->stage.draw = draw; + pstip->stage.next = NULL; + pstip->stage.point = passthrough_point; + pstip->stage.line = passthrough_line; + pstip->stage.tri = pstip_first_tri; + pstip->stage.flush = pstip_flush; + pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; + pstip->stage.destroy = pstip_destroy; + + return pstip; +} + + +/* + * XXX temporary? solution to mapping a pipe_context to a pstip_stage. + */ + +#define MAX_CONTEXTS 10 + +static struct pipe_context *Pipe[MAX_CONTEXTS]; +static struct pstip_stage *Stage[MAX_CONTEXTS]; +static uint NumContexts; + +static void +add_pstip_pipe_context(struct pipe_context *pipe, struct pstip_stage *pstip) +{ + assert(NumContexts < MAX_CONTEXTS); + Pipe[NumContexts] = pipe; + Stage[NumContexts] = pstip; + NumContexts++; +} + +static struct pstip_stage * +pstip_stage_from_pipe(struct pipe_context *pipe) +{ + uint i; + for (i = 0; i < NumContexts; i++) { + if (Pipe[i] == pipe) + return Stage[i]; + } + assert(0); + return NULL; +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +pstip_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + } + + return aafs; +} + + +static void +pstip_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* save current */ + pstip->fs = aafs; + /* pass-through */ + pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); +} + + +static void +pstip_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* pass-through */ + pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +pstip_bind_sampler_state(struct pipe_context *pipe, + unsigned unit, void *sampler) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.sampler[unit] = sampler; + /* pass-through */ + pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); +} + + +static void +pstip_set_sampler_texture(struct pipe_context *pipe, + unsigned sampler, struct pipe_texture *texture) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.texture[sampler] = texture; + /* pass-through */ + pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); +} + + +static void +pstip_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.stipple = stipple; + /* pass-through */ + pstip->driver_set_polygon_stipple(pstip->pipe, stipple); + + pstip_update_texture(pstip); +} + + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_pstipple_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct pstip_stage *pstip; + + /* + * Create / install AA line drawing / prim stage + */ + pstip = draw_pstip_stage( draw ); + assert(pstip); + draw->pipeline.pstipple = &pstip->stage; + + pstip->pipe = pipe; + + /* create special texture, sampler state */ + pstip_create_texture(pstip); + pstip_create_sampler(pstip); + + /* save original driver functions */ + pstip->driver_create_fs_state = pipe->create_fs_state; + pstip->driver_bind_fs_state = pipe->bind_fs_state; + pstip->driver_delete_fs_state = pipe->delete_fs_state; + + pstip->driver_bind_sampler_state = pipe->bind_sampler_state; + pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; + + /* override the driver's functions */ + pipe->create_fs_state = pstip_create_fs_state; + pipe->bind_fs_state = pstip_bind_fs_state; + pipe->delete_fs_state = pstip_delete_fs_state; + + pipe->bind_sampler_state = pstip_bind_sampler_state; + pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->set_polygon_stipple = pstip_set_polygon_stipple; + + add_pstip_pipe_context(pipe, pstip); +} diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 5167ef1e75..efd6793f2b 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -82,6 +82,12 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) precalc_flat = 1; /* only needed for lines really */ } + if (draw->rasterizer->poly_stipple_enable + && draw->pipeline.pstipple) { + draw->pipeline.pstipple->next = next; + next = draw->pipeline.pstipple; + } + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { draw->pipeline.unfilled->next = next; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index de4c6c18e7..2cdf3c75bf 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -332,6 +332,11 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + /* sp_prim_setup can do wide points (don't convert to quads) */ draw_convert_wide_points(softpipe->draw, FALSE); diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index a50cee7648..feeafc7084 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -39,6 +39,13 @@ #include "sp_quad.h" +/** + * This is a temporary variable for testing draw-stage polygon stipple. + * If zero, do stipple in sp_quad_stipple.c + */ +#define USE_DRAW_STAGE_PSTIPPLE 1 + + struct softpipe_winsys; struct softpipe_vbuf_render; struct draw_context; diff --git a/src/gallium/drivers/softpipe/sp_quad.c b/src/gallium/drivers/softpipe/sp_quad.c index 6bd468a51c..15b5594547 100644 --- a/src/gallium/drivers/softpipe/sp_quad.c +++ b/src/gallium/drivers/softpipe/sp_quad.c @@ -112,7 +112,9 @@ sp_build_quad_pipeline(struct softpipe_context *sp) sp_push_quad_first( sp, sp->quad.earlyz ); } +#if !USE_DRAW_STAGE_PSTIPPLE if (sp->rasterizer->poly_stipple_enable) { sp_push_quad_first( sp, sp->quad.polygon_stipple ); } +#endif } -- cgit v1.2.3 From 57060bc1fa82e4e93d2affafecd98219be2f991f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 20 Feb 2008 22:10:27 +0100 Subject: gallium: Silence compiler warnings on Windows. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index dcbf76f600..4d33950e99 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -25,6 +25,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" +#include "pipe/p_pointer.h" #include "rtasm_execmem.h" #include "rtasm_x86sse.h" @@ -34,7 +35,7 @@ static unsigned char *cptr( void (*label)() ) { - return (unsigned char *)(unsigned long)label; + return (unsigned char *) label; } @@ -46,7 +47,7 @@ static void do_realloc( struct x86_function *p ) p->csr = p->store; } else { - unsigned used = p->csr - p->store; + uintptr_t used = pointer_to_uintptr( p->csr ) - pointer_to_uintptr( p->store ); unsigned char *tmp = p->store; p->size *= 2; p->store = rtasm_exec_malloc(p->size); @@ -60,7 +61,7 @@ static void do_realloc( struct x86_function *p ) */ static unsigned char *reserve( struct x86_function *p, int bytes ) { - if (p->csr + bytes - p->store > p->size) + if (p->csr + bytes - p->store > (int) p->size) do_realloc(p); { @@ -135,7 +136,7 @@ static void emit_modrm( struct x86_function *p, case mod_INDIRECT: break; case mod_DISP8: - emit_1b(p, regmem.disp); + emit_1b(p, (char) regmem.disp); break; case mod_DISP32: emit_1i(p, regmem.disp); @@ -251,14 +252,14 @@ void x86_jcc( struct x86_function *p, enum x86_cc cc, unsigned char *label ) { - int offset = label - (x86_get_label(p) + 2); + intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); emit_1b(p, (char) offset); } else { - offset = label - (x86_get_label(p) + 6); + offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, offset); } @@ -293,13 +294,13 @@ unsigned char *x86_call_forward( struct x86_function *p) void x86_fixup_fwd_jump( struct x86_function *p, unsigned char *fixup ) { - *(int *)(fixup - 4) = x86_get_label(p) - fixup; + *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup ); } void x86_jmp( struct x86_function *p, unsigned char *label) { emit_1ub(p, 0xe9); - emit_1i(p, label - x86_get_label(p) - 4); + emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); } #if 0 @@ -1207,7 +1208,7 @@ void (*x86_get_func( struct x86_function *p ))(void) { if (DISASSEM && p->store) debug_printf("disassemble %p %p\n", p->store, p->csr); - return (void (*)(void)) (unsigned long) p->store; + return (void (*)(void)) p->store; } #else -- cgit v1.2.3 From 2cf860b866d80595b7287d6991dc96abc3ca8dd3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 22 Feb 2008 16:22:41 +0900 Subject: gallium: MSVC fixes. --- src/gallium/auxiliary/cso_cache/cso_hash.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index b40217c524..e65d331a0b 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -134,7 +134,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) hint = countBits(-hint); if (hint < MinNumBits) hint = MinNumBits; - hash->userNumBits = hint; + hash->userNumBits = (short)hint; while (primeForNumBits(hint) < (hash->size >> 1)) ++hint; } else if (hint < MinNumBits) { @@ -147,7 +147,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) int oldNumBuckets = hash->numBuckets; int i = 0; - hash->numBits = hint; + hash->numBits = (short)hint; hash->numBuckets = primeForNumBits(hint); hash->buckets = malloc(sizeof(struct cso_node*) * hash->numBuckets); for (i = 0; i < hash->numBuckets; ++i) @@ -241,7 +241,7 @@ struct cso_hash * cso_hash_create(void) hash->data.d->buckets = 0; hash->data.d->size = 0; hash->data.d->nodeSize = sizeof(struct cso_node); - hash->data.d->userNumBits = MinNumBits; + hash->data.d->userNumBits = (short)MinNumBits; hash->data.d->numBits = 0; hash->data.d->numBuckets = 0; -- cgit v1.2.3 From 6c597238b2e168b63738ac8cc9167c1d09185aad Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 22 Feb 2008 17:22:32 +0900 Subject: gallium: Add cso convenience routine (from Keith's patch). --- src/gallium/auxiliary/cso_cache/cso_cache.c | 20 ++++++++++++++++++++ src/gallium/auxiliary/cso_cache/cso_hash.h | 9 +++++++++ 2 files changed, 29 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 9e77e0774d..776ce6bacf 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -132,6 +132,26 @@ cso_find_state(struct cso_cache *sc, return cso_hash_find(hash, hash_key); } + +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ) +{ + struct cso_hash_iter iter = cso_hash_find(hash, hash_key); + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (!memcmp(iter_data, templ, size)) { + /* Return the payload: + */ + return (unsigned char *)iter_data + size; + } + iter = cso_hash_iter_next(iter); + } + return NULL; +} + + struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *templ) diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index b4aa111860..ffd99beba9 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -59,4 +59,13 @@ void *cso_hash_iter_data(struct cso_hash_iter iter); struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter); struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); + +/* KW: a convenience routine: + */ +void *cso_hash_find_data_from_template( struct cso_hash *hash, + unsigned hash_key, + void *templ, + int size ); + + #endif -- cgit v1.2.3 From 901b03e84dce21f4241375da179b2199a3162e0c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 22 Feb 2008 18:28:30 +0900 Subject: gallium: Wrap decls in extern "C". --- src/gallium/auxiliary/cso_cache/cso_cache.h | 12 ++++++++++++ src/gallium/auxiliary/cso_cache/cso_hash.h | 10 ++++++++++ 2 files changed, 22 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 116e2eaa2c..58664cdd94 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -37,8 +37,15 @@ #include "pipe/p_state.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct cso_hash; +struct cso_hash_iter; + struct cso_cache { struct cso_hash *blend_hash; struct cso_hash *depth_stencil_hash; @@ -104,4 +111,9 @@ struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, void * cso_take_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type); + +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index ffd99beba9..2e8b69675c 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -33,6 +33,12 @@ #ifndef CSO_HASH_H #define CSO_HASH_H + +#ifdef __cplusplus +extern "C" { +#endif + + struct cso_hash; struct cso_node; @@ -68,4 +74,8 @@ void *cso_hash_find_data_from_template( struct cso_hash *hash, int size ); +#ifdef __cplusplus +} +#endif + #endif -- cgit v1.2.3 From f1bef2cba2e6e4c0988b05306dd1435b8c0df3d5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 22 Feb 2008 18:45:20 +0900 Subject: gallium: Countour MSVC's pickyness for structures returned by value. --- src/gallium/auxiliary/cso_cache/cso_cache.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 58664cdd94..b4f4f3ae41 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -36,16 +36,14 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" +#include "cso_hash.h" /* for cso_hash_iter, as MSVC requires structures returned by value to be fully defined */ + #ifdef __cplusplus extern "C" { #endif -struct cso_hash; - -struct cso_hash_iter; - struct cso_cache { struct cso_hash *blend_hash; struct cso_hash *depth_stencil_hash; -- cgit v1.2.3 From e9276efafe43219d7af548ce7f5d2440e19836b0 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 22 Feb 2008 17:22:10 -0700 Subject: gallium: fix bug in which wide point stage overrode the aapoint stage Also, simplify the logic a bit. --- src/gallium/auxiliary/draw/draw_validate.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index efd6793f2b..3a19dd4cd7 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -45,6 +45,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) struct draw_stage *next = draw->pipeline.rasterize; int need_det = 0; int precalc_flat = 0; + boolean wide_lines, wide_points; /* Set the validate's next stage to the rasterize stage, so that it * can be found later if needed for flushing. @@ -68,9 +69,18 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.aapoint; } - if ((draw->rasterizer->line_width != 1.0 && draw->convert_wide_lines - && !draw->rasterizer->line_smooth) || - (draw->rasterizer->point_size != 1.0 && draw->convert_wide_points) || + /* drawing wide lines? */ + wide_lines = (draw->rasterizer->line_width != 1.0 + && draw->convert_wide_lines + && !draw->rasterizer->line_smooth); + + /* drawing large points? */ + wide_points = (draw->rasterizer->point_size != 1.0 + && draw->convert_wide_points + && !draw->pipeline.aapoint); + + if (wide_lines || + wide_points || draw->rasterizer->point_sprite) { draw->pipeline.wide->next = next; next = draw->pipeline.wide; -- cgit v1.2.3 From e8de5c70e3370e9112a5facc870075eea60c4c46 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 23 Feb 2008 14:14:54 +0900 Subject: Bring in several forgotten MSVC fixes. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 4 ++-- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2 +- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 4 +++- src/gallium/drivers/softpipe/sp_fs_exec.c | 4 ++-- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/include/pipe/p_compiler.h | 8 ++++++++ 7 files changed, 18 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 97beb5f72a..f5b5f4052f 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -166,6 +166,7 @@ static INLINE void pb_destroy(struct pb_buffer *buf) { assert(buf); + assert(buf->vtbl); buf->vtbl->destroy(buf); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index ff4fd123f3..66256f3fa7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -192,8 +192,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, } /* Some sanity checks */ - assert(0 <= mm_buf->block->ofs && mm_buf->block->ofs < mm->size); - assert(size <= mm_buf->block->size && mm_buf->block->ofs + mm_buf->block->size <= mm->size); + assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size); + assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size); _glthread_UNLOCK_MUTEX(mm->mutex); return SUPER(mm_buf); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index d7b18dc9c5..ac52441400 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -2455,7 +2455,7 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) /* execute instructions, until pc is set to -1 */ while (pc != -1) { - assert(pc < mach->NumInstructions); + assert(pc < (int) mach->NumInstructions); exec_instruction( mach, mach->Instructions + pc, &pc ); } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index b5c54847e0..ff74e6117c 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -299,7 +299,8 @@ static const char *TGSI_SEMANTICS[] = "SEMANTIC_BCOLOR", "SEMANTIC_FOG", "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC," + "SEMANTIC_GENERIC", + "SEMANTIC_NORMAL" }; static const char *TGSI_SEMANTICS_SHORT[] = @@ -310,6 +311,7 @@ static const char *TGSI_SEMANTICS_SHORT[] = "FOG", "PSIZE", "GENERIC", + "NORMAL" }; static const char *TGSI_IMMS[] = diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 8cb0534342..d5bd7a702f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -81,7 +81,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef, static void -exec_prepare( struct sp_fragment_shader *base, +exec_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers ) { @@ -98,7 +98,7 @@ exec_prepare( struct sp_fragment_shader *base, * interface: */ static unsigned -exec_run( struct sp_fragment_shader *base, +exec_run( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, struct quad_header *quad ) { diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 22da471453..34b2b7d4e2 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -146,7 +146,7 @@ shade_quad_llvm(struct quad_stage *qs, unsigned -run_llvm_fs( struct sp_fragment_shader *base, +run_llvm_fs( const struct sp_fragment_shader *base, struct foo *machine ) { } diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index ab527f2afe..91f3d2ac2d 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -42,6 +42,14 @@ #endif +#if defined(__MSC__) + +/* Avoid 'expression is always true' warning */ +#pragma warning(disable: 4296) + +#endif /* __MSC__ */ + + typedef unsigned int uint; typedef unsigned char ubyte; typedef unsigned char boolean; -- cgit v1.2.3 From e9bb63c8e20361597463b2f7f88d84fe2770c8b9 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 24 Feb 2008 02:16:28 +0900 Subject: gallium: MSVC fixes. --- src/gallium/auxiliary/draw/draw_aapoint.c | 6 +++--- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 2 ++ src/gallium/drivers/softpipe/sp_state.h | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 43119cc70b..27a81f3e90 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -616,10 +616,10 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) */ #if !NORMALIZE - k = 1.0 / radius; - k = 1.0 - 2.0 * k + k * k; + k = 1.0f / radius; + k = 1.0f - 2.0f * k + k * k; #else - k = 1.0 - 1.0 / radius; + k = 1.0f - 1.0f / radius; #endif /* allocate/dup new verts */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index e2ee72ed1f..6e217eb2e0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -285,7 +285,9 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) /* Wait on outstanding fences */ while (fenced_list->numDelayed) { _glthread_UNLOCK_MUTEX(fenced_list->mutex); +#ifndef __MSC__ sched_yield(); +#endif _fenced_buffer_list_check_free(fenced_list, 1); _glthread_LOCK_MUTEX(fenced_list->mutex); } diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index ef8cf67d4c..6ff31e601f 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -63,14 +63,14 @@ struct tgsi_exec_machine; struct sp_fragment_shader { struct pipe_shader_state shader; - void (*prepare)( struct sp_fragment_shader *shader, + void (*prepare)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct tgsi_sampler *samplers); /* Run the shader - this interface will get cleaned up in the * future: */ - unsigned (*run)( struct sp_fragment_shader *shader, + unsigned (*run)( const struct sp_fragment_shader *shader, struct tgsi_exec_machine *machine, struct quad_header *quad ); -- cgit v1.2.3 From 1d77d6caf647424f9c1c481145be0465e96c9e3e Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 23 Feb 2008 16:15:29 -0700 Subject: gallium: added new tgsi_scan.c / tgsi_scan_shader() function Used to get information about registers, instructions used in a shader. --- src/gallium/auxiliary/tgsi/Makefile | 1 + src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 117 ++++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 57 ++++++++++++++ 3 files changed, 175 insertions(+) create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 71f64b747c..5555639b70 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ util/tgsi_build.c \ util/tgsi_dump.c \ util/tgsi_parse.c \ + util/tgsi_scan.c \ util/tgsi_transform.c \ util/tgsi_util.c diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c new file mode 100644 index 0000000000..4b99ac37cc --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -0,0 +1,117 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program scan utility. + * Used to determine which registers and instructions are used by a shader. + * + * Authors: Brian Paul + */ + + +#include "tgsi_scan.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_build.h" + + + + +/** + */ +void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info) +{ + uint procType; + struct tgsi_parse_context parse; + + memset(info, 0, sizeof(*info)); + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); + return; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + uint file = fulldecl->Declaration.File; + uint i; + for (i = fulldecl->u.DeclarationRange.First; + i <= fulldecl->u.DeclarationRange.Last; + i++) { + info->file_mask[file] |= (1 << i); + info->file_count[file]++; + + /* special case */ + if (procType == TGSI_PROCESSOR_FRAGMENT && + file == TGSI_FILE_OUTPUT && + fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + info->writes_z = TRUE; + } + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + info->immediate_count++; + break; + + default: + assert( 0 ); + } + } + + tgsi_parse_free (&parse); +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h new file mode 100644 index 0000000000..757446437c --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h @@ -0,0 +1,57 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SCAN_H +#define TGSI_SCAN_H + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + + +/** + * Shader summary info + */ +struct tgsi_shader_info +{ + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ + uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ + + uint immediate_count; /**< number of immediates declared */ + + uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + + boolean writes_z; /**< does fragment shader write Z value? */ +}; + + +extern void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info); + + +#endif /* TGSI_SCAN_H */ -- cgit v1.2.3 From 7aadb475e58b91f3c17c2a70f6700225e9ef25ed Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 24 Feb 2008 02:46:46 +0900 Subject: gallium: Fix MSVC warnings. --- src/gallium/auxiliary/draw/draw_pstipple.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 4048abf856..1ab04cd959 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -133,7 +133,7 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, pctx->maxSampler = (int) decl->u.DeclarationRange.Last; } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - pctx->maxInput = MAX2(pctx->maxInput, decl->u.DeclarationRange.Last); + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last); if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) pctx->wincoordInput = (int) decl->u.DeclarationRange.First; } @@ -332,7 +332,7 @@ generate_pstip_fs(struct pstip_stage *pstip) if (transform.wincoordInput < 0) { pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; - pstip_fs.input_semantic_index[pstip_fs.num_inputs] = transform.maxInput; + pstip_fs.input_semantic_index[pstip_fs.num_inputs] = (ubyte)transform.maxInput; pstip_fs.num_inputs++; } -- cgit v1.2.3 From fdcb9260eea8f9b9deaeeade2a46cffbf3dcaa59 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 24 Feb 2008 17:58:05 +0900 Subject: Add new files. --- src/gallium/auxiliary/draw/SConscript | 3 +++ src/gallium/auxiliary/tgsi/SConscript | 2 ++ src/gallium/drivers/softpipe/SConscript | 3 +++ 3 files changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 8e3a8caa74..3302dc44f7 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -3,6 +3,8 @@ Import('*') draw = env.ConvenienceLibrary( target = 'draw', source = [ + 'draw_aaline.c', + 'draw_aapoint.c', 'draw_clip.c', 'draw_vs_exec.c', 'draw_vs_sse.c', @@ -13,6 +15,7 @@ draw = env.ConvenienceLibrary( 'draw_flatshade.c', 'draw_offset.c', 'draw_prim.c', + 'draw_pstipple.c', 'draw_stipple.c', 'draw_twoside.c', 'draw_unfilled.c', diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 8464bfe944..4632dcc072 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -8,6 +8,8 @@ tgsi = env.ConvenienceLibrary( 'util/tgsi_build.c', 'util/tgsi_dump.c', 'util/tgsi_parse.c', + 'util/tgsi_scan.c', + 'util/tgsi_transform.c', 'util/tgsi_util.c', ]) diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index d581ee8d3c..4c1a6d5df0 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -5,6 +5,9 @@ env = env.Clone() softpipe = env.ConvenienceLibrary( target = 'softpipe', source = [ + 'sp_fs_exec.c', + 'sp_fs_sse.c', + 'sp_fs_llvm.c', 'sp_clear.c', 'sp_context.c', 'sp_draw_arrays.c', -- cgit v1.2.3 From b75706764b4cf18f3b41bf4a97d82b3c528064d8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 25 Feb 2008 15:18:33 +0900 Subject: Add Zack's comments about CSOs. --- src/gallium/auxiliary/cso_cache/cso_cache.h | 51 ++++++++++++++++++++++++++--- 1 file changed, 46 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index b4f4f3ae41..3a005a376e 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,9 +25,48 @@ * **************************************************************************/ - /* - * Authors: - * Zack Rusin + /** + * @file + * Constant State Object (CSO) cache. + * + * The basic idea is that the states are created via the + * create_state/bind_state/delete_state semantics. The driver is expected to + * perform as much of the Gallium state translation to whatever its internal + * representation is during the create call. Gallium then has a caching + * mechanism where it stores the created states. When the pipeline needs an + * actual state change, a bind call is issued. In the bind call the driver + * gets its already translated representation. + * + * Those semantics mean that the driver doesn't do the repeated translations + * of states on every frame, but only once, when a new state is actually + * created. + * + * Even on hardware that doesn't do any kind of state cache, it makes the + * driver look a lot neater, plus it avoids all the redundant state + * translations on every frame. + * + * Currently our constant state objects are: + * - alpha test + * - blend + * - depth stencil + * - fragment shader + * - rasterizer (old setup) + * - sampler + * - vertex shader + * + * Things that are not constant state objects include: + * - blend_color + * - clip_state + * - clear_color_state + * - constant_buffer + * - feedback_state + * - framebuffer_state + * - polygon_stipple + * - scissor_state + * - texture_state + * - viewport_state + * + * @author Zack Rusin */ #ifndef CSO_CACHE_H @@ -36,7 +75,9 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "cso_hash.h" /* for cso_hash_iter, as MSVC requires structures returned by value to be fully defined */ +/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures + * returned by value to be fully defined */ +#include "cso_hash.h" #ifdef __cplusplus -- cgit v1.2.3 From 2a0675eb75b8ca52efab739218bf93922bf884b5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 25 Feb 2008 16:39:39 +0900 Subject: Replace standand library functions by portable ones. --- src/gallium/auxiliary/cso_cache/cso_cache.c | 6 +++-- src/gallium/auxiliary/cso_cache/cso_hash.c | 34 ++++++++++++++--------------- 2 files changed, 20 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 776ce6bacf..9c32e94124 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -28,6 +28,8 @@ /* Authors: Zack Rusin */ +#include "pipe/p_util.h" + #include "cso_cache.h" #include "cso_hash.h" @@ -176,7 +178,7 @@ void * cso_take_state(struct cso_cache *sc, struct cso_cache *cso_cache_create(void) { - struct cso_cache *sc = malloc(sizeof(struct cso_cache)); + struct cso_cache *sc = MALLOC_STRUCT(cso_cache); sc->blend_hash = cso_hash_create(); sc->sampler_hash = cso_hash_create(); @@ -197,5 +199,5 @@ void cso_cache_delete(struct cso_cache *sc) cso_hash_delete(sc->rasterizer_hash); cso_hash_delete(sc->fs_hash); cso_hash_delete(sc->vs_hash); - free(sc); + FREE(sc); } diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index e65d331a0b..208fc58502 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -30,12 +30,10 @@ * Zack Rusin */ -#include "cso_hash.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" -#include -#include -#include -#include +#include "cso_hash.h" #define MAX(a, b) ((a > b) ? (a) : (b)) @@ -98,7 +96,7 @@ struct cso_hash { static void *cso_data_allocate_node(struct cso_hash_data *hash) { - return malloc(hash->nodeSize); + return MALLOC(hash->nodeSize); } static void cso_data_free_node(struct cso_node *node) @@ -107,10 +105,10 @@ static void cso_data_free_node(struct cso_node *node) * Need to cast value ptr to original cso type, then free the * driver-specific data hanging off of it. For example: struct cso_sampler *csamp = (struct cso_sampler *) node->value; - free(csamp->data); + FREE(csamp->data); */ - free(node->value); - free(node); + FREE(node->value); + FREE(node); } static struct cso_node * @@ -149,7 +147,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) hash->numBits = (short)hint; hash->numBuckets = primeForNumBits(hint); - hash->buckets = malloc(sizeof(struct cso_node*) * hash->numBuckets); + hash->buckets = MALLOC(sizeof(struct cso_node*) * hash->numBuckets); for (i = 0; i < hash->numBuckets; ++i) hash->buckets[i] = e; @@ -173,7 +171,7 @@ static void cso_data_rehash(struct cso_hash_data *hash, int hint) firstNode = afterLastNode; } } - free(oldBuckets); + FREE(oldBuckets); } } @@ -235,8 +233,8 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, struct cso_hash * cso_hash_create(void) { - struct cso_hash *hash = malloc(sizeof(struct cso_hash)); - hash->data.d = malloc(sizeof(struct cso_hash_data)); + struct cso_hash *hash = MALLOC_STRUCT(cso_hash); + hash->data.d = MALLOC_STRUCT(cso_hash_data); hash->data.d->fakeNext = 0; hash->data.d->buckets = 0; hash->data.d->size = 0; @@ -261,9 +259,9 @@ void cso_hash_delete(struct cso_hash *hash) cur = next; } } - free(hash->data.d->buckets); - free(hash->data.d); - free(hash); + FREE(hash->data.d->buckets); + FREE(hash->data.d); + FREE(hash); } struct cso_hash_iter cso_hash_find(struct cso_hash *hash, @@ -301,7 +299,7 @@ static struct cso_node *cso_hash_data_next(struct cso_node *node) a.next = node->next; if (!a.next) { - fprintf(stderr, "iterating beyond the last element\n"); + debug_printf("iterating beyond the last element\n"); return 0; } if (a.next->next) @@ -352,7 +350,7 @@ static struct cso_node *cso_hash_data_prev(struct cso_node *node) --bucket; --start; } - fprintf(stderr, "iterating backward beyond first element\n"); + debug_printf("iterating backward beyond first element\n"); return a.e; } -- cgit v1.2.3 From d6c7f7e314ee9f034402c919d142bf6ba9844ec9 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Feb 2008 14:46:42 -0700 Subject: gallium: modify draw_find_vs_output() to search vertex shader outputs This simplifies drivers using the draw module and removes the last dependency on vertex-shader "internals". Since the draw module is producing the post-transformed vertices, it makes sense to ask it where specific vertex attributes are located. This could also simplify some things in the state tracker code for selection, feedback, rasterpos... --- src/gallium/auxiliary/draw/draw_context.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index c28e78d33a..6e5e4d64d1 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -244,14 +244,32 @@ draw_convert_wide_lines(struct draw_context *draw, boolean enable) /** - * The draw module may sometimes generate vertices with extra attributes - * (such as texcoords for AA lines). The driver can call this function - * to find those attributes. + * Ask the draw module for the location/slot of the given vertex attribute in + * a post-transformed vertex. + * + * With this function, drivers that use the draw module should have no reason + * to track the current vertex shader. + * + * Note that the draw module may sometimes generate vertices with extra + * attributes (such as texcoords for AA lines). The driver can call this + * function to find those attributes. + * + * Zero is returned if the attribute is not found since this is + * a don't care / undefined situtation. Returning -1 would be a bit more + * work for the drivers. */ int draw_find_vs_output(struct draw_context *draw, uint semantic_name, uint semantic_index) { + const struct pipe_shader_state *vs = &draw->vertex_shader->state; + uint i; + for (i = 0; i < vs->num_outputs; i++) { + if (vs->output_semantic_name[i] == semantic_name && + vs->output_semantic_index[i] == semantic_index) + return i; + } + /* XXX there may be more than one extra vertex attrib. * For example, simulated gl_FragCoord and gl_PointCoord. */ -- cgit v1.2.3 From cc0cf1154b6288d49d7a9dfd18fb666331dd7334 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Feb 2008 15:34:46 -0700 Subject: gallium: fix bad ptr assignment --- src/gallium/auxiliary/draw/draw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 6e5e4d64d1..7dd1c6f6fa 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -262,7 +262,7 @@ int draw_find_vs_output(struct draw_context *draw, uint semantic_name, uint semantic_index) { - const struct pipe_shader_state *vs = &draw->vertex_shader->state; + const struct pipe_shader_state *vs = draw->vertex_shader->state; uint i; for (i = 0; i < vs->num_outputs; i++) { if (vs->output_semantic_name[i] == semantic_name && -- cgit v1.2.3 From 08a5f49644c4bfc62291c49718f2d18e58527d1d Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Feb 2008 18:56:03 -0700 Subject: gallium: rewrite AA point fragment shader to use a CMP instruction instead of IF/ELSE/ENDIF Allows the shader to work on i915 hardware. --- src/gallium/auxiliary/draw/draw_aapoint.c | 196 +++++++++++++++--------------- 1 file changed, 96 insertions(+), 100 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 27a81f3e90..cae6fcd4d2 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -222,7 +222,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, * * Temp reg0 usage: * t0.x = distance of fragment from center point - * t0.y = boolean, is t0.x > 1 ? + * t0.y = boolean, is t0.x > 1.0, also misc temp usage * t0.z = temporary for computing 1/(1-k) value * t0.w = final coverage value */ @@ -313,9 +313,73 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; ctx->emit_instruction(ctx, &newInst); - /* SGT t0.y, t0.x, tex.z; # bool b = distance > k */ + + /* compute coverage factor = (1-d)/(1-k) */ + + /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.z, t0.z; # t0.z = 1 / m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SUB t0.y, 1, t0.x; # d = 1 - d */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + ctx->emit_instruction(ctx, &newInst); + + /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SLE; newInst.Instruction.NumDstRegs = 1; newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; newInst.FullDstRegisters[0].DstRegister.Index = tmp0; @@ -329,111 +393,40 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; ctx->emit_instruction(ctx, &newInst); - /* IF t0.y # if b then */ + /* CMP t0.w, -t0.y, tex.w, t0.w; + * # if -t0.y < 0 then + * t0.w = 1 + * else + * t0.w = t0.w + */ newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_IF; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 1; + newInst.Instruction.Opcode = TGSI_OPCODE_CMP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 3; newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); - { - /* compute coverage factor = (1-d)/(1-k) */ - - /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SUB; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - - /* RCP t0.z, t0.z; # t0.z = 1 / m */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_RCP; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - - /* SUB t0.x, 1, t0.x; # d = 1 - d */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SUB; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - ctx->emit_instruction(ctx, &newInst); - - /* MUL t0.w, t0.x, t0.z; # coverage = d * m */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MUL; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - } - - /* ELSE */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_ELSE; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 0; - ctx->emit_instruction(ctx, &newInst); - - { - /* MOV t0.w, tex.w; # coverage = 1.0 */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MOV; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - ctx->emit_instruction(ctx, &newInst); - } - - /* ENDIF */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_ENDIF; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 0; - ctx->emit_instruction(ctx, &newInst); } if (inst->Instruction.Opcode == TGSI_OPCODE_END) { @@ -516,7 +509,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) (struct tgsi_token *) aapoint_fs.tokens, MAX, &transform.base); -#if 0 /* DEBUG */ +#if 1 /* DEBUG */ tgsi_dump(orig_fs->tokens, 0); tgsi_dump(aapoint_fs.tokens, 0); #endif @@ -613,6 +606,9 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) * ELSE * coverage = 1.0; // full coverage * ENDIF + * + * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to + * avoid using IF/ELSE/ENDIF TGSI opcodes. */ #if !NORMALIZE -- cgit v1.2.3 From 6abb82da7e676384e7e2c9732307b23f8ed7157d Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 25 Feb 2008 22:03:58 -0500 Subject: implement deleting of driver side cached state in cso's --- src/gallium/auxiliary/cso_cache/cso_cache.c | 88 +++++++++++++++++++++++++++++ src/gallium/auxiliary/cso_cache/cso_cache.h | 14 +++++ src/gallium/auxiliary/cso_cache/cso_hash.h | 1 - src/mesa/state_tracker/st_cache.c | 13 ++++- 4 files changed, 114 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 9c32e94124..9aa1a64272 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -190,9 +190,96 @@ struct cso_cache *cso_cache_create(void) return sc; } +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + void (*func)(void *state, void *user_data), void *user_data) +{ + struct cso_hash *hash = 0; + struct cso_hash_iter iter; + + switch (type) { + case CSO_BLEND: + hash = sc->blend_hash; + break; + case CSO_SAMPLER: + hash = sc->sampler_hash; + break; + case CSO_DEPTH_STENCIL_ALPHA: + hash = sc->depth_stencil_hash; + break; + case CSO_RASTERIZER: + hash = sc->rasterizer_hash; + break; + case CSO_FRAGMENT_SHADER: + hash = sc->fs_hash; + break; + case CSO_VERTEX_SHADER: + hash = sc->vs_hash; + break; + } + + iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + void *state = cso_hash_iter_data(iter); + if (state) { + func(state, user_data); + } + iter = cso_hash_iter_next(iter); + } +} + +static void delete_blend_state(void *state, void *user_data) +{ + struct cso_blend *cso = (struct cso_blend *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_depth_stencil_state(void *state, void *pipe) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_sampler_state(void *state, void *pipe) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_rasterizer_state(void *state, void *pipe) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_fs_state(void *state, void *pipe) +{ + struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_vs_state(void *state, void *pipe) +{ + struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + void cso_cache_delete(struct cso_cache *sc) { assert(sc); + /* delete driver data */ + cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0); + cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0); + cso_for_each_state(sc, CSO_FRAGMENT_SHADER, delete_fs_state, 0); + cso_for_each_state(sc, CSO_VERTEX_SHADER, delete_vs_state, 0); + cso_for_each_state(sc, CSO_RASTERIZER, delete_rasterizer_state, 0); + cso_for_each_state(sc, CSO_SAMPLER, delete_sampler_state, 0); + cso_hash_delete(sc->blend_hash); cso_hash_delete(sc->sampler_hash); cso_hash_delete(sc->depth_stencil_hash); @@ -201,3 +288,4 @@ void cso_cache_delete(struct cso_cache *sc) cso_hash_delete(sc->vs_hash); FREE(sc); } + diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 3a005a376e..f3bd4623c9 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -97,31 +97,43 @@ struct cso_cache { struct cso_blend { struct pipe_blend_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_depth_stencil_alpha { struct pipe_depth_stencil_alpha_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_rasterizer { struct pipe_rasterizer_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_fragment_shader { struct pipe_shader_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_vertex_shader { struct pipe_shader_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; struct cso_sampler { struct pipe_sampler_state state; void *data; + void (*delete_state)(void *, void *); + void *context; }; @@ -147,6 +159,8 @@ struct cso_hash_iter cso_find_state(struct cso_cache *sc, struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *templ); +void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, + void (*func)(void *state, void *user_data), void *user_data); void * cso_take_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type); diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index 2e8b69675c..86c62c027a 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -38,7 +38,6 @@ extern "C" { #endif - struct cso_hash; struct cso_node; diff --git a/src/mesa/state_tracker/st_cache.c b/src/mesa/state_tracker/st_cache.c index 2979e7fae5..78f282302e 100644 --- a/src/mesa/state_tracker/st_cache.c +++ b/src/mesa/state_tracker/st_cache.c @@ -63,6 +63,8 @@ const struct cso_blend * st_cached_blend_state(struct st_context *st, cso->data = st->pipe->create_blend_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; + cso->delete_state = st->pipe->delete_blend_state; + cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_BLEND, cso); } return ((struct cso_blend *)cso_hash_iter_data(iter)); @@ -82,6 +84,8 @@ st_cached_sampler_state(struct st_context *st, cso->data = st->pipe->create_sampler_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; + cso->delete_state = st->pipe->delete_sampler_state; + cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_SAMPLER, cso); } return (struct cso_sampler*)(cso_hash_iter_data(iter)); @@ -103,6 +107,8 @@ st_cached_depth_stencil_alpha_state(struct st_context *st, cso->data = st->pipe->create_depth_stencil_alpha_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; + cso->delete_state = st->pipe->delete_depth_stencil_alpha_state; + cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); } return (struct cso_depth_stencil_alpha*)(cso_hash_iter_data(iter)); @@ -123,6 +129,8 @@ const struct cso_rasterizer* st_cached_rasterizer_state( cso->data = st->pipe->create_rasterizer_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; + cso->delete_state = st->pipe->delete_rasterizer_state; + cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_RASTERIZER, cso); } return (struct cso_rasterizer*)(cso_hash_iter_data(iter)); @@ -143,6 +151,8 @@ st_cached_fs_state(struct st_context *st, cso->data = st->pipe->create_fs_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; + cso->delete_state = st->pipe->delete_fs_state; + cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_FRAGMENT_SHADER, cso); } return (struct cso_fragment_shader*)(cso_hash_iter_data(iter)); @@ -163,8 +173,9 @@ st_cached_vs_state(struct st_context *st, cso->data = st->pipe->create_vs_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; + cso->delete_state = st->pipe->delete_vs_state; + cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_VERTEX_SHADER, cso); } return (struct cso_vertex_shader*)(cso_hash_iter_data(iter)); } - -- cgit v1.2.3 From bf1c2f3602038440ffacf7ae494cb4e9bacc9bb9 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 26 Feb 2008 00:15:55 -0500 Subject: hide cso cache definition and add some initial code for size limiting the caches --- src/gallium/auxiliary/cso_cache/cso_cache.c | 22 ++++++++++++++++++++++ src/gallium/auxiliary/cso_cache/cso_cache.h | 28 +++++++++++----------------- 2 files changed, 33 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 9aa1a64272..1b870c8e41 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -33,6 +33,17 @@ #include "cso_cache.h" #include "cso_hash.h" + +struct cso_cache { + struct cso_hash *blend_hash; + struct cso_hash *depth_stencil_hash; + struct cso_hash *fs_hash; + struct cso_hash *vs_hash; + struct cso_hash *rasterizer_hash; + struct cso_hash *sampler_hash; + int max_size; +}; + #if 1 static unsigned hash_key(const void *key, unsigned key_size) { @@ -180,6 +191,7 @@ struct cso_cache *cso_cache_create(void) { struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + sc->max_size = 4096; sc->blend_hash = cso_hash_create(); sc->sampler_hash = cso_hash_create(); sc->depth_stencil_hash = cso_hash_create(); @@ -289,3 +301,13 @@ void cso_cache_delete(struct cso_cache *sc) FREE(sc); } +void cso_set_maximum_cache_size(struct cso_cache *sc, int number) +{ + sc->max_size = number; +} + +int cso_maximum_cache_size(const struct cso_cache *sc) +{ + return sc->max_size; +} + diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index f3bd4623c9..72a6b9d488 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -28,7 +28,7 @@ /** * @file * Constant State Object (CSO) cache. - * + * * The basic idea is that the states are created via the * create_state/bind_state/delete_state semantics. The driver is expected to * perform as much of the Gallium state translation to whatever its internal @@ -36,15 +36,15 @@ * mechanism where it stores the created states. When the pipeline needs an * actual state change, a bind call is issued. In the bind call the driver * gets its already translated representation. - * + * * Those semantics mean that the driver doesn't do the repeated translations * of states on every frame, but only once, when a new state is actually * created. - * + * * Even on hardware that doesn't do any kind of state cache, it makes the * driver look a lot neater, plus it avoids all the redundant state * translations on every frame. - * + * * Currently our constant state objects are: * - alpha test * - blend @@ -53,7 +53,7 @@ * - rasterizer (old setup) * - sampler * - vertex shader - * + * * Things that are not constant state objects include: * - blend_color * - clip_state @@ -65,7 +65,7 @@ * - scissor_state * - texture_state * - viewport_state - * + * * @author Zack Rusin */ @@ -75,24 +75,16 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures +/* cso_hash.h is necessary for cso_hash_iter, as MSVC requires structures * returned by value to be fully defined */ -#include "cso_hash.h" +#include "cso_hash.h" #ifdef __cplusplus extern "C" { #endif - -struct cso_cache { - struct cso_hash *blend_hash; - struct cso_hash *depth_stencil_hash; - struct cso_hash *fs_hash; - struct cso_hash *vs_hash; - struct cso_hash *rasterizer_hash; - struct cso_hash *sampler_hash; -}; +struct cso_cache; struct cso_blend { struct pipe_blend_state state; @@ -164,6 +156,8 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, void * cso_take_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type); +void cso_set_maximum_cache_size(struct cso_cache *sc, int number); +int cso_maximum_cache_size(const struct cso_cache *sc); #ifdef __cplusplus } -- cgit v1.2.3 From e7985105695a18c29c13deb2b8f40c15eef72ee6 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 26 Feb 2008 00:18:54 -0500 Subject: add an explicit callback decleration and use it to silence warnings --- src/gallium/auxiliary/cso_cache/cso_cache.c | 2 +- src/gallium/auxiliary/cso_cache/cso_cache.h | 4 +++- src/mesa/state_tracker/st_cache.c | 12 ++++++------ 3 files changed, 10 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 1b870c8e41..f31042bce1 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -203,7 +203,7 @@ struct cso_cache *cso_cache_create(void) } void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, - void (*func)(void *state, void *user_data), void *user_data) + cso_state_callback func, void *user_data) { struct cso_hash *hash = 0; struct cso_hash_iter iter; diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 72a6b9d488..3b0fe100b8 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -138,6 +138,8 @@ enum cso_cache_type { CSO_VERTEX_SHADER }; +typedef void (*cso_state_callback)(void *, void *); + unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); @@ -152,7 +154,7 @@ struct cso_hash_iter cso_find_state_template(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *templ); void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, - void (*func)(void *state, void *user_data), void *user_data); + cso_state_callback func, void *user_data); void * cso_take_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type); diff --git a/src/mesa/state_tracker/st_cache.c b/src/mesa/state_tracker/st_cache.c index 78f282302e..7ee4fadc37 100644 --- a/src/mesa/state_tracker/st_cache.c +++ b/src/mesa/state_tracker/st_cache.c @@ -63,7 +63,7 @@ const struct cso_blend * st_cached_blend_state(struct st_context *st, cso->data = st->pipe->create_blend_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; - cso->delete_state = st->pipe->delete_blend_state; + cso->delete_state = (cso_state_callback)st->pipe->delete_blend_state; cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_BLEND, cso); } @@ -84,7 +84,7 @@ st_cached_sampler_state(struct st_context *st, cso->data = st->pipe->create_sampler_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; - cso->delete_state = st->pipe->delete_sampler_state; + cso->delete_state = (cso_state_callback)st->pipe->delete_sampler_state; cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_SAMPLER, cso); } @@ -107,7 +107,7 @@ st_cached_depth_stencil_alpha_state(struct st_context *st, cso->data = st->pipe->create_depth_stencil_alpha_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; - cso->delete_state = st->pipe->delete_depth_stencil_alpha_state; + cso->delete_state = (cso_state_callback)st->pipe->delete_depth_stencil_alpha_state; cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); } @@ -129,7 +129,7 @@ const struct cso_rasterizer* st_cached_rasterizer_state( cso->data = st->pipe->create_rasterizer_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; - cso->delete_state = st->pipe->delete_rasterizer_state; + cso->delete_state = (cso_state_callback)st->pipe->delete_rasterizer_state; cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_RASTERIZER, cso); } @@ -151,7 +151,7 @@ st_cached_fs_state(struct st_context *st, cso->data = st->pipe->create_fs_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; - cso->delete_state = st->pipe->delete_fs_state; + cso->delete_state = (cso_state_callback)st->pipe->delete_fs_state; cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_FRAGMENT_SHADER, cso); } @@ -173,7 +173,7 @@ st_cached_vs_state(struct st_context *st, cso->data = st->pipe->create_vs_state(st->pipe, &cso->state); if (!cso->data) cso->data = &cso->state; - cso->delete_state = st->pipe->delete_vs_state; + cso->delete_state = (cso_state_callback)st->pipe->delete_vs_state; cso->context = st->pipe; iter = cso_insert_state(st->cache, hash_key, CSO_VERTEX_SHADER, cso); } -- cgit v1.2.3 From 7838aaffdb9d34427ebcb73aac585c85d9622018 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 26 Feb 2008 01:48:01 -0500 Subject: implement cache limits for cso by default set to 4096, which might be on the large side --- src/gallium/auxiliary/cso_cache/cso_cache.c | 144 ++++++++++++++++++++-------- src/gallium/auxiliary/cso_cache/cso_hash.c | 5 + src/gallium/auxiliary/cso_cache/cso_hash.h | 4 +- 3 files changed, 110 insertions(+), 43 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index f31042bce1..a6e8469b44 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -127,12 +127,106 @@ static int _cso_size_for_type(enum cso_cache_type type) return 0; } + +static void delete_blend_state(void *state, void *data) +{ + struct cso_blend *cso = (struct cso_blend *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_depth_stencil_state(void *state, void *data) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_sampler_state(void *state, void *data) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_rasterizer_state(void *state, void *data) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_fs_state(void *state, void *data) +{ + struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + +static void delete_vs_state(void *state, void *data) +{ + struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; + if (cso->delete_state && cso->data != &cso->state) + cso->delete_state(cso->context, cso->data); +} + + +static INLINE void delete_cso(void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: { + delete_blend_state(state, 0); + } + break; + case CSO_SAMPLER: { + delete_sampler_state(state, 0); + } + break; + case CSO_DEPTH_STENCIL_ALPHA: { + delete_depth_stencil_state(state, 0); + } + break; + case CSO_RASTERIZER: { + delete_rasterizer_state(state, 0); + } + break; + case CSO_FRAGMENT_SHADER: { + delete_fs_state(state, 0); + } + break; + case CSO_VERTEX_SHADER: { + delete_vs_state(state, 0); + } + break; + } + free(state); +} + +static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + int max_size) +{ + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int max_entries = (max_size > cso_hash_size(hash)) ? max_size : cso_hash_size(hash); + int to_remove = (max_size < max_entries) * max_entries/4; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + struct cso_hash_iter iter = cso_hash_first_node(hash); + void *cso = cso_hash_take(hash, cso_hash_iter_key(iter)); + delete_cso(cso, type); + --to_remove; + } +} + struct cso_hash_iter cso_insert_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *state) { struct cso_hash *hash = _cso_hash_for_type(sc, type); + sanitize_hash(hash, type, sc->max_size); + return cso_hash_insert(hash, hash_key, state); } @@ -239,48 +333,6 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, } } -static void delete_blend_state(void *state, void *user_data) -{ - struct cso_blend *cso = (struct cso_blend *)state; - if (cso->delete_state && cso->data != &cso->state) - cso->delete_state(cso->context, cso->data); -} - -static void delete_depth_stencil_state(void *state, void *pipe) -{ - struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; - if (cso->delete_state && cso->data != &cso->state) - cso->delete_state(cso->context, cso->data); -} - -static void delete_sampler_state(void *state, void *pipe) -{ - struct cso_sampler *cso = (struct cso_sampler *)state; - if (cso->delete_state && cso->data != &cso->state) - cso->delete_state(cso->context, cso->data); -} - -static void delete_rasterizer_state(void *state, void *pipe) -{ - struct cso_rasterizer *cso = (struct cso_rasterizer *)state; - if (cso->delete_state && cso->data != &cso->state) - cso->delete_state(cso->context, cso->data); -} - -static void delete_fs_state(void *state, void *pipe) -{ - struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; - if (cso->delete_state && cso->data != &cso->state) - cso->delete_state(cso->context, cso->data); -} - -static void delete_vs_state(void *state, void *pipe) -{ - struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; - if (cso->delete_state && cso->data != &cso->state) - cso->delete_state(cso->context, cso->data); -} - void cso_cache_delete(struct cso_cache *sc) { assert(sc); @@ -304,6 +356,14 @@ void cso_cache_delete(struct cso_cache *sc) void cso_set_maximum_cache_size(struct cso_cache *sc, int number) { sc->max_size = number; + + sanitize_hash(sc->blend_hash, CSO_BLEND, sc->max_size); + sanitize_hash(sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, + sc->max_size); + sanitize_hash(sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); + sanitize_hash(sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); + sanitize_hash(sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); + sanitize_hash(sc->sampler_hash, CSO_SAMPLER, sc->max_size); } int cso_maximum_cache_size(const struct cso_cache *sc) diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 208fc58502..b3b4d667d2 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -395,3 +395,8 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash) struct cso_hash_iter iter = {hash, cso_data_first_node(hash->data.d)}; return iter; } + +int cso_hash_size(struct cso_hash *hash) +{ + return hash->data.d->size; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index 86c62c027a..d5bca9d591 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -47,7 +47,9 @@ struct cso_hash_iter { }; struct cso_hash *cso_hash_create(void); -void cso_hash_delete(struct cso_hash *hash); +void cso_hash_delete(struct cso_hash *hash); + +int cso_hash_size(struct cso_hash *hash); struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, void *data); -- cgit v1.2.3 From ad6bb870de6103ed240fa1f9f828bd13a4401a9a Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 26 Feb 2008 11:49:25 +0100 Subject: gallium: Fix build on Windows. --- src/gallium/auxiliary/cso_cache/cso_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index a6e8469b44..b427b509f8 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -199,7 +199,7 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type) } break; } - free(state); + FREE(state); } static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, -- cgit v1.2.3 From 232a41e19faa860f083e414cb1eb38c0617e9241 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 26 Feb 2008 08:41:42 -0700 Subject: gallium: disable debug code --- src/gallium/auxiliary/draw/draw_aapoint.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index cae6fcd4d2..d16adb2505 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -509,7 +509,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) (struct tgsi_token *) aapoint_fs.tokens, MAX, &transform.base); -#if 1 /* DEBUG */ +#if 0 /* DEBUG */ tgsi_dump(orig_fs->tokens, 0); tgsi_dump(aapoint_fs.tokens, 0); #endif -- cgit v1.2.3 From 1410b7bb509ef37c41043b173bc1047257483af0 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 26 Feb 2008 10:12:17 -0700 Subject: gallium: collect more shader info in tgsi_scan_shader() Now getting input/output semantic info so we can eventually remove those fields from pipe_shader_state. --- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 20 ++++++++++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 11 +++++++++++ 2 files changed, 31 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index 4b99ac37cc..a1cc681492 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -69,6 +69,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, */ while( !tgsi_parse_end_of_tokens( &parse ) ) { + info->num_tokens++; + tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { @@ -91,9 +93,27 @@ tgsi_scan_shader(const struct tgsi_token *tokens, for (i = fulldecl->u.DeclarationRange.First; i <= fulldecl->u.DeclarationRange.Last; i++) { + + /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << i); info->file_count[file]++; + if (file == TGSI_FILE_INPUT) { + info->input_semantic_name[info->num_inputs] + = fulldecl->Semantic.SemanticName; + info->input_semantic_index[info->num_inputs] + = fulldecl->Semantic.SemanticIndex; + info->num_inputs++; + } + + if (file == TGSI_FILE_OUTPUT) { + info->output_semantic_name[info->num_outputs] + = fulldecl->Semantic.SemanticName; + info->output_semantic_index[info->num_outputs] + = fulldecl->Semantic.SemanticIndex; + info->num_outputs++; + } + /* special case */ if (procType == TGSI_PROCESSOR_FRAGMENT && file == TGSI_FILE_OUTPUT && diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h index 757446437c..dc6dfd6d0b 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h @@ -30,6 +30,7 @@ #include "pipe/p_util.h" +#include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" @@ -38,6 +39,16 @@ */ struct tgsi_shader_info { + uint num_tokens; + + /* XXX eventually remove the corresponding fields from pipe_shader_state: */ + ubyte num_inputs; + ubyte num_outputs; + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ -- cgit v1.2.3 From 36aa9cf781440ce685930586cbf53248cf9c0dc2 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 26 Feb 2008 20:32:42 +0100 Subject: gallium: Print texture target for short dumps. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index ff74e6117c..59be14a748 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -664,6 +664,19 @@ static const char *TGSI_TEXTURES[] = "TEXTURE_SHADOWRECT" }; +static const char *TGSI_TEXTURES_SHORT[] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" +}; + static const char *TGSI_SRC_REGISTER_EXTS[] = { "SRC_REGISTER_EXT_TYPE_SWZ", @@ -1037,6 +1050,11 @@ dump_instruction_short( first_reg = FALSE; } + if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { + TXT( ", " ); + ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES_SHORT ); + } + switch( inst->Instruction.Opcode ) { case TGSI_OPCODE_IF: case TGSI_OPCODE_ELSE: -- cgit v1.2.3 From 5e29aab1752c3e07ae2ebde4cb00e6550dab0eb2 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 26 Feb 2008 14:29:35 -0700 Subject: gallium: replace draw_convert_wide_points() with draw_wide_point_threshold() Specifying a threshold size is a bit more flexible, and allows the option of converting even 1-pixel points to triangles (set threshold=0). Also, remove 0.25 pixel bias in wide_point(). --- src/gallium/auxiliary/draw/draw_context.c | 10 +++++----- src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/auxiliary/draw/draw_validate.c | 23 +++++++++++++---------- src/gallium/auxiliary/draw/draw_wide_prims.c | 7 ++++--- src/gallium/drivers/softpipe/sp_context.c | 3 --- 5 files changed, 23 insertions(+), 22 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 7dd1c6f6fa..48f00946ea 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -80,7 +80,7 @@ struct draw_context *draw_create( void ) draw->shader_queue_flush = draw_vertex_shader_queue_flush; - draw->convert_wide_points = TRUE; + draw->wide_point_threshold = 1000000.0; /* infinity */ draw->convert_wide_lines = TRUE; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -220,14 +220,14 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, /** - * Tells the draw module whether to convert wide points (size != 1) - * into triangles. + * Tells the draw module to draw points with triangles if their size + * is greater than this threshold. */ void -draw_convert_wide_points(struct draw_context *draw, boolean enable) +draw_wide_point_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->convert_wide_points = enable; + draw->wide_point_threshold = threshold; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 6abced139b..03e3d3a66b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -215,7 +215,7 @@ struct draw_context float plane[12][4]; unsigned nr_planes; - boolean convert_wide_points; /**< convert wide points to tris? */ + float wide_point_threshold; /**< convert pnts to tris if larger than this */ boolean convert_wide_lines; /**< convert wide lines to tris? */ boolean use_sse; diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 3a19dd4cd7..ded7d10c08 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -52,6 +52,19 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) */ stage->next = next; + /* drawing wide lines? */ + wide_lines = (draw->rasterizer->line_width != 1.0 + && draw->convert_wide_lines + && !draw->rasterizer->line_smooth); + + /* drawing large points? */ + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + wide_points = FALSE; + else if (draw->rasterizer->point_size > draw->wide_point_threshold) + wide_points = TRUE; + else + wide_points = FALSE; + /* * NOTE: we build up the pipeline in end-to-start order. * @@ -69,16 +82,6 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.aapoint; } - /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width != 1.0 - && draw->convert_wide_lines - && !draw->rasterizer->line_smooth); - - /* drawing large points? */ - wide_points = (draw->rasterizer->point_size != 1.0 - && draw->convert_wide_points - && !draw->pipeline.aapoint); - if (wide_lines || wide_points || draw->rasterizer->point_sprite) { diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c index 1f8069bdca..d967810dd4 100644 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ b/src/gallium/auxiliary/draw/draw_wide_prims.c @@ -219,8 +219,8 @@ static void wide_point( struct draw_stage *stage, half_size = wide->half_point_size; } - left_adj = -half_size + 0.25f; - right_adj = half_size + 0.25f; + left_adj = -half_size; /* + 0.25f;*/ + right_adj = half_size; /* + 0.25f;*/ pos0[0] += left_adj; pos0[1] -= half_size; @@ -266,7 +266,8 @@ static void wide_first_point( struct draw_stage *stage, wide->half_point_size = 0.5f * draw->rasterizer->point_size; - if (draw->rasterizer->point_size != 1.0) { + /* XXX we won't know the real size if it's computed by the vertex shader! */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) { stage->point = wide_point; } else { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 2cdf3c75bf..6a884327e0 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -337,9 +337,6 @@ struct pipe_context *softpipe_create( struct pipe_winsys *pipe_winsys, draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); #endif - /* sp_prim_setup can do wide points (don't convert to quads) */ - draw_convert_wide_points(softpipe->draw, FALSE); - sp_init_surface_functions(softpipe); return &softpipe->pipe; -- cgit v1.2.3 From d4a4bed6638e0156324ff9b270f2248c4b5275bb Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 26 Feb 2008 14:30:41 -0700 Subject: gallium: updated prototype (missed in prev commit) --- src/gallium/auxiliary/draw/draw_context.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c25301f71d..d6685f479b 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -90,6 +90,8 @@ void draw_set_rasterizer_state( struct draw_context *draw, void draw_set_rasterize_stage( struct draw_context *draw, struct draw_stage *stage ); +void draw_wide_point_threshold(struct draw_context *draw, float threshold); + void draw_convert_wide_points(struct draw_context *draw, boolean enable); void draw_convert_wide_lines(struct draw_context *draw, boolean enable); -- cgit v1.2.3 From 8902ce06e85f48a436ee3794c77f7abf59f56594 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 27 Feb 2008 14:06:07 +0900 Subject: gallium: Use stricter types. VC++ won't silently convert a pointer to a function with typed pointer arguments to one with void pointer arguments. --- src/gallium/auxiliary/cso_cache/cso_cache.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 3b0fe100b8..44ee128a4a 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -88,44 +88,44 @@ struct cso_cache; struct cso_blend { struct pipe_blend_state state; - void *data; - void (*delete_state)(void *, void *); - void *context; + void *data; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_depth_stencil_alpha { struct pipe_depth_stencil_alpha_state state; void *data; - void (*delete_state)(void *, void *); - void *context; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_rasterizer { struct pipe_rasterizer_state state; void *data; - void (*delete_state)(void *, void *); - void *context; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_fragment_shader { struct pipe_shader_state state; void *data; - void (*delete_state)(void *, void *); - void *context; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_vertex_shader { struct pipe_shader_state state; void *data; - void (*delete_state)(void *, void *); - void *context; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; struct cso_sampler { struct pipe_sampler_state state; void *data; - void (*delete_state)(void *, void *); - void *context; + void (*delete_state)(struct pipe_context *, void *); + struct pipe_context *context; }; -- cgit v1.2.3 From 9a8a5d7c2fe7f32c8d15bc0a77f86e1f2f995ffe Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 27 Feb 2008 16:42:15 +0900 Subject: gallium: Replace // comments. --- src/gallium/auxiliary/gallivm/gallivm.h | 2 +- src/gallium/auxiliary/gallivm/gallivm_p.h | 6 +++--- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 8 ++++---- src/gallium/auxiliary/tgsi/util/tgsi_build.h | 8 ++++---- src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 8 ++++---- src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 8 ++++---- src/gallium/auxiliary/tgsi/util/tgsi_util.h | 8 ++++---- 8 files changed, 25 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h index 92da4bca7f..57912a952f 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.h +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -97,7 +97,7 @@ void gallivm_cpu_engine_delete(struct gallivm_cpu_engine *ee); #endif /* MESA_LLVM */ #if defined __cplusplus -} // extern "C" +} #endif #endif diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h index cfe7b1901b..ebf3e11cd5 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_p.h +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h @@ -32,7 +32,7 @@ struct gallivm_ir { int num_components; int num_consts; - //FIXME: this might not be enough for some shaders + /* FIXME: this might not be enough for some shaders */ struct gallivm_interpolate interpolators[32*4]; int num_interp; }; @@ -46,7 +46,7 @@ struct gallivm_prog { int num_consts; - //FIXME: this might not be enough for some shaders + /* FIXME: this might not be enough for some shaders */ struct gallivm_interpolate interpolators[32*4]; int num_interp; }; @@ -104,7 +104,7 @@ static INLINE int gallivm_w_swizzle(int swizzle) #endif /* MESA_LLVM */ #if defined __cplusplus -} // extern "C" +} #endif #endif diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 779b901f2b..57ccd86be4 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1268,7 +1268,7 @@ emit_store( break; case TGSI_SAT_ZERO_ONE: -// assert( 0 ); + /* assert( 0 ); */ break; case TGSI_SAT_MINUS_PLUS_ONE: diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 9bee371766..63b8ef3911 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif struct tgsi_token; struct x86_function; @@ -19,8 +19,8 @@ tgsi_emit_sse2_fs( struct x86_function *function ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_SSE2_H +#endif /* TGSI_SSE2_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h index 116c78abf3..607860e7fc 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif /* * version @@ -313,8 +313,8 @@ tgsi_build_dst_register_ext_modulate( struct tgsi_header *header ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_BUILD_H +#endif /* TGSI_BUILD_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index 1adc9db251..b983b38226 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif #define TGSI_DUMP_VERBOSE 1 #define TGSI_DUMP_NO_IGNORED 2 @@ -21,8 +21,8 @@ tgsi_dump_str( unsigned flags ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_DUMP_H +#endif /* TGSI_DUMP_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index 9372da8d5d..5ccb5bfcf6 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif struct tgsi_full_version { @@ -114,8 +114,8 @@ tgsi_parse_token( struct tgsi_parse_context *ctx ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_PARSE_H +#endif /* TGSI_PARSE_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h index ef14446f0e..45f5f0be0e 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.h @@ -3,7 +3,7 @@ #if defined __cplusplus extern "C" { -#endif // defined __cplusplus +#endif void * tgsi_align_128bit( @@ -63,8 +63,8 @@ tgsi_util_set_full_src_register_sign_mode( unsigned sign_mode ); #if defined __cplusplus -} // extern "C" -#endif // defined __cplusplus +} +#endif -#endif // !defined TGSI_UTIL_H +#endif /* TGSI_UTIL_H */ -- cgit v1.2.3 From d6229d7f1fda03d3c73998505b0facf6e3d5b882 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 27 Feb 2008 18:39:57 +0900 Subject: gallium: Make headers C++ friendly. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 11 ++++++++++- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h | 11 ++++++++++- src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 11 ++++++++++- 3 files changed, 30 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index f5b5f4052f..4b09c80b2a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -37,7 +37,7 @@ * There is no obligation of a winsys driver to use this library. And a pipe * driver should be completly agnostic about it. * - * \author José Fonseca + * \author Jos� Fonseca */ #ifndef PB_BUFFER_H_ @@ -50,6 +50,11 @@ #include "pipe/p_inlines.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct pb_vtbl; /** @@ -200,4 +205,8 @@ void pb_init_winsys(struct pipe_winsys *winsys); +#ifdef __cplusplus +} +#endif + #endif /*PB_BUFFER_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index c40b9c75e1..50d5891bdb 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -44,7 +44,7 @@ * Between the handle's destruction, and the fence signalling, the buffer is * stored in a fenced buffer list. * - * \author José Fonseca + * \author José Fonseca */ #ifndef PB_BUFFER_FENCED_H_ @@ -54,6 +54,11 @@ #include "pipe/p_debug.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct pipe_winsys; struct pipe_buffer; struct pipe_fence_handle; @@ -114,4 +119,8 @@ buffer_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence); +#ifdef __cplusplus +} +#endif + #endif /*PB_BUFFER_FENCED_H_*/ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 1ddf784c97..0cf8e92e37 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -43,7 +43,7 @@ * - the fenced buffer manager, which will delay buffer destruction until the * the moment the card finishing processing it. * - * \author José Fonseca + * \author José Fonseca */ #ifndef PB_BUFMGR_H_ @@ -53,6 +53,11 @@ #include +#ifdef __cplusplus +extern "C" { +#endif + + struct pb_desc; struct pipe_buffer; struct pipe_winsys; @@ -123,4 +128,8 @@ fenced_bufmgr_create(struct pb_manager *provider, struct pipe_winsys *winsys); +#ifdef __cplusplus +} +#endif + #endif /*PB_BUFMGR_H_*/ -- cgit v1.2.3 From 31358282d4bd5a9708dba7be059dcff02233b4e1 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 09:15:15 -0700 Subject: gallium: better debug messages --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 57ccd86be4..0da3b0bdb7 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2268,7 +2268,7 @@ tgsi_emit_sse2( &parse.FullToken.FullInstruction ); if (!ok) { - debug_printf("failed to translate tgsi opcode %d\n", + debug_printf("failed to translate tgsi opcode %d to SSE\n", parse.FullToken.FullInstruction.Instruction.Opcode ); } break; @@ -2276,7 +2276,7 @@ tgsi_emit_sse2( case TGSI_TOKEN_TYPE_IMMEDIATE: /* XXX implement this */ ok = 0; - debug_printf("failed to emit immediate value\n"); + debug_printf("failed to emit immediate value to SSE\n"); break; default: @@ -2358,7 +2358,7 @@ tgsi_emit_sse2_fs( &parse.FullToken.FullInstruction ); if (!ok) { - debug_printf("failed to translate tgsi opcode %d\n", + debug_printf("failed to translate tgsi opcode %d to SSE\n", parse.FullToken.FullInstruction.Instruction.Opcode ); } break; @@ -2366,7 +2366,7 @@ tgsi_emit_sse2_fs( case TGSI_TOKEN_TYPE_IMMEDIATE: /* XXX implement this */ ok = 0; - debug_printf("failed to emit immediate value\n"); + debug_printf("failed to emit immediate value to SSE\n"); break; default: -- cgit v1.2.3 From 6f715dcc219071e574e363a9db4365c9c31ebbd3 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 14:21:12 -0700 Subject: gallium: remove pipe_context->texture_create/release/get_tex_surface() These functions are now per-screen, not per-context. --- src/gallium/auxiliary/draw/draw_aaline.c | 5 ++- src/gallium/auxiliary/draw/draw_pstipple.c | 6 ++- src/gallium/drivers/cell/ppu/cell_texture.c | 30 ------------- src/gallium/drivers/failover/fo_context.c | 4 +- src/gallium/drivers/i915simple/i915_texture.c | 28 ------------ src/gallium/drivers/i965simple/brw_tex_layout.c | 30 ------------- src/gallium/drivers/softpipe/sp_texture.c | 60 ++++++------------------- src/gallium/drivers/softpipe/sp_tile_cache.c | 3 +- src/gallium/include/pipe/p_context.h | 18 +------- src/gallium/include/pipe/p_inlines.h | 18 +++----- src/mesa/state_tracker/st_cb_drawpixels.c | 8 ++-- src/mesa/state_tracker/st_cb_fbo.c | 9 ++-- src/mesa/state_tracker/st_cb_texture.c | 28 ++++++------ src/mesa/state_tracker/st_gen_mipmap.c | 7 +-- src/mesa/state_tracker/st_texture.c | 16 ++++--- 15 files changed, 71 insertions(+), 199 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 73a02a32e4..be6cfd3b6a 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -362,6 +362,7 @@ static void aaline_create_texture(struct aaline_stage *aaline) { struct pipe_context *pipe = aaline->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_texture texTemp; uint level; @@ -374,7 +375,7 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.depth[0] = 1; texTemp.cpp = 1; - aaline->texture = pipe->texture_create(pipe, &texTemp); + aaline->texture = screen->texture_create(screen, &texTemp); /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost @@ -388,7 +389,7 @@ aaline_create_texture(struct aaline_stage *aaline) assert(aaline->texture->width[level] == aaline->texture->height[level]); - surface = pipe->get_tex_surface(pipe, aaline->texture, 0, level, 0); + surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); data = pipe_surface_map(surface); for (i = 0; i < size; i++) { diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 1ab04cd959..efc88bf038 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -348,12 +348,13 @@ pstip_update_texture(struct pstip_stage *pstip) { static const uint bit31 = 1 << 31; struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_surface *surface; const uint *stipple = pstip->state.stipple->stipple; uint i, j; ubyte *data; - surface = pipe->get_tex_surface(pipe, pstip->texture, 0, 0, 0); + surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0); data = pipe_surface_map(surface); /* @@ -389,6 +390,7 @@ static void pstip_create_texture(struct pstip_stage *pstip) { struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_texture texTemp; memset(&texTemp, 0, sizeof(texTemp)); @@ -400,7 +402,7 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.depth[0] = 1; texTemp.cpp = 1; - pstip->texture = pipe->texture_create(pipe, &texTemp); + pstip->texture = screen->texture_create(screen, &texTemp); //pstip_update_texture(pstip); } diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index e6398a85fa..28cadad6ed 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -79,14 +79,6 @@ cell_texture_layout(struct cell_texture * spt) } -static struct pipe_texture * -cell_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) -{ - return pipe->screen->texture_create(pipe->screen, templat); - -} - static struct pipe_texture * cell_texture_create_screen(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -116,13 +108,6 @@ cell_texture_create_screen(struct pipe_screen *screen, } -static void -cell_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - return pipe->screen->texture_release(pipe->screen, pt); -} - - static void cell_texture_release_screen(struct pipe_screen *screen, struct pipe_texture **pt) @@ -157,18 +142,6 @@ cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) } -/** - * Called via pipe->get_tex_surface() - */ -static struct pipe_surface * -cell_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); -} - - static struct pipe_surface * cell_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, @@ -294,10 +267,7 @@ cell_update_texture_mapping(struct cell_context *cell) void cell_init_texture_functions(struct cell_context *cell) { - cell->pipe.texture_create = cell_texture_create; - cell->pipe.texture_release = cell_texture_release; cell->pipe.texture_update = cell_texture_update; - cell->pipe.get_tex_surface = cell_get_tex_surface; } void diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index f559cc0d47..afc0d7eb1e 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -143,10 +143,12 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.surface_copy = hw->surface_copy; failover->pipe.surface_fill = hw->surface_fill; +#if 0 failover->pipe.texture_create = hw->texture_create; failover->pipe.texture_release = hw->texture_release; - failover->pipe.texture_update = hw->texture_update; failover->pipe.get_tex_surface = hw->get_tex_surface; +#endif + failover->pipe.texture_update = hw->texture_update; failover->pipe.flush = hw->flush; diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 3c9509dee3..9cdf3418a9 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -511,14 +511,6 @@ i915_texture_create_screen(struct pipe_screen *screen, } -static struct pipe_texture * -i915_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) -{ - return pipe->screen->texture_create(pipe->screen, templat); -} - - static void i915_texture_release_screen(struct pipe_screen *screen, struct pipe_texture **pt) @@ -550,13 +542,6 @@ i915_texture_release_screen(struct pipe_screen *screen, } -static void -i915_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - i915_texture_release_screen(pipe->screen, pt); -} - - static void i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) { @@ -606,26 +591,13 @@ i915_get_tex_surface_screen(struct pipe_screen *screen, } -static struct pipe_surface * -i915_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - return i915_get_tex_surface_screen(pipe->screen, pt, face, level, zslice); -} - - void i915_init_texture_functions(struct i915_context *i915) { - i915->pipe.texture_create = i915_texture_create; - i915->pipe.texture_release = i915_texture_release; i915->pipe.texture_update = i915_texture_update; - i915->pipe.get_tex_surface = i915_get_tex_surface; } - void i915_init_screen_texture_functions(struct pipe_screen *screen) { diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 9753c50143..b24ac87c37 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -298,14 +298,6 @@ static boolean brw_miptree_layout(struct brw_texture *tex) } -static struct pipe_texture * -brw_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) -{ - return pipe->screen->texture_create(pipe->screen, templat); -} - - static struct pipe_texture * brw_texture_create_screen(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -333,13 +325,6 @@ brw_texture_create_screen(struct pipe_screen *screen, } -static void -brw_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - pipe->screen->texture_release(pipe->screen, pt); -} - - static void brw_texture_release_screen(struct pipe_screen *screen, struct pipe_texture **pt) @@ -379,18 +364,6 @@ brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) } -/* - * XXX note: same as code in sp_surface.c - */ -static struct pipe_surface * -brw_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); -} - - static struct pipe_surface * brw_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, @@ -433,10 +406,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, void brw_init_texture_functions(struct brw_context *brw) { - brw->pipe.texture_create = brw_texture_create; - brw->pipe.texture_release = brw_texture_release; brw->pipe.texture_update = brw_texture_update; - brw->pipe.get_tex_surface = brw_get_tex_surface; } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index f0e8350a4a..7c02765313 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -80,15 +80,6 @@ softpipe_texture_layout(struct softpipe_texture * spt) } -/* XXX temporary */ -static struct pipe_texture * -softpipe_texture_create(struct pipe_context *pipe, - const struct pipe_texture *templat) -{ - return pipe->screen->texture_create(pipe->screen, templat); -} - - static struct pipe_texture * softpipe_texture_create_screen(struct pipe_screen *screen, const struct pipe_texture *templat) @@ -119,14 +110,6 @@ softpipe_texture_create_screen(struct pipe_screen *screen, } -/* XXX temporary */ -static void -softpipe_texture_release(struct pipe_context *pipe, struct pipe_texture **pt) -{ - return pipe->screen->texture_release(pipe->screen, pt); -} - - static void softpipe_texture_release_screen(struct pipe_screen *screen, struct pipe_texture **pt) @@ -153,33 +136,6 @@ softpipe_texture_release_screen(struct pipe_screen *screen, } -static void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); - uint unit; - for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (softpipe->texture[unit] == texture) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); - } - } -} - - -/** - * Called via pipe->get_tex_surface() - */ -/* XXX temporary */ -static struct pipe_surface * -softpipe_get_tex_surface(struct pipe_context *pipe, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - return pipe->screen->get_tex_surface(pipe->screen, pt, face, level, zslice); -} - - static struct pipe_surface * softpipe_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, @@ -217,14 +173,24 @@ softpipe_get_tex_surface_screen(struct pipe_screen *screen, } +static void +softpipe_texture_update(struct pipe_context *pipe, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint unit; + for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { + if (softpipe->texture[unit] == texture) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); + } + } +} + void softpipe_init_texture_funcs( struct softpipe_context *softpipe ) { - softpipe->pipe.texture_create = softpipe_texture_create; - softpipe->pipe.texture_release = softpipe_texture_release; softpipe->pipe.texture_update = softpipe_texture_update; - softpipe->pipe.get_tex_surface = softpipe_get_tex_surface; } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 0ff93c5527..4caf2dd3fc 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -489,6 +489,7 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, struct softpipe_tile_cache *tc, int x, int y, int z, int face, int level) { + struct pipe_screen *screen = pipe->screen; /* tile pos in framebuffer: */ const int tile_x = x & ~(TILE_SIZE - 1); const int tile_y = y & ~(TILE_SIZE - 1); @@ -514,7 +515,7 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, if (tc->tex_surf_map) pipe_surface_unmap(tc->tex_surf); - tc->tex_surf = pipe->get_tex_surface(pipe, tc->texture, face, level, z); + tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z); tc->tex_surf_map = pipe_surface_map(tc->tex_surf); tc->tex_face = face; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index bb345df153..d0f25d7d46 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -189,30 +189,16 @@ struct pipe_context { struct pipe_surface *ps, unsigned clearValue); - - /* - * Texture functions - * XXX these are moving to pipe_screen... - */ - struct pipe_texture * (*texture_create)(struct pipe_context *pipe, - const struct pipe_texture *templat); - - void (*texture_release)(struct pipe_context *pipe, - struct pipe_texture **pt); - /** * Called when texture data is changed. * Note: we could pass some hints about which mip levels or cube faces * have changed... + * XXX this may go away - could pass a 'write' flag to get_tex_surface() */ void (*texture_update)(struct pipe_context *pipe, struct pipe_texture *texture); - /** Get a surface which is a "view" into a texture */ - struct pipe_surface *(*get_tex_surface)(struct pipe_context *pipe, - struct pipe_texture *texture, - unsigned face, unsigned level, - unsigned zslice); + /* Flush rendering: */ diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index a7e97fcd7d..274f76a383 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -107,15 +107,9 @@ pipe_texture_reference(struct pipe_texture **ptr, pt->refcount++; if (*ptr) { - struct pipe_context *pipe = (*ptr)->pipe; - /* XXX temporary mess here */ - if (pipe) { - pipe->texture_release(pipe, ptr); - } - else { - struct pipe_screen *screen = (*ptr)->screen; - screen->texture_release(screen, ptr); - } + struct pipe_screen *screen = (*ptr)->screen; + assert(screen); + screen->texture_release(screen, ptr); assert(!*ptr); } @@ -127,10 +121,10 @@ pipe_texture_reference(struct pipe_texture **ptr, static INLINE void pipe_texture_release(struct pipe_texture **ptr) { - struct pipe_context *pipe; + struct pipe_screen *screen; assert(ptr); - pipe = (*ptr)->pipe; - pipe->texture_release(pipe, ptr); + screen = (*ptr)->screen; + screen->texture_release(screen, ptr); *ptr = NULL; } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 0f2c6307dd..ff236adc5c 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -456,6 +456,7 @@ make_texture(struct st_context *st, { GLcontext *ctx = st->ctx; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; const struct gl_texture_format *mformat; struct pipe_texture *pt; enum pipe_format pipeFormat; @@ -493,7 +494,7 @@ make_texture(struct st_context *st, /* we'll do pixel transfer in a fragment shader */ ctx->_ImageTransferState = 0x0; - surface = pipe->get_tex_surface(pipe, pt, 0, 0, 0); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0); /* map texture surface */ dest = pipe_surface_map(surface); @@ -1031,7 +1032,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, printf("st_Bitmap (sourcing from PBO not implemented yet)\n"); } - surface = pipe->get_tex_surface(pipe, pt, 0, 0, 0); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0); /* map texture surface */ dest = pipe_surface_map(surface); @@ -1207,6 +1208,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; struct st_renderbuffer *rbRead; struct st_vertex_program *stvp; struct st_fragment_program *stfp; @@ -1248,7 +1250,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, if (!pt) return; - psTex = pipe->get_tex_surface(pipe, pt, 0, 0, 0); + psTex = screen->get_tex_surface(screen, pt, 0, 0, 0); if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { srcy = ctx->DrawBuffer->Height - srcy - height; diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 781425b546..5384252a8e 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -307,6 +307,7 @@ st_render_texture(GLcontext *ctx, struct st_renderbuffer *strb; struct gl_renderbuffer *rb; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_texture *pt; assert(!att->Renderbuffer); @@ -332,10 +333,10 @@ st_render_texture(GLcontext *ctx, rb->Height = pt->height[att->TextureLevel]; /* the renderbuffer's surface is inside the texture */ - strb->surface = pipe->get_tex_surface(pipe, pt, - att->CubeMapFace, - att->TextureLevel, - att->Zoffset); + strb->surface = screen->get_tex_surface(screen, pt, + att->CubeMapFace, + att->TextureLevel, + att->Zoffset); assert(strb->surface); init_renderbuffer_bits(strb, pt->format); diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index f5f956f6ea..1ba3173312 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -144,12 +144,11 @@ st_NewTextureObject(GLcontext * ctx, GLuint name, GLenum target) static void st_DeleteTextureObject(GLcontext *ctx, - struct gl_texture_object *texObj) + struct gl_texture_object *texObj) { struct st_texture_object *stObj = st_texture_object(texObj); - if (stObj->pt) - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); _mesa_delete_texture_object(ctx, texObj); } @@ -163,7 +162,7 @@ st_FreeTextureImageData(GLcontext * ctx, struct gl_texture_image *texImage) DBG("%s\n", __FUNCTION__); if (stImage->pt) { - ctx->st->pipe->texture_release(ctx->st->pipe, &stImage->pt); + pipe_texture_release(&stImage->pt); } if (texImage->Data) { @@ -537,7 +536,7 @@ st_TexImage(GLcontext * ctx, * Release any old malloced memory. */ if (stImage->pt) { - ctx->st->pipe->texture_release(ctx->st->pipe, &stImage->pt); + pipe_texture_release(&stImage->pt); assert(!texImage->Data); } else if (texImage->Data) { @@ -556,7 +555,7 @@ st_TexImage(GLcontext * ctx, stImage->face, stImage->level)) { DBG("release it\n"); - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); assert(!stObj->pt); } @@ -1025,6 +1024,7 @@ fallback_copy_texsubimage(GLcontext *ctx, GLsizei width, GLsizei height) { struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; const uint face = texture_face(target); struct pipe_texture *pt = stImage->pt; struct pipe_surface *src_surf, *dest_surf; @@ -1042,8 +1042,7 @@ fallback_copy_texsubimage(GLcontext *ctx, src_surf = strb->surface; - dest_surf = pipe->get_tex_surface(pipe, pt, - face, level, destZ); + dest_surf = screen->get_tex_surface(screen, pt, face, level, destZ); /* buffer for one row */ data = (GLfloat *) malloc(width * 4 * sizeof(GLfloat)); @@ -1096,6 +1095,7 @@ do_copy_texsubimage(GLcontext *ctx, struct gl_framebuffer *fb = ctx->ReadBuffer; struct st_renderbuffer *strb; struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_surface *dest_surface; uint dest_format, src_format; uint do_flip = FALSE; @@ -1126,8 +1126,8 @@ do_copy_texsubimage(GLcontext *ctx, src_format = strb->surface->format; dest_format = stImage->pt->format; - dest_surface = pipe->get_tex_surface(pipe, stImage->pt, stImage->face, - stImage->level, destZ); + dest_surface = screen->get_tex_surface(screen, stImage->pt, stImage->face, + stImage->level, destZ); if (src_format == dest_format && ctx->_ImageTransferState == 0x0 && @@ -1352,7 +1352,7 @@ copy_image_data_to_texture(struct st_context *st, stImage->face ); - st->pipe->texture_release(st->pipe, &stImage->pt); + pipe_texture_release(&stImage->pt); } else { assert(stImage->base.Data != NULL); @@ -1408,7 +1408,7 @@ st_finalize_texture(GLcontext *ctx, */ if (firstImage->base.Border) { if (stObj->pt) { - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); } return GL_FALSE; } @@ -1424,7 +1424,7 @@ st_finalize_texture(GLcontext *ctx, firstImage->pt->last_level >= stObj->lastLevel) { if (stObj->pt) - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); pipe_texture_reference(&stObj->pt, firstImage->pt); } @@ -1450,7 +1450,7 @@ st_finalize_texture(GLcontext *ctx, stObj->pt->depth[0] != firstImage->base.Depth || stObj->pt->cpp != cpp || stObj->pt->compressed != firstImage->base.IsCompressed)) { - ctx->st->pipe->texture_release(ctx->st->pipe, &stObj->pt); + pipe_texture_release(&stObj->pt); } diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 2b16310602..243dc0b1d0 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -276,7 +276,7 @@ st_render_mipmap(struct st_context *st, /* * Setup framebuffer / dest surface */ - fb.cbufs[0] = pipe->get_tex_surface(pipe, pt, face, dstLevel, zslice); + fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); pipe->set_framebuffer_state(pipe, &fb); /* @@ -325,6 +325,7 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, struct gl_texture_object *texObj) { struct pipe_context *pipe = ctx->st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_winsys *ws = pipe->winsys; struct pipe_texture *pt = st_get_texobj_texture(texObj); const uint baseLevel = texObj->BaseLevel; @@ -345,8 +346,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, const ubyte *srcData; ubyte *dstData; - srcSurf = pipe->get_tex_surface(pipe, pt, face, srcLevel, zslice); - dstSurf = pipe->get_tex_surface(pipe, pt, face, dstLevel, zslice); + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); srcData = (ubyte *) ws->buffer_map(ws, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index ad284170e4..c2b0aa8a4a 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -77,6 +77,7 @@ st_texture_create(struct st_context *st, GLuint compress_byte) { struct pipe_texture pt, *newtex; + struct pipe_screen *screen = st->pipe->screen; assert(target <= PIPE_TEXTURE_CUBE); @@ -96,7 +97,7 @@ st_texture_create(struct st_context *st, pt.compressed = compress_byte ? 1 : 0; pt.cpp = pt.compressed ? compress_byte : st_sizeof_format(format); - newtex = st->pipe->texture_create(st->pipe, &pt); + newtex = screen->texture_create(screen, &pt); assert(!newtex || newtex->refcount == 1); @@ -183,11 +184,12 @@ GLubyte * st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, GLuint zoffset) { + struct pipe_screen *screen = st->pipe->screen; struct pipe_texture *pt = stImage->pt; DBG("%s \n", __FUNCTION__); - stImage->surface = st->pipe->get_tex_surface(st->pipe, pt, stImage->face, - stImage->level, zoffset); + stImage->surface = screen->get_tex_surface(screen, pt, stImage->face, + stImage->level, zoffset); return pipe_surface_map(stImage->surface); } @@ -239,6 +241,7 @@ st_texture_image_data(struct pipe_context *pipe, void *src, GLuint src_row_pitch, GLuint src_image_pitch) { + struct pipe_screen *screen = pipe->screen; GLuint depth = dst->depth[level]; GLuint i; GLuint height = 0; @@ -251,7 +254,7 @@ st_texture_image_data(struct pipe_context *pipe, if(dst->compressed) height /= 4; - dst_surface = pipe->get_tex_surface(pipe, dst, face, level, i); + dst_surface = screen->get_tex_surface(screen, dst, face, level, i); st_surface_data(pipe, dst_surface, 0, 0, /* dstx, dsty */ @@ -275,6 +278,7 @@ st_texture_image_copy(struct pipe_context *pipe, struct pipe_texture *src, GLuint face) { + struct pipe_screen *screen = pipe->screen; GLuint width = dst->width[dstLevel]; GLuint height = dst->height[dstLevel]; GLuint depth = dst->depth[dstLevel]; @@ -299,8 +303,8 @@ st_texture_image_copy(struct pipe_context *pipe, assert(src->width[srcLevel] == width); assert(src->height[srcLevel] == height); - dst_surface = pipe->get_tex_surface(pipe, dst, face, dstLevel, i); - src_surface = pipe->get_tex_surface(pipe, src, face, srcLevel, i); + dst_surface = screen->get_tex_surface(screen, dst, face, dstLevel, i); + src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i); pipe->surface_copy(pipe, FALSE, -- cgit v1.2.3 From fb40c5a9c7dc91c03f80780e0a09be0cade98705 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 15:06:04 -0700 Subject: gallium: added uses_kill field to tgsi_shader_info --- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 3 +++ src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 1 + 2 files changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index a1cc681492..a973aeb62f 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -133,5 +133,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } + info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || + info->opcode_count[TGSI_OPCODE_KILP]); + tgsi_parse_free (&parse); } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h index dc6dfd6d0b..d10d300c4d 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h @@ -57,6 +57,7 @@ struct tgsi_shader_info uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ boolean writes_z; /**< does fragment shader write Z value? */ + boolean uses_kill; /**< KIL or KILP instruction used? */ }; -- cgit v1.2.3 From 3197ad5a56ee94773f974ac727b316c5adfe1b6f Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 15:47:24 -0700 Subject: gallium: added file_max[] array to tgsi_shader_info Records the highest index of a declared register. --- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 5 ++++- src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 1 + 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index a973aeb62f..8c886edc65 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -46,10 +46,12 @@ void tgsi_scan_shader(const struct tgsi_token *tokens, struct tgsi_shader_info *info) { - uint procType; + uint procType, i; struct tgsi_parse_context parse; memset(info, 0, sizeof(*info)); + for (i = 0; i < TGSI_FILE_COUNT; i++) + info->file_max[i] = -1; /** ** Setup to begin parsing input shader @@ -97,6 +99,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << i); info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], i); if (file == TGSI_FILE_INPUT) { info->input_semantic_name[info->num_inputs] diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h index d10d300c4d..563ee900fa 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h @@ -51,6 +51,7 @@ struct tgsi_shader_info uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ + int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ uint immediate_count; /**< number of immediates declared */ -- cgit v1.2.3 From 679b6cf0a0e662513c8d7732049c44916e0e9e86 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 16:00:04 -0700 Subject: gallium: include p_compiler.h instead of p_util.h --- src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h index 563ee900fa..0530bc6b51 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h @@ -29,7 +29,7 @@ #define TGSI_SCAN_H -#include "pipe/p_util.h" +#include "pipe/p_compiler.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -- cgit v1.2.3 From 7df26d76d2a37e9e828296bfbcf7cec04bfbe233 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 16:01:35 -0700 Subject: gallium: include p_util.h --- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index 8c886edc65..b7bbff1689 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -37,6 +37,7 @@ #include "tgsi/util/tgsi_parse.h" #include "tgsi/util/tgsi_build.h" +#include "pipe/p_util.h" -- cgit v1.2.3 From cddeca51adf0d2b736a223e47b60f6ef3be85bff Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 16:02:58 -0700 Subject: gallium: remove dependencies on pipe_shader_state's semantic info Use tgsi_scan_shader() to populate a tgsi_shader_info struct and use that instead. --- src/gallium/auxiliary/draw/draw_aaline.c | 2 ++ src/gallium/auxiliary/draw/draw_aapoint.c | 6 ++++-- src/gallium/auxiliary/draw/draw_clip.c | 8 ++++---- src/gallium/auxiliary/draw/draw_context.c | 8 ++++---- src/gallium/auxiliary/draw/draw_flatshade.c | 8 ++++---- src/gallium/auxiliary/draw/draw_private.h | 3 +++ src/gallium/auxiliary/draw/draw_pstipple.c | 2 ++ src/gallium/auxiliary/draw/draw_twoside.c | 12 ++++++------ src/gallium/auxiliary/draw/draw_vertex_fetch.c | 2 +- src/gallium/auxiliary/draw/draw_vertex_shader.c | 17 +++++++++-------- src/gallium/auxiliary/draw/draw_vs_exec.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/auxiliary/draw/draw_wide_prims.c | 8 ++++---- 13 files changed, 45 insertions(+), 35 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index be6cfd3b6a..51140388f0 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -340,9 +340,11 @@ generate_aaline_fs(struct aaline_stage *aaline) tgsi_dump(aaline_fs.tokens, 0); #endif +#if 1 /* XXX remove */ aaline_fs.input_semantic_name[aaline_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; aaline_fs.input_semantic_index[aaline_fs.num_inputs] = transform.maxGeneric + 1; aaline_fs.num_inputs++; +#endif aaline->fs->aaline_fs = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index d16adb2505..d48a416899 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -514,9 +514,11 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) tgsi_dump(aapoint_fs.tokens, 0); #endif +#if 1 /* XXX remove */ aapoint_fs.input_semantic_name[aapoint_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; aapoint_fs.input_semantic_index[aapoint_fs.num_inputs] = transform.maxGeneric + 1; aapoint_fs.num_inputs++; +#endif aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); @@ -694,8 +696,8 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) /* find PSIZ vertex output */ const struct draw_vertex_shader *vs = draw->vertex_shader; uint i; - for (i = 0; i < vs->state->num_outputs; i++) { - if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { aapoint->psize_slot = i; break; } diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_clip.c index e3051507ea..200152ecab 100644 --- a/src/gallium/auxiliary/draw/draw_clip.c +++ b/src/gallium/auxiliary/draw/draw_clip.c @@ -409,13 +409,13 @@ clip_init_state( struct draw_stage *stage ) clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; if (clipper->flat) { - const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; uint i; clipper->num_color_attribs = 0; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || - vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { clipper->color_attribs[clipper->num_color_attribs++] = i; } } diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 48f00946ea..64ada8ce04 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -262,11 +262,11 @@ int draw_find_vs_output(struct draw_context *draw, uint semantic_name, uint semantic_index) { - const struct pipe_shader_state *vs = draw->vertex_shader->state; + const struct draw_vertex_shader *vs = draw->vertex_shader; uint i; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == semantic_name && - vs->output_semantic_index[i] == semantic_index) + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == semantic_name && + vs->info.output_semantic_index[i] == semantic_index) return i; } diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_flatshade.c index 4398abbc60..ccad71d695 100644 --- a/src/gallium/auxiliary/draw/draw_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_flatshade.c @@ -128,14 +128,14 @@ static void flatshade_point( struct draw_stage *stage, static void flatshade_init_state( struct draw_stage *stage ) { struct flat_stage *flat = flat_stage(stage); - const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; uint i; /* Find which vertex shader outputs are colors, make a list */ flat->num_color_attribs = 0; - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR || - vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { flat->color_attribs[flat->num_color_attribs++] = i; } } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 03e3d3a66b..e988e71d23 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -46,6 +46,7 @@ #include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_scan.h" struct pipe_context; @@ -134,6 +135,8 @@ struct draw_vertex_shader { */ const struct pipe_shader_state *state; + struct tgsi_shader_info info; + void (*prepare)( struct draw_vertex_shader *shader, struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index efc88bf038..f6200aa820 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -330,11 +330,13 @@ generate_pstip_fs(struct pstip_stage *pstip) pstip->sampler_unit = transform.maxSampler + 1; +#if 1 /* XXX remove */ if (transform.wincoordInput < 0) { pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; pstip_fs.input_semantic_index[pstip_fs.num_inputs] = (ubyte)transform.maxInput; pstip_fs.num_inputs++; } +#endif pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); } diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_twoside.c index 1c38957987..3debaac282 100644 --- a/src/gallium/auxiliary/draw/draw_twoside.c +++ b/src/gallium/auxiliary/draw/draw_twoside.c @@ -119,7 +119,7 @@ static void twoside_first_tri( struct draw_stage *stage, struct prim_header *header ) { struct twoside_stage *twoside = twoside_stage(stage); - const struct pipe_shader_state *vs = stage->draw->vertex_shader->state; + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; uint i; twoside->attrib_front0 = 0; @@ -128,15 +128,15 @@ static void twoside_first_tri( struct draw_stage *stage, twoside->attrib_back1 = 0; /* Find which vertex shader outputs are front/back colors */ - for (i = 0; i < vs->num_outputs; i++) { - if (vs->output_semantic_name[i] == TGSI_SEMANTIC_COLOR) { - if (vs->output_semantic_index[i] == 0) + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + if (vs->info.output_semantic_index[i] == 0) twoside->attrib_front0 = i; else twoside->attrib_front1 = i; } - if (vs->output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { - if (vs->output_semantic_index[i] == 0) + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + if (vs->info.output_semantic_index[i] == 0) twoside->attrib_back0 = i; else twoside->attrib_back1 = i; diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index e13df04605..cb8cdd04a3 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -473,7 +473,7 @@ void draw_update_vertex_fetch( struct draw_context *draw ) if (!draw->vertex_shader) return; - nr_attrs = draw->vertex_shader->state->num_inputs; + nr_attrs = draw->vertex_shader->info.num_inputs; for (i = 0; i < nr_attrs; i++) { unsigned buf = draw->vertex_element[i].vertex_buffer_index; diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 5d2f5c9c43..1e95355555 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -91,15 +91,16 @@ draw_create_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *vs; vs = draw_create_vs_llvm( draw, shader ); - if (vs) - return vs; + if (!vs) { + vs = draw_create_vs_sse( draw, shader ); + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); + } + } + assert(vs); - vs = draw_create_vs_sse( draw, shader ); - if (vs) - return vs; + tgsi_scan_shader(shader->tokens, &vs->info); - vs = draw_create_vs_exec( draw, shader ); - assert(vs); return vs; } @@ -111,7 +112,7 @@ draw_bind_vertex_shader(struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->vertex_shader = dvs; - draw->num_vs_outputs = dvs->state->num_outputs; + draw->num_vs_outputs = dvs->info.num_outputs; tgsi_exec_machine_init(&draw->machine); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 8588879400..583812aadd 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -103,7 +103,7 @@ vs_exec_run( struct draw_vertex_shader *shader, const float *trans = draw->viewport.translate; assert(count <= 4); - assert(draw->vertex_shader->state->output_semantic_name[0] + assert(draw->vertex_shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); machine->Consts = (float (*)[4]) draw->user.constants; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 11ef0c503d..0b8bc2bf14 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -119,7 +119,7 @@ vs_sse_run( struct draw_vertex_shader *base, const float *trans = draw->viewport.translate; assert(count <= 4); - assert(draw->vertex_shader->state->output_semantic_name[0] + assert(draw->vertex_shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); /* Consts does not require 16 byte alignment. */ diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c index d967810dd4..d6bff110b4 100644 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ b/src/gallium/auxiliary/draw/draw_wide_prims.c @@ -278,8 +278,8 @@ static void wide_first_point( struct draw_stage *stage, /* find vertex shader texcoord outputs */ const struct draw_vertex_shader *vs = draw->vertex_shader; uint i, j = 0; - for (i = 0; i < vs->state->num_outputs; i++) { - if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { wide->texcoord_slot[j] = i; wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; j++; @@ -294,8 +294,8 @@ static void wide_first_point( struct draw_stage *stage, /* find PSIZ vertex output */ const struct draw_vertex_shader *vs = draw->vertex_shader; uint i; - for (i = 0; i < vs->state->num_outputs; i++) { - if (vs->state->output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { wide->psize_slot = i; break; } -- cgit v1.2.3 From 1774b177b858f9f87d00e54b0bf00e9634e375e9 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 27 Feb 2008 18:46:54 -0700 Subject: gallium: added draw_num_vs_outputs() to query number of post-transform vertex attribs --- src/gallium/auxiliary/draw/draw_context.c | 13 +++++++++++++ src/gallium/auxiliary/draw/draw_context.h | 4 ++++ src/gallium/drivers/softpipe/sp_state_derived.c | 4 ++-- 3 files changed, 19 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 64ada8ce04..3500c34811 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -281,6 +281,19 @@ draw_find_vs_output(struct draw_context *draw, } +/** + * Return number of vertex shader outputs. + */ +uint +draw_num_vs_outputs(struct draw_context *draw) +{ + uint count = draw->vertex_shader->info.num_outputs; + if (draw->extra_vp_outputs.slot >= 0) + count++; + return count; +} + + /** * Allocate space for temporary post-transform vertices, such as for clipping. */ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index d6685f479b..99bfef55f4 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -112,6 +112,10 @@ int draw_find_vs_output(struct draw_context *draw, uint semantic_name, uint semantic_index); +uint +draw_num_vs_outputs(struct draw_context *draw); + + /* * Vertex shader functions diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index aa6e329116..eafbaed4b9 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -61,7 +61,6 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) if (vinfo->num_attribs == 0) { /* compute vertex layout now */ - const struct pipe_shader_state *vs = &softpipe->vs->shader; const struct sp_fragment_shader *spfs = softpipe->fs; const enum interp_mode colorInterp = softpipe->rasterizer->flatshade ? INTERP_CONSTANT : INTERP_LINEAR; @@ -74,7 +73,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; vinfo_vbuf->num_attribs = 0; draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - vinfo_vbuf->size = 4 * vs->num_outputs + /* size in dwords or floats */ + vinfo_vbuf->size = 4 * draw_num_vs_outputs(softpipe->draw) + sizeof(struct vertex_header) / 4; } -- cgit v1.2.3 From 510bc3535c4af68db71e5ffd19f3e21e10ec6004 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 28 Feb 2008 11:23:01 +0900 Subject: gallium: Fix sign/unsign comparison. --- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index b7bbff1689..cf6de1e82f 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -100,7 +100,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << i); info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], i); + info->file_max[file] = MAX2(info->file_max[file], (int)i); if (file == TGSI_FILE_INPUT) { info->input_semantic_name[info->num_inputs] -- cgit v1.2.3 From e280bd50cc46ddbf5ac7fbaafc934d1048d77ba2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 28 Feb 2008 21:25:54 +0900 Subject: gallium: Fix MSVC warnings. --- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index cf6de1e82f..ea4a72967d 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -104,17 +104,17 @@ tgsi_scan_shader(const struct tgsi_token *tokens, if (file == TGSI_FILE_INPUT) { info->input_semantic_name[info->num_inputs] - = fulldecl->Semantic.SemanticName; + = (ubyte)fulldecl->Semantic.SemanticName; info->input_semantic_index[info->num_inputs] - = fulldecl->Semantic.SemanticIndex; + = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_inputs++; } if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[info->num_outputs] - = fulldecl->Semantic.SemanticName; + = (ubyte)fulldecl->Semantic.SemanticName; info->output_semantic_index[info->num_outputs] - = fulldecl->Semantic.SemanticIndex; + = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_outputs++; } -- cgit v1.2.3 From a1a13954885cd469faab49633b5386e5c889e3df Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 28 Feb 2008 17:49:22 -0700 Subject: gallium: split draw_wide_prim stage into separate point/line stages. This fixes a validation/code-path problem. Enabling the stage for the sake of wide points also inadvertantly caused wide lines to be converted to tris when we actually want them passed through, such as for the AA line stage. This is just cleaner now. Also, replace draw_convert_wide_lines() with draw_wide_line_threshold() as was done for points. Allows for 1-pixel lines to be converted too if needed. --- src/gallium/auxiliary/draw/Makefile | 4 +++- src/gallium/auxiliary/draw/SConscript | 3 ++- src/gallium/auxiliary/draw/draw_context.c | 17 ++++++++++------- src/gallium/auxiliary/draw/draw_context.h | 4 +--- src/gallium/auxiliary/draw/draw_private.h | 8 +++++--- src/gallium/auxiliary/draw/draw_validate.c | 16 +++++++++------- 6 files changed, 30 insertions(+), 22 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index c9980f0b83..2daa1636f3 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -29,7 +29,9 @@ C_SOURCES = \ draw_vf.c \ draw_vf_generic.c \ draw_vf_sse.c \ - draw_wide_prims.c + draw_wide_line.c \ + draw_wide_point.c + include ../../Makefile.template diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 3302dc44f7..c18dcb2927 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -28,7 +28,8 @@ draw = env.ConvenienceLibrary( 'draw_vf.c', 'draw_vf_generic.c', 'draw_vf_sse.c', - 'draw_wide_prims.c', + 'draw_wide_point.c', + 'draw_wide_line.c' ]) auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 3500c34811..428b6209e0 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -48,7 +48,8 @@ struct draw_context *draw_create( void ) #endif /* create pipeline stages */ - draw->pipeline.wide = draw_wide_stage( draw ); + draw->pipeline.wide_line = draw_wide_line_stage( draw ); + draw->pipeline.wide_point = draw_wide_point_stage( draw ); draw->pipeline.stipple = draw_stipple_stage( draw ); draw->pipeline.unfilled = draw_unfilled_stage( draw ); draw->pipeline.twoside = draw_twoside_stage( draw ); @@ -80,8 +81,9 @@ struct draw_context *draw_create( void ) draw->shader_queue_flush = draw_vertex_shader_queue_flush; + /* these defaults are oriented toward the needs of softpipe */ draw->wide_point_threshold = 1000000.0; /* infinity */ - draw->convert_wide_lines = TRUE; + draw->wide_line_threshold = 1.0; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -94,7 +96,8 @@ struct draw_context *draw_create( void ) void draw_destroy( struct draw_context *draw ) { - draw->pipeline.wide->destroy( draw->pipeline.wide ); + draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); + draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); draw->pipeline.stipple->destroy( draw->pipeline.stipple ); draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); draw->pipeline.twoside->destroy( draw->pipeline.twoside ); @@ -232,14 +235,14 @@ draw_wide_point_threshold(struct draw_context *draw, float threshold) /** - * Tells the draw module whether to convert wide lines (width != 1) - * into triangles. + * Tells the draw module to draw lines with triangles if their width + * is greater than this threshold. */ void -draw_convert_wide_lines(struct draw_context *draw, boolean enable) +draw_wide_line_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->convert_wide_lines = enable; + draw->wide_line_threshold = threshold; } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 99bfef55f4..ab87b4127c 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -92,9 +92,7 @@ void draw_set_rasterize_stage( struct draw_context *draw, void draw_wide_point_threshold(struct draw_context *draw, float threshold); -void draw_convert_wide_points(struct draw_context *draw, boolean enable); - -void draw_convert_wide_lines(struct draw_context *draw, boolean enable); +void draw_wide_line_threshold(struct draw_context *draw, float threshold); boolean draw_use_sse(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index e988e71d23..c732d723a7 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -186,7 +186,8 @@ struct draw_context struct draw_stage *aapoint; struct draw_stage *aaline; struct draw_stage *pstipple; - struct draw_stage *wide; + struct draw_stage *wide_line; + struct draw_stage *wide_point; struct draw_stage *rasterize; } pipeline; @@ -219,7 +220,7 @@ struct draw_context unsigned nr_planes; float wide_point_threshold; /**< convert pnts to tris if larger than this */ - boolean convert_wide_lines; /**< convert wide lines to tris? */ + float wide_line_threshold; /**< convert lines to tris if wider than this */ boolean use_sse; /* If a prim stage introduces new vertex attributes, they'll be stored here @@ -304,7 +305,8 @@ extern struct draw_stage *draw_clip_stage( struct draw_context *context ); extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); extern struct draw_stage *draw_cull_stage( struct draw_context *context ); extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); extern struct draw_stage *draw_validate_stage( struct draw_context *context ); diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index ded7d10c08..084eee9b6e 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -53,8 +53,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) stage->next = next; /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width != 1.0 - && draw->convert_wide_lines + wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold && !draw->rasterizer->line_smooth); /* drawing large points? */ @@ -82,11 +81,14 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.aapoint; } - if (wide_lines || - wide_points || - draw->rasterizer->point_sprite) { - draw->pipeline.wide->next = next; - next = draw->pipeline.wide; + if (wide_lines) { + draw->pipeline.wide_line->next = next; + next = draw->pipeline.wide_line; + } + + if (wide_points || draw->rasterizer->point_sprite) { + draw->pipeline.wide_point->next = next; + next = draw->pipeline.wide_point; } if (draw->rasterizer->line_stipple_enable) { -- cgit v1.2.3 From b233b1e2dc2fca2f45b3cb5df167e3675b8cc1fb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 28 Feb 2008 17:54:42 -0700 Subject: gallium: new wide point/line stages (missed in prev commit) --- src/gallium/auxiliary/draw/draw_wide_line.c | 187 +++++++++++++++++++ src/gallium/auxiliary/draw/draw_wide_point.c | 257 +++++++++++++++++++++++++++ 2 files changed, 444 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_wide_line.c create mode 100644 src/gallium/auxiliary/draw/draw_wide_point.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_wide_line.c b/src/gallium/auxiliary/draw/draw_wide_line.c new file mode 100644 index 0000000000..946a983f00 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_wide_line.c @@ -0,0 +1,187 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +struct wideline_stage { + struct draw_stage stage; + + float half_line_width; +}; + + + +static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) +{ + return (struct wideline_stage *)stage; +} + + +static void wideline_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void wideline_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad (two triangles). + * XXX need to disable polygon stipple. + */ +static void wideline_line( struct draw_stage *stage, + struct prim_header *header ) +{ + /*const struct wideline_stage *wide = wideline_stage(stage);*/ + const float half_width = 0.5f * stage->draw->rasterizer->line_width; + + struct prim_header tri; + + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + const float dx = FABSF(pos0[0] - pos2[0]); + const float dy = FABSF(pos0[1] - pos2[1]); + + /* + * Draw wide line as a quad (two tris) by "stretching" the line along + * X or Y. + * We need to tweak coords in several ways to be conformant here. + */ + + if (dx > dy) { + /* x-major line */ + pos0[1] = pos0[1] - half_width - 0.25f; + pos1[1] = pos1[1] + half_width - 0.25f; + pos2[1] = pos2[1] - half_width - 0.25f; + pos3[1] = pos3[1] + half_width - 0.25f; + if (pos0[0] < pos2[0]) { + /* left to right line */ + pos0[0] -= 0.5f; + pos1[0] -= 0.5f; + pos2[0] -= 0.5f; + pos3[0] -= 0.5f; + } + else { + /* right to left line */ + pos0[0] += 0.5f; + pos1[0] += 0.5f; + pos2[0] += 0.5f; + pos3[0] += 0.5f; + } + } + else { + /* y-major line */ + pos0[0] = pos0[0] - half_width + 0.25f; + pos1[0] = pos1[0] + half_width + 0.25f; + pos2[0] = pos2[0] - half_width + 0.25f; + pos3[0] = pos3[0] + half_width + 0.25f; + if (pos0[1] < pos2[1]) { + /* top to bottom line */ + pos0[1] -= 0.5f; + pos1[1] -= 0.5f; + pos2[1] -= 0.5f; + pos3[1] -= 0.5f; + } + else { + /* bottom to top line */ + pos0[1] += 0.5f; + pos1[1] += 0.5f; + pos2[1] += 0.5f; + pos3[1] += 0.5f; + } + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void wideline_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->next->flush( stage->next, flags ); +} + + +static void wideline_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void wideline_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) +{ + struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = wideline_point; + wide->stage.line = wideline_line; + wide->stage.tri = wideline_tri; + wide->stage.flush = wideline_flush; + wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; + wide->stage.destroy = wideline_destroy; + + return &wide->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c new file mode 100644 index 0000000000..8f877a102a --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_wide_point.c @@ -0,0 +1,257 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +struct widepoint_stage { + struct draw_stage stage; + + float half_point_size; + + uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; + uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; + uint num_texcoords; + + int psize_slot; +}; + + + +static INLINE struct widepoint_stage * +widepoint_stage( struct draw_stage *stage ) +{ + return (struct widepoint_stage *)stage; +} + + +static void passthrough_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + +static void widepoint_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line(stage->next, header); +} + +static void widepoint_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Set the vertex texcoords for sprite mode. + * Coords may be left untouched or set to a right-side-up or upside-down + * orientation. + */ +static void set_texcoords(const struct widepoint_stage *wide, + struct vertex_header *v, const float tc[4]) +{ + uint i; + for (i = 0; i < wide->num_texcoords; i++) { + if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { + uint j = wide->texcoord_slot[i]; + v->data[j][0] = tc[0]; + if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) + v->data[j][1] = 1.0f - tc[1]; + else + v->data[j][1] = tc[1]; + v->data[j][2] = tc[2]; + v->data[j][3] = tc[3]; + } + } +} + + +/* If there are lots of sprite points (and why wouldn't there be?) it + * would probably be more sensible to change hardware setup to + * optimize this rather than doing the whole thing in software like + * this. + */ +static void widepoint_point( struct draw_stage *stage, + struct prim_header *header ) +{ + const struct widepoint_stage *wide = widepoint_stage(stage); + const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; + float half_size; + float left_adj, right_adj; + + struct prim_header tri; + + /* four dups of original vertex */ + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + /* point size is either per-vertex or fixed size */ + if (wide->psize_slot >= 0) { + half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; + } + else { + half_size = wide->half_point_size; + } + + left_adj = -half_size; /* + 0.25f;*/ + right_adj = half_size; /* + 0.25f;*/ + + pos0[0] += left_adj; + pos0[1] -= half_size; + + pos1[0] += left_adj; + pos1[1] += half_size; + + pos2[0] += right_adj; + pos2[1] -= half_size; + + pos3[0] += right_adj; + pos3[1] += half_size; + + if (sprite) { + static const float tex00[4] = { 0, 0, 0, 1 }; + static const float tex01[4] = { 0, 1, 0, 1 }; + static const float tex11[4] = { 1, 1, 0, 1 }; + static const float tex10[4] = { 1, 0, 0, 1 }; + set_texcoords( wide, v0, tex00 ); + set_texcoords( wide, v1, tex01 ); + set_texcoords( wide, v2, tex10 ); + set_texcoords( wide, v3, tex11 ); + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void widepoint_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct widepoint_stage *wide = widepoint_stage(stage); + struct draw_context *draw = stage->draw; + + wide->half_point_size = 0.5f * draw->rasterizer->point_size; + + /* XXX we won't know the real size if it's computed by the vertex shader! */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) { + stage->point = widepoint_point; + } + else { + stage->point = passthrough_point; + } + + if (draw->rasterizer->point_sprite) { + /* find vertex shader texcoord outputs */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i, j = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + wide->texcoord_slot[j] = i; + wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; + j++; + } + } + wide->num_texcoords = j; + } + + wide->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + wide->psize_slot = i; + break; + } + } + } + + stage->point( stage, header ); +} + + +static void widepoint_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->point = widepoint_first_point; + stage->next->flush( stage->next, flags ); +} + + +static void widepoint_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void widepoint_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) +{ + struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = widepoint_first_point; + wide->stage.line = widepoint_line; + wide->stage.tri = widepoint_tri; + wide->stage.flush = widepoint_flush; + wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; + wide->stage.destroy = widepoint_destroy; + + return &wide->stage; +} -- cgit v1.2.3 From 78220aea864c36038f8425ad9d8467d2a2bdea58 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 29 Feb 2008 10:07:44 -0700 Subject: gallium: remove the ugly pipe->draw stage lookup code in aaline/point/pstipple stages Added a void *draw ptr to pipe_context. Probably look for a better solution someday. --- src/gallium/auxiliary/draw/draw_aaline.c | 32 ++++-------------------------- src/gallium/auxiliary/draw/draw_aapoint.c | 32 ++++-------------------------- src/gallium/auxiliary/draw/draw_pstipple.c | 32 ++++-------------------------- src/gallium/include/pipe/p_context.h | 1 + 4 files changed, 13 insertions(+), 84 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 51140388f0..7660e56fe6 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -691,35 +691,11 @@ draw_aaline_stage(struct draw_context *draw) } -/* - * XXX temporary? solution to mapping a pipe_context to a aaline_stage. - */ - -#define MAX_CONTEXTS 10 - -static struct pipe_context *Pipe[MAX_CONTEXTS]; -static struct aaline_stage *Stage[MAX_CONTEXTS]; -static uint NumContexts; - -static void -add_aa_pipe_context(struct pipe_context *pipe, struct aaline_stage *aa) -{ - assert(NumContexts < MAX_CONTEXTS); - Pipe[NumContexts] = pipe; - Stage[NumContexts] = aa; - NumContexts++; -} - static struct aaline_stage * aaline_stage_from_pipe(struct pipe_context *pipe) { - uint i; - for (i = 0; i < NumContexts; i++) { - if (Pipe[i] == pipe) - return Stage[i]; - } - assert(0); - return NULL; + struct draw_context *draw = (struct draw_context *) pipe->draw; + return aaline_stage(draw->pipeline.aaline); } @@ -802,6 +778,8 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) { struct aaline_stage *aaline; + pipe->draw = (void *) draw; + /* * Create / install AA line drawing / prim stage */ @@ -830,6 +808,4 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) pipe->bind_sampler_state = aaline_bind_sampler_state; pipe->set_sampler_texture = aaline_set_sampler_texture; - - add_aa_pipe_context(pipe, aaline); } diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index d48a416899..70f696475f 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -762,35 +762,11 @@ draw_aapoint_stage(struct draw_context *draw) } -/* - * XXX temporary? solution to mapping a pipe_context to a aapoint_stage. - */ - -#define MAX_CONTEXTS 10 - -static struct pipe_context *Pipe[MAX_CONTEXTS]; -static struct aapoint_stage *Stage[MAX_CONTEXTS]; -static uint NumContexts; - -static void -add_aa_pipe_context(struct pipe_context *pipe, struct aapoint_stage *aa) -{ - assert(NumContexts < MAX_CONTEXTS); - Pipe[NumContexts] = pipe; - Stage[NumContexts] = aa; - NumContexts++; -} - static struct aapoint_stage * aapoint_stage_from_pipe(struct pipe_context *pipe) { - uint i; - for (i = 0; i < NumContexts; i++) { - if (Pipe[i] == pipe) - return Stage[i]; - } - assert(0); - return NULL; + struct draw_context *draw = (struct draw_context *) pipe->draw; + return aapoint_stage(draw->pipeline.aapoint); } @@ -850,6 +826,8 @@ draw_install_aapoint_stage(struct draw_context *draw, { struct aapoint_stage *aapoint; + pipe->draw = (void *) draw; + /* * Create / install AA point drawing / prim stage */ @@ -868,6 +846,4 @@ draw_install_aapoint_stage(struct draw_context *draw, pipe->create_fs_state = aapoint_create_fs_state; pipe->bind_fs_state = aapoint_bind_fs_state; pipe->delete_fs_state = aapoint_delete_fs_state; - - add_aa_pipe_context(pipe, aapoint); } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index f6200aa820..2cfeb813b3 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -559,35 +559,11 @@ draw_pstip_stage(struct draw_context *draw) } -/* - * XXX temporary? solution to mapping a pipe_context to a pstip_stage. - */ - -#define MAX_CONTEXTS 10 - -static struct pipe_context *Pipe[MAX_CONTEXTS]; -static struct pstip_stage *Stage[MAX_CONTEXTS]; -static uint NumContexts; - -static void -add_pstip_pipe_context(struct pipe_context *pipe, struct pstip_stage *pstip) -{ - assert(NumContexts < MAX_CONTEXTS); - Pipe[NumContexts] = pipe; - Stage[NumContexts] = pstip; - NumContexts++; -} - static struct pstip_stage * pstip_stage_from_pipe(struct pipe_context *pipe) { - uint i; - for (i = 0; i < NumContexts; i++) { - if (Pipe[i] == pipe) - return Stage[i]; - } - assert(0); - return NULL; + struct draw_context *draw = (struct draw_context *) pipe->draw; + return pstip_stage(draw->pipeline.pstipple); } @@ -686,6 +662,8 @@ draw_install_pstipple_stage(struct draw_context *draw, { struct pstip_stage *pstip; + pipe->draw = (void *) draw; + /* * Create / install AA line drawing / prim stage */ @@ -716,6 +694,4 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_sampler_state = pstip_bind_sampler_state; pipe->set_sampler_texture = pstip_set_sampler_texture; pipe->set_polygon_stipple = pstip_set_polygon_stipple; - - add_pstip_pipe_context(pipe, pstip); } diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index d0f25d7d46..1501b52f3e 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -56,6 +56,7 @@ struct pipe_context { struct pipe_screen *screen; void *priv; /** context private data (for DRI for example) */ + void *draw; /** private, for draw module (temporary? */ void (*destroy)( struct pipe_context * ); -- cgit v1.2.3 From a41b77f4fedc7d956d8cb44547d812b5449f8641 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Feb 2008 11:09:02 -0700 Subject: gallium: added pipe_get/put_tile_z() functions --- src/gallium/auxiliary/util/p_tile.c | 124 ++++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/p_tile.h | 14 ++++ 2 files changed, 138 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 3f795a3898..287d783981 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -697,3 +697,127 @@ pipe_put_tile_rgba(struct pipe_context *pipe, FREE(packed); } + + +/** + * Get a block of Z values, converted to 32-bit range. + */ +void +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z) +{ + const uint dstStride = w; + uint *pDest = z; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + const uint *pSrc + = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += dstStride; + pSrc += ps->pitch; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + { + const uint *pSrc + = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 24-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); + } + pDest += dstStride; + pSrc += ps->pitch; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + const ushort *pSrc + = (const ushort *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 16-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 16) | pSrc[j]; + } + pDest += dstStride; + pSrc += ps->pitch; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + +void +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *zSrc) +{ + const uint srcStride = w; + const uint *pSrc = zSrc; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += ps->pitch; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + { + uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 24-bit Z (0 stencil) */ + pDest[j] = pSrc[j] >> 8; + } + pDest += ps->pitch; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + ushort *pDest = (ushort *) pipe_surface_map(ps) + (y * ps->pitch + x); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 16-bit Z */ + pDest[j] = pSrc[j] >> 16; + } + pDest += ps->pitch; + pSrc += srcStride; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h index cd8124bf11..318b6d11a6 100644 --- a/src/gallium/auxiliary/util/p_tile.h +++ b/src/gallium/auxiliary/util/p_tile.h @@ -78,4 +78,18 @@ pipe_put_tile_rgba(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const float *p); + +extern void +pipe_get_tile_z(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z); + +extern void +pipe_put_tile_z(struct pipe_context *pipe, + struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *z); + + #endif -- cgit v1.2.3 From 2a121e8e2289d2ca136f511c5a6ef049f9c99ec4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Feb 2008 11:37:12 -0700 Subject: gallium: tweak coords for wide lines --- src/gallium/auxiliary/draw/draw_wide_line.c | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_wide_line.c b/src/gallium/auxiliary/draw/draw_wide_line.c index 946a983f00..9a168ce8bd 100644 --- a/src/gallium/auxiliary/draw/draw_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_wide_line.c @@ -86,7 +86,10 @@ static void wideline_line( struct draw_stage *stage, const float dx = FABSF(pos0[0] - pos2[0]); const float dy = FABSF(pos0[1] - pos2[1]); - + + /* small tweak to meet GL specification */ + const float bias = 0.125f; + /* * Draw wide line as a quad (two tris) by "stretching" the line along * X or Y. @@ -95,10 +98,10 @@ static void wideline_line( struct draw_stage *stage, if (dx > dy) { /* x-major line */ - pos0[1] = pos0[1] - half_width - 0.25f; - pos1[1] = pos1[1] + half_width - 0.25f; - pos2[1] = pos2[1] - half_width - 0.25f; - pos3[1] = pos3[1] + half_width - 0.25f; + pos0[1] = pos0[1] - half_width - bias; + pos1[1] = pos1[1] + half_width - bias; + pos2[1] = pos2[1] - half_width - bias; + pos3[1] = pos3[1] + half_width - bias; if (pos0[0] < pos2[0]) { /* left to right line */ pos0[0] -= 0.5f; @@ -116,10 +119,10 @@ static void wideline_line( struct draw_stage *stage, } else { /* y-major line */ - pos0[0] = pos0[0] - half_width + 0.25f; - pos1[0] = pos1[0] + half_width + 0.25f; - pos2[0] = pos2[0] - half_width + 0.25f; - pos3[0] = pos3[0] + half_width + 0.25f; + pos0[0] = pos0[0] - half_width + bias; + pos1[0] = pos1[0] + half_width + bias; + pos2[0] = pos2[0] - half_width + bias; + pos3[0] = pos3[0] + half_width + bias; if (pos0[1] < pos2[1]) { /* top to bottom line */ pos0[1] -= 0.5f; -- cgit v1.2.3 From b8ee90e05a98d5a167c1fdb5a8fc3bc0cb4a6a78 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Feb 2008 12:21:42 -0700 Subject: gallium: need precalc_flat=1 for wide lines --- src/gallium/auxiliary/draw/draw_validate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 084eee9b6e..b43295b586 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -84,6 +84,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) if (wide_lines) { draw->pipeline.wide_line->next = next; next = draw->pipeline.wide_line; + precalc_flat = 1; } if (wide_points || draw->rasterizer->point_sprite) { -- cgit v1.2.3 From 6da943d204eeb488895933c45e174042cb69c92d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Feb 2008 12:22:04 -0700 Subject: gallium: point rast coord tweak --- src/gallium/auxiliary/draw/draw_wide_point.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c index 8f877a102a..65bd50f2b8 100644 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_wide_point.c @@ -109,7 +109,7 @@ static void widepoint_point( struct draw_stage *stage, const struct widepoint_stage *wide = widepoint_stage(stage); const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; float half_size; - float left_adj, right_adj; + float left_adj, right_adj, bot_adj, top_adj; struct prim_header tri; @@ -124,6 +124,8 @@ static void widepoint_point( struct draw_stage *stage, float *pos2 = v2->data[0]; float *pos3 = v3->data[0]; + const float xbias = 0.0, ybias = -0.125; + /* point size is either per-vertex or fixed size */ if (wide->psize_slot >= 0) { half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; @@ -132,20 +134,22 @@ static void widepoint_point( struct draw_stage *stage, half_size = wide->half_point_size; } - left_adj = -half_size; /* + 0.25f;*/ - right_adj = half_size; /* + 0.25f;*/ + left_adj = -half_size + xbias; + right_adj = half_size + xbias; + bot_adj = half_size + ybias; + top_adj = -half_size + ybias; pos0[0] += left_adj; - pos0[1] -= half_size; + pos0[1] += top_adj; pos1[0] += left_adj; - pos1[1] += half_size; + pos1[1] += bot_adj; pos2[0] += right_adj; - pos2[1] -= half_size; + pos2[1] += top_adj; pos3[0] += right_adj; - pos3[1] += half_size; + pos3[1] += bot_adj; if (sprite) { static const float tex00[4] = { 0, 0, 0, 1 }; -- cgit v1.2.3 From 5240cebb23f4862f4f7458a1b397957e4460b527 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 29 Feb 2008 13:00:17 -0700 Subject: gallium: fix line emit order for unfilled tris A tri drawn with GL_LINE_LOOP and GL_POLYGON w/ fillmode=GL_LINE should produce the same results when line stipple is enabled. Results are correct now. --- src/gallium/auxiliary/draw/draw_unfilled.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c index 8777cfdfc8..4d718d514c 100644 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_unfilled.c @@ -101,9 +101,9 @@ static void lines( struct draw_stage *stage, assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag); #endif + if (header->edgeflags & 0x4) line( stage, v2, v0 ); if (header->edgeflags & 0x1) line( stage, v0, v1 ); if (header->edgeflags & 0x2) line( stage, v1, v2 ); - if (header->edgeflags & 0x4) line( stage, v2, v0 ); } -- cgit v1.2.3 From e884c7ed9a14aabaa86f6710c594d20812ed11d9 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 1 Mar 2008 08:04:21 -0500 Subject: start implementing start of bultins --- src/gallium/auxiliary/gallivm/Makefile | 7 +- src/gallium/auxiliary/gallivm/instructions.cpp | 10 ++ src/gallium/auxiliary/gallivm/instructionssoa.cpp | 116 ++++++++++++++++++++++ src/gallium/auxiliary/gallivm/instructionssoa.h | 14 ++- src/gallium/auxiliary/gallivm/soabuiltins.c | 58 +++++++++++ src/gallium/auxiliary/gallivm/storagesoa.cpp | 28 +++--- src/gallium/auxiliary/gallivm/storagesoa.h | 15 +-- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 38 ++----- src/gallium/include/pipe/p_shader_tokens.h | 20 ++-- 9 files changed, 239 insertions(+), 67 deletions(-) create mode 100644 src/gallium/auxiliary/gallivm/soabuiltins.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index 39fac6ea4a..c24e19e062 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -15,7 +15,7 @@ GALLIVM_SOURCES = \ storagesoa.cpp \ instructionssoa.cpp -INC_SOURCES = gallivm_builtins.cpp +INC_SOURCES = gallivm_builtins.cpp gallivmsoabuiltins.cpp CPP_SOURCES = \ $(GALLIVM_SOURCES) @@ -65,8 +65,10 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins +gallivmsoabuiltins.cpp: soabuiltins.c + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-module -o=$@ -f -for=shader -funcname=createSoaBuiltins # Emacs tags tags: @@ -78,6 +80,7 @@ clean: -rm -f *.o */*.o *~ *.so *~ server/*.o -rm -f depend depend.bak -rm -f gallivm_builtins.cpp + -rm -f gallivmsoabuiltins.cpp symlinks: diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 55d39fa5f1..8919491792 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -42,6 +42,7 @@ #include #include #include +#include #include #include @@ -51,6 +52,15 @@ using namespace llvm; #include "gallivm_builtins.cpp" +#if 0 + +llvm::Value *arrayFromChannels(std::vector &vals) +{ + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + ArrayType *vectorArray = ArrayType::get(vectorType, 4); +} +#endif + static inline std::string createFuncName(int label) { std::ostringstream stream; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index a4d5046637..5739cf0cde 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -2,9 +2,28 @@ #include "storagesoa.h" +#include "pipe/p_shader_tokens.h" + +#include #include +#include +#include +#include +#include +#include + +#include + +/* disable some warnings. this file is autogenerated */ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wunused-variable" +#endif using namespace llvm; +#include "gallivmsoabuiltins.cpp" +#if defined(__GNUC__) +#pragma GCC diagnostic warning "-Wunused-variable" +#endif InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, StorageSoa *storage) @@ -12,6 +31,8 @@ InstructionsSoa::InstructionsSoa(llvm::Module *mod, llvm::Function *func, m_storage(storage), m_idx(0) { + createFunctionMap(); + createBuiltins(); } const char * InstructionsSoa::name(const char *prefix) const @@ -119,3 +140,98 @@ std::vector InstructionsSoa::extractVector(llvm::Value *vector) return res; } + +void InstructionsSoa::createFunctionMap() +{ + m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; +} + +llvm::Function * InstructionsSoa::function(int op) +{ + if (m_functions.find(op) != m_functions.end()) + return m_functions[op]; + + std::string name = m_functionsMap[op]; + + llvm::Function *originalFunc = m_builtins->getFunction(name); + llvm::Function *func = CloneFunction(originalFunc); + currentModule()->getFunctionList().push_back(func); + std::cout << "Func parent is "<getParent() + <<", cur is "<setParent(currentModule()); + m_functions[op] = func; + return func; +} + +llvm::Module * InstructionsSoa::currentModule() const +{ + BasicBlock *block = m_builder.GetInsertBlock(); + if (!block || !block->getParent()) + return 0; + + return block->getParent()->getParent(); +} + +void InstructionsSoa::createBuiltins() +{ + m_builtins = createSoaBuiltins(); +} + +std::vector InstructionsSoa::dp3(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_DP3); + std::vector params; + + llvm::Value *tmp = allocaTemp(); + params.push_back(tmp); + + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), + name("dp3")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + std::vector indices; + indices.push_back(m_storage->constantInt(0)); + indices.push_back(m_storage->constantInt(0)); + + GetElementPtrInst *getElem = new GetElementPtrInst(tmp, + indices.begin(), + indices.end(), + name("allocaPtr"), + m_builder.GetInsertBlock()); + indices = std::vector(); + indices.push_back(m_storage->constantInt(0)); + GetElementPtrInst *xElemPtr = new GetElementPtrInst(getElem, + m_storage->constantInt(0), + name("xPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *yElemPtr = new GetElementPtrInst(getElem, + m_storage->constantInt(1), + name("yPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *zElemPtr = new GetElementPtrInst(getElem, + m_storage->constantInt(2), + name("zPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *wElemPtr = new GetElementPtrInst(getElem, + m_storage->constantInt(3), + name("wPtr"), + m_builder.GetInsertBlock()); + + std::vector res(4); + res[0] = new LoadInst(xElemPtr); + res[1] = new LoadInst(yElemPtr); + res[2] = new LoadInst(zElemPtr); + res[3] = new LoadInst(wElemPtr); + + return res; +} diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 4169dcbb2e..5c26687150 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -30,6 +30,7 @@ #include +#include #include namespace llvm { @@ -47,9 +48,10 @@ public: llvm::BasicBlock *block, StorageSoa *storage); std::vector arl(const std::vector in); - std::vector add(const std::vector in1, const std::vector in2); + std::vector dp3(const std::vector in1, + const std::vector in2); std::vector madd(const std::vector in1, const std::vector in2, const std::vector in3); @@ -62,9 +64,19 @@ private: const char * name(const char *prefix) const; llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, llvm::Value *z, llvm::Value *w); + void createFunctionMap(); + void createBuiltins(); + llvm::Function *function(int); + llvm::Module *currentModule() const; + llvm::Value *allocaTemp(); private: llvm::LLVMFoldingBuilder m_builder; StorageSoa *m_storage; + + std::map m_functionsMap; + std::map m_functions; + llvm::Module *m_builtins; + private: mutable int m_idx; mutable char m_name[32]; diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c new file mode 100644 index 0000000000..0b428a750f --- /dev/null +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -0,0 +1,58 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * This file is compiled with clang into the LLVM bitcode + * + * Authors: + * Zack Rusin zack@tungstengraphics.com + */ +typedef __attribute__(( ocu_vector_type(4) )) float float4; + +void dp3(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + +#if 0 +void yo(float4 *out, float4 *in) +{ + float4 res[4]; + + dp3(res, in[0], in[1], in[2], in[3], + in[4], in[5], in[6], in[7]); + out[1] = res[1]; +} +#endif diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index ed0674a96f..bb6fe3d7e1 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -277,7 +277,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector &v return constVector; } -std::vector StorageSoa::load(Argument type, int idx, int swizzle, +std::vector StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, llvm::Value *indIdx) { std::vector val(4); @@ -292,25 +292,29 @@ std::vector StorageSoa::load(Argument type, int idx, int swizzle, debug_printf("XXXXXXXXX realIdx = %p, indIdx = %p\n", realIndex, indIdx); switch(type) { - case Input: + case TGSI_FILE_INPUT: val = inputElement(realIndex); break; - case Output: + case TGSI_FILE_OUTPUT: val = outputElement(realIndex); break; - case Temp: + case TGSI_FILE_TEMPORARY: val = tempElement(realIndex); break; - case Const: + case TGSI_FILE_CONSTANT: val = constElement(realIndex); break; - case Immediate: + case TGSI_FILE_IMMEDIATE: val = immediateElement(realIndex); break; - case Address: + case TGSI_FILE_ADDRESS: debug_printf("Address not handled in the load phase!\n"); assert(0); break; + default: + debug_printf("Unknown load!\n"); + assert(0); + break; } if (!gallivm_is_swizzle(swizzle)) return val; @@ -324,21 +328,21 @@ std::vector StorageSoa::load(Argument type, int idx, int swizzle, return res; } -void StorageSoa::store(Argument type, int idx, const std::vector &val, +void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector &val, int mask) { llvm::Value *out = 0; switch(type) { - case Output: + case TGSI_FILE_OUTPUT: out = m_output; break; - case Temp: + case TGSI_FILE_TEMPORARY: out = m_temps; break; - case Input: + case TGSI_FILE_INPUT: out = m_input; break; - case Address: { + case TGSI_FILE_ADDRESS: { llvm::Value *addr = m_addresses[idx]; if (!addr) { addAddress(idx); diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h index 6443351f27..ae2fc7c6ae 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.h +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -28,6 +28,8 @@ #ifndef STORAGESOA_H #define STORAGESOA_H +#include + #include #include #include @@ -45,15 +47,6 @@ namespace llvm { class StorageSoa { -public: - enum Argument { - Input, - Output, - Temp, - Const, - Immediate, - Address - }; public: StorageSoa(llvm::BasicBlock *block, llvm::Value *input, @@ -62,9 +55,9 @@ public: llvm::Value *temps); - std::vector load(Argument type, int idx, int swizzle, + std::vector load(enum tgsi_file_type type, int idx, int swizzle, llvm::Value *indIdx =0); - void store(Argument type, int idx, const std::vector &val, + void store(enum tgsi_file_type type, int idx, const std::vector &val, int mask); void addImmediate(float *vec); diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 2cb4acce32..a52ee26434 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -708,25 +708,9 @@ translate_instructionir(llvm::Module *module, if (src->SrcRegister.Indirect) { indIdx = storage->addrElement(src->SrcRegisterInd.Index); } - if (src->SrcRegister.File == TGSI_FILE_CONSTANT) { - val = storage->load(StorageSoa::Const, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { - val = storage->load(StorageSoa::Input, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { - val = storage->load(StorageSoa::Temp, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { - val = storage->load(StorageSoa::Output, - src->SrcRegister.Index, swizzle, indIdx); - } else if (src->SrcRegister.File == TGSI_FILE_IMMEDIATE) { - val = storage->load(StorageSoa::Immediate, - src->SrcRegister.Index, swizzle, indIdx); - } else { - fprintf(stderr, "ERROR: not supported llvm source %d\n", src->SrcRegister.File); - return; - } + + val = storage->load((enum tgsi_file_type)src->SrcRegister.File, + src->SrcRegister.Index, swizzle, indIdx); inputs[i] = val; } @@ -763,6 +747,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_DP3: { + out = instr->dp3(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DP4: { @@ -1067,19 +1052,8 @@ translate_instructionir(llvm::Module *module, for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { - storage->store(StorageSoa::Output, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { - storage->store(StorageSoa::Temp, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { - storage->store(StorageSoa::Address, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - } else { - fprintf(stderr, "ERROR: unsupported LLVM destination!"); - assert(!"wrong destination"); - } + storage->store((enum tgsi_file_type)dst->DstRegister.File, + dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } } diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 0a6145a6bf..b110f01291 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -42,15 +42,17 @@ struct tgsi_token unsigned Extended : 1; /* BOOL */ }; -#define TGSI_FILE_NULL 0 -#define TGSI_FILE_CONSTANT 1 -#define TGSI_FILE_INPUT 2 -#define TGSI_FILE_OUTPUT 3 -#define TGSI_FILE_TEMPORARY 4 -#define TGSI_FILE_SAMPLER 5 -#define TGSI_FILE_ADDRESS 6 -#define TGSI_FILE_IMMEDIATE 7 -#define TGSI_FILE_COUNT 8 /**< how many TGSI_FILE_ types */ +enum tgsi_file_type { + TGSI_FILE_NULL =0, + TGSI_FILE_CONSTANT =1, + TGSI_FILE_INPUT =2, + TGSI_FILE_OUTPUT =3, + TGSI_FILE_TEMPORARY =4, + TGSI_FILE_SAMPLER =5, + TGSI_FILE_ADDRESS =6, + TGSI_FILE_IMMEDIATE =7, + TGSI_FILE_COUNT /**< how many TGSI_FILE_ types */ +}; #define TGSI_DECLARE_RANGE 0 -- cgit v1.2.3 From 17f543fc4529ca4ce7f73a840ed0fb50d1fec925 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 1 Mar 2008 08:32:31 -0500 Subject: make the first builtin work (dp3) --- src/gallium/auxiliary/gallivm/gallivm.cpp | 10 +++++- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 41 +++++++++++++---------- 2 files changed, 33 insertions(+), 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index d14bb3b99a..b6f641a3f8 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -306,11 +306,19 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) { struct gallivm_prog *prog = (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); + + std::cout << "Before optimizations:"<module->dump(); + std::cout<<"-------------------------------"<module); llvm::Module *mod = llvm::CloneModule(ir->module); prog->num_consts = ir->num_consts; memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); prog->num_interp = ir->num_interp; - + /* Run optimization passes over it */ PassManager passes; passes.add(new TargetData(mod)); diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 5739cf0cde..3fcfce8ce0 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -158,6 +158,7 @@ llvm::Function * InstructionsSoa::function(int op) currentModule()->getFunctionList().push_back(func); std::cout << "Func parent is "<getParent() <<", cur is "<dump(); //func->setParent(currentModule()); m_functions[op] = func; return func; @@ -184,8 +185,15 @@ std::vector InstructionsSoa::dp3(const std::vector i std::vector params; llvm::Value *tmp = allocaTemp(); - params.push_back(tmp); - + std::vector indices; + indices.push_back(m_storage->constantInt(0)); + indices.push_back(m_storage->constantInt(0)); + GetElementPtrInst *getElem = new GetElementPtrInst(tmp, + indices.begin(), + indices.end(), + name("allocaPtr"), + m_builder.GetInsertBlock()); + params.push_back(getElem); params.push_back(in1[0]); params.push_back(in1[1]); params.push_back(in1[2]); @@ -194,20 +202,10 @@ std::vector InstructionsSoa::dp3(const std::vector i params.push_back(in2[1]); params.push_back(in2[2]); params.push_back(in2[3]); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), - name("dp3")); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); call->setCallingConv(CallingConv::C); call->setTailCall(false); - std::vector indices; - indices.push_back(m_storage->constantInt(0)); - indices.push_back(m_storage->constantInt(0)); - - GetElementPtrInst *getElem = new GetElementPtrInst(tmp, - indices.begin(), - indices.end(), - name("allocaPtr"), - m_builder.GetInsertBlock()); indices = std::vector(); indices.push_back(m_storage->constantInt(0)); GetElementPtrInst *xElemPtr = new GetElementPtrInst(getElem, @@ -228,10 +226,19 @@ std::vector InstructionsSoa::dp3(const std::vector i m_builder.GetInsertBlock()); std::vector res(4); - res[0] = new LoadInst(xElemPtr); - res[1] = new LoadInst(yElemPtr); - res[2] = new LoadInst(zElemPtr); - res[3] = new LoadInst(wElemPtr); + res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); + res[1] = new LoadInst(yElemPtr, name("yRes"), false, m_builder.GetInsertBlock()); + res[2] = new LoadInst(zElemPtr, name("zRes"), false, m_builder.GetInsertBlock()); + res[3] = new LoadInst(wElemPtr, name("wRes"), false, m_builder.GetInsertBlock()); return res; } + +llvm::Value * InstructionsSoa::allocaTemp() +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), + m_builder.GetInsertBlock()); + return alloca; +} -- cgit v1.2.3 From a9c40f833ead8459788b86603c7f2b94632b1109 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 1 Mar 2008 09:50:41 -0500 Subject: refactor code calling builtins and implement dp4 --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 116 +++++++++++++++++----- src/gallium/auxiliary/gallivm/instructionssoa.h | 12 +++ src/gallium/auxiliary/gallivm/soabuiltins.c | 14 +++ src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 1 + 4 files changed, 116 insertions(+), 27 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 3fcfce8ce0..89d513afd0 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -144,6 +144,7 @@ std::vector InstructionsSoa::extractVector(llvm::Value *vector) void InstructionsSoa::createFunctionMap() { m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; + m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; } llvm::Function * InstructionsSoa::function(int op) @@ -182,45 +183,42 @@ std::vector InstructionsSoa::dp3(const std::vector i const std::vector in2) { llvm::Function *func = function(TGSI_OPCODE_DP3); - std::vector params; + return callBuiltin(func, in1, in2); +} + +llvm::Value * InstructionsSoa::allocaTemp() +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), + m_builder.GetInsertBlock()); - llvm::Value *tmp = allocaTemp(); std::vector indices; indices.push_back(m_storage->constantInt(0)); indices.push_back(m_storage->constantInt(0)); - GetElementPtrInst *getElem = new GetElementPtrInst(tmp, + GetElementPtrInst *getElem = new GetElementPtrInst(alloca, indices.begin(), indices.end(), name("allocaPtr"), m_builder.GetInsertBlock()); - params.push_back(getElem); - params.push_back(in1[0]); - params.push_back(in1[1]); - params.push_back(in1[2]); - params.push_back(in1[3]); - params.push_back(in2[0]); - params.push_back(in2[1]); - params.push_back(in2[2]); - params.push_back(in2[3]); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); + return getElem; +} - indices = std::vector(); - indices.push_back(m_storage->constantInt(0)); - GetElementPtrInst *xElemPtr = new GetElementPtrInst(getElem, +std::vector InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) +{ + GetElementPtrInst *xElemPtr = new GetElementPtrInst(allocaPtr, m_storage->constantInt(0), name("xPtr"), m_builder.GetInsertBlock()); - GetElementPtrInst *yElemPtr = new GetElementPtrInst(getElem, + GetElementPtrInst *yElemPtr = new GetElementPtrInst(allocaPtr, m_storage->constantInt(1), name("yPtr"), m_builder.GetInsertBlock()); - GetElementPtrInst *zElemPtr = new GetElementPtrInst(getElem, + GetElementPtrInst *zElemPtr = new GetElementPtrInst(allocaPtr, m_storage->constantInt(2), name("zPtr"), m_builder.GetInsertBlock()); - GetElementPtrInst *wElemPtr = new GetElementPtrInst(getElem, + GetElementPtrInst *wElemPtr = new GetElementPtrInst(allocaPtr, m_storage->constantInt(3), name("wPtr"), m_builder.GetInsertBlock()); @@ -234,11 +232,75 @@ std::vector InstructionsSoa::dp3(const std::vector i return res; } -llvm::Value * InstructionsSoa::allocaTemp() +std::vector InstructionsSoa::dp4(const std::vector in1, + const std::vector in2) { - VectorType *vector = VectorType::get(Type::FloatTy, 4); - ArrayType *vecArray = ArrayType::get(vector, 4); - AllocaInst *alloca = new AllocaInst(vecArray, name("tmpRes"), - m_builder.GetInsertBlock()); - return alloca; + llvm::Function *func = function(TGSI_OPCODE_DP4); + return callBuiltin(func, in1, in2); +} + +std::vector InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector in1) +{ + std::vector params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector in1, + const std::vector in2) +{ + std::vector params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); +} + +std::vector InstructionsSoa::callBuiltin(llvm::Function *func, const std::vector in1, + const std::vector in2, + const std::vector in3) +{ + std::vector params; + + llvm::Value *allocaPtr = allocaTemp(); + params.push_back(allocaPtr); + params.push_back(in1[0]); + params.push_back(in1[1]); + params.push_back(in1[2]); + params.push_back(in1[3]); + params.push_back(in2[0]); + params.push_back(in2[1]); + params.push_back(in2[2]); + params.push_back(in2[3]); + params.push_back(in3[0]); + params.push_back(in3[1]); + params.push_back(in3[2]); + params.push_back(in3[3]); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end()); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + + return allocaToResult(allocaPtr); } diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 5c26687150..3ef51dcaff 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -52,6 +52,8 @@ public: const std::vector in2); std::vector dp3(const std::vector in1, const std::vector in2); + std::vector dp4(const std::vector in1, + const std::vector in2); std::vector madd(const std::vector in1, const std::vector in2, const std::vector in3); @@ -69,6 +71,16 @@ private: llvm::Function *function(int); llvm::Module *currentModule() const; llvm::Value *allocaTemp(); + std::vector allocaToResult(llvm::Value *allocaPtr); + std::vector callBuiltin(llvm::Function *func, + const std::vector in1); + std::vector callBuiltin(llvm::Function *func, + const std::vector in1, + const std::vector in2); + std::vector callBuiltin(llvm::Function *func, + const std::vector in1, + const std::vector in2, + const std::vector in3); private: llvm::LLVMFoldingBuilder m_builder; StorageSoa *m_storage; diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 0b428a750f..24c14e1b69 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -46,6 +46,20 @@ void dp3(float4 *res, res[3] = dot; } + +void dp4(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 dot = (tmp0x * tmp1x) + (tmp0y * tmp1y) + + (tmp0z * tmp1z) + (tmp0w * tmp1w); + + res[0] = dot; + res[1] = dot; + res[2] = dot; + res[3] = dot; +} + #if 0 void yo(float4 *out, float4 *in) { diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index a52ee26434..3f65865a5a 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -751,6 +751,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_DP4: { + out = instr->dp4(inputs[0], inputs[1]); } break; case TGSI_OPCODE_DST: { -- cgit v1.2.3 From 800d13df726b9f82f796c86fe7ae6d18231820ec Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 3 Mar 2008 17:44:04 +0100 Subject: draw: add fetch for bgra ubyte surfaces --- src/gallium/auxiliary/draw/draw_vertex_fetch.c | 44 +++++++++++++++++--------- 1 file changed, 29 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index cb8cdd04a3..b56d85396d 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -54,7 +54,7 @@ fetch_##NAME(const void *ptr, float *attrib) \ int i; \ \ for (i = 0; i < SZ; i++) { \ - attrib[i] = CVT; \ + attrib[i] = CVT(i); \ } \ \ for (; i < 4; i++) { \ @@ -62,24 +62,24 @@ fetch_##NAME(const void *ptr, float *attrib) \ } \ } -#define CVT_64_FLOAT (float) ((double *) ptr)[i] -#define CVT_32_FLOAT ((float *) ptr)[i] +#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i] +#define CVT_32_FLOAT(i) ((float *) ptr)[i] -#define CVT_8_USCALED (float) ((unsigned char *) ptr)[i] -#define CVT_16_USCALED (float) ((unsigned short *) ptr)[i] -#define CVT_32_USCALED (float) ((unsigned int *) ptr)[i] +#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i] +#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i] +#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i] -#define CVT_8_SSCALED (float) ((char *) ptr)[i] -#define CVT_16_SSCALED (float) ((short *) ptr)[i] -#define CVT_32_SSCALED (float) ((int *) ptr)[i] +#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i] +#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i] +#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i] -#define CVT_8_UNORM (float) ((unsigned char *) ptr)[i] / 255.0f -#define CVT_16_UNORM (float) ((unsigned short *) ptr)[i] / 65535.0f -#define CVT_32_UNORM (float) ((unsigned int *) ptr)[i] / 4294967295.0f +#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f +#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f +#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f -#define CVT_8_SNORM (float) ((char *) ptr)[i] / 127.0f -#define CVT_16_SNORM (float) ((short *) ptr)[i] / 32767.0f -#define CVT_32_SNORM (float) ((int *) ptr)[i] / 2147483647.0f +#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f +#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f +#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) @@ -156,6 +156,16 @@ FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) +static void +fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) +{ + attrib[2] = CVT_8_UNORM(0); + attrib[1] = CVT_8_UNORM(1); + attrib[0] = CVT_8_UNORM(2); + attrib[3] = CVT_8_UNORM(3); +} + + static fetch_func get_fetch_func( enum pipe_format format ) { #if 0 @@ -296,6 +306,10 @@ static fetch_func get_fetch_func( enum pipe_format format ) case PIPE_FORMAT_A8R8G8B8_UNORM: return fetch_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return fetch_B8G8R8A8_UNORM; + case 0: return NULL; /* not sure why this is needed */ -- cgit v1.2.3 From 689e1c5d501eb2f557f85dd3279ac5d91e53b0ad Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 3 Mar 2008 17:49:38 +0100 Subject: win32: don't prepend all debug with gallium3d --- src/gallium/auxiliary/util/p_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index b9607a6ba7..0da3b66a4d 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -43,7 +43,7 @@ void debug_vprintf(const char *format, va_list ap) { #ifdef WIN32 - EngDebugPrint("Gallium3D: ", (PCHAR)format, ap); + EngDebugPrint("", (PCHAR)format, ap); #else vfprintf(stderr, format, ap); #endif -- cgit v1.2.3 From 19cc2e363185b50d19e4f257dd3558896b9b49d3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 4 Mar 2008 17:55:02 +0100 Subject: draw: dont' compute clipmask or apply viewport when not clipping (rename bypass_clipping to coords_in_window_space? --- src/gallium/auxiliary/draw/draw_vs_exec.c | 38 +++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 583812aadd..514303e0ea 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -132,20 +132,30 @@ vs_exec_run( struct draw_vertex_shader *shader, z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. -- cgit v1.2.3 From b1922de9f3478869c6788ef4e954c06c20e7aa9c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 5 Mar 2008 11:38:21 +0100 Subject: gallium: Use custom vsnprintf in WINDDK. EngDebugPrint does not handle float point arguments, so we need to use our own vsnprintf implementation. --- src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/p_debug.c | 20 +- src/gallium/auxiliary/util/u_snprintf.c | 1482 +++++++++++++++++++++++++++++++ 3 files changed, 1502 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/util/u_snprintf.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 4717941434..642088b962 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -7,6 +7,7 @@ util = env.ConvenienceLibrary( 'p_tile.c', 'p_util.c', 'u_mm.c', + 'u_snprintf.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 0da3b66a4d..7b3c030956 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -40,10 +40,28 @@ #include "pipe/p_compiler.h" +#ifdef WIN32 +static INLINE void +rpl_EngDebugPrint(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + EngDebugPrint("", (PCHAR)format, ap); + va_end(ap); +} + +int rpl_vsnprintf(char *, size_t, const char *, va_list); +#endif + + void debug_vprintf(const char *format, va_list ap) { #ifdef WIN32 - EngDebugPrint("", (PCHAR)format, ap); + /* EngDebugPrint does not handle float point arguments, so we need to use + * our own vsnprintf implementation */ + char buf[512 + 1]; + rpl_vsnprintf(buf, sizeof(buf), format, ap); + rpl_EngDebugPrint("%s", buf); #else vfprintf(stderr, format, ap); #endif diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c new file mode 100644 index 0000000000..5e19a2ddb2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -0,0 +1,1482 @@ +/* + * Copyright (c) 1995 Patrick Powell. + * + * This code is based on code written by Patrick Powell . + * It may be used for any purpose as long as this notice remains intact on all + * source code distributions. + */ + +/* + * Copyright (c) 2008 Holger Weiss. + * + * This version of the code is maintained by Holger Weiss . + * My changes to the code may freely be used, modified and/or redistributed for + * any purpose. It would be nice if additions and fixes to this file (including + * trivial code cleanups) would be sent back in order to let me include them in + * the version available at . + * However, this is not a requirement for using or redistributing (possibly + * modified) versions of this file, nor is leaving this notice intact mandatory. + */ + +/* + * History + * + * 2008-01-20 Holger Weiss for C99-snprintf 1.1: + * + * Fixed the detection of infinite floating point values on IRIX (and + * possibly other systems) and applied another few minor cleanups. + * + * 2008-01-06 Holger Weiss for C99-snprintf 1.0: + * + * Added a lot of new features, fixed many bugs, and incorporated various + * improvements done by Andrew Tridgell , Russ Allbery + * , Hrvoje Niksic , Damien Miller + * , and others for the Samba, INN, Wget, and OpenSSH + * projects. The additions include: support the "e", "E", "g", "G", and + * "F" conversion specifiers (and use conversion style "f" or "F" for the + * still unsupported "a" and "A" specifiers); support the "hh", "ll", "j", + * "t", and "z" length modifiers; support the "#" flag and the (non-C99) + * "'" flag; use localeconv(3) (if available) to get both the current + * locale's decimal point character and the separator between groups of + * digits; fix the handling of various corner cases of field width and + * precision specifications; fix various floating point conversion bugs; + * handle infinite and NaN floating point values; don't attempt to write to + * the output buffer (which may be NULL) if a size of zero was specified; + * check for integer overflow of the field width, precision, and return + * values and during the floating point conversion; use the OUTCHAR() macro + * instead of a function for better performance; provide asprintf(3) and + * vasprintf(3) functions; add new test cases. The replacement functions + * have been renamed to use an "rpl_" prefix, the function calls in the + * main project (and in this file) must be redefined accordingly for each + * replacement function which is needed (by using Autoconf or other means). + * Various other minor improvements have been applied and the coding style + * was cleaned up for consistency. + * + * 2007-07-23 Holger Weiss for Mutt 1.5.13: + * + * C99 compliant snprintf(3) and vsnprintf(3) functions return the number + * of characters that would have been written to a sufficiently sized + * buffer (excluding the '\0'). The original code simply returned the + * length of the resulting output string, so that's been fixed. + * + * 1998-03-05 Michael Elkins for Mutt 0.90.8: + * + * The original code assumed that both snprintf(3) and vsnprintf(3) were + * missing. Some systems only have snprintf(3) but not vsnprintf(3), so + * the code is now broken down under HAVE_SNPRINTF and HAVE_VSNPRINTF. + * + * 1998-01-27 Thomas Roessler for Mutt 0.89i: + * + * The PGP code was using unsigned hexadecimal formats. Unfortunately, + * unsigned formats simply didn't work. + * + * 1997-10-22 Brandon Long for Mutt 0.87.1: + * + * Ok, added some minimal floating point support, which means this probably + * requires libm on most operating systems. Don't yet support the exponent + * (e,E) and sigfig (g,G). Also, fmtint() was pretty badly broken, it just + * wasn't being exercised in ways which showed it, so that's been fixed. + * Also, formatted the code to Mutt conventions, and removed dead code left + * over from the original. Also, there is now a builtin-test, run with: + * gcc -DTEST_SNPRINTF -o snprintf snprintf.c -lm && ./snprintf + * + * 2996-09-15 Brandon Long for Mutt 0.43: + * + * This was ugly. It is still ugly. I opted out of floating point + * numbers, but the formatter understands just about everything from the + * normal C string format, at least as far as I can tell from the Solaris + * 2.5 printf(3S) man page. + */ + +/* + * ToDo + * + * - Add wide character support. + * - Add support for "%a" and "%A" conversions. + * - Create test routines which predefine the expected results. Our test cases + * usually expose bugs in system implementations rather than in ours :-) + */ + +/* + * Usage + * + * 1) The following preprocessor macros should be defined to 1 if the feature or + * file in question is available on the target system (by using Autoconf or + * other means), though basic functionality should be available as long as + * HAVE_STDARG_H and HAVE_STDLIB_H are defined correctly: + * + * HAVE_VSNPRINTF + * HAVE_SNPRINTF + * HAVE_VASPRINTF + * HAVE_ASPRINTF + * HAVE_STDARG_H + * HAVE_STDDEF_H + * HAVE_STDINT_H + * HAVE_STDLIB_H + * HAVE_INTTYPES_H + * HAVE_LOCALE_H + * HAVE_LOCALECONV + * HAVE_LCONV_DECIMAL_POINT + * HAVE_LCONV_THOUSANDS_SEP + * HAVE_LONG_DOUBLE + * HAVE_LONG_LONG_INT + * HAVE_UNSIGNED_LONG_LONG_INT + * HAVE_INTMAX_T + * HAVE_UINTMAX_T + * HAVE_UINTPTR_T + * HAVE_PTRDIFF_T + * HAVE_VA_COPY + * HAVE___VA_COPY + * + * 2) The calls to the functions which should be replaced must be redefined + * throughout the project files (by using Autoconf or other means): + * + * #define vsnprintf rpl_vsnprintf + * #define snprintf rpl_snprintf + * #define vasprintf rpl_vasprintf + * #define asprintf rpl_asprintf + * + * 3) The required replacement functions should be declared in some header file + * included throughout the project files: + * + * #if HAVE_CONFIG_H + * #include + * #endif + * #if HAVE_STDARG_H + * #include + * #if !HAVE_VSNPRINTF + * int rpl_vsnprintf(char *, size_t, const char *, va_list); + * #endif + * #if !HAVE_SNPRINTF + * int rpl_snprintf(char *, size_t, const char *, ...); + * #endif + * #if !HAVE_VASPRINTF + * int rpl_vasprintf(char **, const char *, va_list); + * #endif + * #if !HAVE_ASPRINTF + * int rpl_asprintf(char **, const char *, ...); + * #endif + * #endif + * + * Autoconf macros for handling step 1 and step 2 are available at + * . + */ + +#if HAVE_CONFIG_H +#include +#else +#ifdef WIN32 +#define vsnprintf rpl_vsnprintf +#define snprintf rpl_snprintf +#define HAVE_VSNPRINTF 0 +#define HAVE_SNPRINTF 0 +#define HAVE_VASPRINTF 1 /* not needed */ +#define HAVE_ASPRINTF 1 /* not needed */ +#define HAVE_STDARG_H 1 +#define HAVE_STDDEF_H 1 +#define HAVE_STDINT_H 0 +#define HAVE_STDLIB_H 1 +#define HAVE_INTTYPES_H 0 +#define HAVE_LOCALE_H 0 +#define HAVE_LOCALECONV 0 +#define HAVE_LCONV_DECIMAL_POINT 0 +#define HAVE_LCONV_THOUSANDS_SEP 0 +#define HAVE_LONG_DOUBLE 0 +#define HAVE_LONG_LONG_INT 1 +#define HAVE_UNSIGNED_LONG_LONG_INT 1 +#define HAVE_INTMAX_T 0 +#define HAVE_UINTMAX_T 0 +#define HAVE_UINTPTR_T 1 +#define HAVE_PTRDIFF_T 1 +#define HAVE_VA_COPY 0 +#define HAVE___VA_COPY 0 +#else +#define HAVE_VSNPRINTF 1 +#define HAVE_SNPRINTF 1 +#define HAVE_VASPRINTF 1 +#define HAVE_ASPRINTF 1 +#endif +#endif /* HAVE_CONFIG_H */ + +#if !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || !HAVE_VASPRINTF +#include /* For NULL, size_t, vsnprintf(3), and vasprintf(3). */ +#ifdef VA_START +#undef VA_START +#endif /* defined(VA_START) */ +#ifdef VA_SHIFT +#undef VA_SHIFT +#endif /* defined(VA_SHIFT) */ +#if HAVE_STDARG_H +#include +#define VA_START(ap, last) va_start(ap, last) +#define VA_SHIFT(ap, value, type) /* No-op for ANSI C. */ +#else /* Assume is available. */ +#include +#define VA_START(ap, last) va_start(ap) /* "last" is ignored. */ +#define VA_SHIFT(ap, value, type) value = va_arg(ap, type) +#endif /* HAVE_STDARG_H */ + +#if !HAVE_VASPRINTF +#if HAVE_STDLIB_H +#include /* For malloc(3). */ +#endif /* HAVE_STDLIB_H */ +#ifdef VA_COPY +#undef VA_COPY +#endif /* defined(VA_COPY) */ +#ifdef VA_END_COPY +#undef VA_END_COPY +#endif /* defined(VA_END_COPY) */ +#if HAVE_VA_COPY +#define VA_COPY(dest, src) va_copy(dest, src) +#define VA_END_COPY(ap) va_end(ap) +#elif HAVE___VA_COPY +#define VA_COPY(dest, src) __va_copy(dest, src) +#define VA_END_COPY(ap) va_end(ap) +#else +#define VA_COPY(dest, src) (void)mymemcpy(&dest, &src, sizeof(va_list)) +#define VA_END_COPY(ap) /* No-op. */ +#define NEED_MYMEMCPY 1 +static void *mymemcpy(void *, void *, size_t); +#endif /* HAVE_VA_COPY */ +#endif /* !HAVE_VASPRINTF */ + +#if !HAVE_VSNPRINTF +#include /* For ERANGE and errno. */ +#include /* For *_MAX. */ +#if HAVE_INTTYPES_H +#include /* For intmax_t (if not defined in ). */ +#endif /* HAVE_INTTYPES_H */ +#if HAVE_LOCALE_H +#include /* For localeconv(3). */ +#endif /* HAVE_LOCALE_H */ +#if HAVE_STDDEF_H +#include /* For ptrdiff_t. */ +#endif /* HAVE_STDDEF_H */ +#if HAVE_STDINT_H +#include /* For intmax_t. */ +#endif /* HAVE_STDINT_H */ + +/* Support for unsigned long long int. We may also need ULLONG_MAX. */ +#ifndef ULONG_MAX /* We may need ULONG_MAX as a fallback. */ +#ifdef UINT_MAX +#define ULONG_MAX UINT_MAX +#else +#define ULONG_MAX INT_MAX +#endif /* defined(UINT_MAX) */ +#endif /* !defined(ULONG_MAX) */ +#ifdef ULLONG +#undef ULLONG +#endif /* defined(ULLONG) */ +#if HAVE_UNSIGNED_LONG_LONG_INT +#define ULLONG unsigned long long int +#ifndef ULLONG_MAX +#define ULLONG_MAX ULONG_MAX +#endif /* !defined(ULLONG_MAX) */ +#else +#define ULLONG unsigned long int +#ifdef ULLONG_MAX +#undef ULLONG_MAX +#endif /* defined(ULLONG_MAX) */ +#define ULLONG_MAX ULONG_MAX +#endif /* HAVE_LONG_LONG_INT */ + +/* Support for uintmax_t. We also need UINTMAX_MAX. */ +#ifdef UINTMAX_T +#undef UINTMAX_T +#endif /* defined(UINTMAX_T) */ +#if HAVE_UINTMAX_T || defined(uintmax_t) +#define UINTMAX_T uintmax_t +#ifndef UINTMAX_MAX +#define UINTMAX_MAX ULLONG_MAX +#endif /* !defined(UINTMAX_MAX) */ +#else +#define UINTMAX_T ULLONG +#ifdef UINTMAX_MAX +#undef UINTMAX_MAX +#endif /* defined(UINTMAX_MAX) */ +#define UINTMAX_MAX ULLONG_MAX +#endif /* HAVE_UINTMAX_T || defined(uintmax_t) */ + +/* Support for long double. */ +#ifndef LDOUBLE +#if HAVE_LONG_DOUBLE +#define LDOUBLE long double +#else +#define LDOUBLE double +#endif /* HAVE_LONG_DOUBLE */ +#endif /* !defined(LDOUBLE) */ + +/* Support for long long int. */ +#ifndef LLONG +#if HAVE_LONG_LONG_INT +#define LLONG long long int +#else +#define LLONG long int +#endif /* HAVE_LONG_LONG_INT */ +#endif /* !defined(LLONG) */ + +/* Support for intmax_t. */ +#ifndef INTMAX_T +#if HAVE_INTMAX_T || defined(intmax_t) +#define INTMAX_T intmax_t +#else +#define INTMAX_T LLONG +#endif /* HAVE_INTMAX_T || defined(intmax_t) */ +#endif /* !defined(INTMAX_T) */ + +/* Support for uintptr_t. */ +#ifndef UINTPTR_T +#if HAVE_UINTPTR_T || defined(uintptr_t) +#define UINTPTR_T uintptr_t +#else +#define UINTPTR_T unsigned long int +#endif /* HAVE_UINTPTR_T || defined(uintptr_t) */ +#endif /* !defined(UINTPTR_T) */ + +/* Support for ptrdiff_t. */ +#ifndef PTRDIFF_T +#if HAVE_PTRDIFF_T || defined(ptrdiff_t) +#define PTRDIFF_T ptrdiff_t +#else +#define PTRDIFF_T long int +#endif /* HAVE_PTRDIFF_T || defined(ptrdiff_t) */ +#endif /* !defined(PTRDIFF_T) */ + +/* + * We need an unsigned integer type corresponding to ptrdiff_t (cf. C99: + * 7.19.6.1, 7). However, we'll simply use PTRDIFF_T and convert it to an + * unsigned type if necessary. This should work just fine in practice. + */ +#ifndef UPTRDIFF_T +#define UPTRDIFF_T PTRDIFF_T +#endif /* !defined(UPTRDIFF_T) */ + +/* + * We need a signed integer type corresponding to size_t (cf. C99: 7.19.6.1, 7). + * However, we'll simply use size_t and convert it to a signed type if + * necessary. This should work just fine in practice. + */ +#ifndef SSIZE_T +#define SSIZE_T size_t +#endif /* !defined(SSIZE_T) */ + +/* Either ERANGE or E2BIG should be available everywhere. */ +#ifndef ERANGE +#define ERANGE E2BIG +#endif /* !defined(ERANGE) */ +#ifndef EOVERFLOW +#define EOVERFLOW ERANGE +#endif /* !defined(EOVERFLOW) */ + +/* + * Buffer size to hold the octal string representation of UINT128_MAX without + * nul-termination ("3777777777777777777777777777777777777777777"). + */ +#ifdef MAX_CONVERT_LENGTH +#undef MAX_CONVERT_LENGTH +#endif /* defined(MAX_CONVERT_LENGTH) */ +#define MAX_CONVERT_LENGTH 43 + +/* Format read states. */ +#define PRINT_S_DEFAULT 0 +#define PRINT_S_FLAGS 1 +#define PRINT_S_WIDTH 2 +#define PRINT_S_DOT 3 +#define PRINT_S_PRECISION 4 +#define PRINT_S_MOD 5 +#define PRINT_S_CONV 6 + +/* Format flags. */ +#define PRINT_F_MINUS (1 << 0) +#define PRINT_F_PLUS (1 << 1) +#define PRINT_F_SPACE (1 << 2) +#define PRINT_F_NUM (1 << 3) +#define PRINT_F_ZERO (1 << 4) +#define PRINT_F_QUOTE (1 << 5) +#define PRINT_F_UP (1 << 6) +#define PRINT_F_UNSIGNED (1 << 7) +#define PRINT_F_TYPE_G (1 << 8) +#define PRINT_F_TYPE_E (1 << 9) + +/* Conversion flags. */ +#define PRINT_C_CHAR 1 +#define PRINT_C_SHORT 2 +#define PRINT_C_LONG 3 +#define PRINT_C_LLONG 4 +#define PRINT_C_LDOUBLE 5 +#define PRINT_C_SIZE 6 +#define PRINT_C_PTRDIFF 7 +#define PRINT_C_INTMAX 8 + +#ifndef MAX +#define MAX(x, y) ((x >= y) ? x : y) +#endif /* !defined(MAX) */ +#ifndef CHARTOINT +#define CHARTOINT(ch) (ch - '0') +#endif /* !defined(CHARTOINT) */ +#ifndef ISDIGIT +#define ISDIGIT(ch) ('0' <= (unsigned char)ch && (unsigned char)ch <= '9') +#endif /* !defined(ISDIGIT) */ +#ifndef ISNAN +#define ISNAN(x) (x != x) +#endif /* !defined(ISNAN) */ +#ifndef ISINF +#define ISINF(x) (x != 0.0 && x + x == x) +#endif /* !defined(ISINF) */ + +#ifdef OUTCHAR +#undef OUTCHAR +#endif /* defined(OUTCHAR) */ +#define OUTCHAR(str, len, size, ch) \ +do { \ + if (len + 1 < size) \ + str[len] = ch; \ + (len)++; \ +} while (/* CONSTCOND */ 0) + +static void fmtstr(char *, size_t *, size_t, const char *, int, int, int); +static void fmtint(char *, size_t *, size_t, INTMAX_T, int, int, int, int); +static void fmtflt(char *, size_t *, size_t, LDOUBLE, int, int, int, int *); +static void printsep(char *, size_t *, size_t); +static int getnumsep(int); +static int getexponent(LDOUBLE); +static int convert(UINTMAX_T, char *, size_t, int, int); +static UINTMAX_T cast(LDOUBLE); +static UINTMAX_T myround(LDOUBLE); +static LDOUBLE mypow10(int); + +extern int errno; + +int +rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) +{ + LDOUBLE fvalue; + INTMAX_T value; + unsigned char cvalue; + const char *strvalue; + INTMAX_T *intmaxptr; + PTRDIFF_T *ptrdiffptr; + SSIZE_T *sizeptr; + LLONG *llongptr; + long int *longptr; + int *intptr; + short int *shortptr; + signed char *charptr; + size_t len = 0; + int overflow = 0; + int base = 0; + int cflags = 0; + int flags = 0; + int width = 0; + int precision = -1; + int state = PRINT_S_DEFAULT; + char ch = *format++; + + /* + * C99 says: "If `n' is zero, nothing is written, and `s' may be a null + * pointer." (7.19.6.5, 2) We're forgiving and allow a NULL pointer + * even if a size larger than zero was specified. At least NetBSD's + * snprintf(3) does the same, as well as other versions of this file. + * (Though some of these versions will write to a non-NULL buffer even + * if a size of zero was specified, which violates the standard.) + */ + if (str == NULL && size != 0) + size = 0; + + while (ch != '\0') + switch (state) { + case PRINT_S_DEFAULT: + if (ch == '%') + state = PRINT_S_FLAGS; + else + OUTCHAR(str, len, size, ch); + ch = *format++; + break; + case PRINT_S_FLAGS: + switch (ch) { + case '-': + flags |= PRINT_F_MINUS; + ch = *format++; + break; + case '+': + flags |= PRINT_F_PLUS; + ch = *format++; + break; + case ' ': + flags |= PRINT_F_SPACE; + ch = *format++; + break; + case '#': + flags |= PRINT_F_NUM; + ch = *format++; + break; + case '0': + flags |= PRINT_F_ZERO; + ch = *format++; + break; + case '\'': /* SUSv2 flag (not in C99). */ + flags |= PRINT_F_QUOTE; + ch = *format++; + break; + default: + state = PRINT_S_WIDTH; + break; + } + break; + case PRINT_S_WIDTH: + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (width > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + width = 10 * width + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative field width argument is + * taken as a `-' flag followed by a positive + * field width." (7.19.6.1, 5) + */ + if ((width = va_arg(args, int)) < 0) { + flags |= PRINT_F_MINUS; + width = -width; + } + ch = *format++; + state = PRINT_S_DOT; + } else + state = PRINT_S_DOT; + break; + case PRINT_S_DOT: + if (ch == '.') { + state = PRINT_S_PRECISION; + ch = *format++; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_PRECISION: + if (precision == -1) + precision = 0; + if (ISDIGIT(ch)) { + ch = CHARTOINT(ch); + if (precision > (INT_MAX - ch) / 10) { + overflow = 1; + goto out; + } + precision = 10 * precision + ch; + ch = *format++; + } else if (ch == '*') { + /* + * C99 says: "A negative precision argument is + * taken as if the precision were omitted." + * (7.19.6.1, 5) + */ + if ((precision = va_arg(args, int)) < 0) + precision = -1; + ch = *format++; + state = PRINT_S_MOD; + } else + state = PRINT_S_MOD; + break; + case PRINT_S_MOD: + switch (ch) { + case 'h': + ch = *format++; + if (ch == 'h') { /* It's a char. */ + ch = *format++; + cflags = PRINT_C_CHAR; + } else + cflags = PRINT_C_SHORT; + break; + case 'l': + ch = *format++; + if (ch == 'l') { /* It's a long long. */ + ch = *format++; + cflags = PRINT_C_LLONG; + } else + cflags = PRINT_C_LONG; + break; + case 'L': + cflags = PRINT_C_LDOUBLE; + ch = *format++; + break; + case 'j': + cflags = PRINT_C_INTMAX; + ch = *format++; + break; + case 't': + cflags = PRINT_C_PTRDIFF; + ch = *format++; + break; + case 'z': + cflags = PRINT_C_SIZE; + ch = *format++; + break; + } + state = PRINT_S_CONV; + break; + case PRINT_S_CONV: + switch (ch) { + case 'd': + /* FALLTHROUGH */ + case 'i': + switch (cflags) { + case PRINT_C_CHAR: + value = (signed char)va_arg(args, int); + break; + case PRINT_C_SHORT: + value = (short int)va_arg(args, int); + break; + case PRINT_C_LONG: + value = va_arg(args, long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, LLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, SSIZE_T); + break; + case PRINT_C_INTMAX: + value = va_arg(args, INTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, PTRDIFF_T); + break; + default: + value = va_arg(args, int); + break; + } + fmtint(str, &len, size, value, 10, width, + precision, flags); + break; + case 'X': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'x': + base = 16; + /* FALLTHROUGH */ + case 'o': + if (base == 0) + base = 8; + /* FALLTHROUGH */ + case 'u': + if (base == 0) + base = 10; + flags |= PRINT_F_UNSIGNED; + switch (cflags) { + case PRINT_C_CHAR: + value = (unsigned char)va_arg(args, + unsigned int); + break; + case PRINT_C_SHORT: + value = (unsigned short int)va_arg(args, + unsigned int); + break; + case PRINT_C_LONG: + value = va_arg(args, unsigned long int); + break; + case PRINT_C_LLONG: + value = va_arg(args, ULLONG); + break; + case PRINT_C_SIZE: + value = va_arg(args, size_t); + break; + case PRINT_C_INTMAX: + value = va_arg(args, UINTMAX_T); + break; + case PRINT_C_PTRDIFF: + value = va_arg(args, UPTRDIFF_T); + break; + default: + value = va_arg(args, unsigned int); + break; + } + fmtint(str, &len, size, value, base, width, + precision, flags); + break; + case 'A': + /* Not yet supported, we'll use "%F". */ + /* FALLTHROUGH */ + case 'F': + flags |= PRINT_F_UP; + case 'a': + /* Not yet supported, we'll use "%f". */ + /* FALLTHROUGH */ + case 'f': + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'E': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'e': + flags |= PRINT_F_TYPE_E; + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'G': + flags |= PRINT_F_UP; + /* FALLTHROUGH */ + case 'g': + flags |= PRINT_F_TYPE_G; + if (cflags == PRINT_C_LDOUBLE) + fvalue = va_arg(args, LDOUBLE); + else + fvalue = va_arg(args, double); + /* + * If the precision is zero, it is treated as + * one (cf. C99: 7.19.6.1, 8). + */ + if (precision == 0) + precision = 1; + fmtflt(str, &len, size, fvalue, width, + precision, flags, &overflow); + if (overflow) + goto out; + break; + case 'c': + cvalue = va_arg(args, int); + OUTCHAR(str, len, size, cvalue); + break; + case 's': + strvalue = va_arg(args, char *); + fmtstr(str, &len, size, strvalue, width, + precision, flags); + break; + case 'p': + /* + * C99 says: "The value of the pointer is + * converted to a sequence of printing + * characters, in an implementation-defined + * manner." (C99: 7.19.6.1, 8) + */ + if ((strvalue = va_arg(args, void *)) == NULL) + /* + * We use the glibc format. BSD prints + * "0x0", SysV "0". + */ + fmtstr(str, &len, size, "(nil)", width, + -1, flags); + else { + /* + * We use the BSD/glibc format. SysV + * omits the "0x" prefix (which we emit + * using the PRINT_F_NUM flag). + */ + flags |= PRINT_F_NUM; + flags |= PRINT_F_UNSIGNED; + fmtint(str, &len, size, + (UINTPTR_T)strvalue, 16, width, + precision, flags); + } + break; + case 'n': + switch (cflags) { + case PRINT_C_CHAR: + charptr = va_arg(args, signed char *); + *charptr = len; + break; + case PRINT_C_SHORT: + shortptr = va_arg(args, short int *); + *shortptr = len; + break; + case PRINT_C_LONG: + longptr = va_arg(args, long int *); + *longptr = len; + break; + case PRINT_C_LLONG: + llongptr = va_arg(args, LLONG *); + *llongptr = len; + break; + case PRINT_C_SIZE: + /* + * C99 says that with the "z" length + * modifier, "a following `n' conversion + * specifier applies to a pointer to a + * signed integer type corresponding to + * size_t argument." (7.19.6.1, 7) + */ + sizeptr = va_arg(args, SSIZE_T *); + *sizeptr = len; + break; + case PRINT_C_INTMAX: + intmaxptr = va_arg(args, INTMAX_T *); + *intmaxptr = len; + break; + case PRINT_C_PTRDIFF: + ptrdiffptr = va_arg(args, PTRDIFF_T *); + *ptrdiffptr = len; + break; + default: + intptr = va_arg(args, int *); + *intptr = len; + break; + } + break; + case '%': /* Print a "%" character verbatim. */ + OUTCHAR(str, len, size, ch); + break; + default: /* Skip other characters. */ + break; + } + ch = *format++; + state = PRINT_S_DEFAULT; + base = cflags = flags = width = 0; + precision = -1; + break; + } +out: + if (len < size) + str[len] = '\0'; + else if (size > 0) + str[size - 1] = '\0'; + + if (overflow || len >= INT_MAX) { + errno = overflow ? EOVERFLOW : ERANGE; + return -1; + } + return (int)len; +} + +static void +fmtstr(char *str, size_t *len, size_t size, const char *value, int width, + int precision, int flags) +{ + int padlen, strln; /* Amount to pad. */ + int noprecision = (precision == -1); + + if (value == NULL) /* We're forgiving. */ + value = "(null)"; + + /* If a precision was specified, don't read the string past it. */ + for (strln = 0; value[strln] != '\0' && + (noprecision || strln < precision); strln++) + continue; + + if ((padlen = width - strln) < 0) + padlen = 0; + if (flags & PRINT_F_MINUS) /* Left justify. */ + padlen = -padlen; + + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + while (*value != '\0' && (noprecision || precision-- > 0)) { + OUTCHAR(str, *len, size, *value); + value++; + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void +fmtint(char *str, size_t *len, size_t size, INTMAX_T value, int base, int width, + int precision, int flags) +{ + UINTMAX_T uvalue; + char iconvert[MAX_CONVERT_LENGTH]; + char sign = 0; + char hexprefix = 0; + int spadlen = 0; /* Amount to space pad. */ + int zpadlen = 0; /* Amount to zero pad. */ + int pos; + int separators = (flags & PRINT_F_QUOTE); + int noprecision = (precision == -1); + + if (flags & PRINT_F_UNSIGNED) + uvalue = value; + else { + uvalue = (value >= 0) ? value : -value; + if (value < 0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + } + + pos = convert(uvalue, iconvert, sizeof(iconvert), base, + flags & PRINT_F_UP); + + if (flags & PRINT_F_NUM && uvalue != 0) { + /* + * C99 says: "The result is converted to an `alternative form'. + * For `o' conversion, it increases the precision, if and only + * if necessary, to force the first digit of the result to be a + * zero (if the value and precision are both 0, a single 0 is + * printed). For `x' (or `X') conversion, a nonzero result has + * `0x' (or `0X') prefixed to it." (7.19.6.1, 6) + */ + switch (base) { + case 8: + if (precision <= pos) + precision = pos + 1; + break; + case 16: + hexprefix = (flags & PRINT_F_UP) ? 'X' : 'x'; + break; + } + } + + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(pos); + + zpadlen = precision - pos - separators; + spadlen = width /* Minimum field width. */ + - separators /* Number of separators. */ + - MAX(precision, pos) /* Number of integer digits. */ + - ((sign != 0) ? 1 : 0) /* Will we print a sign? */ + - ((hexprefix != 0) ? 2 : 0); /* Will we print a prefix? */ + + if (zpadlen < 0) + zpadlen = 0; + if (spadlen < 0) + spadlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored. For `d', `i', `o', `u', `x', and `X' conversions, if a + * precision is specified, the `0' flag is ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justify. */ + spadlen = -spadlen; + else if (flags & PRINT_F_ZERO && noprecision) { + zpadlen += spadlen; + spadlen = 0; + } + while (spadlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + if (hexprefix != 0) { /* A "0x" or "0X" prefix. */ + OUTCHAR(str, *len, size, '0'); + OUTCHAR(str, *len, size, hexprefix); + } + while (zpadlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + zpadlen--; + } + while (pos > 0) { /* The actual digits. */ + pos--; + OUTCHAR(str, *len, size, iconvert[pos]); + if (separators > 0 && pos > 0 && pos % 3 == 0) + printsep(str, len, size); + } + while (spadlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + spadlen++; + } +} + +static void +fmtflt(char *str, size_t *len, size_t size, LDOUBLE fvalue, int width, + int precision, int flags, int *overflow) +{ + LDOUBLE ufvalue; + UINTMAX_T intpart; + UINTMAX_T fracpart; + UINTMAX_T mask; + const char *infnan = NULL; + char iconvert[MAX_CONVERT_LENGTH]; + char fconvert[MAX_CONVERT_LENGTH]; + char econvert[4]; /* "e-12" (without nul-termination). */ + char esign = 0; + char sign = 0; + int leadfraczeros = 0; + int exponent = 0; + int emitpoint = 0; + int omitzeros = 0; + int omitcount = 0; + int padlen = 0; + int epos = 0; + int fpos = 0; + int ipos = 0; + int separators = (flags & PRINT_F_QUOTE); + int estyle = (flags & PRINT_F_TYPE_E); +#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT + struct lconv *lc = localeconv(); +#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */ + + /* + * AIX' man page says the default is 0, but C99 and at least Solaris' + * and NetBSD's man pages say the default is 6, and sprintf(3) on AIX + * defaults to 6. + */ + if (precision == -1) + precision = 6; + + if (fvalue < 0.0) + sign = '-'; + else if (flags & PRINT_F_PLUS) /* Do a sign. */ + sign = '+'; + else if (flags & PRINT_F_SPACE) + sign = ' '; + + if (ISNAN(fvalue)) + infnan = (flags & PRINT_F_UP) ? "NAN" : "nan"; + else if (ISINF(fvalue)) + infnan = (flags & PRINT_F_UP) ? "INF" : "inf"; + + if (infnan != NULL) { + if (sign != 0) + iconvert[ipos++] = sign; + while (*infnan != '\0') + iconvert[ipos++] = *infnan++; + fmtstr(str, len, size, iconvert, width, ipos, flags); + return; + } + + /* "%e" (or "%E") or "%g" (or "%G") conversion. */ + if (flags & PRINT_F_TYPE_E || flags & PRINT_F_TYPE_G) { + if (flags & PRINT_F_TYPE_G) { + /* + * For "%g" (and "%G") conversions, the precision + * specifies the number of significant digits, which + * includes the digits in the integer part. The + * conversion will or will not be using "e-style" (like + * "%e" or "%E" conversions) depending on the precision + * and on the exponent. However, the exponent can be + * affected by rounding the converted value, so we'll + * leave this decision for later. Until then, we'll + * assume that we're going to do an "e-style" conversion + * (in order to get the exponent calculated). For + * "e-style", the precision must be decremented by one. + */ + precision--; + /* + * For "%g" (and "%G") conversions, trailing zeros are + * removed from the fractional portion of the result + * unless the "#" flag was specified. + */ + if (!(flags & PRINT_F_NUM)) + omitzeros = 1; + } + exponent = getexponent(fvalue); + estyle = 1; + } + +again: + /* + * Sorry, we only support 9, 19, or 38 digits (that is, the number of + * digits of the 32-bit, the 64-bit, or the 128-bit UINTMAX_MAX value + * minus one) past the decimal point due to our conversion method. + */ + switch (sizeof(UINTMAX_T)) { + case 16: + if (precision > 38) + precision = 38; + break; + case 8: + if (precision > 19) + precision = 19; + break; + default: + if (precision > 9) + precision = 9; + break; + } + + ufvalue = (fvalue >= 0.0) ? fvalue : -fvalue; + if (estyle) /* We want exactly one integer digit. */ + ufvalue /= mypow10(exponent); + + if ((intpart = cast(ufvalue)) == UINTMAX_MAX) { + *overflow = 1; + return; + } + + /* + * Factor of ten with the number of digits needed for the fractional + * part. For example, if the precision is 3, the mask will be 1000. + */ + mask = mypow10(precision); + /* + * We "cheat" by converting the fractional part to integer by + * multiplying by a factor of ten. + */ + if ((fracpart = myround(mask * (ufvalue - intpart))) >= mask) { + /* + * For example, ufvalue = 2.99962, intpart = 2, and mask = 1000 + * (because precision = 3). Now, myround(1000 * 0.99962) will + * return 1000. So, the integer part must be incremented by one + * and the fractional part must be set to zero. + */ + intpart++; + fracpart = 0; + if (estyle && intpart == 10) { + /* + * The value was rounded up to ten, but we only want one + * integer digit if using "e-style". So, the integer + * part must be set to one and the exponent must be + * incremented by one. + */ + intpart = 1; + exponent++; + } + } + + /* + * Now that we know the real exponent, we can check whether or not to + * use "e-style" for "%g" (and "%G") conversions. If we don't need + * "e-style", the precision must be adjusted and the integer and + * fractional parts must be recalculated from the original value. + * + * C99 says: "Let P equal the precision if nonzero, 6 if the precision + * is omitted, or 1 if the precision is zero. Then, if a conversion + * with style `E' would have an exponent of X: + * + * - if P > X >= -4, the conversion is with style `f' (or `F') and + * precision P - (X + 1). + * + * - otherwise, the conversion is with style `e' (or `E') and precision + * P - 1." (7.19.6.1, 8) + * + * Note that we had decremented the precision by one. + */ + if (flags & PRINT_F_TYPE_G && estyle && + precision + 1 > exponent && exponent >= -4) { + precision -= exponent; + estyle = 0; + goto again; + } + + if (estyle) { + if (exponent < 0) { + exponent = -exponent; + esign = '-'; + } else + esign = '+'; + + /* + * Convert the exponent. The sizeof(econvert) is 4. So, the + * econvert buffer can hold e.g. "e+99" and "e-99". We don't + * support an exponent which contains more than two digits. + * Therefore, the following stores are safe. + */ + epos = convert(exponent, econvert, 2, 10, 0); + /* + * C99 says: "The exponent always contains at least two digits, + * and only as many more digits as necessary to represent the + * exponent." (7.19.6.1, 8) + */ + if (epos == 1) + econvert[epos++] = '0'; + econvert[epos++] = esign; + econvert[epos++] = (flags & PRINT_F_UP) ? 'E' : 'e'; + } + + /* Convert the integer part and the fractional part. */ + ipos = convert(intpart, iconvert, sizeof(iconvert), 10, 0); + if (fracpart != 0) /* convert() would return 1 if fracpart == 0. */ + fpos = convert(fracpart, fconvert, sizeof(fconvert), 10, 0); + + leadfraczeros = precision - fpos; + + if (omitzeros) { + if (fpos > 0) /* Omit trailing fractional part zeros. */ + while (omitcount < fpos && fconvert[omitcount] == '0') + omitcount++; + else { /* The fractional part is zero, omit it completely. */ + omitcount = precision; + leadfraczeros = 0; + } + precision -= omitcount; + } + + /* + * Print a decimal point if either the fractional part is non-zero + * and/or the "#" flag was specified. + */ + if (precision > 0 || flags & PRINT_F_NUM) + emitpoint = 1; + if (separators) /* Get the number of group separators we'll print. */ + separators = getnumsep(ipos); + + padlen = width /* Minimum field width. */ + - ipos /* Number of integer digits. */ + - epos /* Number of exponent characters. */ + - precision /* Number of fractional digits. */ + - separators /* Number of group separators. */ + - (emitpoint ? 1 : 0) /* Will we print a decimal point? */ + - ((sign != 0) ? 1 : 0); /* Will we print a sign character? */ + + if (padlen < 0) + padlen = 0; + + /* + * C99 says: "If the `0' and `-' flags both appear, the `0' flag is + * ignored." (7.19.6.1, 6) + */ + if (flags & PRINT_F_MINUS) /* Left justifty. */ + padlen = -padlen; + else if (flags & PRINT_F_ZERO && padlen > 0) { + if (sign != 0) { /* Sign. */ + OUTCHAR(str, *len, size, sign); + sign = 0; + } + while (padlen > 0) { /* Leading zeros. */ + OUTCHAR(str, *len, size, '0'); + padlen--; + } + } + while (padlen > 0) { /* Leading spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen--; + } + if (sign != 0) /* Sign. */ + OUTCHAR(str, *len, size, sign); + while (ipos > 0) { /* Integer part. */ + ipos--; + OUTCHAR(str, *len, size, iconvert[ipos]); + if (separators > 0 && ipos > 0 && ipos % 3 == 0) + printsep(str, len, size); + } + if (emitpoint) { /* Decimal point. */ +#if HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT + if (lc->decimal_point != NULL && *lc->decimal_point != '\0') + OUTCHAR(str, *len, size, *lc->decimal_point); + else /* We'll always print some decimal point character. */ +#endif /* HAVE_LOCALECONV && HAVE_LCONV_DECIMAL_POINT */ + OUTCHAR(str, *len, size, '.'); + } + while (leadfraczeros > 0) { /* Leading fractional part zeros. */ + OUTCHAR(str, *len, size, '0'); + leadfraczeros--; + } + while (fpos > omitcount) { /* The remaining fractional part. */ + fpos--; + OUTCHAR(str, *len, size, fconvert[fpos]); + } + while (epos > 0) { /* Exponent. */ + epos--; + OUTCHAR(str, *len, size, econvert[epos]); + } + while (padlen < 0) { /* Trailing spaces. */ + OUTCHAR(str, *len, size, ' '); + padlen++; + } +} + +static void +printsep(char *str, size_t *len, size_t size) +{ +#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP + struct lconv *lc = localeconv(); + int i; + + if (lc->thousands_sep != NULL) + for (i = 0; lc->thousands_sep[i] != '\0'; i++) + OUTCHAR(str, *len, size, lc->thousands_sep[i]); + else +#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */ + OUTCHAR(str, *len, size, ','); +} + +static int +getnumsep(int digits) +{ + int separators = (digits - ((digits % 3 == 0) ? 1 : 0)) / 3; +#if HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP + int strln; + struct lconv *lc = localeconv(); + + /* We support an arbitrary separator length (including zero). */ + if (lc->thousands_sep != NULL) { + for (strln = 0; lc->thousands_sep[strln] != '\0'; strln++) + continue; + separators *= strln; + } +#endif /* HAVE_LOCALECONV && HAVE_LCONV_THOUSANDS_SEP */ + return separators; +} + +static int +getexponent(LDOUBLE value) +{ + LDOUBLE tmp = (value >= 0.0) ? value : -value; + int exponent = 0; + + /* + * We check for 99 > exponent > -99 in order to work around possible + * endless loops which could happen (at least) in the second loop (at + * least) if we're called with an infinite value. However, we checked + * for infinity before calling this function using our ISINF() macro, so + * this might be somewhat paranoid. + */ + while (tmp < 1.0 && tmp > 0.0 && --exponent > -99) + tmp *= 10; + while (tmp >= 10.0 && ++exponent < 99) + tmp /= 10; + + return exponent; +} + +static int +convert(UINTMAX_T value, char *buf, size_t size, int base, int caps) +{ + const char *digits = caps ? "0123456789ABCDEF" : "0123456789abcdef"; + size_t pos = 0; + + /* We return an unterminated buffer with the digits in reverse order. */ + do { + buf[pos++] = digits[value % base]; + value /= base; + } while (value != 0 && pos < size); + + return (int)pos; +} + +static UINTMAX_T +cast(LDOUBLE value) +{ + UINTMAX_T result; + + /* + * We check for ">=" and not for ">" because if UINTMAX_MAX cannot be + * represented exactly as an LDOUBLE value (but is less than LDBL_MAX), + * it may be increased to the nearest higher representable value for the + * comparison (cf. C99: 6.3.1.4, 2). It might then equal the LDOUBLE + * value although converting the latter to UINTMAX_T would overflow. + */ + if (value >= UINTMAX_MAX) + return UINTMAX_MAX; + + result = value; + /* + * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to + * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates + * the standard). Sigh. + */ + return (result <= value) ? result : result - 1; +} + +static UINTMAX_T +myround(LDOUBLE value) +{ + UINTMAX_T intpart = cast(value); + + return ((value -= intpart) < 0.5) ? intpart : intpart + 1; +} + +static LDOUBLE +mypow10(int exponent) +{ + LDOUBLE result = 1; + + while (exponent > 0) { + result *= 10; + exponent--; + } + while (exponent < 0) { + result /= 10; + exponent++; + } + return result; +} +#endif /* !HAVE_VSNPRINTF */ + +#if !HAVE_VASPRINTF +#if NEED_MYMEMCPY +void * +mymemcpy(void *dst, void *src, size_t len) +{ + const char *from = src; + char *to = dst; + + /* No need for optimization, we use this only to replace va_copy(3). */ + while (len-- > 0) + *to++ = *from++; + return dst; +} +#endif /* NEED_MYMEMCPY */ + +int +rpl_vasprintf(char **ret, const char *format, va_list ap) +{ + size_t size; + int len; + va_list aq; + + VA_COPY(aq, ap); + len = vsnprintf(NULL, 0, format, aq); + VA_END_COPY(aq); + if (len < 0 || (*ret = malloc(size = len + 1)) == NULL) + return -1; + return vsnprintf(*ret, size, format, ap); +} +#endif /* !HAVE_VASPRINTF */ + +#if !HAVE_SNPRINTF +#if HAVE_STDARG_H +int +rpl_snprintf(char *str, size_t size, const char *format, ...) +#else +int +rpl_snprintf(va_alist) va_dcl +#endif /* HAVE_STDARG_H */ +{ +#if !HAVE_STDARG_H + char *str; + size_t size; + char *format; +#endif /* HAVE_STDARG_H */ + va_list ap; + int len; + + VA_START(ap, format); + VA_SHIFT(ap, str, char *); + VA_SHIFT(ap, size, size_t); + VA_SHIFT(ap, format, const char *); + len = vsnprintf(str, size, format, ap); + va_end(ap); + return len; +} +#endif /* !HAVE_SNPRINTF */ + +#if !HAVE_ASPRINTF +#if HAVE_STDARG_H +int +rpl_asprintf(char **ret, const char *format, ...) +#else +int +rpl_asprintf(va_alist) va_dcl +#endif /* HAVE_STDARG_H */ +{ +#if !HAVE_STDARG_H + char **ret; + char *format; +#endif /* HAVE_STDARG_H */ + va_list ap; + int len; + + VA_START(ap, format); + VA_SHIFT(ap, ret, char **); + VA_SHIFT(ap, format, const char *); + len = vasprintf(ret, format, ap); + va_end(ap); + return len; +} +#endif /* !HAVE_ASPRINTF */ +#else /* Dummy declaration to avoid empty translation unit warnings. */ +int main(void); +#endif /* !HAVE_SNPRINTF || !HAVE_VSNPRINTF || !HAVE_ASPRINTF || [...] */ + + +/* vim: set joinspaces textwidth=80: */ -- cgit v1.2.3 From 4528287e040415c2071012d02f20979ff995c754 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 5 Mar 2008 10:50:14 +0100 Subject: gallium: michel's patch to rework texture/sampler binding interface Bind all the samplers/textures at once rather than piecemeal. This is easier for drivers to understand. --- src/gallium/auxiliary/draw/draw_aaline.c | 54 +- src/gallium/auxiliary/draw/draw_pstipple.c | 51 +- src/gallium/drivers/failover/fo_context.h | 7 +- src/gallium/drivers/failover/fo_state.c | 59 +- src/gallium/drivers/failover/fo_state_emit.c | 18 +- src/gallium/drivers/i915simple/i915_context.h | 675 +++++----- src/gallium/drivers/i915simple/i915_state.c | 46 +- src/gallium/drivers/i915simple/i915_state_emit.c | 6 - .../drivers/i915simple/i915_state_sampler.c | 6 +- src/gallium/drivers/i965simple/brw_context.h | 1365 ++++++++++---------- src/gallium/drivers/i965simple/brw_state.c | 896 ++++++------- .../drivers/i965simple/brw_wm_sampler_state.c | 3 +- .../drivers/i965simple/brw_wm_surface_state.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 486 +++---- src/gallium/drivers/softpipe/sp_context.h | 3 + src/gallium/drivers/softpipe/sp_quad_fs.c | 417 +++--- src/gallium/drivers/softpipe/sp_state.h | 390 +++--- src/gallium/drivers/softpipe/sp_state_sampler.c | 56 +- src/gallium/drivers/softpipe/sp_texture.c | 404 +++--- src/gallium/include/pipe/p_context.h | 8 +- src/mesa/state_tracker/st_atom_sampler.c | 16 +- src/mesa/state_tracker/st_atom_texture.c | 15 +- src/mesa/state_tracker/st_cb_drawpixels.c | 11 +- src/mesa/state_tracker/st_context.h | 5 +- src/mesa/state_tracker/st_gen_mipmap.c | 11 +- 25 files changed, 2561 insertions(+), 2449 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 7660e56fe6..3ec73b0800 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -78,7 +78,8 @@ struct aaline_stage void *sampler_cso; struct pipe_texture *texture; - uint sampler_unit; + uint num_samplers; + uint num_textures; /* @@ -98,11 +99,10 @@ struct aaline_stage void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); - void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); - - void (*driver_set_sampler_texture)(struct pipe_context *, - unsigned sampler, - struct pipe_texture *); + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, + void **); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); struct pipe_context *pipe; }; @@ -607,6 +607,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) auto struct aaline_stage *aaline = aaline_stage(stage); struct draw_context *draw = stage->draw; struct pipe_context *pipe = aaline->pipe; + uint num = MAX2(aaline->num_textures, aaline->num_samplers); assert(draw->rasterizer->line_smooth); @@ -624,8 +625,11 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) */ bind_aaline_fragment_shader(aaline); - aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, aaline->sampler_cso); - aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, aaline->texture); + aaline->state.sampler[num] = aaline->sampler_cso; + aaline->state.texture[num] = aaline->texture; + + aaline->driver_bind_sampler_states(pipe, num + 1, aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, num + 1, aaline->state.texture); /* now really draw first line */ stage->line = aaline_line; @@ -647,10 +651,10 @@ aaline_flush(struct draw_stage *stage, unsigned flags) aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); /* XXX restore original texture, sampler state */ - aaline->driver_bind_sampler_state(pipe, aaline->sampler_unit, - aaline->state.sampler[aaline->sampler_unit]); - aaline->driver_set_sampler_texture(pipe, aaline->sampler_unit, - aaline->state.texture[aaline->sampler_unit]); + aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, + aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, aaline->num_textures, + aaline->state.texture); draw->extra_vp_outputs.slot = 0; } @@ -745,26 +749,28 @@ aaline_delete_fs_state(struct pipe_context *pipe, void *fs) static void -aaline_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +aaline_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); /* save current */ - aaline->state.sampler[unit] = sampler; + memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); + aaline->num_samplers = num; /* pass-through */ - aaline->driver_bind_sampler_state(aaline->pipe, unit, sampler); + aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); } static void -aaline_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, struct pipe_texture *texture) +aaline_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); /* save current */ - aaline->state.texture[sampler] = texture; + memcpy(aaline->state.texture, texture, num * sizeof(struct pipe_texture *)); + aaline->num_textures = num; /* pass-through */ - aaline->driver_set_sampler_texture(aaline->pipe, sampler, texture); + aaline->driver_set_sampler_textures(aaline->pipe, num, texture); } @@ -798,14 +804,14 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) aaline->driver_bind_fs_state = pipe->bind_fs_state; aaline->driver_delete_fs_state = pipe->delete_fs_state; - aaline->driver_bind_sampler_state = pipe->bind_sampler_state; - aaline->driver_set_sampler_texture = pipe->set_sampler_texture; + aaline->driver_bind_sampler_states = pipe->bind_sampler_states; + aaline->driver_set_sampler_textures = pipe->set_sampler_textures; /* override the driver's functions */ pipe->create_fs_state = aaline_create_fs_state; pipe->bind_fs_state = aaline_bind_fs_state; pipe->delete_fs_state = aaline_delete_fs_state; - pipe->bind_sampler_state = aaline_bind_sampler_state; - pipe->set_sampler_texture = aaline_set_sampler_texture; + pipe->bind_sampler_states = aaline_bind_sampler_states; + pipe->set_sampler_textures = aaline_set_sampler_textures; } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 2cfeb813b3..894b136f2c 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -68,7 +68,8 @@ struct pstip_stage void *sampler_cso; struct pipe_texture *texture; - uint sampler_unit; + uint num_samplers; + uint num_textures; /* * Currently bound state @@ -88,11 +89,10 @@ struct pstip_stage void (*driver_bind_fs_state)(struct pipe_context *, void *); void (*driver_delete_fs_state)(struct pipe_context *, void *); - void (*driver_bind_sampler_state)(struct pipe_context *, unsigned, void *); + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - void (*driver_set_sampler_texture)(struct pipe_context *, - unsigned sampler, - struct pipe_texture *); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); void (*driver_set_polygon_stipple)(struct pipe_context *, const struct pipe_poly_stipple *); @@ -328,8 +328,6 @@ generate_pstip_fs(struct pstip_stage *pstip) tgsi_dump(pstip_fs.tokens, 0); #endif - pstip->sampler_unit = transform.maxSampler + 1; - #if 1 /* XXX remove */ if (transform.wincoordInput < 0) { pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; @@ -486,6 +484,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) struct pstip_stage *pstip = pstip_stage(stage); struct draw_context *draw = stage->draw; struct pipe_context *pipe = pstip->pipe; + uint num = MAX2(pstip->num_textures, pstip->num_samplers); assert(draw->rasterizer->poly_stipple_enable); @@ -494,8 +493,8 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) */ bind_pstip_fragment_shader(pstip); - pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, pstip->sampler_cso); - pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, pstip->texture); + pstip->driver_bind_sampler_states(pipe, num + 1, pstip->state.sampler); + pstip->driver_set_sampler_textures(pipe, num + 1, pstip->state.texture); /* now really draw first line */ stage->tri = passthrough_tri; @@ -517,10 +516,10 @@ pstip_flush(struct draw_stage *stage, unsigned flags) pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); /* XXX restore original texture, sampler state */ - pstip->driver_bind_sampler_state(pipe, pstip->sampler_unit, - pstip->state.sampler[pstip->sampler_unit]); - pstip->driver_set_sampler_texture(pipe, pstip->sampler_unit, - pstip->state.texture[pstip->sampler_unit]); + pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, + pstip->state.sampler); + pstip->driver_set_sampler_textures(pipe, pstip->num_textures, + pstip->state.texture); } @@ -613,26 +612,28 @@ pstip_delete_fs_state(struct pipe_context *pipe, void *fs) static void -pstip_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +pstip_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - pstip->state.sampler[unit] = sampler; + memcpy(pstip->state.sampler, sampler, num * sizeof(void *)); + pstip->num_samplers = num; /* pass-through */ - pstip->driver_bind_sampler_state(pstip->pipe, unit, sampler); + pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); } static void -pstip_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, struct pipe_texture *texture) +pstip_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - pstip->state.texture[sampler] = texture; + memcpy(pstip->state.texture, texture, num * sizeof(struct pipe_texture *)); + pstip->num_textures = num; /* pass-through */ - pstip->driver_set_sampler_texture(pstip->pipe, sampler, texture); + pstip->driver_set_sampler_textures(pstip->pipe, num, texture); } @@ -682,8 +683,8 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->driver_bind_fs_state = pipe->bind_fs_state; pstip->driver_delete_fs_state = pipe->delete_fs_state; - pstip->driver_bind_sampler_state = pipe->bind_sampler_state; - pstip->driver_set_sampler_texture = pipe->set_sampler_texture; + pstip->driver_bind_sampler_states = pipe->bind_sampler_states; + pstip->driver_set_sampler_textures = pipe->set_sampler_textures; pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; /* override the driver's functions */ @@ -691,7 +692,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_fs_state = pstip_bind_fs_state; pipe->delete_fs_state = pstip_delete_fs_state; - pipe->bind_sampler_state = pstip_bind_sampler_state; - pipe->set_sampler_texture = pstip_set_sampler_texture; + pipe->bind_sampler_states = pstip_bind_sampler_states; + pipe->set_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; } diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 1dc87291c9..8f3ad3ee79 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -87,12 +87,15 @@ struct failover_context { struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + void *sw_sampler_state[PIPE_MAX_SAMPLERS]; + void *hw_sampler_state[PIPE_MAX_SAMPLERS]; + unsigned dirty; - unsigned dirty_sampler; - unsigned dirty_texture; unsigned dirty_vertex_buffer; unsigned dirty_vertex_element; + unsigned num_samplers; + unsigned num_textures; unsigned mode; struct pipe_context *hw; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 0fc5568da1..11eec2714e 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -28,6 +28,8 @@ /* Authors: Keith Whitwell */ +#include "pipe/p_inlines.h" + #include "fo_context.h" @@ -322,18 +324,27 @@ failover_create_sampler_state(struct pipe_context *pipe, } static void -failover_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +failover_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct failover_context *failover = failover_context(pipe); struct fo_state *state = (struct fo_state*)sampler; - failover->sampler[unit] = state; + uint i; + assert(num <= PIPE_MAX_SAMPLERS); + /* Check for no-op */ + if (num == failover->num_samplers && + !memcmp(failover->sampler, sampler, num * sizeof(void *))) + return; + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + failover->sw_sampler_state[i] = i < num ? state[i].sw_state : NULL; + failover->hw_sampler_state[i] = i < num ? state[i].hw_state : NULL; + } failover->dirty |= FO_NEW_SAMPLER; - failover->dirty_sampler |= (1<sw->bind_sampler_state(failover->sw, unit, - state->sw_state); - failover->hw->bind_sampler_state(failover->hw, unit, - state->hw_state); + failover->num_samplers = num; + failover->sw->bind_sampler_states(failover->sw, num, + failover->sw_sampler_state); + failover->hw->bind_sampler_states(failover->hw, num, + failover->hw_sampler_state); } static void @@ -351,17 +362,29 @@ failover_delete_sampler_state(struct pipe_context *pipe, void *sampler) static void -failover_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) +failover_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct failover_context *failover = failover_context(pipe); - - failover->texture[unit] = texture; + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == failover->num_textures && + !memcmp(failover->texture, texture, num * sizeof(struct pipe_texture *))) + return; + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + texture[i]); + for (i = num; i < failover->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &failover->texture[i], + NULL); failover->dirty |= FO_NEW_TEXTURE; - failover->dirty_texture |= (1<sw->set_sampler_texture( failover->sw, unit, texture ); - failover->hw->set_sampler_texture( failover->hw, unit, texture ); + failover->num_textures = num; + failover->sw->set_sampler_textures( failover->sw, num, texture ); + failover->hw->set_sampler_textures( failover->hw, num, texture ); } @@ -429,7 +452,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.bind_blend_state = failover_bind_blend_state; failover->pipe.delete_blend_state = failover_delete_blend_state; failover->pipe.create_sampler_state = failover_create_sampler_state; - failover->pipe.bind_sampler_state = failover_bind_sampler_state; + failover->pipe.bind_sampler_states = failover_bind_sampler_states; failover->pipe.delete_sampler_state = failover_delete_sampler_state; failover->pipe.create_depth_stencil_alpha_state = failover_create_depth_stencil_state; failover->pipe.bind_depth_stencil_alpha_state = failover_bind_depth_stencil_state; @@ -449,7 +472,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_framebuffer_state = failover_set_framebuffer_state; failover->pipe.set_polygon_stipple = failover_set_polygon_stipple; failover->pipe.set_scissor_state = failover_set_scissor_state; - failover->pipe.set_sampler_texture = failover_set_sampler_texture; + failover->pipe.set_sampler_textures = failover_set_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; failover->pipe.set_vertex_buffer = failover_set_vertex_buffer; failover->pipe.set_vertex_element = failover_set_vertex_element; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index c663dd4947..3de931e04e 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -94,21 +94,13 @@ failover_state_emit( struct failover_context *failover ) failover->sw->set_viewport_state( failover->sw, &failover->viewport ); if (failover->dirty & FO_NEW_SAMPLER) { - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (failover->dirty_sampler & (1<sw->bind_sampler_state( failover->sw, i, - failover->sampler[i]->sw_state ); - } - } + failover->sw->bind_sampler_states( failover->sw, failover->num_samplers, + failover->sw_sampler_state ); } if (failover->dirty & FO_NEW_TEXTURE) { - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - if (failover->dirty_texture & (1<sw->set_sampler_texture( failover->sw, i, - failover->texture[i] ); - } - } + failover->sw->set_sampler_textures( failover->sw, failover->num_textures, + failover->texture ); } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { @@ -132,6 +124,4 @@ failover_state_emit( struct failover_context *failover ) failover->dirty = 0; failover->dirty_vertex_element = 0; failover->dirty_vertex_buffer = 0; - failover->dirty_texture = 0; - failover->dirty_sampler = 0; } diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 6401112f83..746f18ba38 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -1,336 +1,339 @@ - /************************************************************************** - * - * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef I915_CONTEXT_H -#define I915_CONTEXT_H - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "draw/draw_vertex.h" - -#include "tgsi/util/tgsi_scan.h" - - -#define I915_TEX_UNITS 8 - -#define I915_DYNAMIC_MODES4 0 -#define I915_DYNAMIC_DEPTHSCALE_0 1 /* just the header */ -#define I915_DYNAMIC_DEPTHSCALE_1 2 -#define I915_DYNAMIC_IAB 3 -#define I915_DYNAMIC_BC_0 4 /* just the header */ -#define I915_DYNAMIC_BC_1 5 -#define I915_DYNAMIC_BFO_0 6 -#define I915_DYNAMIC_BFO_1 7 -#define I915_DYNAMIC_STP_0 8 -#define I915_DYNAMIC_STP_1 9 -#define I915_DYNAMIC_SC_ENA_0 10 -#define I915_DYNAMIC_SC_RECT_0 11 -#define I915_DYNAMIC_SC_RECT_1 12 -#define I915_DYNAMIC_SC_RECT_2 13 -#define I915_MAX_DYNAMIC 14 - - -#define I915_IMMEDIATE_S0 0 -#define I915_IMMEDIATE_S1 1 -#define I915_IMMEDIATE_S2 2 -#define I915_IMMEDIATE_S3 3 -#define I915_IMMEDIATE_S4 4 -#define I915_IMMEDIATE_S5 5 -#define I915_IMMEDIATE_S6 6 -#define I915_IMMEDIATE_S7 7 -#define I915_MAX_IMMEDIATE 8 - -/* These must mach the order of LI0_STATE_* bits, as they will be used - * to generate hardware packets: - */ -#define I915_CACHE_STATIC 0 -#define I915_CACHE_DYNAMIC 1 /* handled specially */ -#define I915_CACHE_SAMPLER 2 -#define I915_CACHE_MAP 3 -#define I915_CACHE_PROGRAM 4 -#define I915_CACHE_CONSTANTS 5 -#define I915_MAX_CACHE 6 - -#define I915_MAX_CONSTANT 32 - - -/** See constant_flags[] below */ -#define I915_CONSTFLAG_USER 0x1f - - -/** - * Subclass of pipe_shader_state - */ -struct i915_fragment_shader -{ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - uint *program; - uint program_len; - - /** - * constants introduced during translation. - * These are placed at the end of the constant buffer and grow toward - * the beginning (eg: slot 31, 30 29, ...) - * User-provided constants start at 0. - * This allows both types of constants to co-exist (until there's too many) - * and doesn't require regenerating/changing the fragment program to - * shuffle constants around. - */ - uint num_constants; - float constants[I915_MAX_CONSTANT][4]; - - /** - * Status of each constant - * if I915_CONSTFLAG_PARAM, the value must be taken from the corresponding - * slot of the user's constant buffer. (set by pipe->set_constant_buffer()) - * Else, the bitmask indicates which components are occupied by immediates. - */ - ubyte constant_flags[I915_MAX_CONSTANT]; -}; - - -struct i915_cache_context; - -/* Use to calculate differences between state emitted to hardware and - * current driver-calculated state. - */ -struct i915_state -{ - unsigned immediate[I915_MAX_IMMEDIATE]; - unsigned dynamic[I915_MAX_DYNAMIC]; - - float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; - /** number of constants passed in through a constant buffer */ - uint num_user_constants[PIPE_SHADER_TYPES]; - - /* texture sampler state */ - unsigned sampler[I915_TEX_UNITS][3]; - unsigned sampler_enable_flags; - unsigned sampler_enable_nr; - - /* texture image buffers */ - unsigned texbuffer[I915_TEX_UNITS][2]; - - /** Describes the current hardware vertex layout */ - struct vertex_info vertex_info; - - unsigned id; /* track lost context events */ -}; - -struct i915_blend_state { - unsigned iab; - unsigned modes4; - unsigned LIS5; - unsigned LIS6; -}; - -struct i915_depth_stencil_state { - unsigned stencil_modes4; - unsigned bfo[2]; - unsigned stencil_LIS5; - unsigned depth_LIS6; -}; - -struct i915_rasterizer_state { - int light_twoside : 1; - unsigned st; - enum interp_mode color_interp; - - unsigned LIS4; - unsigned LIS7; - unsigned sc[1]; - - const struct pipe_rasterizer_state *templ; - - union { float f; unsigned u; } ds[2]; -}; - -struct i915_sampler_state { - unsigned state[3]; - const struct pipe_sampler_state *templ; -}; - - -struct i915_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -struct i915_context -{ - struct pipe_context pipe; - struct i915_winsys *winsys; - struct draw_context *draw; - - /* The most recent drawing state as set by the driver: - */ - const struct i915_blend_state *blend; - const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; - const struct i915_depth_stencil_state *depth_stencil; - const struct i915_rasterizer_state *rasterizer; - - struct i915_fragment_shader *fs; - - struct pipe_blend_color blend_color; - struct pipe_clip_state clip; - struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; - struct pipe_framebuffer_state framebuffer; - struct pipe_poly_stipple poly_stipple; - struct pipe_scissor_state scissor; - struct i915_texture *texture[PIPE_MAX_SAMPLERS]; - struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - - unsigned dirty; - - unsigned *batch_start; - - /** Vertex buffer */ - struct pipe_buffer *vbo; - - struct i915_state current; - unsigned hardware_dirty; - - unsigned debug; -}; - -/* A flag for each state_tracker state object: - */ -#define I915_NEW_VIEWPORT 0x1 -#define I915_NEW_RASTERIZER 0x2 -#define I915_NEW_FS 0x4 -#define I915_NEW_BLEND 0x8 -#define I915_NEW_CLIP 0x10 -#define I915_NEW_SCISSOR 0x20 -#define I915_NEW_STIPPLE 0x40 -#define I915_NEW_FRAMEBUFFER 0x80 -#define I915_NEW_ALPHA_TEST 0x100 -#define I915_NEW_DEPTH_STENCIL 0x200 -#define I915_NEW_SAMPLER 0x400 -#define I915_NEW_TEXTURE 0x800 -#define I915_NEW_CONSTANTS 0x1000 -#define I915_NEW_VBO 0x2000 -#define I915_NEW_VS 0x4000 - - -/* Driver's internally generated state flags: - */ -#define I915_NEW_VERTEX_FORMAT 0x10000 - - -/* Dirty flags for hardware emit - */ -#define I915_HW_STATIC (1<set_constant_buffer()) + * Else, the bitmask indicates which components are occupied by immediates. + */ + ubyte constant_flags[I915_MAX_CONSTANT]; +}; + + +struct i915_cache_context; + +/* Use to calculate differences between state emitted to hardware and + * current driver-calculated state. + */ +struct i915_state +{ + unsigned immediate[I915_MAX_IMMEDIATE]; + unsigned dynamic[I915_MAX_DYNAMIC]; + + float constants[PIPE_SHADER_TYPES][I915_MAX_CONSTANT][4]; + /** number of constants passed in through a constant buffer */ + uint num_user_constants[PIPE_SHADER_TYPES]; + + /* texture sampler state */ + unsigned sampler[I915_TEX_UNITS][3]; + unsigned sampler_enable_flags; + unsigned sampler_enable_nr; + + /* texture image buffers */ + unsigned texbuffer[I915_TEX_UNITS][2]; + + /** Describes the current hardware vertex layout */ + struct vertex_info vertex_info; + + unsigned id; /* track lost context events */ +}; + +struct i915_blend_state { + unsigned iab; + unsigned modes4; + unsigned LIS5; + unsigned LIS6; +}; + +struct i915_depth_stencil_state { + unsigned stencil_modes4; + unsigned bfo[2]; + unsigned stencil_LIS5; + unsigned depth_LIS6; +}; + +struct i915_rasterizer_state { + int light_twoside : 1; + unsigned st; + enum interp_mode color_interp; + + unsigned LIS4; + unsigned LIS7; + unsigned sc[1]; + + const struct pipe_rasterizer_state *templ; + + union { float f; unsigned u; } ds[2]; +}; + +struct i915_sampler_state { + unsigned state[3]; + const struct pipe_sampler_state *templ; +}; + + +struct i915_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +struct i915_context +{ + struct pipe_context pipe; + struct i915_winsys *winsys; + struct draw_context *draw; + + /* The most recent drawing state as set by the driver: + */ + const struct i915_blend_state *blend; + const struct i915_sampler_state *sampler[PIPE_MAX_SAMPLERS]; + const struct i915_depth_stencil_state *depth_stencil; + const struct i915_rasterizer_state *rasterizer; + + struct i915_fragment_shader *fs; + + struct pipe_blend_color blend_color; + struct pipe_clip_state clip; + struct pipe_constant_buffer constants[PIPE_SHADER_TYPES]; + struct pipe_framebuffer_state framebuffer; + struct pipe_poly_stipple poly_stipple; + struct pipe_scissor_state scissor; + struct i915_texture *texture[PIPE_MAX_SAMPLERS]; + struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + + unsigned dirty; + + unsigned num_samplers; + unsigned num_textures; + + unsigned *batch_start; + + /** Vertex buffer */ + struct pipe_buffer *vbo; + + struct i915_state current; + unsigned hardware_dirty; + + unsigned debug; +}; + +/* A flag for each state_tracker state object: + */ +#define I915_NEW_VIEWPORT 0x1 +#define I915_NEW_RASTERIZER 0x2 +#define I915_NEW_FS 0x4 +#define I915_NEW_BLEND 0x8 +#define I915_NEW_CLIP 0x10 +#define I915_NEW_SCISSOR 0x20 +#define I915_NEW_STIPPLE 0x40 +#define I915_NEW_FRAMEBUFFER 0x80 +#define I915_NEW_ALPHA_TEST 0x100 +#define I915_NEW_DEPTH_STENCIL 0x200 +#define I915_NEW_SAMPLER 0x400 +#define I915_NEW_TEXTURE 0x800 +#define I915_NEW_CONSTANTS 0x1000 +#define I915_NEW_VBO 0x2000 +#define I915_NEW_VS 0x4000 + + +/* Driver's internally generated state flags: + */ +#define I915_NEW_VERTEX_FORMAT 0x10000 + + +/* Dirty flags for hardware emit + */ +#define I915_HW_STATIC (1<sampler[unit] = (const struct i915_sampler_state*)sampler; + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_samplers && + !memcmp(i915->sampler, sampler, num * sizeof(void *))) + return; + + memcpy(i915->sampler, sampler, num * sizeof(void *)); + memset(&i915->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * sizeof(void *)); + + i915->num_samplers = num; i915->dirty |= I915_NEW_SAMPLER; } @@ -526,14 +535,29 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, } -static void i915_set_sampler_texture(struct pipe_context *pipe, - unsigned sampler, - struct pipe_texture *texture) +static void i915_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) { struct i915_context *i915 = i915_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == i915->num_textures && + !memcmp(i915->texture, texture, num * sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + texture[i]); + + for (i = num; i < i915->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &i915->texture[i], + NULL); - pipe_texture_reference((struct pipe_texture **) &i915->texture[sampler], - texture); + i915->num_textures = num; i915->dirty |= I915_NEW_TEXTURE; } @@ -691,7 +715,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->pipe.delete_blend_state = i915_delete_blend_state; i915->pipe.create_sampler_state = i915_create_sampler_state; - i915->pipe.bind_sampler_state = i915_bind_sampler_state; + i915->pipe.bind_sampler_states = i915_bind_sampler_states; i915->pipe.delete_sampler_state = i915_delete_sampler_state; i915->pipe.create_depth_stencil_alpha_state = i915_create_depth_stencil_state; @@ -715,7 +739,7 @@ i915_init_state_functions( struct i915_context *i915 ) i915->pipe.set_polygon_stipple = i915_set_polygon_stipple; i915->pipe.set_scissor_state = i915_set_scissor_state; - i915->pipe.set_sampler_texture = i915_set_sampler_texture; + i915->pipe.set_sampler_textures = i915_set_sampler_textures; i915->pipe.set_viewport_state = i915_set_viewport_state; i915->pipe.set_vertex_buffer = i915_set_vertex_buffer; i915->pipe.set_vertex_element = i915_set_vertex_element; diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 6bbaac4e34..a7498d22b7 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -267,12 +267,6 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* 2 + I915_TEX_UNITS*3 dwords, I915_TEX_UNITS relocs */ if (i915->hardware_dirty & (I915_HW_MAP | I915_HW_SAMPLER)) { - /* XXX: we were refering to sampler state - * (current.sampler_enable_nr) below, but only checking - * I915_HW_MAP above. Should probably calculate the enabled - * flags separately - but there will be further rework of - * state so perhaps not necessary yet. - */ const uint nr = i915->current.sampler_enable_nr; if (nr) { const uint enabled = i915->current.sampler_enable_flags; diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 9c1a5bbbd6..9dbb1b1b23 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -106,7 +106,8 @@ void i915_update_samplers( struct i915_context *i915 ) i915->current.sampler_enable_nr = 0; i915->current.sampler_enable_flags = 0x0; - for (unit = 0; unit < I915_TEX_UNITS; unit++) { + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ /* could also examine the fragment program? */ if (i915->texture[unit]) { @@ -219,7 +220,8 @@ i915_update_textures(struct i915_context *i915) { uint unit; - for (unit = 0; unit < I915_TEX_UNITS; unit++) { + for (unit = 0; unit < i915->num_textures && unit < i915->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ /* could also examine the fragment program? */ if (i915->texture[unit]) { diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 4da3a8cffc..b83a13c3b6 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -1,681 +1,684 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - - -#ifndef BRWCONTEXT_INC -#define BRWCONTEXT_INC - - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_state.h" - -#include "tgsi/util/tgsi_scan.h" - -#include "brw_structs.h" -#include "brw_winsys.h" - - -/* Glossary: - * - * URB - uniform resource buffer. A mid-sized buffer which is - * partitioned between the fixed function units and used for passing - * values (vertices, primitives, constants) between them. - * - * CURBE - constant URB entry. An urb region (entry) used to hold - * constant values which the fixed function units can be instructed to - * preload into the GRF when spawining a thread. - * - * VUE - vertex URB entry. An urb entry holding a vertex and usually - * a vertex header. The header contains control information and - * things like primitive type, Begin/end flags and clip codes. - * - * PUE - primitive URB entry. An urb entry produced by the setup (SF) - * unit holding rasterization and interpolation parameters. - * - * GRF - general register file. One of several register files - * addressable by programmed threads. The inputs (r0, payload, curbe, - * urb) of the thread are preloaded to this area before the thread is - * spawned. The registers are individually 8 dwords wide and suitable - * for general usage. Registers holding thread input values are not - * special and may be overwritten. - * - * MRF - message register file. Threads communicate (and terminate) - * by sending messages. Message parameters are placed in contigous - * MRF registers. All program output is via these messages. URB - * entries are populated by sending a message to the shared URB - * function containing the new data, together with a control word, - * often an unmodified copy of R0. - * - * R0 - GRF register 0. Typically holds control information used when - * sending messages to other threads. - * - * EU or GEN4 EU: The name of the programmable subsystem of the - * i965 hardware. Threads are executed by the EU, the registers - * described above are part of the EU architecture. - * - * Fixed function units: - * - * CS - Command streamer. Notional first unit, little software - * interaction. Holds the URB entries used for constant data, ie the - * CURBEs. - * - * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of - * this unit is responsible for pulling vertices out of vertex buffers - * in vram and injecting them into the processing pipe as VUEs. If - * enabled, it first passes them to a VS thread which is a good place - * for the driver to implement any active vertex shader. - * - * GS - Geometry Shader. This corresponds to a new DX10 concept. If - * enabled, incoming strips etc are passed to GS threads in individual - * line/triangle/point units. The GS thread may perform arbitary - * computation and emit whatever primtives with whatever vertices it - * chooses. This makes GS an excellent place to implement GL's - * unfilled polygon modes, though of course it is capable of much - * more. Additionally, GS is used to translate away primitives not - * handled by latter units, including Quads and Lineloops. - * - * CS - Clipper. Mesa's clipping algorithms are imported to run on - * this unit. The fixed function part performs cliptesting against - * the 6 fixed clipplanes and makes descisions on whether or not the - * incoming primitive needs to be passed to a thread for clipping. - * User clip planes are handled via cooperation with the VS thread. - * - * SF - Strips Fans or Setup: Triangles are prepared for - * rasterization. Interpolation coefficients are calculated. - * Flatshading and two-side lighting usually performed here. - * - * WM - Windower. Interpolation of vertex attributes performed here. - * Fragment shader implemented here. SIMD aspects of EU taken full - * advantage of, as pixels are processed in blocks of 16. - * - * CC - Color Calculator. No EU threads associated with this unit. - * Handles blending and (presumably) depth and stencil testing. - */ - -#define BRW_MAX_CURBE (32*16) - -struct brw_context; -struct brw_winsys; - - -/* Raised when we receive new state across the pipe interface: - */ -#define BRW_NEW_VIEWPORT 0x1 -#define BRW_NEW_RASTERIZER 0x2 -#define BRW_NEW_FS 0x4 -#define BRW_NEW_BLEND 0x8 -#define BRW_NEW_CLIP 0x10 -#define BRW_NEW_SCISSOR 0x20 -#define BRW_NEW_STIPPLE 0x40 -#define BRW_NEW_FRAMEBUFFER 0x80 -#define BRW_NEW_ALPHA_TEST 0x100 -#define BRW_NEW_DEPTH_STENCIL 0x200 -#define BRW_NEW_SAMPLER 0x400 -#define BRW_NEW_TEXTURE 0x800 -#define BRW_NEW_CONSTANTS 0x1000 -#define BRW_NEW_VBO 0x2000 -#define BRW_NEW_VS 0x4000 - -/* Raised for other internal events: - */ -#define BRW_NEW_URB_FENCE 0x10000 -#define BRW_NEW_PSP 0x20000 -#define BRW_NEW_CURBE_OFFSETS 0x40000 -#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 -#define BRW_NEW_PRIMITIVE 0x100000 -#define BRW_NEW_SCENE 0x200000 -#define BRW_NEW_SF_LINKAGE 0x400000 - -extern int BRW_DEBUG; - -#define DEBUG_TEXTURE 0x1 -#define DEBUG_STATE 0x2 -#define DEBUG_IOCTL 0x4 -#define DEBUG_PRIMS 0x8 -#define DEBUG_VERTS 0x10 -#define DEBUG_FALLBACKS 0x20 -#define DEBUG_VERBOSE 0x40 -#define DEBUG_DRI 0x80 -#define DEBUG_DMA 0x100 -#define DEBUG_SANITY 0x200 -#define DEBUG_SYNC 0x400 -#define DEBUG_SLEEP 0x800 -#define DEBUG_PIXEL 0x1000 -#define DEBUG_STATS 0x2000 -#define DEBUG_TILE 0x4000 -#define DEBUG_SINGLE_THREAD 0x8000 -#define DEBUG_WM 0x10000 -#define DEBUG_URB 0x20000 -#define DEBUG_VS 0x40000 -#define DEBUG_BATCH 0x80000 -#define DEBUG_BUFMGR 0x100000 -#define DEBUG_BLIT 0x200000 -#define DEBUG_REGION 0x400000 -#define DEBUG_MIPTREE 0x800000 - -#define DBG(...) do { \ - if (BRW_DEBUG & FILE_DEBUG_FLAG) \ - brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ -} while(0) - -#define PRINT(...) do { \ - brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ -} while(0) - -struct brw_state_flags { - unsigned cache; - unsigned brw; -}; - - -struct brw_vertex_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - int id; -}; - - -struct brw_fragment_program { - struct pipe_shader_state program; - struct tgsi_shader_info info; - - boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ - int id; -}; - - -struct pipe_setup_linkage { - struct { - unsigned vp_output:5; - unsigned interp_mode:4; - unsigned bf_vp_output:5; - } fp_input[PIPE_MAX_SHADER_INPUTS]; - - unsigned fp_input_count:5; - unsigned max_vp_output:5; -}; - - - -struct brw_texture { - struct pipe_texture base; - - /* Derived from the above: - */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; - - unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; - - /* Explicitly store the offset of each image for each cube face or - * depth value. Pretty much have to accept that hardware formats - * are going to be so diverse that there is no unified way to - * compute the offsets of depth/cube images within a mipmap level, - * so have to store them as a lookup table: - */ - unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ - - /* Includes image offset tables: - */ - unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; - - /* The data is held here: - */ - struct pipe_buffer *buffer; -}; - -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ -/* Data about a particular attempt to compile a program. Note that - * there can be many of these, each in a different GL state - * corresponding to a different brw_wm_prog_key struct, with different - * compiled programs: - */ - -struct brw_wm_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - - unsigned first_curbe_grf; - unsigned total_grf; - unsigned total_scratch; - - /* Internally generated constants for the CURBE. These are loaded - * ahead of the data from the constant buffer. - */ - const float internal_const[8]; - unsigned nr_internal_consts; - unsigned max_const; - - boolean error; -}; - -struct brw_sf_prog_data { - unsigned urb_read_length; - unsigned total_grf; - - /* Each vertex may have upto 12 attributes, 4 components each, - * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 - * rows. - * - * Actually we use 4 for each, so call it 12 rows. - */ - unsigned urb_entry_size; -}; - -struct brw_clip_prog_data { - unsigned curb_read_length; /* user planes? */ - unsigned clip_mode; - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_gs_prog_data { - unsigned urb_read_length; - unsigned total_grf; -}; - -struct brw_vs_prog_data { - unsigned curb_read_length; - unsigned urb_read_length; - unsigned total_grf; - unsigned outputs_written; - - unsigned inputs_read; - - unsigned max_const; - - float imm_buf[PIPE_MAX_CONSTANT][4]; - unsigned num_imm; - unsigned num_consts; - - /* Used for calculating urb partitions: - */ - unsigned urb_entry_size; -}; - - -#define BRW_MAX_TEX_UNIT 8 -#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 - -/* Create a fixed sized struct for caching binding tables: - */ -struct brw_surface_binding_table { - unsigned surf_ss_offset[BRW_WM_MAX_SURF]; -}; - - -struct brw_cache; - -struct brw_mem_pool { - struct pipe_buffer *buffer; - - unsigned size; - unsigned offset; /* offset of first free byte */ - - struct brw_context *brw; -}; - -struct brw_cache_item { - unsigned hash; - unsigned key_size; /* for variable-sized keys */ - const void *key; - - unsigned offset; /* offset within pool's buffer */ - unsigned data_size; - - struct brw_cache_item *next; -}; - - - -struct brw_cache { - unsigned id; - - const char *name; - - struct brw_context *brw; - struct brw_mem_pool *pool; - - struct brw_cache_item **items; - unsigned size, n_items; - - unsigned key_size; /* for fixed-size keys */ - unsigned aux_size; - - unsigned last_addr; /* offset of active item */ -}; - - - - -/* Considered adding a member to this struct to document which flags - * an update might raise so that ordering of the state atoms can be - * checked or derived at runtime. Dropped the idea in favor of having - * a debug mode where the state is monitored for flags which are - * raised that have already been tested against. - */ -struct brw_tracked_state { - struct brw_state_flags dirty; - void (*update)( struct brw_context *brw ); -}; - - -/* Flags for brw->state.cache. - */ -#define CACHE_NEW_CC_VP (1< 32. Wouldn't life - * be easier if C allowed arrays of packed elements? - */ -#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) - - - - -struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ -}; - - - - - -struct brw_context -{ - struct pipe_context pipe; - struct brw_winsys *winsys; - - unsigned primitive; - unsigned reduced_primitive; - - boolean emit_state_always; - - struct { - struct brw_state_flags dirty; - } state; - - - struct { - const struct pipe_blend_state *Blend; - const struct pipe_depth_stencil_alpha_state *DepthStencil; - const struct pipe_poly_stipple *PolygonStipple; - const struct pipe_rasterizer_state *Raster; - const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; - const struct brw_vertex_program *VertexProgram; - const struct brw_fragment_program *FragmentProgram; - - struct pipe_clip_state Clip; - struct pipe_blend_color BlendColor; - struct pipe_scissor_state Scissor; - struct pipe_viewport_state Viewport; - struct pipe_framebuffer_state FrameBuffer; - - const struct pipe_constant_buffer *Constants[2]; - const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; - } attribs; - - struct brw_mem_pool pool[BRW_MAX_POOL]; - struct brw_cache cache[BRW_MAX_CACHE]; - struct brw_cached_batch_item *cached_batch_items; - - struct { - - /* Arrays with buffer objects to copy non-bufferobj arrays into - * for upload: - */ - const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; - - struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; - -#define BRW_NR_UPLOAD_BUFS 17 -#define BRW_UPLOAD_INIT_SIZE (128*1024) - - /* Summary of size and varying of active arrays, so we can check - * for changes to this state: - */ - struct brw_vertex_info info; - } vb; - - - unsigned hardware_dirty; - unsigned dirty; - unsigned pci_id; - /* BRW_NEW_URB_ALLOCATIONS: - */ - struct { - unsigned vsize; /* vertex size plus header in urb registers */ - unsigned csize; /* constant buffer size in urb registers */ - unsigned sfsize; /* setup data size in urb registers */ - - boolean constrained; - - unsigned nr_vs_entries; - unsigned nr_gs_entries; - unsigned nr_clip_entries; - unsigned nr_sf_entries; - unsigned nr_cs_entries; - -/* unsigned vs_size; */ -/* unsigned gs_size; */ -/* unsigned clip_size; */ -/* unsigned sf_size; */ -/* unsigned cs_size; */ - - unsigned vs_start; - unsigned gs_start; - unsigned clip_start; - unsigned sf_start; - unsigned cs_start; - } urb; - - - /* BRW_NEW_CURBE_OFFSETS: - */ - struct { - unsigned wm_start; - unsigned wm_size; - unsigned clip_start; - unsigned clip_size; - unsigned vs_start; - unsigned vs_size; - unsigned total_size; - - unsigned gs_offset; - - float *last_buf; - unsigned last_bufsz; - } curbe; - - struct { - struct brw_vs_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } vs; - - struct { - struct brw_gs_prog_data *prog_data; - - boolean prog_active; - unsigned prog_gs_offset; - unsigned state_gs_offset; - } gs; - - struct { - struct brw_clip_prog_data *prog_data; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } clip; - - - struct { - struct brw_sf_prog_data *prog_data; - - struct pipe_setup_linkage linkage; - - unsigned prog_gs_offset; - unsigned vp_gs_offset; - unsigned state_gs_offset; - } sf; - - struct { - struct brw_wm_prog_data *prog_data; - -// struct brw_wm_compiler *compile_data; - - - /** - * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER - * cache - */ - struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; - - unsigned render_surf; - unsigned nr_surfaces; - - unsigned max_threads; - struct pipe_buffer *scratch_buffer; - unsigned scratch_buffer_size; - - unsigned sampler_count; - unsigned sampler_gs_offset; - - struct brw_surface_binding_table bind; - unsigned bind_ss_offset; - - unsigned prog_gs_offset; - unsigned state_gs_offset; - } wm; - - - struct { - unsigned vp_gs_offset; - unsigned state_gs_offset; - } cc; - - - /* Used to give every program string a unique id - */ - unsigned program_id; -}; - - -#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) - - -/*====================================================================== - * brw_vtbl.c - */ -void brw_do_flush( struct brw_context *brw, - unsigned flags ); - - -/*====================================================================== - * brw_state.c - */ -void brw_validate_state(struct brw_context *brw); -void brw_init_state(struct brw_context *brw); -void brw_destroy_state(struct brw_context *brw); - - -/*====================================================================== - * brw_tex.c - */ -void brwUpdateTextureState( struct brw_context *brw ); - - -/* brw_urb.c - */ -void brw_upload_urb_fence(struct brw_context *brw); - -void brw_upload_constant_buffer_state(struct brw_context *brw); - -void brw_init_surface_functions(struct brw_context *brw); -void brw_init_state_functions(struct brw_context *brw); -void brw_init_flush_functions(struct brw_context *brw); -void brw_init_string_functions(struct brw_context *brw); - -/*====================================================================== - * Inline conversion functions. These are better-typed than the - * macros used previously: - */ -static inline struct brw_context * -brw_context( struct pipe_context *ctx ) -{ - return (struct brw_context *)ctx; -} - -#endif - +/* + Copyright (C) Intel Corp. 2006. All Rights Reserved. + Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to + develop this 3D driver. + + Permission is hereby granted, free of charge, to any person obtaining + a copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sublicense, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial + portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **********************************************************************/ + /* + * Authors: + * Keith Whitwell + */ + + +#ifndef BRWCONTEXT_INC +#define BRWCONTEXT_INC + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_state.h" + +#include "tgsi/util/tgsi_scan.h" + +#include "brw_structs.h" +#include "brw_winsys.h" + + +/* Glossary: + * + * URB - uniform resource buffer. A mid-sized buffer which is + * partitioned between the fixed function units and used for passing + * values (vertices, primitives, constants) between them. + * + * CURBE - constant URB entry. An urb region (entry) used to hold + * constant values which the fixed function units can be instructed to + * preload into the GRF when spawining a thread. + * + * VUE - vertex URB entry. An urb entry holding a vertex and usually + * a vertex header. The header contains control information and + * things like primitive type, Begin/end flags and clip codes. + * + * PUE - primitive URB entry. An urb entry produced by the setup (SF) + * unit holding rasterization and interpolation parameters. + * + * GRF - general register file. One of several register files + * addressable by programmed threads. The inputs (r0, payload, curbe, + * urb) of the thread are preloaded to this area before the thread is + * spawned. The registers are individually 8 dwords wide and suitable + * for general usage. Registers holding thread input values are not + * special and may be overwritten. + * + * MRF - message register file. Threads communicate (and terminate) + * by sending messages. Message parameters are placed in contigous + * MRF registers. All program output is via these messages. URB + * entries are populated by sending a message to the shared URB + * function containing the new data, together with a control word, + * often an unmodified copy of R0. + * + * R0 - GRF register 0. Typically holds control information used when + * sending messages to other threads. + * + * EU or GEN4 EU: The name of the programmable subsystem of the + * i965 hardware. Threads are executed by the EU, the registers + * described above are part of the EU architecture. + * + * Fixed function units: + * + * CS - Command streamer. Notional first unit, little software + * interaction. Holds the URB entries used for constant data, ie the + * CURBEs. + * + * VF/VS - Vertex Fetch / Vertex Shader. The fixed function part of + * this unit is responsible for pulling vertices out of vertex buffers + * in vram and injecting them into the processing pipe as VUEs. If + * enabled, it first passes them to a VS thread which is a good place + * for the driver to implement any active vertex shader. + * + * GS - Geometry Shader. This corresponds to a new DX10 concept. If + * enabled, incoming strips etc are passed to GS threads in individual + * line/triangle/point units. The GS thread may perform arbitary + * computation and emit whatever primtives with whatever vertices it + * chooses. This makes GS an excellent place to implement GL's + * unfilled polygon modes, though of course it is capable of much + * more. Additionally, GS is used to translate away primitives not + * handled by latter units, including Quads and Lineloops. + * + * CS - Clipper. Mesa's clipping algorithms are imported to run on + * this unit. The fixed function part performs cliptesting against + * the 6 fixed clipplanes and makes descisions on whether or not the + * incoming primitive needs to be passed to a thread for clipping. + * User clip planes are handled via cooperation with the VS thread. + * + * SF - Strips Fans or Setup: Triangles are prepared for + * rasterization. Interpolation coefficients are calculated. + * Flatshading and two-side lighting usually performed here. + * + * WM - Windower. Interpolation of vertex attributes performed here. + * Fragment shader implemented here. SIMD aspects of EU taken full + * advantage of, as pixels are processed in blocks of 16. + * + * CC - Color Calculator. No EU threads associated with this unit. + * Handles blending and (presumably) depth and stencil testing. + */ + +#define BRW_MAX_CURBE (32*16) + +struct brw_context; +struct brw_winsys; + + +/* Raised when we receive new state across the pipe interface: + */ +#define BRW_NEW_VIEWPORT 0x1 +#define BRW_NEW_RASTERIZER 0x2 +#define BRW_NEW_FS 0x4 +#define BRW_NEW_BLEND 0x8 +#define BRW_NEW_CLIP 0x10 +#define BRW_NEW_SCISSOR 0x20 +#define BRW_NEW_STIPPLE 0x40 +#define BRW_NEW_FRAMEBUFFER 0x80 +#define BRW_NEW_ALPHA_TEST 0x100 +#define BRW_NEW_DEPTH_STENCIL 0x200 +#define BRW_NEW_SAMPLER 0x400 +#define BRW_NEW_TEXTURE 0x800 +#define BRW_NEW_CONSTANTS 0x1000 +#define BRW_NEW_VBO 0x2000 +#define BRW_NEW_VS 0x4000 + +/* Raised for other internal events: + */ +#define BRW_NEW_URB_FENCE 0x10000 +#define BRW_NEW_PSP 0x20000 +#define BRW_NEW_CURBE_OFFSETS 0x40000 +#define BRW_NEW_REDUCED_PRIMITIVE 0x80000 +#define BRW_NEW_PRIMITIVE 0x100000 +#define BRW_NEW_SCENE 0x200000 +#define BRW_NEW_SF_LINKAGE 0x400000 + +extern int BRW_DEBUG; + +#define DEBUG_TEXTURE 0x1 +#define DEBUG_STATE 0x2 +#define DEBUG_IOCTL 0x4 +#define DEBUG_PRIMS 0x8 +#define DEBUG_VERTS 0x10 +#define DEBUG_FALLBACKS 0x20 +#define DEBUG_VERBOSE 0x40 +#define DEBUG_DRI 0x80 +#define DEBUG_DMA 0x100 +#define DEBUG_SANITY 0x200 +#define DEBUG_SYNC 0x400 +#define DEBUG_SLEEP 0x800 +#define DEBUG_PIXEL 0x1000 +#define DEBUG_STATS 0x2000 +#define DEBUG_TILE 0x4000 +#define DEBUG_SINGLE_THREAD 0x8000 +#define DEBUG_WM 0x10000 +#define DEBUG_URB 0x20000 +#define DEBUG_VS 0x40000 +#define DEBUG_BATCH 0x80000 +#define DEBUG_BUFMGR 0x100000 +#define DEBUG_BLIT 0x200000 +#define DEBUG_REGION 0x400000 +#define DEBUG_MIPTREE 0x800000 + +#define DBG(...) do { \ + if (BRW_DEBUG & FILE_DEBUG_FLAG) \ + brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +#define PRINT(...) do { \ + brw->pipe.winsys->printf(brw->pipe.winsys, __VA_ARGS__); \ +} while(0) + +struct brw_state_flags { + unsigned cache; + unsigned brw; +}; + + +struct brw_vertex_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + int id; +}; + + +struct brw_fragment_program { + struct pipe_shader_state program; + struct tgsi_shader_info info; + + boolean UsesDepth; /* XXX add this to tgsi_shader_info? */ + int id; +}; + + +struct pipe_setup_linkage { + struct { + unsigned vp_output:5; + unsigned interp_mode:4; + unsigned bf_vp_output:5; + } fp_input[PIPE_MAX_SHADER_INPUTS]; + + unsigned fp_input_count:5; + unsigned max_vp_output:5; +}; + + + +struct brw_texture { + struct pipe_texture base; + + /* Derived from the above: + */ + unsigned pitch; + unsigned depth_pitch; /* per-image on i945? */ + unsigned total_height; + + unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; + + /* Explicitly store the offset of each image for each cube face or + * depth value. Pretty much have to accept that hardware formats + * are going to be so diverse that there is no unified way to + * compute the offsets of depth/cube images within a mipmap level, + * so have to store them as a lookup table: + */ + unsigned *image_offset[PIPE_MAX_TEXTURE_LEVELS]; /**< array [depth] of offsets */ + + /* Includes image offset tables: + */ + unsigned level_offset[PIPE_MAX_TEXTURE_LEVELS]; + + /* The data is held here: + */ + struct pipe_buffer *buffer; +}; + +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ +/* Data about a particular attempt to compile a program. Note that + * there can be many of these, each in a different GL state + * corresponding to a different brw_wm_prog_key struct, with different + * compiled programs: + */ + +struct brw_wm_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + + unsigned first_curbe_grf; + unsigned total_grf; + unsigned total_scratch; + + /* Internally generated constants for the CURBE. These are loaded + * ahead of the data from the constant buffer. + */ + const float internal_const[8]; + unsigned nr_internal_consts; + unsigned max_const; + + boolean error; +}; + +struct brw_sf_prog_data { + unsigned urb_read_length; + unsigned total_grf; + + /* Each vertex may have upto 12 attributes, 4 components each, + * except WPOS which requires only 2. (11*4 + 2) == 44 ==> 11 + * rows. + * + * Actually we use 4 for each, so call it 12 rows. + */ + unsigned urb_entry_size; +}; + +struct brw_clip_prog_data { + unsigned curb_read_length; /* user planes? */ + unsigned clip_mode; + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_gs_prog_data { + unsigned urb_read_length; + unsigned total_grf; +}; + +struct brw_vs_prog_data { + unsigned curb_read_length; + unsigned urb_read_length; + unsigned total_grf; + unsigned outputs_written; + + unsigned inputs_read; + + unsigned max_const; + + float imm_buf[PIPE_MAX_CONSTANT][4]; + unsigned num_imm; + unsigned num_consts; + + /* Used for calculating urb partitions: + */ + unsigned urb_entry_size; +}; + + +#define BRW_MAX_TEX_UNIT 8 +#define BRW_WM_MAX_SURF BRW_MAX_TEX_UNIT + 1 + +/* Create a fixed sized struct for caching binding tables: + */ +struct brw_surface_binding_table { + unsigned surf_ss_offset[BRW_WM_MAX_SURF]; +}; + + +struct brw_cache; + +struct brw_mem_pool { + struct pipe_buffer *buffer; + + unsigned size; + unsigned offset; /* offset of first free byte */ + + struct brw_context *brw; +}; + +struct brw_cache_item { + unsigned hash; + unsigned key_size; /* for variable-sized keys */ + const void *key; + + unsigned offset; /* offset within pool's buffer */ + unsigned data_size; + + struct brw_cache_item *next; +}; + + + +struct brw_cache { + unsigned id; + + const char *name; + + struct brw_context *brw; + struct brw_mem_pool *pool; + + struct brw_cache_item **items; + unsigned size, n_items; + + unsigned key_size; /* for fixed-size keys */ + unsigned aux_size; + + unsigned last_addr; /* offset of active item */ +}; + + + + +/* Considered adding a member to this struct to document which flags + * an update might raise so that ordering of the state atoms can be + * checked or derived at runtime. Dropped the idea in favor of having + * a debug mode where the state is monitored for flags which are + * raised that have already been tested against. + */ +struct brw_tracked_state { + struct brw_state_flags dirty; + void (*update)( struct brw_context *brw ); +}; + + +/* Flags for brw->state.cache. + */ +#define CACHE_NEW_CC_VP (1< 32. Wouldn't life + * be easier if C allowed arrays of packed elements? + */ +#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) + + + + +struct brw_vertex_info { + unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ +}; + + + + + +struct brw_context +{ + struct pipe_context pipe; + struct brw_winsys *winsys; + + unsigned primitive; + unsigned reduced_primitive; + + boolean emit_state_always; + + struct { + struct brw_state_flags dirty; + } state; + + + struct { + const struct pipe_blend_state *Blend; + const struct pipe_depth_stencil_alpha_state *DepthStencil; + const struct pipe_poly_stipple *PolygonStipple; + const struct pipe_rasterizer_state *Raster; + const struct pipe_sampler_state *Samplers[PIPE_MAX_SAMPLERS]; + const struct brw_vertex_program *VertexProgram; + const struct brw_fragment_program *FragmentProgram; + + struct pipe_clip_state Clip; + struct pipe_blend_color BlendColor; + struct pipe_scissor_state Scissor; + struct pipe_viewport_state Viewport; + struct pipe_framebuffer_state FrameBuffer; + + const struct pipe_constant_buffer *Constants[2]; + const struct brw_texture *Texture[PIPE_MAX_SAMPLERS]; + } attribs; + + unsigned num_samplers; + unsigned num_textures; + + struct brw_mem_pool pool[BRW_MAX_POOL]; + struct brw_cache cache[BRW_MAX_CACHE]; + struct brw_cached_batch_item *cached_batch_items; + + struct { + + /* Arrays with buffer objects to copy non-bufferobj arrays into + * for upload: + */ + const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; + + struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; + +#define BRW_NR_UPLOAD_BUFS 17 +#define BRW_UPLOAD_INIT_SIZE (128*1024) + + /* Summary of size and varying of active arrays, so we can check + * for changes to this state: + */ + struct brw_vertex_info info; + } vb; + + + unsigned hardware_dirty; + unsigned dirty; + unsigned pci_id; + /* BRW_NEW_URB_ALLOCATIONS: + */ + struct { + unsigned vsize; /* vertex size plus header in urb registers */ + unsigned csize; /* constant buffer size in urb registers */ + unsigned sfsize; /* setup data size in urb registers */ + + boolean constrained; + + unsigned nr_vs_entries; + unsigned nr_gs_entries; + unsigned nr_clip_entries; + unsigned nr_sf_entries; + unsigned nr_cs_entries; + +/* unsigned vs_size; */ +/* unsigned gs_size; */ +/* unsigned clip_size; */ +/* unsigned sf_size; */ +/* unsigned cs_size; */ + + unsigned vs_start; + unsigned gs_start; + unsigned clip_start; + unsigned sf_start; + unsigned cs_start; + } urb; + + + /* BRW_NEW_CURBE_OFFSETS: + */ + struct { + unsigned wm_start; + unsigned wm_size; + unsigned clip_start; + unsigned clip_size; + unsigned vs_start; + unsigned vs_size; + unsigned total_size; + + unsigned gs_offset; + + float *last_buf; + unsigned last_bufsz; + } curbe; + + struct { + struct brw_vs_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } vs; + + struct { + struct brw_gs_prog_data *prog_data; + + boolean prog_active; + unsigned prog_gs_offset; + unsigned state_gs_offset; + } gs; + + struct { + struct brw_clip_prog_data *prog_data; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } clip; + + + struct { + struct brw_sf_prog_data *prog_data; + + struct pipe_setup_linkage linkage; + + unsigned prog_gs_offset; + unsigned vp_gs_offset; + unsigned state_gs_offset; + } sf; + + struct { + struct brw_wm_prog_data *prog_data; + +// struct brw_wm_compiler *compile_data; + + + /** + * Array of sampler state uploaded at sampler_gs_offset of BRW_SAMPLER + * cache + */ + struct brw_sampler_state sampler[BRW_MAX_TEX_UNIT]; + + unsigned render_surf; + unsigned nr_surfaces; + + unsigned max_threads; + struct pipe_buffer *scratch_buffer; + unsigned scratch_buffer_size; + + unsigned sampler_count; + unsigned sampler_gs_offset; + + struct brw_surface_binding_table bind; + unsigned bind_ss_offset; + + unsigned prog_gs_offset; + unsigned state_gs_offset; + } wm; + + + struct { + unsigned vp_gs_offset; + unsigned state_gs_offset; + } cc; + + + /* Used to give every program string a unique id + */ + unsigned program_id; +}; + + +#define BRW_PACKCOLOR8888(r,g,b,a) ((r<<24) | (g<<16) | (b<<8) | a) + + +/*====================================================================== + * brw_vtbl.c + */ +void brw_do_flush( struct brw_context *brw, + unsigned flags ); + + +/*====================================================================== + * brw_state.c + */ +void brw_validate_state(struct brw_context *brw); +void brw_init_state(struct brw_context *brw); +void brw_destroy_state(struct brw_context *brw); + + +/*====================================================================== + * brw_tex.c + */ +void brwUpdateTextureState( struct brw_context *brw ); + + +/* brw_urb.c + */ +void brw_upload_urb_fence(struct brw_context *brw); + +void brw_upload_constant_buffer_state(struct brw_context *brw); + +void brw_init_surface_functions(struct brw_context *brw); +void brw_init_state_functions(struct brw_context *brw); +void brw_init_flush_functions(struct brw_context *brw); +void brw_init_string_functions(struct brw_context *brw); + +/*====================================================================== + * Inline conversion functions. These are better-typed than the + * macros used previously: + */ +static inline struct brw_context * +brw_context( struct pipe_context *ctx ) +{ + return (struct brw_context *)ctx; +} + +#endif + diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 6744a8aa4f..f5efe9fc06 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -1,434 +1,462 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Zack Rusin - * Keith Whitwell - */ - - -#include "pipe/p_winsys.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" - -#include "brw_context.h" -#include "brw_defines.h" -#include "brw_state.h" -#include "brw_draw.h" - - -#define DUP( TYPE, VAL ) \ -do { \ - struct TYPE *x = malloc(sizeof(*x)); \ - memcpy(x, VAL, sizeof(*x) ); \ - return x; \ -} while (0) - -/************************************************************************ - * Blend - */ -static void * -brw_create_blend_state(struct pipe_context *pipe, - const struct pipe_blend_state *blend) -{ - DUP( pipe_blend_state, blend ); -} - -static void brw_bind_blend_state(struct pipe_context *pipe, - void *blend) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Blend = (struct pipe_blend_state*)blend; - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - - -static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) -{ - free(blend); -} - -static void brw_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.BlendColor = *blend_color; - - brw->state.dirty.brw |= BRW_NEW_BLEND; -} - -/************************************************************************ - * Sampler - */ - -static void * -brw_create_sampler_state(struct pipe_context *pipe, - const struct pipe_sampler_state *sampler) -{ - DUP( pipe_sampler_state, sampler ); -} - -static void brw_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Samplers[unit] = sampler; - brw->state.dirty.brw |= BRW_NEW_SAMPLER; -} - -static void brw_delete_sampler_state(struct pipe_context *pipe, - void *sampler) -{ - free(sampler); -} - - -/************************************************************************ - * Depth stencil - */ - -static void * -brw_create_depth_stencil_state(struct pipe_context *pipe, - const struct pipe_depth_stencil_alpha_state *depth_stencil) -{ - DUP( pipe_depth_stencil_alpha_state, depth_stencil ); -} - -static void brw_bind_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; - - brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; -} - -static void brw_delete_depth_stencil_state(struct pipe_context *pipe, - void *depth_stencil) -{ - free(depth_stencil); -} - -/************************************************************************ - * Scissor - */ -static void brw_set_scissor_state( struct pipe_context *pipe, - const struct pipe_scissor_state *scissor ) -{ - struct brw_context *brw = brw_context(pipe); - - memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); - brw->state.dirty.brw |= BRW_NEW_SCISSOR; -} - - -/************************************************************************ - * Stipple - */ - -static void brw_set_polygon_stipple( struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple ) -{ -} - - -/************************************************************************ - * Fragment shader - */ - -static void * brw_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_fp->program = *shader; - brw_fp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_fp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_fp->info2); -#endif - - tgsi_dump(shader->tokens, 0); - - - return (void *)brw_fp; -} - -static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; - brw->state.dirty.brw |= BRW_NEW_FS; -} - -static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) -{ - FREE(shader); -} - - -/************************************************************************ - * Vertex shader and other TNL state - */ - -static void *brw_create_vs_state(struct pipe_context *pipe, - const struct pipe_shader_state *shader) -{ - struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_vp->program = *shader; - brw_vp->id = brw_context(pipe)->program_id++; - - tgsi_scan_shader(shader->tokens, &brw_vp->info); - -#if 0 - brw_shader_info(shader->tokens, - &brw_vp->info2); -#endif - tgsi_dump(shader->tokens, 0); - - return (void *)brw_vp; -} - -static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; - brw->state.dirty.brw |= BRW_NEW_VS; - - debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); -} - -static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) -{ - FREE(shader); -} - - -static void brw_set_clip_state( struct pipe_context *pipe, - const struct pipe_clip_state *clip ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Clip = *clip; -} - - -static void brw_set_viewport_state( struct pipe_context *pipe, - const struct pipe_viewport_state *viewport ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Viewport = *viewport; /* struct copy */ - brw->state.dirty.brw |= BRW_NEW_VIEWPORT; - - /* pass the viewport info to the draw module */ - //draw_set_viewport_state(brw->draw, viewport); -} - - -static void brw_set_vertex_buffer( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer ) -{ - struct brw_context *brw = brw_context(pipe); - brw->vb.vbo_array[index] = buffer; -} - -static void brw_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *element) -{ - /* flush ? */ - struct brw_context *brw = brw_context(pipe); - - assert(index < PIPE_ATTRIB_MAX); - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); - - el.ve0.src_offset = element->src_offset; - el.ve0.src_format = brw_translate_surface_format(element->src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = element->vertex_buffer_index; - - el.ve1.dst_offset = index * 4; - - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - - switch (element->nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } - - brw->vb.inputs[index] = el; -} - - - -/************************************************************************ - * Constant buffers - */ - -static void brw_set_constant_buffer(struct pipe_context *pipe, - uint shader, uint index, - const struct pipe_constant_buffer *buf) -{ - struct brw_context *brw = brw_context(pipe); - - assert(buf == 0 || index == 0); - - brw->attribs.Constants[shader] = buf; - brw->state.dirty.brw |= BRW_NEW_CONSTANTS; -} - - -/************************************************************************ - * Texture surfaces - */ - - -static void brw_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) -{ - struct brw_context *brw = brw_context(pipe); - - pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[unit], - texture); - - brw->state.dirty.brw |= BRW_NEW_TEXTURE; -} - - -/************************************************************************ - * Render targets, etc - */ - -static void brw_set_framebuffer_state(struct pipe_context *pipe, - const struct pipe_framebuffer_state *fb) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.FrameBuffer = *fb; /* struct copy */ - - brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; -} - - - -/************************************************************************ - * Rasterizer state - */ - -static void * -brw_create_rasterizer_state(struct pipe_context *pipe, - const struct pipe_rasterizer_state *rasterizer) -{ - DUP(pipe_rasterizer_state, rasterizer); -} - -static void brw_bind_rasterizer_state( struct pipe_context *pipe, - void *setup ) -{ - struct brw_context *brw = brw_context(pipe); - - brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; - - /* Also pass-through to draw module: - */ - //draw_set_rasterizer_state(brw->draw, setup); - - brw->state.dirty.brw |= BRW_NEW_RASTERIZER; -} - -static void brw_delete_rasterizer_state(struct pipe_context *pipe, - void *setup) -{ - free(setup); -} - - - -void -brw_init_state_functions( struct brw_context *brw ) -{ - brw->pipe.create_blend_state = brw_create_blend_state; - brw->pipe.bind_blend_state = brw_bind_blend_state; - brw->pipe.delete_blend_state = brw_delete_blend_state; - - brw->pipe.create_sampler_state = brw_create_sampler_state; - brw->pipe.bind_sampler_state = brw_bind_sampler_state; - brw->pipe.delete_sampler_state = brw_delete_sampler_state; - - brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; - brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; - brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; - - brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; - brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; - brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; - brw->pipe.create_fs_state = brw_create_fs_state; - brw->pipe.bind_fs_state = brw_bind_fs_state; - brw->pipe.delete_fs_state = brw_delete_fs_state; - brw->pipe.create_vs_state = brw_create_vs_state; - brw->pipe.bind_vs_state = brw_bind_vs_state; - brw->pipe.delete_vs_state = brw_delete_vs_state; - - brw->pipe.set_blend_color = brw_set_blend_color; - brw->pipe.set_clip_state = brw_set_clip_state; - brw->pipe.set_constant_buffer = brw_set_constant_buffer; - brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; - -// brw->pipe.set_feedback_state = brw_set_feedback_state; -// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; - - brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; - brw->pipe.set_scissor_state = brw_set_scissor_state; - brw->pipe.set_sampler_texture = brw_set_sampler_texture; - brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; - brw->pipe.set_vertex_element = brw_set_vertex_element; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Zack Rusin + * Keith Whitwell + */ + + +#include "pipe/p_winsys.h" +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_dump.h" + +#include "brw_context.h" +#include "brw_defines.h" +#include "brw_state.h" +#include "brw_draw.h" + + +#define DUP( TYPE, VAL ) \ +do { \ + struct TYPE *x = malloc(sizeof(*x)); \ + memcpy(x, VAL, sizeof(*x) ); \ + return x; \ +} while (0) + +/************************************************************************ + * Blend + */ +static void * +brw_create_blend_state(struct pipe_context *pipe, + const struct pipe_blend_state *blend) +{ + DUP( pipe_blend_state, blend ); +} + +static void brw_bind_blend_state(struct pipe_context *pipe, + void *blend) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Blend = (struct pipe_blend_state*)blend; + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + + +static void brw_delete_blend_state(struct pipe_context *pipe, void *blend) +{ + free(blend); +} + +static void brw_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.BlendColor = *blend_color; + + brw->state.dirty.brw |= BRW_NEW_BLEND; +} + +/************************************************************************ + * Sampler + */ + +static void * +brw_create_sampler_state(struct pipe_context *pipe, + const struct pipe_sampler_state *sampler) +{ + DUP( pipe_sampler_state, sampler ); +} + +static void brw_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct brw_context *brw = brw_context(pipe); + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_samplers && + !memcmp(brw->attribs.Samplers, sampler, num * sizeof(void *))) + return; + + memcpy(brw->attribs.Samplers, sampler, num * sizeof(void *)); + memset(&brw->attribs.Samplers[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + + brw->num_samplers = num; + + brw->state.dirty.brw |= BRW_NEW_SAMPLER; +} + +static void brw_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + free(sampler); +} + + +/************************************************************************ + * Depth stencil + */ + +static void * +brw_create_depth_stencil_state(struct pipe_context *pipe, + const struct pipe_depth_stencil_alpha_state *depth_stencil) +{ + DUP( pipe_depth_stencil_alpha_state, depth_stencil ); +} + +static void brw_bind_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.DepthStencil = (const struct pipe_depth_stencil_alpha_state *)depth_stencil; + + brw->state.dirty.brw |= BRW_NEW_DEPTH_STENCIL; +} + +static void brw_delete_depth_stencil_state(struct pipe_context *pipe, + void *depth_stencil) +{ + free(depth_stencil); +} + +/************************************************************************ + * Scissor + */ +static void brw_set_scissor_state( struct pipe_context *pipe, + const struct pipe_scissor_state *scissor ) +{ + struct brw_context *brw = brw_context(pipe); + + memcpy( &brw->attribs.Scissor, scissor, sizeof(*scissor) ); + brw->state.dirty.brw |= BRW_NEW_SCISSOR; +} + + +/************************************************************************ + * Stipple + */ + +static void brw_set_polygon_stipple( struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple ) +{ +} + + +/************************************************************************ + * Fragment shader + */ + +static void * brw_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); + + /* XXX: Do I have to duplicate the tokens as well?? + */ + brw_fp->program = *shader; + brw_fp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_fp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_fp->info2); +#endif + + tgsi_dump(shader->tokens, 0); + + + return (void *)brw_fp; +} + +static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FragmentProgram = (struct brw_fragment_program *)shader; + brw->state.dirty.brw |= BRW_NEW_FS; +} + +static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) +{ + FREE(shader); +} + + +/************************************************************************ + * Vertex shader and other TNL state + */ + +static void *brw_create_vs_state(struct pipe_context *pipe, + const struct pipe_shader_state *shader) +{ + struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); + + /* XXX: Do I have to duplicate the tokens as well?? + */ + brw_vp->program = *shader; + brw_vp->id = brw_context(pipe)->program_id++; + + tgsi_scan_shader(shader->tokens, &brw_vp->info); + +#if 0 + brw_shader_info(shader->tokens, + &brw_vp->info2); +#endif + tgsi_dump(shader->tokens, 0); + + return (void *)brw_vp; +} + +static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.VertexProgram = (struct brw_vertex_program *)vs; + brw->state.dirty.brw |= BRW_NEW_VS; + + debug_printf("YYYYYYYYYYYYY BINDING VERTEX SHADER\n"); +} + +static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) +{ + FREE(shader); +} + + +static void brw_set_clip_state( struct pipe_context *pipe, + const struct pipe_clip_state *clip ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Clip = *clip; +} + + +static void brw_set_viewport_state( struct pipe_context *pipe, + const struct pipe_viewport_state *viewport ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Viewport = *viewport; /* struct copy */ + brw->state.dirty.brw |= BRW_NEW_VIEWPORT; + + /* pass the viewport info to the draw module */ + //draw_set_viewport_state(brw->draw, viewport); +} + + +static void brw_set_vertex_buffer( struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_buffer *buffer ) +{ + struct brw_context *brw = brw_context(pipe); + brw->vb.vbo_array[index] = buffer; +} + +static void brw_set_vertex_element(struct pipe_context *pipe, + unsigned index, + const struct pipe_vertex_element *element) +{ + /* flush ? */ + struct brw_context *brw = brw_context(pipe); + + assert(index < PIPE_ATTRIB_MAX); + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); + + el.ve0.src_offset = element->src_offset; + el.ve0.src_format = brw_translate_surface_format(element->src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = element->vertex_buffer_index; + + el.ve1.dst_offset = index * 4; + + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + + switch (element->nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[index] = el; +} + + + +/************************************************************************ + * Constant buffers + */ + +static void brw_set_constant_buffer(struct pipe_context *pipe, + uint shader, uint index, + const struct pipe_constant_buffer *buf) +{ + struct brw_context *brw = brw_context(pipe); + + assert(buf == 0 || index == 0); + + brw->attribs.Constants[shader] = buf; + brw->state.dirty.brw |= BRW_NEW_CONSTANTS; +} + + +/************************************************************************ + * Texture surfaces + */ + + +static void brw_set_sampler_textures(struct pipe_context *pipe, + unsigned num, + struct pipe_texture **texture) +{ + struct brw_context *brw = brw_context(pipe); + uint i; + + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == brw->num_textures && + !memcmp(brw->attribs.Texture, texture, num * + sizeof(struct pipe_texture *))) + return; + + for (i = 0; i < num; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + texture[i]); + + for (i = num; i < brw->num_textures; i++) + pipe_texture_reference((struct pipe_texture **) &brw->attribs.Texture[i], + NULL); + + brw->num_textures = num; + + brw->state.dirty.brw |= BRW_NEW_TEXTURE; +} + + +/************************************************************************ + * Render targets, etc + */ + +static void brw_set_framebuffer_state(struct pipe_context *pipe, + const struct pipe_framebuffer_state *fb) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.FrameBuffer = *fb; /* struct copy */ + + brw->state.dirty.brw |= BRW_NEW_FRAMEBUFFER; +} + + + +/************************************************************************ + * Rasterizer state + */ + +static void * +brw_create_rasterizer_state(struct pipe_context *pipe, + const struct pipe_rasterizer_state *rasterizer) +{ + DUP(pipe_rasterizer_state, rasterizer); +} + +static void brw_bind_rasterizer_state( struct pipe_context *pipe, + void *setup ) +{ + struct brw_context *brw = brw_context(pipe); + + brw->attribs.Raster = (struct pipe_rasterizer_state *)setup; + + /* Also pass-through to draw module: + */ + //draw_set_rasterizer_state(brw->draw, setup); + + brw->state.dirty.brw |= BRW_NEW_RASTERIZER; +} + +static void brw_delete_rasterizer_state(struct pipe_context *pipe, + void *setup) +{ + free(setup); +} + + + +void +brw_init_state_functions( struct brw_context *brw ) +{ + brw->pipe.create_blend_state = brw_create_blend_state; + brw->pipe.bind_blend_state = brw_bind_blend_state; + brw->pipe.delete_blend_state = brw_delete_blend_state; + + brw->pipe.create_sampler_state = brw_create_sampler_state; + brw->pipe.bind_sampler_states = brw_bind_sampler_states; + brw->pipe.delete_sampler_state = brw_delete_sampler_state; + + brw->pipe.create_depth_stencil_alpha_state = brw_create_depth_stencil_state; + brw->pipe.bind_depth_stencil_alpha_state = brw_bind_depth_stencil_state; + brw->pipe.delete_depth_stencil_alpha_state = brw_delete_depth_stencil_state; + + brw->pipe.create_rasterizer_state = brw_create_rasterizer_state; + brw->pipe.bind_rasterizer_state = brw_bind_rasterizer_state; + brw->pipe.delete_rasterizer_state = brw_delete_rasterizer_state; + brw->pipe.create_fs_state = brw_create_fs_state; + brw->pipe.bind_fs_state = brw_bind_fs_state; + brw->pipe.delete_fs_state = brw_delete_fs_state; + brw->pipe.create_vs_state = brw_create_vs_state; + brw->pipe.bind_vs_state = brw_bind_vs_state; + brw->pipe.delete_vs_state = brw_delete_vs_state; + + brw->pipe.set_blend_color = brw_set_blend_color; + brw->pipe.set_clip_state = brw_set_clip_state; + brw->pipe.set_constant_buffer = brw_set_constant_buffer; + brw->pipe.set_framebuffer_state = brw_set_framebuffer_state; + +// brw->pipe.set_feedback_state = brw_set_feedback_state; +// brw->pipe.set_feedback_buffer = brw_set_feedback_buffer; + + brw->pipe.set_polygon_stipple = brw_set_polygon_stipple; + brw->pipe.set_scissor_state = brw_set_scissor_state; + brw->pipe.set_sampler_textures = brw_set_sampler_textures; + brw->pipe.set_viewport_state = brw_set_viewport_state; + brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; + brw->pipe.set_vertex_element = brw_set_vertex_element; +} diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c index de42ffc5b1..ff5ba7e7c7 100644 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -235,7 +235,8 @@ static void upload_wm_samplers(struct brw_context *brw) unsigned sampler_count = 0; /* BRW_NEW_SAMPLER */ - for (unit = 0; unit < BRW_MAX_TEX_UNIT; unit++) { + for (unit = 0; unit < brw->num_textures && unit < brw->num_samplers; + unit++) { /* determine unit enable/disable by looking for a bound texture */ if (brw->attribs.Texture[unit]) { const struct pipe_sampler_state *sampler = brw->attribs.Samplers[unit]; diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index d16d919bce..853c743ccf 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -237,7 +237,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) /* BRW_NEW_TEXTURE */ - for (i = 0; i < BRW_MAX_TEX_UNIT; i++) { + for (i = 0; i < brw->num_textures && i < brw->num_samplers; i++) { const struct brw_texture *texUnit = brw->attribs.Texture[i]; if (texUnit && diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index fa16ed94e8..316ae552b8 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -1,243 +1,243 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Author: - * Keith Whitwell - */ - -#include "draw/draw_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_util.h" -#include "sp_clear.h" -#include "sp_context.h" -#include "sp_flush.h" -#include "sp_prim_setup.h" -#include "sp_prim_vbuf.h" -#include "sp_state.h" -#include "sp_surface.h" -#include "sp_tile_cache.h" -#include "sp_texture.h" -#include "sp_winsys.h" -#include "sp_query.h" - - - -/** - * Map any drawing surfaces which aren't already mapped - */ -void -softpipe_map_surfaces(struct softpipe_context *sp) -{ - unsigned i; - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); - } - - sp_tile_cache_map_surfaces(sp->zsbuf_cache); -} - - -/** - * Unmap any mapped drawing surfaces - */ -void -softpipe_unmap_surfaces(struct softpipe_context *sp) -{ - uint i; - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) - sp_flush_tile_cache(sp, sp->cbuf_cache[i]); - sp_flush_tile_cache(sp, sp->zsbuf_cache); - - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { - sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); - } - sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); -} - - -static void softpipe_destroy( struct pipe_context *pipe ) -{ - struct softpipe_context *softpipe = softpipe_context( pipe ); - struct pipe_winsys *ws = pipe->winsys; - uint i; - - draw_destroy( softpipe->draw ); - - softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); - softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); - softpipe->quad.shade->destroy( softpipe->quad.shade ); - softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); - softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); - softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); - softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); - softpipe->quad.coverage->destroy( softpipe->quad.coverage ); - softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); - softpipe->quad.blend->destroy( softpipe->quad.blend ); - softpipe->quad.colormask->destroy( softpipe->quad.colormask ); - softpipe->quad.output->destroy( softpipe->quad.output ); - - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - sp_destroy_tile_cache(softpipe->cbuf_cache[i]); - sp_destroy_tile_cache(softpipe->zsbuf_cache); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - sp_destroy_tile_cache(softpipe->tex_cache[i]); - - for (i = 0; i < Elements(softpipe->constants); i++) { - if (softpipe->constants[i].buffer) { - pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); - } - } - - FREE( softpipe ); -} - - -struct pipe_context * -softpipe_create( struct pipe_screen *screen, - struct pipe_winsys *pipe_winsys, - struct softpipe_winsys *softpipe_winsys ) -{ - struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); - uint i; - -#if defined(__i386__) || defined(__386__) - softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; -#else - softpipe->use_sse = FALSE; -#endif - - softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; - - softpipe->pipe.winsys = pipe_winsys; - softpipe->pipe.screen = screen; - softpipe->pipe.destroy = softpipe_destroy; - - /* state setters */ - softpipe->pipe.create_blend_state = softpipe_create_blend_state; - softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; - softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; - - softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; - softpipe->pipe.bind_sampler_state = softpipe_bind_sampler_state; - softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; - - softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; - softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; - softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; - - softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; - softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; - softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; - - softpipe->pipe.create_fs_state = softpipe_create_fs_state; - softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; - softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; - - softpipe->pipe.create_vs_state = softpipe_create_vs_state; - softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; - softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; - - softpipe->pipe.set_blend_color = softpipe_set_blend_color; - softpipe->pipe.set_clip_state = softpipe_set_clip_state; - softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; - softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; - softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; - softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; - softpipe->pipe.set_sampler_texture = softpipe_set_sampler_texture; - softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - - softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; - softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; - - softpipe->pipe.draw_arrays = softpipe_draw_arrays; - softpipe->pipe.draw_elements = softpipe_draw_elements; - - softpipe->pipe.clear = softpipe_clear; - softpipe->pipe.flush = softpipe_flush; - - softpipe_init_query_funcs( softpipe ); - softpipe_init_texture_funcs( softpipe ); - - /* - * Alloc caches for accessing drawing surfaces and textures. - * Must be before quad stage setup! - */ - for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - softpipe->cbuf_cache[i] = sp_create_tile_cache(); - softpipe->zsbuf_cache = sp_create_tile_cache(); - - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tile_cache(); - - - /* setup quad rendering stages */ - softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); - softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); - softpipe->quad.shade = sp_quad_shade_stage(softpipe); - softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); - softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); - softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); - softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); - softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); - softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); - softpipe->quad.blend = sp_quad_blend_stage(softpipe); - softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); - softpipe->quad.output = sp_quad_output_stage(softpipe); - - softpipe->winsys = softpipe_winsys; - - /* - * Create drawing context and plug our rendering stage into it. - */ - softpipe->draw = draw_create(); - assert(softpipe->draw); - softpipe->setup = sp_draw_render_stage(softpipe); - - if (GETENV( "SP_VBUF" ) != NULL) { - sp_init_vbuf(softpipe); - } - else { - draw_set_rasterize_stage(softpipe->draw, softpipe->setup); - } - - /* plug in AA line/point stages */ - draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); - draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); - -#if USE_DRAW_STAGE_PSTIPPLE - /* Do polygon stipple w/ texture map + frag prog? */ - draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); -#endif - - sp_init_surface_functions(softpipe); - - return &softpipe->pipe; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Author: + * Keith Whitwell + */ + +#include "draw/draw_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "sp_clear.h" +#include "sp_context.h" +#include "sp_flush.h" +#include "sp_prim_setup.h" +#include "sp_prim_vbuf.h" +#include "sp_state.h" +#include "sp_surface.h" +#include "sp_tile_cache.h" +#include "sp_texture.h" +#include "sp_winsys.h" +#include "sp_query.h" + + + +/** + * Map any drawing surfaces which aren't already mapped + */ +void +softpipe_map_surfaces(struct softpipe_context *sp) +{ + unsigned i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); + } + + sp_tile_cache_map_surfaces(sp->zsbuf_cache); +} + + +/** + * Unmap any mapped drawing surfaces + */ +void +softpipe_unmap_surfaces(struct softpipe_context *sp) +{ + uint i; + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) + sp_flush_tile_cache(sp, sp->cbuf_cache[i]); + sp_flush_tile_cache(sp, sp->zsbuf_cache); + + for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); + } + sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); +} + + +static void softpipe_destroy( struct pipe_context *pipe ) +{ + struct softpipe_context *softpipe = softpipe_context( pipe ); + struct pipe_winsys *ws = pipe->winsys; + uint i; + + draw_destroy( softpipe->draw ); + + softpipe->quad.polygon_stipple->destroy( softpipe->quad.polygon_stipple ); + softpipe->quad.earlyz->destroy( softpipe->quad.earlyz ); + softpipe->quad.shade->destroy( softpipe->quad.shade ); + softpipe->quad.alpha_test->destroy( softpipe->quad.alpha_test ); + softpipe->quad.depth_test->destroy( softpipe->quad.depth_test ); + softpipe->quad.stencil_test->destroy( softpipe->quad.stencil_test ); + softpipe->quad.occlusion->destroy( softpipe->quad.occlusion ); + softpipe->quad.coverage->destroy( softpipe->quad.coverage ); + softpipe->quad.bufloop->destroy( softpipe->quad.bufloop ); + softpipe->quad.blend->destroy( softpipe->quad.blend ); + softpipe->quad.colormask->destroy( softpipe->quad.colormask ); + softpipe->quad.output->destroy( softpipe->quad.output ); + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + sp_destroy_tile_cache(softpipe->cbuf_cache[i]); + sp_destroy_tile_cache(softpipe->zsbuf_cache); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + sp_destroy_tile_cache(softpipe->tex_cache[i]); + + for (i = 0; i < Elements(softpipe->constants); i++) { + if (softpipe->constants[i].buffer) { + pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + } + } + + FREE( softpipe ); +} + + +struct pipe_context * +softpipe_create( struct pipe_screen *screen, + struct pipe_winsys *pipe_winsys, + struct softpipe_winsys *softpipe_winsys ) +{ + struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); + uint i; + +#if defined(__i386__) || defined(__386__) + softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; +#else + softpipe->use_sse = FALSE; +#endif + + softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; + + softpipe->pipe.winsys = pipe_winsys; + softpipe->pipe.screen = screen; + softpipe->pipe.destroy = softpipe_destroy; + + /* state setters */ + softpipe->pipe.create_blend_state = softpipe_create_blend_state; + softpipe->pipe.bind_blend_state = softpipe_bind_blend_state; + softpipe->pipe.delete_blend_state = softpipe_delete_blend_state; + + softpipe->pipe.create_sampler_state = softpipe_create_sampler_state; + softpipe->pipe.bind_sampler_states = softpipe_bind_sampler_states; + softpipe->pipe.delete_sampler_state = softpipe_delete_sampler_state; + + softpipe->pipe.create_depth_stencil_alpha_state = softpipe_create_depth_stencil_state; + softpipe->pipe.bind_depth_stencil_alpha_state = softpipe_bind_depth_stencil_state; + softpipe->pipe.delete_depth_stencil_alpha_state = softpipe_delete_depth_stencil_state; + + softpipe->pipe.create_rasterizer_state = softpipe_create_rasterizer_state; + softpipe->pipe.bind_rasterizer_state = softpipe_bind_rasterizer_state; + softpipe->pipe.delete_rasterizer_state = softpipe_delete_rasterizer_state; + + softpipe->pipe.create_fs_state = softpipe_create_fs_state; + softpipe->pipe.bind_fs_state = softpipe_bind_fs_state; + softpipe->pipe.delete_fs_state = softpipe_delete_fs_state; + + softpipe->pipe.create_vs_state = softpipe_create_vs_state; + softpipe->pipe.bind_vs_state = softpipe_bind_vs_state; + softpipe->pipe.delete_vs_state = softpipe_delete_vs_state; + + softpipe->pipe.set_blend_color = softpipe_set_blend_color; + softpipe->pipe.set_clip_state = softpipe_set_clip_state; + softpipe->pipe.set_constant_buffer = softpipe_set_constant_buffer; + softpipe->pipe.set_framebuffer_state = softpipe_set_framebuffer_state; + softpipe->pipe.set_polygon_stipple = softpipe_set_polygon_stipple; + softpipe->pipe.set_scissor_state = softpipe_set_scissor_state; + softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; + softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; + + softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; + softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; + + softpipe->pipe.draw_arrays = softpipe_draw_arrays; + softpipe->pipe.draw_elements = softpipe_draw_elements; + + softpipe->pipe.clear = softpipe_clear; + softpipe->pipe.flush = softpipe_flush; + + softpipe_init_query_funcs( softpipe ); + softpipe_init_texture_funcs( softpipe ); + + /* + * Alloc caches for accessing drawing surfaces and textures. + * Must be before quad stage setup! + */ + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) + softpipe->cbuf_cache[i] = sp_create_tile_cache(); + softpipe->zsbuf_cache = sp_create_tile_cache(); + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + softpipe->tex_cache[i] = sp_create_tile_cache(); + + + /* setup quad rendering stages */ + softpipe->quad.polygon_stipple = sp_quad_polygon_stipple_stage(softpipe); + softpipe->quad.earlyz = sp_quad_earlyz_stage(softpipe); + softpipe->quad.shade = sp_quad_shade_stage(softpipe); + softpipe->quad.alpha_test = sp_quad_alpha_test_stage(softpipe); + softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe); + softpipe->quad.stencil_test = sp_quad_stencil_test_stage(softpipe); + softpipe->quad.occlusion = sp_quad_occlusion_stage(softpipe); + softpipe->quad.coverage = sp_quad_coverage_stage(softpipe); + softpipe->quad.bufloop = sp_quad_bufloop_stage(softpipe); + softpipe->quad.blend = sp_quad_blend_stage(softpipe); + softpipe->quad.colormask = sp_quad_colormask_stage(softpipe); + softpipe->quad.output = sp_quad_output_stage(softpipe); + + softpipe->winsys = softpipe_winsys; + + /* + * Create drawing context and plug our rendering stage into it. + */ + softpipe->draw = draw_create(); + assert(softpipe->draw); + softpipe->setup = sp_draw_render_stage(softpipe); + + if (GETENV( "SP_VBUF" ) != NULL) { + sp_init_vbuf(softpipe); + } + else { + draw_set_rasterize_stage(softpipe->draw, softpipe->setup); + } + + /* plug in AA line/point stages */ + draw_install_aaline_stage(softpipe->draw, &softpipe->pipe); + draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe); + +#if USE_DRAW_STAGE_PSTIPPLE + /* Do polygon stipple w/ texture map + frag prog? */ + draw_install_pstipple_stage(softpipe->draw, &softpipe->pipe); +#endif + + sp_init_surface_functions(softpipe); + + return &softpipe->pipe; +} diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index feeafc7084..19e6cfaf02 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -81,6 +81,9 @@ struct softpipe_context { struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; unsigned dirty; + unsigned num_samplers; + unsigned num_textures; + /* Counter for occlusion queries. Note this supports overlapping * queries. */ diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 1fbb2e38c4..9198198db3 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -1,208 +1,209 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Vertices are just an array of floats, with all the attributes - * packed. We currently assume a layout like: - * - * attr[0][0..3] - window position - * attr[1..n][0..3] - remaining attributes. - * - * Attributes are assumed to be 4 floats wide but are packed so that - * all the enabled attributes run contiguously. - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_headers.h" -#include "sp_quad.h" -#include "sp_texture.h" -#include "sp_tex_sample.h" - - -struct quad_shade_stage -{ - struct quad_stage stage; - struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; - struct tgsi_exec_machine machine; - struct tgsi_exec_vector *inputs, *outputs; - int colorOutSlot, depthOutSlot; -}; - - -/** cast wrapper */ -static INLINE struct quad_shade_stage * -quad_shade_stage(struct quad_stage *qs) -{ - return (struct quad_shade_stage *) qs; -} - - - -/** - * Execute fragment shader for the four fragments in the quad. - */ -static void -shade_quad( - struct quad_stage *qs, - struct quad_header *quad ) -{ - struct quad_shade_stage *qss = quad_shade_stage( qs ); - struct softpipe_context *softpipe = qs->softpipe; - struct tgsi_exec_machine *machine = &qss->machine; - - /* Consts do not require 16 byte alignment. */ - machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; - - machine->InterpCoefs = quad->coef; - - /* run shader */ - quad->mask &= softpipe->fs->run( softpipe->fs, - &qss->machine, - quad ); - - /* store result color */ - if (qss->colorOutSlot >= 0) { - /* XXX need to handle multiple color outputs someday */ - assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] - == TGSI_SEMANTIC_COLOR); - memcpy( - quad->outputs.color, - &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], - sizeof( quad->outputs.color ) ); - } - - /* - * XXX the following code for updating quad->outputs.depth - * isn't really needed if we did early z testing. - */ - - /* store result Z */ - if (qss->depthOutSlot >= 0) { - /* output[slot] is new Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; - } - } - else { - /* copy input Z (which was interpolated by the executor) to output Z */ - uint i; - for (i = 0; i < 4; i++) { - quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; - /* XXX not sure the above line is always correct. The following - * might be better: - quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; - */ - } - } - - /* shader may cull fragments */ - if( quad->mask ) { - qs->next->run( qs->next, quad ); - } -} - -/** - * Per-primitive (or per-begin?) setup - */ -static void shade_begin(struct quad_stage *qs) -{ - struct quad_shade_stage *qss = quad_shade_stage(qs); - struct softpipe_context *softpipe = qs->softpipe; - unsigned i; - - /* set TGSI sampler state that varies */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - qss->samplers[i].state = softpipe->sampler[i]; - qss->samplers[i].texture = softpipe->texture[i]; - } - - /* find output slots for depth, color */ - qss->colorOutSlot = -1; - qss->depthOutSlot = -1; - for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { - switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { - case TGSI_SEMANTIC_POSITION: - qss->depthOutSlot = i; - break; - case TGSI_SEMANTIC_COLOR: - qss->colorOutSlot = i; - break; - } - } - - softpipe->fs->prepare( softpipe->fs, - &qss->machine, - qss->samplers ); - - qs->next->begin(qs->next); -} - - -static void shade_destroy(struct quad_stage *qs) -{ - struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; - - tgsi_exec_machine_free_data(&qss->machine); - FREE( qss->inputs ); - FREE( qss->outputs ); - FREE( qs ); -} - - -struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) -{ - struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); - uint i; - - /* allocate storage for program inputs/outputs, aligned to 16 bytes */ - qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); - qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); - qss->machine.Inputs = align16(qss->inputs); - qss->machine.Outputs = align16(qss->outputs); - - qss->stage.softpipe = softpipe; - qss->stage.begin = shade_begin; - qss->stage.run = shade_quad; - qss->stage.destroy = shade_destroy; - - /* set TGSI sampler state that's constant */ - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { - assert(softpipe->tex_cache[i]); - qss->samplers[i].get_samples = sp_get_samples; - qss->samplers[i].pipe = &softpipe->pipe; - qss->samplers[i].cache = softpipe->tex_cache[i]; - } - - tgsi_exec_machine_init( &qss->machine ); - - return &qss->stage; -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Vertices are just an array of floats, with all the attributes + * packed. We currently assume a layout like: + * + * attr[0][0..3] - window position + * attr[1..n][0..3] - remaining attributes. + * + * Attributes are assumed to be 4 floats wide but are packed so that + * all the enabled attributes run contiguously. + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_texture.h" +#include "sp_tex_sample.h" + + +struct quad_shade_stage +{ + struct quad_stage stage; + struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; + struct tgsi_exec_machine machine; + struct tgsi_exec_vector *inputs, *outputs; + int colorOutSlot, depthOutSlot; +}; + + +/** cast wrapper */ +static INLINE struct quad_shade_stage * +quad_shade_stage(struct quad_stage *qs) +{ + return (struct quad_shade_stage *) qs; +} + + + +/** + * Execute fragment shader for the four fragments in the quad. + */ +static void +shade_quad( + struct quad_stage *qs, + struct quad_header *quad ) +{ + struct quad_shade_stage *qss = quad_shade_stage( qs ); + struct softpipe_context *softpipe = qs->softpipe; + struct tgsi_exec_machine *machine = &qss->machine; + + /* Consts do not require 16 byte alignment. */ + machine->Consts = softpipe->mapped_constants[PIPE_SHADER_FRAGMENT]; + + machine->InterpCoefs = quad->coef; + + /* run shader */ + quad->mask &= softpipe->fs->run( softpipe->fs, + &qss->machine, + quad ); + + /* store result color */ + if (qss->colorOutSlot >= 0) { + /* XXX need to handle multiple color outputs someday */ + assert(qss->stage.softpipe->fs->info.output_semantic_name[qss->colorOutSlot] + == TGSI_SEMANTIC_COLOR); + memcpy( + quad->outputs.color, + &machine->Outputs[qss->colorOutSlot].xyzw[0].f[0], + sizeof( quad->outputs.color ) ); + } + + /* + * XXX the following code for updating quad->outputs.depth + * isn't really needed if we did early z testing. + */ + + /* store result Z */ + if (qss->depthOutSlot >= 0) { + /* output[slot] is new Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Outputs[0].xyzw[2].f[i]; + } + } + else { + /* copy input Z (which was interpolated by the executor) to output Z */ + uint i; + for (i = 0; i < 4; i++) { + quad->outputs.depth[i] = machine->Inputs[0].xyzw[2].f[i]; + /* XXX not sure the above line is always correct. The following + * might be better: + quad->outputs.depth[i] = machine->QuadPos.xyzw[2].f[i]; + */ + } + } + + /* shader may cull fragments */ + if( quad->mask ) { + qs->next->run( qs->next, quad ); + } +} + +/** + * Per-primitive (or per-begin?) setup + */ +static void shade_begin(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = quad_shade_stage(qs); + struct softpipe_context *softpipe = qs->softpipe; + unsigned i; + unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers); + + /* set TGSI sampler state that varies */ + for (i = 0; i < num; i++) { + qss->samplers[i].state = softpipe->sampler[i]; + qss->samplers[i].texture = softpipe->texture[i]; + } + + /* find output slots for depth, color */ + qss->colorOutSlot = -1; + qss->depthOutSlot = -1; + for (i = 0; i < qss->stage.softpipe->fs->info.num_outputs; i++) { + switch (qss->stage.softpipe->fs->info.output_semantic_name[i]) { + case TGSI_SEMANTIC_POSITION: + qss->depthOutSlot = i; + break; + case TGSI_SEMANTIC_COLOR: + qss->colorOutSlot = i; + break; + } + } + + softpipe->fs->prepare( softpipe->fs, + &qss->machine, + qss->samplers ); + + qs->next->begin(qs->next); +} + + +static void shade_destroy(struct quad_stage *qs) +{ + struct quad_shade_stage *qss = (struct quad_shade_stage *) qs; + + tgsi_exec_machine_free_data(&qss->machine); + FREE( qss->inputs ); + FREE( qss->outputs ); + FREE( qs ); +} + + +struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) +{ + struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); + uint i; + + /* allocate storage for program inputs/outputs, aligned to 16 bytes */ + qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); + qss->machine.Inputs = align16(qss->inputs); + qss->machine.Outputs = align16(qss->outputs); + + qss->stage.softpipe = softpipe; + qss->stage.begin = shade_begin; + qss->stage.run = shade_quad; + qss->stage.destroy = shade_destroy; + + /* set TGSI sampler state that's constant */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + assert(softpipe->tex_cache[i]); + qss->samplers[i].get_samples = sp_get_samples; + qss->samplers[i].pipe = &softpipe->pipe; + qss->samplers[i].cache = softpipe->tex_cache[i]; + } + + tgsi_exec_machine_init( &qss->machine ); + + return &qss->stage; +} diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 3943d4ed2b..45d159143f 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -1,195 +1,195 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#ifndef SP_STATE_H -#define SP_STATE_H - -#include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" - - -#define SP_NEW_VIEWPORT 0x1 -#define SP_NEW_RASTERIZER 0x2 -#define SP_NEW_FS 0x4 -#define SP_NEW_BLEND 0x8 -#define SP_NEW_CLIP 0x10 -#define SP_NEW_SCISSOR 0x20 -#define SP_NEW_STIPPLE 0x40 -#define SP_NEW_FRAMEBUFFER 0x80 -#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 -#define SP_NEW_CONSTANTS 0x200 -#define SP_NEW_SAMPLER 0x400 -#define SP_NEW_TEXTURE 0x800 -#define SP_NEW_VERTEX 0x1000 -#define SP_NEW_VS 0x2000 -#define SP_NEW_QUERY 0x4000 - - -struct tgsi_sampler; -struct tgsi_exec_machine; - - -/** Subclass of pipe_shader_state (though it doesn't really need to be). - * - * This is starting to look an awful lot like a quad pipeline stage... - */ -struct sp_fragment_shader { - struct pipe_shader_state shader; - - struct tgsi_shader_info info; - - void (*prepare)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers); - - /* Run the shader - this interface will get cleaned up in the - * future: - */ - unsigned (*run)( const struct sp_fragment_shader *shader, - struct tgsi_exec_machine *machine, - struct quad_header *quad ); - - - void (*delete)( struct sp_fragment_shader * ); -}; - -struct vertex_info; - -/** Subclass of pipe_shader_state */ -struct sp_vertex_shader { - struct pipe_shader_state shader; - struct draw_vertex_shader *draw_data; -}; - - - -void * -softpipe_create_blend_state(struct pipe_context *, - const struct pipe_blend_state *); -void softpipe_bind_blend_state(struct pipe_context *, - void *); -void softpipe_delete_blend_state(struct pipe_context *, - void *); - -void * -softpipe_create_sampler_state(struct pipe_context *, - const struct pipe_sampler_state *); -void softpipe_bind_sampler_state(struct pipe_context *, unsigned, void *); -void softpipe_delete_sampler_state(struct pipe_context *, void *); - -void * -softpipe_create_depth_stencil_state(struct pipe_context *, - const struct pipe_depth_stencil_alpha_state *); -void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); -void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); - -void * -softpipe_create_rasterizer_state(struct pipe_context *, - const struct pipe_rasterizer_state *); -void softpipe_bind_rasterizer_state(struct pipe_context *, void *); -void softpipe_delete_rasterizer_state(struct pipe_context *, void *); - -void softpipe_set_framebuffer_state( struct pipe_context *, - const struct pipe_framebuffer_state * ); - -void softpipe_set_blend_color( struct pipe_context *pipe, - const struct pipe_blend_color *blend_color ); - -void softpipe_set_clip_state( struct pipe_context *, - const struct pipe_clip_state * ); - -void softpipe_set_constant_buffer(struct pipe_context *, - uint shader, uint index, - const struct pipe_constant_buffer *buf); - -void *softpipe_create_fs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_fs_state(struct pipe_context *, void *); -void softpipe_delete_fs_state(struct pipe_context *, void *); -void *softpipe_create_vs_state(struct pipe_context *, - const struct pipe_shader_state *); -void softpipe_bind_vs_state(struct pipe_context *, void *); -void softpipe_delete_vs_state(struct pipe_context *, void *); - -void softpipe_set_polygon_stipple( struct pipe_context *, - const struct pipe_poly_stipple * ); - -void softpipe_set_scissor_state( struct pipe_context *, - const struct pipe_scissor_state * ); - -void softpipe_set_sampler_texture( struct pipe_context *, - unsigned unit, - struct pipe_texture * ); - -void softpipe_set_viewport_state( struct pipe_context *, - const struct pipe_viewport_state * ); - -void softpipe_set_vertex_element(struct pipe_context *, - unsigned index, - const struct pipe_vertex_element *); - -void softpipe_set_vertex_buffer(struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer *); - - -void softpipe_update_derived( struct softpipe_context *softpipe ); - - -boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, - unsigned start, unsigned count); - -boolean softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count); - - -void -softpipe_map_surfaces(struct softpipe_context *sp); - -void -softpipe_unmap_surfaces(struct softpipe_context *sp); - -void -softpipe_map_texture_surfaces(struct softpipe_context *sp); - -void -softpipe_unmap_texture_surfaces(struct softpipe_context *sp); - - -struct vertex_info * -softpipe_get_vertex_info(struct softpipe_context *softpipe); - -struct vertex_info * -softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); - - -#endif +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef SP_STATE_H +#define SP_STATE_H + +#include "pipe/p_state.h" +#include "tgsi/util/tgsi_scan.h" + + +#define SP_NEW_VIEWPORT 0x1 +#define SP_NEW_RASTERIZER 0x2 +#define SP_NEW_FS 0x4 +#define SP_NEW_BLEND 0x8 +#define SP_NEW_CLIP 0x10 +#define SP_NEW_SCISSOR 0x20 +#define SP_NEW_STIPPLE 0x40 +#define SP_NEW_FRAMEBUFFER 0x80 +#define SP_NEW_DEPTH_STENCIL_ALPHA 0x100 +#define SP_NEW_CONSTANTS 0x200 +#define SP_NEW_SAMPLER 0x400 +#define SP_NEW_TEXTURE 0x800 +#define SP_NEW_VERTEX 0x1000 +#define SP_NEW_VS 0x2000 +#define SP_NEW_QUERY 0x4000 + + +struct tgsi_sampler; +struct tgsi_exec_machine; + + +/** Subclass of pipe_shader_state (though it doesn't really need to be). + * + * This is starting to look an awful lot like a quad pipeline stage... + */ +struct sp_fragment_shader { + struct pipe_shader_state shader; + + struct tgsi_shader_info info; + + void (*prepare)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct tgsi_sampler *samplers); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + unsigned (*run)( const struct sp_fragment_shader *shader, + struct tgsi_exec_machine *machine, + struct quad_header *quad ); + + + void (*delete)( struct sp_fragment_shader * ); +}; + +struct vertex_info; + +/** Subclass of pipe_shader_state */ +struct sp_vertex_shader { + struct pipe_shader_state shader; + struct draw_vertex_shader *draw_data; +}; + + + +void * +softpipe_create_blend_state(struct pipe_context *, + const struct pipe_blend_state *); +void softpipe_bind_blend_state(struct pipe_context *, + void *); +void softpipe_delete_blend_state(struct pipe_context *, + void *); + +void * +softpipe_create_sampler_state(struct pipe_context *, + const struct pipe_sampler_state *); +void softpipe_bind_sampler_states(struct pipe_context *, unsigned, void **); +void softpipe_delete_sampler_state(struct pipe_context *, void *); + +void * +softpipe_create_depth_stencil_state(struct pipe_context *, + const struct pipe_depth_stencil_alpha_state *); +void softpipe_bind_depth_stencil_state(struct pipe_context *, void *); +void softpipe_delete_depth_stencil_state(struct pipe_context *, void *); + +void * +softpipe_create_rasterizer_state(struct pipe_context *, + const struct pipe_rasterizer_state *); +void softpipe_bind_rasterizer_state(struct pipe_context *, void *); +void softpipe_delete_rasterizer_state(struct pipe_context *, void *); + +void softpipe_set_framebuffer_state( struct pipe_context *, + const struct pipe_framebuffer_state * ); + +void softpipe_set_blend_color( struct pipe_context *pipe, + const struct pipe_blend_color *blend_color ); + +void softpipe_set_clip_state( struct pipe_context *, + const struct pipe_clip_state * ); + +void softpipe_set_constant_buffer(struct pipe_context *, + uint shader, uint index, + const struct pipe_constant_buffer *buf); + +void *softpipe_create_fs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_fs_state(struct pipe_context *, void *); +void softpipe_delete_fs_state(struct pipe_context *, void *); +void *softpipe_create_vs_state(struct pipe_context *, + const struct pipe_shader_state *); +void softpipe_bind_vs_state(struct pipe_context *, void *); +void softpipe_delete_vs_state(struct pipe_context *, void *); + +void softpipe_set_polygon_stipple( struct pipe_context *, + const struct pipe_poly_stipple * ); + +void softpipe_set_scissor_state( struct pipe_context *, + const struct pipe_scissor_state * ); + +void softpipe_set_sampler_textures( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); + +void softpipe_set_viewport_state( struct pipe_context *, + const struct pipe_viewport_state * ); + +void softpipe_set_vertex_element(struct pipe_context *, + unsigned index, + const struct pipe_vertex_element *); + +void softpipe_set_vertex_buffer(struct pipe_context *, + unsigned index, + const struct pipe_vertex_buffer *); + + +void softpipe_update_derived( struct softpipe_context *softpipe ); + + +boolean softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, + unsigned start, unsigned count); + +boolean softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count); + + +void +softpipe_map_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_surfaces(struct softpipe_context *sp); + +void +softpipe_map_texture_surfaces(struct softpipe_context *sp); + +void +softpipe_unmap_texture_surfaces(struct softpipe_context *sp); + + +struct vertex_info * +softpipe_get_vertex_info(struct softpipe_context *softpipe); + +struct vertex_info * +softpipe_get_vbuf_vertex_info(struct softpipe_context *softpipe); + + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 1d6dd17d1d..7cf85b9207 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -50,45 +50,67 @@ softpipe_create_sampler_state(struct pipe_context *pipe, return mem_dup(sampler, sizeof(*sampler)); } + void -softpipe_bind_sampler_state(struct pipe_context *pipe, - unsigned unit, void *sampler) +softpipe_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) { struct softpipe_context *softpipe = softpipe_context(pipe); + assert(num <= PIPE_MAX_SAMPLERS); + + /* Check for no-op */ + if (num == softpipe->num_samplers && + !memcmp(softpipe->sampler, sampler, num * sizeof(void *))) + return; + draw_flush(softpipe->draw); - assert(unit < PIPE_MAX_SAMPLERS); - softpipe->sampler[unit] = (struct pipe_sampler_state *)sampler; + memcpy(softpipe->sampler, sampler, num * sizeof(void *)); + memset(&softpipe->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * + sizeof(void *)); + + softpipe->num_samplers = num; softpipe->dirty |= SP_NEW_SAMPLER; } void -softpipe_delete_sampler_state(struct pipe_context *pipe, - void *sampler) +softpipe_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) { - FREE( sampler ); -} + struct softpipe_context *softpipe = softpipe_context(pipe); + uint i; + assert(num <= PIPE_MAX_SAMPLERS); -void -softpipe_set_sampler_texture(struct pipe_context *pipe, - unsigned unit, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); + /* Check for no-op */ + if (num == softpipe->num_textures && + !memcmp(softpipe->texture, texture, num * sizeof(struct pipe_texture *))) + return; draw_flush(softpipe->draw); - assert(unit < PIPE_MAX_SAMPLERS); - pipe_texture_reference(&softpipe->texture[unit], texture); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + struct pipe_texture *tex = i < num ? texture[i] : NULL; + + pipe_texture_reference(&softpipe->texture[i], tex); + sp_tile_cache_set_texture(pipe, softpipe->tex_cache[i], tex); + } - sp_tile_cache_set_texture(pipe, softpipe->tex_cache[unit], texture); + softpipe->num_textures = num; softpipe->dirty |= SP_NEW_TEXTURE; } +void +softpipe_delete_sampler_state(struct pipe_context *pipe, + void *sampler) +{ + FREE( sampler ); +} + + diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index c605ed925b..bbf2d80308 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -1,202 +1,202 @@ -/************************************************************************** - * - * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - /* - * Authors: - * Keith Whitwell - * Michel Dänzer - */ - -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_inlines.h" -#include "pipe/p_util.h" -#include "pipe/p_winsys.h" - -#include "sp_context.h" -#include "sp_state.h" -#include "sp_texture.h" -#include "sp_tile_cache.h" - - -/* Simple, maximally packed layout. - */ - -static unsigned minify( unsigned d ) -{ - return MAX2(1, d>>1); -} - - -static void -softpipe_texture_layout(struct softpipe_texture * spt) -{ - struct pipe_texture *pt = &spt->base; - unsigned level; - unsigned width = pt->width[0]; - unsigned height = pt->height[0]; - unsigned depth = pt->depth[0]; - - spt->buffer_size = 0; - - for (level = 0; level <= pt->last_level; level++) { - pt->width[level] = width; - pt->height[level] = height; - pt->depth[level] = depth; - - spt->level_offset[level] = spt->buffer_size; - - spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * - ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - width * pt->cpp; - - width = minify(width); - height = minify(height); - depth = minify(depth); - } -} - - -static struct pipe_texture * -softpipe_texture_create_screen(struct pipe_screen *screen, - const struct pipe_texture *templat) -{ - struct pipe_winsys *ws = screen->winsys; - struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); - if (!spt) - return NULL; - - spt->base = *templat; - spt->base.refcount = 1; - spt->base.screen = screen; - - softpipe_texture_layout(spt); - - spt->buffer = ws->buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - spt->buffer_size); - if (!spt->buffer) { - FREE(spt); - return NULL; - } - - assert(spt->base.refcount == 1); - - return &spt->base; -} - - -static void -softpipe_texture_release_screen(struct pipe_screen *screen, - struct pipe_texture **pt) -{ - if (!*pt) - return; - - /* - DBG("%s %p refcount will be %d\n", - __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); - */ - if (--(*pt)->refcount <= 0) { - struct softpipe_texture *spt = softpipe_texture(*pt); - - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); - */ - - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); - - FREE(spt); - } - *pt = NULL; -} - - -static struct pipe_surface * -softpipe_get_tex_surface_screen(struct pipe_screen *screen, - struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) -{ - struct pipe_winsys *ws = screen->winsys; - struct softpipe_texture *spt = softpipe_texture(pt); - struct pipe_surface *ps; - - assert(level <= pt->last_level); - - ps = ws->surface_alloc(ws); - if (ps) { - assert(ps->refcount); - assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); - ps->format = pt->format; - ps->cpp = pt->cpp; - ps->width = pt->width[level]; - ps->height = pt->height[level]; - ps->pitch = ps->width; - ps->offset = spt->level_offset[level]; - - if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { - ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - (pt->compressed ? ps->height/4 : ps->height) * - ps->width * ps->cpp; - } - else { - assert(face == 0); - assert(zslice == 0); - } - } - return ps; -} - - -static void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture) -{ - struct softpipe_context *softpipe = softpipe_context(pipe); - uint unit; - for (unit = 0; unit < PIPE_MAX_SAMPLERS; unit++) { - if (softpipe->texture[unit] == texture) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); - } - } -} - - -void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ) -{ - softpipe->pipe.texture_update = softpipe_texture_update; -} - - -void -softpipe_init_screen_texture_funcs(struct pipe_screen *screen) -{ - screen->texture_create = softpipe_texture_create_screen; - screen->texture_release = softpipe_texture_release_screen; - screen->get_tex_surface = softpipe_get_tex_surface_screen; -} +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + /* + * Authors: + * Keith Whitwell + * Michel Dänzer + */ + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" + +#include "sp_context.h" +#include "sp_state.h" +#include "sp_texture.h" +#include "sp_tile_cache.h" + + +/* Simple, maximally packed layout. + */ + +static unsigned minify( unsigned d ) +{ + return MAX2(1, d>>1); +} + + +static void +softpipe_texture_layout(struct softpipe_texture * spt) +{ + struct pipe_texture *pt = &spt->base; + unsigned level; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; + unsigned depth = pt->depth[0]; + + spt->buffer_size = 0; + + for (level = 0; level <= pt->last_level; level++) { + pt->width[level] = width; + pt->height[level] = height; + pt->depth[level] = depth; + + spt->level_offset[level] = spt->buffer_size; + + spt->buffer_size += ((pt->compressed) ? MAX2(1, height/4) : height) * + ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * + width * pt->cpp; + + width = minify(width); + height = minify(height); + depth = minify(depth); + } +} + + +static struct pipe_texture * +softpipe_texture_create_screen(struct pipe_screen *screen, + const struct pipe_texture *templat) +{ + struct pipe_winsys *ws = screen->winsys; + struct softpipe_texture *spt = CALLOC_STRUCT(softpipe_texture); + if (!spt) + return NULL; + + spt->base = *templat; + spt->base.refcount = 1; + spt->base.screen = screen; + + softpipe_texture_layout(spt); + + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + spt->buffer_size); + if (!spt->buffer) { + FREE(spt); + return NULL; + } + + assert(spt->base.refcount == 1); + + return &spt->base; +} + + +static void +softpipe_texture_release_screen(struct pipe_screen *screen, + struct pipe_texture **pt) +{ + if (!*pt) + return; + + /* + DBG("%s %p refcount will be %d\n", + __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); + */ + if (--(*pt)->refcount <= 0) { + struct softpipe_texture *spt = softpipe_texture(*pt); + + /* + DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); + */ + + pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + + FREE(spt); + } + *pt = NULL; +} + + +static struct pipe_surface * +softpipe_get_tex_surface_screen(struct pipe_screen *screen, + struct pipe_texture *pt, + unsigned face, unsigned level, unsigned zslice) +{ + struct pipe_winsys *ws = screen->winsys; + struct softpipe_texture *spt = softpipe_texture(pt); + struct pipe_surface *ps; + + assert(level <= pt->last_level); + + ps = ws->surface_alloc(ws); + if (ps) { + assert(ps->refcount); + assert(ps->winsys); + pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + ps->format = pt->format; + ps->cpp = pt->cpp; + ps->width = pt->width[level]; + ps->height = pt->height[level]; + ps->pitch = ps->width; + ps->offset = spt->level_offset[level]; + + if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { + ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * + (pt->compressed ? ps->height/4 : ps->height) * + ps->width * ps->cpp; + } + else { + assert(face == 0); + assert(zslice == 0); + } + } + return ps; +} + + +static void +softpipe_texture_update(struct pipe_context *pipe, + struct pipe_texture *texture) +{ + struct softpipe_context *softpipe = softpipe_context(pipe); + uint unit; + for (unit = 0; unit < softpipe->num_textures; unit++) { + if (softpipe->texture[unit] == texture) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); + } + } +} + + +void +softpipe_init_texture_funcs( struct softpipe_context *softpipe ) +{ + softpipe->pipe.texture_update = softpipe_texture_update; +} + + +void +softpipe_init_screen_texture_funcs(struct pipe_screen *screen) +{ + screen->texture_create = softpipe_texture_create_screen; + screen->texture_release = softpipe_texture_release_screen; + screen->get_tex_surface = softpipe_get_tex_surface_screen; +} diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 1501b52f3e..b64948f0f3 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -100,7 +100,7 @@ struct pipe_context { void * (*create_sampler_state)(struct pipe_context *, const struct pipe_sampler_state *); - void (*bind_sampler_state)(struct pipe_context *, unsigned unit, void *); + void (*bind_sampler_states)(struct pipe_context *, unsigned num, void **); void (*delete_sampler_state)(struct pipe_context *, void *); void * (*create_rasterizer_state)(struct pipe_context *, @@ -148,9 +148,9 @@ struct pipe_context { /* Currently a sampler is constrained to sample from a single texture: */ - void (*set_sampler_texture)( struct pipe_context *, - unsigned sampler, - struct pipe_texture * ); + void (*set_sampler_textures)( struct pipe_context *, + unsigned num, + struct pipe_texture ** ); void (*set_viewport_state)( struct pipe_context *, const struct pipe_viewport_state * ); diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 92263cb688..1000f98ffc 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -120,10 +120,11 @@ update_samplers(struct st_context *st) const struct st_fragment_program *fs = st->fp; GLuint su; + st->state.num_samplers = 0; + /* loop over sampler units (aka tex image units) */ for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { struct pipe_sampler_state sampler; - const struct cso_sampler *cso; memset(&sampler, 0, sizeof(sampler)); @@ -168,17 +169,16 @@ update_samplers(struct st_context *st) = st_compare_func_to_pipe(texobj->CompareFunc); } + st->state.num_samplers = su + 1; + /* XXX more sampler state here */ } - cso = st_cached_sampler_state(st, &sampler); - - if (cso != st->state.sampler[su]) { - /* state has changed */ - st->state.sampler[su] = cso; - st->pipe->bind_sampler_state(st->pipe, su, cso->data); - } + st->state.sampler[su] = st_cached_sampler_state(st, &sampler)->data; } + + st->pipe->bind_sampler_states(st->pipe, st->state.num_samplers, + st->state.sampler); } diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index 697d2cdfb4..e53a897637 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -37,6 +37,7 @@ #include "st_texture.h" #include "st_cb_texture.h" #include "pipe/p_context.h" +#include "pipe/p_inlines.h" /** @@ -51,6 +52,8 @@ update_textures(struct st_context *st) struct gl_fragment_program *fprog = st->ctx->FragmentProgram._Current; GLuint unit; + st->state.num_textures = 0; + for (unit = 0; unit < st->ctx->Const.MaxTextureCoordUnits; unit++) { const GLuint su = fprog->Base.SamplerUnits[unit]; struct gl_texture_object *texObj = st->ctx->Texture.Unit[su]._Current; @@ -62,6 +65,8 @@ update_textures(struct st_context *st) retval = st_finalize_texture(st->ctx, st->pipe, texObj, &flush); /* XXX retval indicates whether there's a texture border */ + + st->state.num_textures = unit + 1; } /* XXX: need to ensure that textures are unbound/removed from @@ -70,18 +75,16 @@ update_textures(struct st_context *st) */ pt = st_get_stobj_texture(stObj); - - if (st->state.sampler_texture[unit] != pt) { - st->state.sampler_texture[unit] = pt; - st->pipe->set_sampler_texture(st->pipe, unit, pt); - } + pipe_texture_reference(&st->state.sampler_texture[unit], pt); if (stObj && stObj->dirtyData) { st->pipe->texture_update(st->pipe, pt); stObj->dirtyData = GL_FALSE; } - } + + st->pipe->set_sampler_textures(st->pipe, st->state.num_textures, + st->state.sampler_texture); } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 65c9fda9cb..dee4c4132a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -641,7 +641,6 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, const GLfloat *color, GLboolean invertTex) { - const GLuint unit = 0; struct pipe_context *pipe = ctx->st->pipe; GLfloat x0, y0, x1, y1; GLuint maxSize; @@ -684,7 +683,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.normalized_coords = 1; cso = st_cached_sampler_state(ctx->st, &sampler); - pipe->bind_sampler_state(pipe, unit, cso->data); + pipe->bind_sampler_states(pipe, 1, (void**)&cso->data); } /* viewport state: viewport matching window dims */ @@ -705,7 +704,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, /* texture state: */ { - pipe->set_sampler_texture(pipe, unit, pt); + pipe->set_sampler_textures(pipe, 1, &pt); } /* Compute window coords (y=0=bottom) with pixel zoom. @@ -727,8 +726,10 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, pipe->bind_rasterizer_state(pipe, ctx->st->state.rasterizer->data); pipe->bind_fs_state(pipe, ctx->st->state.fs->data); pipe->bind_vs_state(pipe, ctx->st->state.vs->cso->data); - pipe->set_sampler_texture(pipe, unit, ctx->st->state.sampler_texture[unit]); - pipe->bind_sampler_state(pipe, unit, ctx->st->state.sampler[unit]->data); + pipe->set_sampler_textures(pipe, ctx->st->state.num_textures, + ctx->st->state.sampler_texture); + pipe->bind_sampler_states(pipe, ctx->st->state.num_samplers, + ctx->st->state.sampler); pipe->set_viewport_state(pipe, &ctx->st->state.viewport); } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 1fbf9721e7..897a5109b7 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -76,7 +76,7 @@ struct st_context struct { const struct cso_alpha_test *alpha_test; const struct cso_blend *blend; - const struct cso_sampler *sampler[PIPE_MAX_SAMPLERS]; + void *sampler[PIPE_MAX_SAMPLERS]; const struct cso_depth_stencil_alpha *depth_stencil; const struct cso_rasterizer *rasterizer; const struct cso_fragment_shader *fs; @@ -90,6 +90,9 @@ struct st_context struct pipe_poly_stipple poly_stipple; struct pipe_scissor_state scissor; struct pipe_viewport_state viewport; + + GLuint num_samplers; + GLuint num_textures; } state; struct { diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 841d77abbc..3723e26d45 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -284,7 +284,7 @@ st_render_mipmap(struct st_context *st, */ sampler.min_lod = sampler.max_lod = srcLevel; sampler_cso = pipe->create_sampler_state(pipe, &sampler); - pipe->bind_sampler_state(pipe, 0, sampler_cso); + pipe->bind_sampler_states(pipe, 1, &sampler_cso); simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); @@ -293,7 +293,7 @@ st_render_mipmap(struct st_context *st, * the right mipmap level. */ /*pt->first_level = srcLevel;*/ - pipe->set_sampler_texture(pipe, 0, pt); + pipe->set_sampler_textures(pipe, 1, &pt); draw_quad(st->ctx); @@ -310,9 +310,10 @@ st_render_mipmap(struct st_context *st, pipe->bind_fs_state(pipe, st->state.fs->data); if (st->state.vs) pipe->bind_vs_state(pipe, st->state.vs->cso->data); - if (st->state.sampler[0]) - pipe->bind_sampler_state(pipe, 0, st->state.sampler[0]->data); - pipe->set_sampler_texture(pipe, 0, st->state.sampler_texture[0]); + pipe->bind_sampler_states(pipe, st->state.num_samplers, + st->state.sampler); + pipe->set_sampler_textures(pipe, st->state.num_textures, + st->state.sampler_texture); pipe->set_viewport_state(pipe, &st->state.viewport); return TRUE; -- cgit v1.2.3 From 8143adafddabb6ac3a21c18927ae41425f26bfff Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 6 Mar 2008 19:57:41 +0100 Subject: gallium: Surround externs with extern "C". --- src/gallium/auxiliary/util/p_tile.h | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h index 318b6d11a6..fdc80a13b3 100644 --- a/src/gallium/auxiliary/util/p_tile.h +++ b/src/gallium/auxiliary/util/p_tile.h @@ -52,44 +52,50 @@ pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) return FALSE; } +#ifdef __cplusplus +extern "C" { +#endif -extern void +void pipe_get_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, void *p, int dst_stride); -extern void +void pipe_put_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const void *p, int src_stride); -extern void +void pipe_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, float *p); -extern void +void pipe_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const float *p); -extern void +void pipe_get_tile_z(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, uint *z); -extern void +void pipe_put_tile_z(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, const uint *z); +#ifdef __cplusplus +} +#endif #endif -- cgit v1.2.3 From fc96aec9b7aceb4a0e7471e797abe8a00fc40cf2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 8 Mar 2008 16:29:12 +0000 Subject: gallium: Document debug_printf usage. --- src/gallium/auxiliary/util/Makefile | 3 ++- src/gallium/include/pipe/p_debug.h | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 906a46d6b4..2a3a9380b3 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -7,7 +7,8 @@ C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ - u_mm.c + u_mm.c \ + u_snprintf.c include ../../Makefile.template diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 2a11627b36..a14a1fc5f6 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -58,8 +58,22 @@ extern "C" { #endif +/** + * Print debug messages. + * + * A debug message will be printed regardless of the DEBUG/NDEBUG macros. + * + * The actual channel used to output debug message is platform specific. To + * avoid misformating or truncation, follow these rules of thumb: + * - output whole lines + * - avoid outputing large strings (512 bytes is the current maximum length + * that is guaranteed to be printed in all platforms) + */ void debug_printf(const char *format, ...); +/** + * @sa debug_printf + */ void debug_vprintf(const char *format, va_list ap); void debug_assert_fail(const char *expr, const char *file, unsigned line); -- cgit v1.2.3 From 5d802d8c8460cecf306b130eb29ef05069173e30 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 9 Mar 2008 15:09:55 +0100 Subject: cso: add a higher-level interface which does all pipe interactions to set a given state --- src/gallium/auxiliary/cso_cache/Makefile | 1 + src/gallium/auxiliary/cso_cache/SConscript | 1 + src/gallium/auxiliary/cso_cache/cso_context.c | 340 ++++++++++++++++++++++++++ src/gallium/auxiliary/cso_cache/cso_context.h | 85 +++++++ 4 files changed, 427 insertions(+) create mode 100644 src/gallium/auxiliary/cso_cache/cso_context.c create mode 100644 src/gallium/auxiliary/cso_cache/cso_context.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/Makefile b/src/gallium/auxiliary/cso_cache/Makefile index 3e49266163..6bd6602088 100644 --- a/src/gallium/auxiliary/cso_cache/Makefile +++ b/src/gallium/auxiliary/cso_cache/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = cso_cache C_SOURCES = \ + cso_context.c \ cso_cache.c \ cso_hash.c diff --git a/src/gallium/auxiliary/cso_cache/SConscript b/src/gallium/auxiliary/cso_cache/SConscript index 9751881613..651e68a191 100644 --- a/src/gallium/auxiliary/cso_cache/SConscript +++ b/src/gallium/auxiliary/cso_cache/SConscript @@ -3,6 +3,7 @@ Import('*') cso_cache = env.ConvenienceLibrary( target = 'cso_cache', source = [ + 'cso_context.c', 'cso_cache.c', 'cso_hash.c', ]) diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c new file mode 100644 index 0000000000..77237464c5 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -0,0 +1,340 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* Wrap the cso cache & hash mechanisms in a simplified + * pipe-driver-specific interface. + * + * Authors: + * Zack Rusin + * Keith Whitwell + */ + +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "cso_cache/cso_context.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct cso_context { + struct pipe_context *pipe; + struct cso_cache *cache; + + struct { + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + } hw; + + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + + void *blend; + void *depth_stencil; + void *rasterizer; + void *fragment_shader; + void *vertex_shader; +}; + + +struct cso_context *cso_create_context( struct pipe_context *pipe ) +{ + struct cso_context *ctx = CALLOC_STRUCT(cso_context); + if (ctx == NULL) + goto out; + + ctx->cache = cso_cache_create(); + if (ctx->cache == NULL) + goto out; + + ctx->pipe = pipe; + + return ctx; + +out: + cso_destroy_context( ctx ); + return NULL; +} + + +void cso_destroy_context( struct cso_context *ctx ) +{ + if (ctx == NULL) + return; + +/* + if (ctx->pipe) + ctx->pipe->flush( ctx->pipe, PIPE_FLUSH_UNBIND_ALL ); +*/ + + if (ctx->cache) + cso_cache_delete( ctx->cache ); + + FREE( ctx ); +} + + +/* Those function will either find the state of the given template + * in the cache or they will create a new state from the given + * template, insert it in the cache and return it. + */ + +/* + * If the driver returns 0 from the create method then they will assign + * the data member of the cso to be the template itself. + */ + +void cso_set_blend(struct cso_context *ctx, + const struct pipe_blend_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_BLEND, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_blend *cso = malloc(sizeof(struct cso_blend)); + + cso->state = *templ; + cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); + handle = cso->data; + } + else { + handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data; + } + + if (ctx->blend != handle) { + ctx->blend = handle; + ctx->pipe->bind_blend_state(ctx->pipe, handle); + } +} + +void cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void*)templ); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = malloc(sizeof(struct cso_sampler)); + + cso->state = *templ; + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } + } + + ctx->samplers[idx] = handle; +} + +void cso_single_sampler_done( struct cso_context *ctx ) +{ + unsigned i; + + for (i = 0; i < 8; i++) + if (ctx->samplers[i] == NULL) + break; + + ctx->nr_samplers = i; + + if (ctx->hw.nr_samplers != ctx->nr_samplers || + memcmp(ctx->hw.samplers, + ctx->samplers, + ctx->nr_samplers * sizeof(void *)) != 0) + { + memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); + ctx->hw.nr_samplers = ctx->nr_samplers; + + ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + } +} + +void cso_set_samplers( struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates ) +{ + unsigned i; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) + cso_single_sampler( ctx, i, templates[i] ); + + for ( ; i < ctx->nr_samplers; i++) + cso_single_sampler( ctx, i, NULL ); + + cso_single_sampler_done( ctx ); +} + +void cso_set_depth_stencil_alpha(struct cso_context *ctx, + const struct pipe_depth_stencil_alpha_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_depth_stencil_alpha_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, + CSO_DEPTH_STENCIL_ALPHA, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_depth_stencil_alpha *cso = malloc(sizeof(struct cso_depth_stencil_alpha)); + + cso->state = *templ; + cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; + cso->context = ctx->pipe; + + cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + handle = cso->data; + } + else { + handle = ((struct cso_depth_stencil_alpha *)cso_hash_iter_data(iter))->data; + } + + if (ctx->depth_stencil != handle) { + ctx->depth_stencil = handle; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); + } +} + + + +void cso_set_rasterizer(struct cso_context *ctx, + const struct pipe_rasterizer_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_rasterizer_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_RASTERIZER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_rasterizer *cso = malloc(sizeof(struct cso_rasterizer)); + + cso->state = *templ; + cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; + cso->context = ctx->pipe; + + cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data; + } + + if (ctx->rasterizer != handle) { + ctx->rasterizer = handle; + ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); + } +} + + + + + +void cso_set_fragment_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_FRAGMENT_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_fragment_shader *cso = malloc(sizeof(struct cso_fragment_shader)); + + cso->state = *templ; + cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; + } + + if (ctx->fragment_shader != handle) { + ctx->fragment_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } +} + +void cso_set_vertex_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_VERTEX_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_vertex_shader *cso = malloc(sizeof(struct cso_vertex_shader)); + + cso->state = *templ; + cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; + } + + if (ctx->vertex_shader != handle) { + ctx->vertex_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h new file mode 100644 index 0000000000..1f2a630804 --- /dev/null +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef CSO_CONTEXT_H +#define CSO_CONTEXT_H + +#include "pipe/p_context.h" +#include "pipe/p_state.h" + + +#ifdef __cplusplus +extern "C" { +#endif + +struct cso_context; + +struct cso_context *cso_create_context( struct pipe_context *pipe ); + +void cso_set_blend( struct cso_context *cso, + const struct pipe_blend_state *blend ); + +void cso_set_depth_stencil_alpha( struct cso_context *cso, + const struct pipe_depth_stencil_alpha_state *dsa ); + +void cso_set_rasterizer( struct cso_context *cso, + const struct pipe_rasterizer_state *rasterizer ); + +void cso_set_samplers( struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states ); + +/* Alternate interface to support state trackers that like to modify + * samplers one at a time: + */ +void cso_single_sampler( struct cso_context *cso, + unsigned nr, + const struct pipe_sampler_state *states ); + +void cso_single_sampler_done( struct cso_context *cso ); + + +/* These aren't really sensible -- most of the time the api provides + * object semantics for shaders anyway, and the cases where it doesn't + * (eg mesa's internall-generated texenv programs), it will be up to + * the state tracker to implement their own specialized caching. + */ +void cso_set_fragment_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); + +void cso_set_vertex_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); + +void cso_destroy_context( struct cso_context *cso ); + + +#ifdef __cplusplus +} +#endif + +#endif -- cgit v1.2.3 From ac87bc18359825890a53d4dbfda5c6eecd916afd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 9 Mar 2008 17:05:46 +0100 Subject: cso: Use MALLOC --- src/gallium/auxiliary/cso_cache/cso_context.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 77237464c5..051e7d96d3 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -117,7 +117,7 @@ void cso_set_blend(struct cso_context *ctx, void *handle; if (cso_hash_iter_is_null(iter)) { - struct cso_blend *cso = malloc(sizeof(struct cso_blend)); + struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); cso->state = *templ; cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); @@ -150,7 +150,7 @@ void cso_single_sampler(struct cso_context *ctx, (void*)templ); if (cso_hash_iter_is_null(iter)) { - struct cso_sampler *cso = malloc(sizeof(struct cso_sampler)); + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); cso->state = *templ; cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); @@ -220,7 +220,7 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, void *handle; if (cso_hash_iter_is_null(iter)) { - struct cso_depth_stencil_alpha *cso = malloc(sizeof(struct cso_depth_stencil_alpha)); + struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); cso->state = *templ; cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); @@ -253,7 +253,7 @@ void cso_set_rasterizer(struct cso_context *ctx, void *handle = NULL; if (cso_hash_iter_is_null(iter)) { - struct cso_rasterizer *cso = malloc(sizeof(struct cso_rasterizer)); + struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); cso->state = *templ; cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); @@ -288,7 +288,7 @@ void cso_set_fragment_shader(struct cso_context *ctx, void *handle = NULL; if (cso_hash_iter_is_null(iter)) { - struct cso_fragment_shader *cso = malloc(sizeof(struct cso_fragment_shader)); + struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader)); cso->state = *templ; cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); @@ -319,7 +319,7 @@ void cso_set_vertex_shader(struct cso_context *ctx, void *handle = NULL; if (cso_hash_iter_is_null(iter)) { - struct cso_vertex_shader *cso = malloc(sizeof(struct cso_vertex_shader)); + struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); cso->state = *templ; cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); -- cgit v1.2.3 From d8d6569e288fe3324473fb19ade798502dfbba8e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 9 Mar 2008 17:06:20 +0100 Subject: cso: fix line endings --- src/gallium/auxiliary/cso_cache/cso_context.c | 680 +++++++++++++------------- 1 file changed, 340 insertions(+), 340 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 051e7d96d3..596e5a9ad6 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -1,340 +1,340 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* Wrap the cso cache & hash mechanisms in a simplified - * pipe-driver-specific interface. - * - * Authors: - * Zack Rusin - * Keith Whitwell - */ - -#include "pipe/p_state.h" -#include "pipe/p_util.h" - -#include "cso_cache/cso_context.h" -#include "cso_cache/cso_cache.h" -#include "cso_cache/cso_hash.h" - -struct cso_context { - struct pipe_context *pipe; - struct cso_cache *cache; - - struct { - void *samplers[PIPE_MAX_SAMPLERS]; - unsigned nr_samplers; - } hw; - - void *samplers[PIPE_MAX_SAMPLERS]; - unsigned nr_samplers; - - void *blend; - void *depth_stencil; - void *rasterizer; - void *fragment_shader; - void *vertex_shader; -}; - - -struct cso_context *cso_create_context( struct pipe_context *pipe ) -{ - struct cso_context *ctx = CALLOC_STRUCT(cso_context); - if (ctx == NULL) - goto out; - - ctx->cache = cso_cache_create(); - if (ctx->cache == NULL) - goto out; - - ctx->pipe = pipe; - - return ctx; - -out: - cso_destroy_context( ctx ); - return NULL; -} - - -void cso_destroy_context( struct cso_context *ctx ) -{ - if (ctx == NULL) - return; - -/* - if (ctx->pipe) - ctx->pipe->flush( ctx->pipe, PIPE_FLUSH_UNBIND_ALL ); -*/ - - if (ctx->cache) - cso_cache_delete( ctx->cache ); - - FREE( ctx ); -} - - -/* Those function will either find the state of the given template - * in the cache or they will create a new state from the given - * template, insert it in the cache and return it. - */ - -/* - * If the driver returns 0 from the create method then they will assign - * the data member of the cso to be the template itself. - */ - -void cso_set_blend(struct cso_context *ctx, - const struct pipe_blend_state *templ) -{ - unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_BLEND, - (void*)templ); - void *handle; - - if (cso_hash_iter_is_null(iter)) { - struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); - - cso->state = *templ; - cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; - cso->context = ctx->pipe; - - iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); - handle = cso->data; - } - else { - handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data; - } - - if (ctx->blend != handle) { - ctx->blend = handle; - ctx->pipe->bind_blend_state(ctx->pipe, handle); - } -} - -void cso_single_sampler(struct cso_context *ctx, - unsigned idx, - const struct pipe_sampler_state *templ) -{ - void *handle = NULL; - - if (templ != NULL) { - unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_SAMPLER, - (void*)templ); - - if (cso_hash_iter_is_null(iter)) { - struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); - - cso->state = *templ; - cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; - cso->context = ctx->pipe; - - iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); - handle = cso->data; - } - else { - handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; - } - } - - ctx->samplers[idx] = handle; -} - -void cso_single_sampler_done( struct cso_context *ctx ) -{ - unsigned i; - - for (i = 0; i < 8; i++) - if (ctx->samplers[i] == NULL) - break; - - ctx->nr_samplers = i; - - if (ctx->hw.nr_samplers != ctx->nr_samplers || - memcmp(ctx->hw.samplers, - ctx->samplers, - ctx->nr_samplers * sizeof(void *)) != 0) - { - memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); - ctx->hw.nr_samplers = ctx->nr_samplers; - - ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); - } -} - -void cso_set_samplers( struct cso_context *ctx, - unsigned nr, - const struct pipe_sampler_state **templates ) -{ - unsigned i; - - /* TODO: fastpath - */ - - for (i = 0; i < nr; i++) - cso_single_sampler( ctx, i, templates[i] ); - - for ( ; i < ctx->nr_samplers; i++) - cso_single_sampler( ctx, i, NULL ); - - cso_single_sampler_done( ctx ); -} - -void cso_set_depth_stencil_alpha(struct cso_context *ctx, - const struct pipe_depth_stencil_alpha_state *templ) -{ - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_depth_stencil_alpha_state)); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, - CSO_DEPTH_STENCIL_ALPHA, - (void*)templ); - void *handle; - - if (cso_hash_iter_is_null(iter)) { - struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); - - cso->state = *templ; - cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; - cso->context = ctx->pipe; - - cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); - handle = cso->data; - } - else { - handle = ((struct cso_depth_stencil_alpha *)cso_hash_iter_data(iter))->data; - } - - if (ctx->depth_stencil != handle) { - ctx->depth_stencil = handle; - ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); - } -} - - - -void cso_set_rasterizer(struct cso_context *ctx, - const struct pipe_rasterizer_state *templ) -{ - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_rasterizer_state)); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_RASTERIZER, - (void*)templ); - void *handle = NULL; - - if (cso_hash_iter_is_null(iter)) { - struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); - - cso->state = *templ; - cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; - cso->context = ctx->pipe; - - cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); - handle = cso->data; - } - else { - handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data; - } - - if (ctx->rasterizer != handle) { - ctx->rasterizer = handle; - ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); - } -} - - - - - -void cso_set_fragment_shader(struct cso_context *ctx, - const struct pipe_shader_state *templ) -{ - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_shader_state)); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_FRAGMENT_SHADER, - (void*)templ); - void *handle = NULL; - - if (cso_hash_iter_is_null(iter)) { - struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader)); - - cso->state = *templ; - cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state; - cso->context = ctx->pipe; - - iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso); - handle = cso->data; - } - else { - handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; - } - - if (ctx->fragment_shader != handle) { - ctx->fragment_shader = handle; - ctx->pipe->bind_fs_state(ctx->pipe, handle); - } -} - -void cso_set_vertex_shader(struct cso_context *ctx, - const struct pipe_shader_state *templ) -{ - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_shader_state)); - struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_VERTEX_SHADER, - (void*)templ); - void *handle = NULL; - - if (cso_hash_iter_is_null(iter)) { - struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); - - cso->state = *templ; - cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); - cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; - cso->context = ctx->pipe; - - iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso); - handle = cso->data; - } - else { - handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; - } - - if (ctx->vertex_shader != handle) { - ctx->vertex_shader = handle; - ctx->pipe->bind_fs_state(ctx->pipe, handle); - } -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* Wrap the cso cache & hash mechanisms in a simplified + * pipe-driver-specific interface. + * + * Authors: + * Zack Rusin + * Keith Whitwell + */ + +#include "pipe/p_state.h" +#include "pipe/p_util.h" + +#include "cso_cache/cso_context.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct cso_context { + struct pipe_context *pipe; + struct cso_cache *cache; + + struct { + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + } hw; + + void *samplers[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers; + + void *blend; + void *depth_stencil; + void *rasterizer; + void *fragment_shader; + void *vertex_shader; +}; + + +struct cso_context *cso_create_context( struct pipe_context *pipe ) +{ + struct cso_context *ctx = CALLOC_STRUCT(cso_context); + if (ctx == NULL) + goto out; + + ctx->cache = cso_cache_create(); + if (ctx->cache == NULL) + goto out; + + ctx->pipe = pipe; + + return ctx; + +out: + cso_destroy_context( ctx ); + return NULL; +} + + +void cso_destroy_context( struct cso_context *ctx ) +{ + if (ctx == NULL) + return; + +/* + if (ctx->pipe) + ctx->pipe->flush( ctx->pipe, PIPE_FLUSH_UNBIND_ALL ); +*/ + + if (ctx->cache) + cso_cache_delete( ctx->cache ); + + FREE( ctx ); +} + + +/* Those function will either find the state of the given template + * in the cache or they will create a new state from the given + * template, insert it in the cache and return it. + */ + +/* + * If the driver returns 0 from the create method then they will assign + * the data member of the cso to be the template itself. + */ + +void cso_set_blend(struct cso_context *ctx, + const struct pipe_blend_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_BLEND, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); + + cso->state = *templ; + cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); + handle = cso->data; + } + else { + handle = ((struct cso_blend *)cso_hash_iter_data(iter))->data; + } + + if (ctx->blend != handle) { + ctx->blend = handle; + ctx->pipe->bind_blend_state(ctx->pipe, handle); + } +} + +void cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) +{ + void *handle = NULL; + + if (templ != NULL) { + unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_SAMPLER, + (void*)templ); + + if (cso_hash_iter_is_null(iter)) { + struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); + + cso->state = *templ; + cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_sampler *)cso_hash_iter_data(iter))->data; + } + } + + ctx->samplers[idx] = handle; +} + +void cso_single_sampler_done( struct cso_context *ctx ) +{ + unsigned i; + + for (i = 0; i < 8; i++) + if (ctx->samplers[i] == NULL) + break; + + ctx->nr_samplers = i; + + if (ctx->hw.nr_samplers != ctx->nr_samplers || + memcmp(ctx->hw.samplers, + ctx->samplers, + ctx->nr_samplers * sizeof(void *)) != 0) + { + memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); + ctx->hw.nr_samplers = ctx->nr_samplers; + + ctx->pipe->bind_sampler_states(ctx->pipe, ctx->nr_samplers, ctx->samplers); + } +} + +void cso_set_samplers( struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates ) +{ + unsigned i; + + /* TODO: fastpath + */ + + for (i = 0; i < nr; i++) + cso_single_sampler( ctx, i, templates[i] ); + + for ( ; i < ctx->nr_samplers; i++) + cso_single_sampler( ctx, i, NULL ); + + cso_single_sampler_done( ctx ); +} + +void cso_set_depth_stencil_alpha(struct cso_context *ctx, + const struct pipe_depth_stencil_alpha_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_depth_stencil_alpha_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, + CSO_DEPTH_STENCIL_ALPHA, + (void*)templ); + void *handle; + + if (cso_hash_iter_is_null(iter)) { + struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); + + cso->state = *templ; + cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; + cso->context = ctx->pipe; + + cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + handle = cso->data; + } + else { + handle = ((struct cso_depth_stencil_alpha *)cso_hash_iter_data(iter))->data; + } + + if (ctx->depth_stencil != handle) { + ctx->depth_stencil = handle; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); + } +} + + + +void cso_set_rasterizer(struct cso_context *ctx, + const struct pipe_rasterizer_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_rasterizer_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_RASTERIZER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); + + cso->state = *templ; + cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; + cso->context = ctx->pipe; + + cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_rasterizer *)cso_hash_iter_data(iter))->data; + } + + if (ctx->rasterizer != handle) { + ctx->rasterizer = handle; + ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); + } +} + + + + + +void cso_set_fragment_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_FRAGMENT_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader)); + + cso->state = *templ; + cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; + } + + if (ctx->fragment_shader != handle) { + ctx->fragment_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } +} + +void cso_set_vertex_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) +{ + unsigned hash_key = cso_construct_key((void*)templ, + sizeof(struct pipe_shader_state)); + struct cso_hash_iter iter = cso_find_state_template(ctx->cache, + hash_key, CSO_VERTEX_SHADER, + (void*)templ); + void *handle = NULL; + + if (cso_hash_iter_is_null(iter)) { + struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); + + cso->state = *templ; + cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); + cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; + cso->context = ctx->pipe; + + iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso); + handle = cso->data; + } + else { + handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; + } + + if (ctx->vertex_shader != handle) { + ctx->vertex_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } +} -- cgit v1.2.3 From aff4cf19a753baf0428d2bf53614900e5afea8a3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 9 Mar 2008 20:17:02 +0000 Subject: draw: cope with binding NULL vertex shader (on context delete, for instance) --- src/gallium/auxiliary/draw/draw_vertex_shader.c | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 1e95355555..133418baca 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -110,13 +110,20 @@ draw_bind_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + if (dvs) + { + draw->vertex_shader = dvs; + draw->num_vs_outputs = dvs->info.num_outputs; - draw->vertex_shader = dvs; - draw->num_vs_outputs = dvs->info.num_outputs; + tgsi_exec_machine_init(&draw->machine); - tgsi_exec_machine_init(&draw->machine); - - dvs->prepare( dvs, draw ); + dvs->prepare( dvs, draw ); + } + else { + draw->vertex_shader = NULL; + draw->num_vs_outputs = 0; + } } -- cgit v1.2.3 From b041dbe9019ff8cb16ff15d0baaa803c7dc654db Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 9 Mar 2008 20:21:45 +0000 Subject: gallium: avoid deleting currently-bound CSO's on cache destruction --- src/gallium/auxiliary/cso_cache/cso_context.c | 34 +++++++++++++++++++-------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 596e5a9ad6..fbb26ca511 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -72,6 +72,9 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ) ctx->pipe = pipe; + /* Enable for testing: */ + if (0) cso_set_maximum_cache_size( ctx->cache, 4 ); + return ctx; out: @@ -79,20 +82,31 @@ out: return NULL; } +static void cso_release_all( struct cso_context *ctx ) +{ + if (ctx->pipe) { + ctx->pipe->bind_blend_state( ctx->pipe, NULL ); + ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); + ctx->pipe->bind_sampler_states( ctx->pipe, 0, NULL ); + ctx->pipe->bind_depth_stencil_alpha_state( ctx->pipe, NULL ); + ctx->pipe->bind_fs_state( ctx->pipe, NULL ); + ctx->pipe->bind_vs_state( ctx->pipe, NULL ); + } + + if (ctx->cache) { + cso_cache_delete( ctx->cache ); + ctx->cache = NULL; + } +} + void cso_destroy_context( struct cso_context *ctx ) { - if (ctx == NULL) - return; - -/* - if (ctx->pipe) - ctx->pipe->flush( ctx->pipe, PIPE_FLUSH_UNBIND_ALL ); -*/ + debug_printf("%s\n", __FUNCTION__); + + if (ctx) + cso_release_all( ctx ); - if (ctx->cache) - cso_cache_delete( ctx->cache ); - FREE( ctx ); } -- cgit v1.2.3 From b721bc8792f6add71dede11924d7060bbce72f0e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 10 Mar 2008 13:04:13 +0000 Subject: gallium: WinCE portability fixes. --- src/gallium/auxiliary/draw/draw_unfilled.c | 2 +- src/gallium/auxiliary/util/p_debug.c | 8 ++++++++ src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 6 +++--- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 2 +- src/gallium/include/pipe/p_util.h | 10 ++++++++++ 6 files changed, 24 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c index 4d718d514c..b07860cd9e 100644 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_unfilled.c @@ -129,7 +129,7 @@ static void unfilled_tri( struct draw_stage *stage, points( stage, header ); break; default: - abort(); + assert(0); } } diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 7b3c030956..6255d716a6 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -57,11 +57,15 @@ int rpl_vsnprintf(char *, size_t, const char *, va_list); void debug_vprintf(const char *format, va_list ap) { #ifdef WIN32 +#ifndef WINCE /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation */ char buf[512 + 1]; rpl_vsnprintf(buf, sizeof(buf), format, ap); rpl_EngDebugPrint("%s", buf); +#else + /* TODO: Implement debug print for WINCE */ +#endif #else vfprintf(stderr, format, ap); #endif @@ -80,7 +84,11 @@ void debug_printf(const char *format, ...) static INLINE void debug_abort(void) { #ifdef WIN32 +#ifndef WINCE EngDebugBreak(); +#else + _asm {int 3}; +#endif #else abort(); #endif diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 4ffeac35e1..318916fe30 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -72,7 +72,7 @@ alpha_test_quad(struct quad_stage *qs, struct quad_header *quad) passMask = MASK_ALL; break; default: - abort(); + assert(0); } quad->mask &= passMask; diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 17f3ecd0b8..0b79cfa1ed 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -392,7 +392,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) assert(0); /* to do */ break; default: - abort(); + assert(0); } /* @@ -464,7 +464,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) } break; default: - abort(); + assert(0); } @@ -716,7 +716,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) VEC4_MAX(quadColor[3], source[3], dest[3]); /* A */ break; default: - abort(); + assert(0); } /* pass blended quad to next stage */ diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index a9a0754f27..a1859f9883 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -185,7 +185,7 @@ sp_depth_test_quad(struct quad_stage *qs, struct quad_header *quad) zmask = MASK_ALL; break; default: - abort(); + assert(0); } quad->mask &= zmask; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 3b32ba1d24..ef36ce75f7 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -54,7 +54,12 @@ EngFreeMem( static INLINE void * MALLOC( unsigned size ) { +#ifdef WINCE + /* TODO: Need to abstract this */ + return malloc( size ); +#else return EngAllocMem( 0, size, 'D3AG' ); +#endif } static INLINE void * @@ -71,7 +76,12 @@ static INLINE void FREE( void *ptr ) { if( ptr ) { +#ifdef WINCE + /* TODO: Need to abstract this */ + free( ptr ); +#else EngFreeMem( ptr ); +#endif } } -- cgit v1.2.3 From 01bd21eef8f572944c09771f44e3006e2991280e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 10 Mar 2008 16:45:19 +0000 Subject: gallium: Import Dennis Smit cpu detection code. It still needs a slight code massasing to integrate with the rest of gallium (namely mapping the OS_* ARCH_* defines), but I'm commiting anyway so that it is available to be used when somebody needs it. --- src/gallium/auxiliary/util/u_cpu_detect.c | 506 ++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_cpu_detect.h | 78 +++++ 2 files changed, 584 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_cpu_detect.c create mode 100644 src/gallium/auxiliary/util/u_cpu_detect.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c new file mode 100644 index 0000000000..d9f2f8fc28 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.c @@ -0,0 +1,506 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* + * Based on the work of Eric Anholt + */ + +/* FIXME: clean this entire file up */ + +#include "u_cpu_detect.h" + +#ifdef __linux__ +#define OS_LINUX +#endif +#ifdef WIN32 +#define OS_WIN32 +#endif + +#if defined(ARCH_POWERPC) +#if defined(OS_DARWIN) +#include +#else +#include +#include +#endif +#endif + +#if defined(OS_NETBSD) || defined(OS_OPENBSD) +#include +#include +#include +#endif + +#if defined(OS_FREEBSD) +#include +#include +#endif + +#if defined(OS_LINUX) +#include +#endif + +#if defined(OS_WIN32) +#include +#endif + +#include +#include +#include +#include + + +static struct cpu_detect_caps __cpu_detect_caps; +static int __cpu_detect_initialized = 0; + +static int has_cpuid(void); +static int cpuid(unsigned int ax, unsigned int *p); + +/* The sigill handlers */ +#if defined(ARCH_X86) /* x86 (linux katmai handler check thing) */ +#if defined(OS_LINUX) && defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC) +static void sigill_handler_sse(int signal, struct sigcontext sc) +{ + /* Both the "xorps %%xmm0,%%xmm0" and "divps %xmm0,%%xmm1" + * instructions are 3 bytes long. We must increment the instruction + * pointer manually to avoid repeated execution of the offending + * instruction. + * + * If the SIGILL is caused by a divide-by-zero when unmasked + * exceptions aren't supported, the SIMD FPU status and control + * word will be restored at the end of the test, so we don't need + * to worry about doing it here. Besides, we may not be able to... + */ + sc.eip += 3; + + __cpu_detect_caps.hasSSE=0; +} + +static void sigfpe_handler_sse(int signal, struct sigcontext sc) +{ + if (sc.fpstate->magic != 0xffff) { + /* Our signal context has the extended FPU state, so reset the + * divide-by-zero exception mask and clear the divide-by-zero + * exception bit. + */ + sc.fpstate->mxcsr |= 0x00000200; + sc.fpstate->mxcsr &= 0xfffffffb; + } else { + /* If we ever get here, we're completely hosed. + */ + } +} +#endif +#endif /* OS_LINUX && _POSIX_SOURCE && X86_FXSR_MAGIC */ + +#if defined(OS_WIN32) +LONG CALLBACK win32_sig_handler_sse(EXCEPTION_POINTERS* ep) +{ + if(ep->ExceptionRecord->ExceptionCode==EXCEPTION_ILLEGAL_INSTRUCTION){ + ep->ContextRecord->Eip +=3; + __cpu_detect_caps.hasSSE=0; + return EXCEPTION_CONTINUE_EXECUTION; + } + return EXCEPTION_CONTINUE_SEARCH; +} +#endif /* OS_WIN32 */ + + +#if defined(ARCH_POWERPC) && !defined(OS_DARWIN) +static sigjmp_buf __lv_powerpc_jmpbuf; +static volatile sig_atomic_t __lv_powerpc_canjump = 0; + +static void sigill_handler (int sig); + +static void sigill_handler (int sig) +{ + if (!__lv_powerpc_canjump) { + signal (sig, SIG_DFL); + raise (sig); + } + + __lv_powerpc_canjump = 0; + siglongjmp(__lv_powerpc_jmpbuf, 1); +} + +static void check_os_altivec_support(void) +{ +#if defined(OS_DARWIN) + int sels[2] = {CTL_HW, HW_VECTORUNIT}; + int has_vu = 0; + int len = sizeof (has_vu); + int err; + + err = sysctl(sels, 2, &has_vu, &len, NULL, 0); + + if (err == 0) { + if (has_vu != 0) { + __cpu_detect_caps.hasAltiVec = 1; + } + } +#else /* !OS_DARWIN */ + /* no Darwin, do it the brute-force way */ + /* this is borrowed from the libmpeg2 library */ + signal(SIGILL, sigill_handler); + if (sigsetjmp(__lv_powerpc_jmpbuf, 1)) { + signal(SIGILL, SIG_DFL); + } else { + __lv_powerpc_canjump = 1; + + __asm __volatile + ("mtspr 256, %0\n\t" + "vand %%v0, %%v0, %%v0" + : + : "r" (-1)); + + signal(SIGILL, SIG_DFL); + __cpu_detect_caps.hasAltiVec = 1; + } +#endif +} +#endif + +/* If we're running on a processor that can do SSE, let's see if we + * are allowed to or not. This will catch 2.4.0 or later kernels that + * haven't been configured for a Pentium III but are running on one, + * and RedHat patched 2.2 kernels that have broken exception handling + * support for user space apps that do SSE. + */ +static void check_os_katmai_support(void) +{ +#if defined(ARCH_X86) +#if defined(OS_FREEBSD) + int has_sse=0, ret; + int len = sizeof (has_sse); + + ret = sysctlbyname("hw.instruction_sse", &has_sse, &len, NULL, 0); + if (ret || !has_sse) + __cpu_detect_caps.hasSSE=0; + +#elif defined(OS_NETBSD) || defined(OS_OPENBSD) + int has_sse, has_sse2, ret, mib[2]; + int varlen; + + mib[0] = CTL_MACHDEP; + mib[1] = CPU_SSE; + varlen = sizeof (has_sse); + + ret = sysctl(mib, 2, &has_sse, &varlen, NULL, 0); + if (ret < 0 || !has_sse) { + __cpu_detect_caps.hasSSE = 0; + } else { + __cpu_detect_caps.hasSSE = 1; + } + + mib[1] = CPU_SSE2; + varlen = sizeof (has_sse2); + ret = sysctl(mib, 2, &has_sse2, &varlen, NULL, 0); + if (ret < 0 || !has_sse2) { + __cpu_detect_caps.hasSSE2 = 0; + } else { + __cpu_detect_caps.hasSSE2 = 1; + } + __cpu_detect_caps.hasSSE = 0; /* FIXME ?!?!? */ + +#elif defined(OS_WIN32) + LPTOP_LEVEL_EXCEPTION_FILTER exc_fil; + if (__cpu_detect_caps.hasSSE) { + exc_fil = SetUnhandledExceptionFilter(win32_sig_handler_sse); + __asm __volatile ("xorps %xmm0, %xmm0"); + SetUnhandledExceptionFilter(exc_fil); + } +#elif defined(OS_LINUX) + struct sigaction saved_sigill; + struct sigaction saved_sigfpe; + + /* Save the original signal handlers. + */ + sigaction(SIGILL, NULL, &saved_sigill); + sigaction(SIGFPE, NULL, &saved_sigfpe); + + signal(SIGILL, (void (*)(int))sigill_handler_sse); + signal(SIGFPE, (void (*)(int))sigfpe_handler_sse); + + /* Emulate test for OSFXSR in CR4. The OS will set this bit if it + * supports the extended FPU save and restore required for SSE. If + * we execute an SSE instruction on a PIII and get a SIGILL, the OS + * doesn't support Streaming SIMD Exceptions, even if the processor + * does. + */ + if (__cpu_detect_caps.hasSSE) { + __asm __volatile ("xorps %xmm1, %xmm0"); + } + + /* Emulate test for OSXMMEXCPT in CR4. The OS will set this bit if + * it supports unmasked SIMD FPU exceptions. If we unmask the + * exceptions, do a SIMD divide-by-zero and get a SIGILL, the OS + * doesn't support unmasked SIMD FPU exceptions. If we get a SIGFPE + * as expected, we're okay but we need to clean up after it. + * + * Are we being too stringent in our requirement that the OS support + * unmasked exceptions? Certain RedHat 2.2 kernels enable SSE by + * setting CR4.OSFXSR but don't support unmasked exceptions. Win98 + * doesn't even support them. We at least know the user-space SSE + * support is good in kernels that do support unmasked exceptions, + * and therefore to be safe I'm going to leave this test in here. + */ + if (__cpu_detect_caps.hasSSE) { + // test_os_katmai_exception_support(); + } + + /* Restore the original signal handlers. + */ + sigaction(SIGILL, &saved_sigill, NULL); + sigaction(SIGFPE, &saved_sigfpe, NULL); + +#else + /* We can't use POSIX signal handling to test the availability of + * SSE, so we disable it by default. + */ + __cpu_detect_caps.hasSSE = 0; +#endif /* __linux__ */ +#endif +} + + +static int has_cpuid(void) +{ +#if defined(ARCH_X86) + int a, c; + + __asm __volatile + ("pushf\n" + "popl %0\n" + "movl %0, %1\n" + "xorl $0x200000, %0\n" + "push %0\n" + "popf\n" + "pushf\n" + "popl %0\n" + : "=a" (a), "=c" (c) + : + : "cc"); + + return a != c; +#else + return 0; +#endif +} + +static int cpuid(unsigned int ax, unsigned int *p) +{ +#if defined(ARCH_X86) + unsigned int flags; + + __asm __volatile + ("movl %%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl %%ebx, %%esi" + : "=a" (p[0]), "=S" (p[1]), + "=c" (p[2]), "=d" (p[3]) + : "0" (ax)); + + return 0; +#else + return -1; +#endif +} + +void cpu_detect_initialize() +{ + unsigned int regs[4]; + unsigned int regs2[4]; + + int mib[2], ncpu; + int len; + + memset(&__cpu_detect_caps, 0, sizeof (struct cpu_detect_caps)); + + /* Check for arch type */ +#if defined(ARCH_MIPS) + __cpu_detect_caps.type = CPU_DETECT_TYPE_MIPS; +#elif defined(ARCH_ALPHA) + __cpu_detect_caps.type = CPU_DETECT_TYPE_ALPHA; +#elif defined(ARCH_SPARC) + __cpu_detect_caps.type = CPU_DETECT_TYPE_SPARC; +#elif defined(ARCH_X86) + __cpu_detect_caps.type = CPU_DETECT_TYPE_X86; +#elif defined(ARCH_POWERPC) + __cpu_detect_caps.type = CPU_DETECT_TYPE_POWERPC; +#else + __cpu_detect_caps.type = CPU_DETECT_TYPE_OTHER; +#endif + + /* Count the number of CPUs in system */ +#if !defined(OS_WIN32) && !defined(OS_UNKNOWN) && defined(_SC_NPROCESSORS_ONLN) + __cpu_detect_caps.nrcpu = sysconf(_SC_NPROCESSORS_ONLN); + if (__cpu_detect_caps.nrcpu == -1) + __cpu_detect_caps.nrcpu = 1; + +#elif defined(OS_NETBSD) || defined(OS_FREEBSD) || defined(OS_OPENBSD) + + mib[0] = CTL_HW; + mib[1] = HW_NCPU; + + len = sizeof (ncpu); + sysctl(mib, 2, &ncpu, &len, NULL, 0); + __cpu_detect_caps.nrcpu = ncpu; + +#else + __cpu_detect_caps.nrcpu = 1; +#endif + +#if defined(ARCH_X86) + /* No cpuid, old 486 or lower */ + if (has_cpuid() == 0) + return; + + __cpu_detect_caps.cacheline = 32; + + /* Get max cpuid level */ + cpuid(0x00000000, regs); + + if (regs[0] >= 0x00000001) { + unsigned int cacheline; + + cpuid (0x00000001, regs2); + + __cpu_detect_caps.x86cpuType = (regs2[0] >> 8) & 0xf; + if (__cpu_detect_caps.x86cpuType == 0xf) + __cpu_detect_caps.x86cpuType = 8 + ((regs2[0] >> 20) & 255); /* use extended family (P4, IA64) */ + + /* general feature flags */ + __cpu_detect_caps.hasTSC = (regs2[3] & (1 << 8 )) >> 8; /* 0x0000010 */ + __cpu_detect_caps.hasMMX = (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + __cpu_detect_caps.hasSSE = (regs2[3] & (1 << 25 )) >> 25; /* 0x2000000 */ + __cpu_detect_caps.hasSSE2 = (regs2[3] & (1 << 26 )) >> 26; /* 0x4000000 */ + __cpu_detect_caps.hasSSE3 = (regs2[2] & (1)); /* 0x0000001 */ + __cpu_detect_caps.hasSSSE3 = (regs2[2] & (1 << 9 )) >> 9; /* 0x0000020 */ + __cpu_detect_caps.hasMMX2 = __cpu_detect_caps.hasSSE; /* SSE cpus supports mmxext too */ + + cacheline = ((regs2[1] >> 8) & 0xFF) * 8; + if (cacheline > 0) + __cpu_detect_caps.cacheline = cacheline; + } + + cpuid(0x80000000, regs); + + if (regs[0] >= 0x80000001) { + + cpuid(0x80000001, regs2); + + __cpu_detect_caps.hasMMX |= (regs2[3] & (1 << 23 )) >> 23; /* 0x0800000 */ + __cpu_detect_caps.hasMMX2 |= (regs2[3] & (1 << 22 )) >> 22; /* 0x400000 */ + __cpu_detect_caps.has3DNow = (regs2[3] & (1 << 31 )) >> 31; /* 0x80000000 */ + __cpu_detect_caps.has3DNowExt = (regs2[3] & (1 << 30 )) >> 30; + } + + if (regs[0] >= 0x80000006) { + cpuid(0x80000006, regs2); + __cpu_detect_caps.cacheline = regs2[2] & 0xFF; + } + + +#if defined(OS_LINUX) || defined(OS_FREEBSD) || defined(OS_NETBSD) || defined(OS_CYGWIN) || defined(OS_OPENBSD) + if (__cpu_detect_caps.hasSSE) + check_os_katmai_support(); + + if (!__cpu_detect_caps.hasSSE) { + __cpu_detect_caps.hasSSE2 = 0; + __cpu_detect_caps.hasSSE3 = 0; + __cpu_detect_caps.hasSSSE3 = 0; + } +#else + __cpu_detect_caps.hasSSE = 0; + __cpu_detect_caps.hasSSE2 = 0; + __cpu_detect_caps.hasSSE3 = 0; + __cpu_detect_caps.hasSSSE3 = 0; +#endif +#endif /* ARCH_X86 */ + +#if defined(ARCH_POWERPC) + check_os_altivec_support(); +#endif /* ARCH_POWERPC */ + + __cpu_detect_initialized = 1; +} + +struct cpu_detect_caps *cpu_detect_get_caps() +{ + return &__cpu_detect_caps; +} + +/* The getters and setters for feature flags */ +int cpu_detect_get_tsc() +{ + return __cpu_detect_caps.hasTSC; +} + +int cpu_detect_get_mmx() +{ + return __cpu_detect_caps.hasMMX; +} + +int cpu_detect_get_mmx2() +{ + return __cpu_detect_caps.hasMMX2; +} + +int cpu_detect_get_sse() +{ + return __cpu_detect_caps.hasSSE; +} + +int cpu_detect_get_sse2() +{ + return __cpu_detect_caps.hasSSE2; +} + +int cpu_detect_get_sse3() +{ + return __cpu_detect_caps.hasSSE3; +} + +int cpu_detect_get_ssse3() +{ + return __cpu_detect_caps.hasSSSE3; +} + +int cpu_detect_get_3dnow() +{ + return __cpu_detect_caps.has3DNow; +} + +int cpu_detect_get_3dnow2() +{ + return __cpu_detect_caps.has3DNowExt; +} + +int cpu_detect_get_altivec() +{ + return __cpu_detect_caps.hasAltiVec; +} + diff --git a/src/gallium/auxiliary/util/u_cpu_detect.h b/src/gallium/auxiliary/util/u_cpu_detect.h new file mode 100644 index 0000000000..1612d49286 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cpu_detect.h @@ -0,0 +1,78 @@ +/************************************************************************** + * + * Copyright 2008 Dennis Smit + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + ***************************************************************************/ + +/* + * Based on the work of Eric Anholt + */ + +#ifndef _CPU_DETECT_H +#define _CPU_DETECT_H + +typedef enum { + CPU_DETECT_TYPE_MIPS, + CPU_DETECT_TYPE_ALPHA, + CPU_DETECT_TYPE_SPARC, + CPU_DETECT_TYPE_X86, + CPU_DETECT_TYPE_POWERPC, + CPU_DETECT_TYPE_OTHER +} cpu_detect_type; + +struct cpu_detect_caps { + cpu_detect_type type; + int nrcpu; + + /* Feature flags */ + int x86cpuType; + int cacheline; + + int hasTSC; + int hasMMX; + int hasMMX2; + int hasSSE; + int hasSSE2; + int hasSSE3; + int hasSSSE3; + int has3DNow; + int has3DNowExt; + int hasAltiVec; +}; + +/* prototypes */ +void cpu_detect_initialize(void); +struct cpu_detect_caps *cpu_detect_get_caps(void); + +int cpu_detect_get_tsc(void); +int cpu_detect_get_mmx(void); +int cpu_detect_get_mmx2(void); +int cpu_detect_get_sse(void); +int cpu_detect_get_sse2(void); +int cpu_detect_get_sse3(void); +int cpu_detect_get_ssse3(void); +int cpu_detect_get_3dnow(void); +int cpu_detect_get_3dnow2(void); +int cpu_detect_get_altivec(void); + +#endif /* _CPU_DETECT_H */ -- cgit v1.2.3 From 31022681543932ae4fce2bae5bc9d024c8178f1a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 10 Mar 2008 21:18:00 +0000 Subject: gallium: Use hardcoded breakpoints on x86 targets. --- src/gallium/auxiliary/util/p_debug.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 6255d716a6..93bfaea393 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -81,14 +81,17 @@ void debug_printf(const char *format, ...) } -static INLINE void debug_abort(void) +/* TODO: implement a debug_abort that calls EngBugCheckEx on WIN32 */ + + +static INLINE void debug_break(void) { -#ifdef WIN32 -#ifndef WINCE - EngDebugBreak(); -#else +#if (defined(__i386__) || defined(__386__)) && defined(__GNUC__) + __asm("int3"); +#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) _asm {int 3}; -#endif +#elif defined(WIN32) && !defined(WINCE) + EngDebugBreak(); #else abort(); #endif @@ -98,5 +101,5 @@ static INLINE void debug_abort(void) void debug_assert_fail(const char *expr, const char *file, unsigned line) { debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr); - debug_abort(); + debug_break(); } -- cgit v1.2.3 From d5692cb349fb74e8f9d3a18f5bbd788b09b93581 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 10 Mar 2008 17:21:43 -0600 Subject: gallium: new surface/context tracker (sct) module Will be used for tracking the surfaces and textures which are bound/used by contexts. --- configs/default | 2 +- src/gallium/auxiliary/sct/Makefile | 12 + src/gallium/auxiliary/sct/SConscript | 9 + src/gallium/auxiliary/sct/sct.c | 453 +++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/sct/sct.h | 123 ++++++++++ src/gallium/auxiliary/sct/usage.c | 61 +++++ 6 files changed, 659 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/sct/Makefile create mode 100644 src/gallium/auxiliary/sct/SConscript create mode 100644 src/gallium/auxiliary/sct/sct.c create mode 100644 src/gallium/auxiliary/sct/sct.h create mode 100644 src/gallium/auxiliary/sct/usage.c (limited to 'src/gallium/auxiliary') diff --git a/configs/default b/configs/default index 48ddd29282..f0146c50ce 100644 --- a/configs/default +++ b/configs/default @@ -68,7 +68,7 @@ PROGRAM_DIRS = demos redbook samples glsl xdemos # Gallium directories and -GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi rtasm util +GALLIUM_AUXILIARY_DIRS = draw cso_cache pipebuffer tgsi rtasm util sct GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple failover GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) diff --git a/src/gallium/auxiliary/sct/Makefile b/src/gallium/auxiliary/sct/Makefile new file mode 100644 index 0000000000..516d1756cf --- /dev/null +++ b/src/gallium/auxiliary/sct/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = sct + +C_SOURCES = \ + sct.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/sct/SConscript b/src/gallium/auxiliary/sct/SConscript new file mode 100644 index 0000000000..76927d973f --- /dev/null +++ b/src/gallium/auxiliary/sct/SConscript @@ -0,0 +1,9 @@ +Import('*') + +sct = env.ConvenienceLibrary( + target = 'sct', + source = [ + 'sct.c' + ]) + +auxiliaries.insert(0, sct) diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c new file mode 100644 index 0000000000..97ee5882a1 --- /dev/null +++ b/src/gallium/auxiliary/sct/sct.c @@ -0,0 +1,453 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" +#include "sct.h" + + +struct texture_list +{ + struct pipe_texture *texture; + struct texture_list *next; +}; + + + +#define MAX_SURFACES ((PIPE_MAX_COLOR_BUFS) + 1) + +struct sct_context +{ + const struct pipe_context *context; + + /** surfaces the context is drawing into */ + struct pipe_surface *surfaces[MAX_SURFACES]; + + /** currently bound textures */ + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + + /** previously bound textures, used but not flushed */ + struct texture_list *textures_used; + + boolean needs_flush; + + struct sct_context *next; +}; + + + +struct sct_surface +{ + const struct pipe_surface *surface; + + /** list of contexts drawing to this surface */ + struct sct_context_list *contexts; + + struct sct_surface *next; +}; + + + +/** + * Find the surface_info for the given pipe_surface + */ +static struct sct_surface * +find_surface_info(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + struct sct_surface *si; + for (si = sct->surfaces; si; si = si->next) + if (si->surface == surface) + return si; + return NULL; +} + + +/** + * As above, but create new surface_info if surface is new. + */ +static struct sct_surface * +find_create_surface_info(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + struct sct_surface *si = find_surface_info(sct, surface); + if (si) + return si; + + /* alloc new */ + si = CALLOC_STRUCT(sct_surface); + if (si) { + si->surface = surface; + + /* insert at head */ + si->next = sct->surfaces; + sct->surfaces = si; + } + + return si; +} + + +/** + * Find a context_info for the given context. + */ +static struct sct_context * +find_context_info(struct surface_context_tracker *sct, + const struct pipe_context *context) +{ + struct sct_context *ci; + for (ci = sct->contexts; ci; ci = ci->next) + if (ci->context == context) + return ci; + return NULL; +} + + +/** + * As above, but create new context_info if context is new. + */ +static struct sct_context * +find_create_context_info(struct surface_context_tracker *sct, + const struct pipe_context *context) +{ + struct sct_context *ci = find_context_info(sct, context); + if (ci) + return ci; + + /* alloc new */ + ci = CALLOC_STRUCT(sct_context); + if (ci) { + ci->context = context; + + /* insert at head */ + ci->next = sct->contexts; + sct->contexts = ci; + } + + return ci; +} + + +/** + * Is the context already bound to the surface? + */ +static boolean +find_surface_context(const struct sct_surface *si, + const struct pipe_context *context) +{ + const struct sct_context_list *cl; + for (cl = si->contexts; cl; cl = cl->next) { + if (cl->context == context) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Add a context to the list of contexts associated with a surface. + */ +static void +add_context_to_surface(struct sct_surface *si, + const struct pipe_context *context) +{ + struct sct_context_list *cl = CALLOC_STRUCT(sct_context_list); + if (cl) { + cl->context = context; + /* insert at head of list of contexts */ + cl->next = si->contexts; + si->contexts = cl; + } +} + + +/** + * Remove a context from the list of contexts associated with a surface. + */ +static void +remove_context_from_surface(struct sct_surface *si, + const struct pipe_context *context) +{ + struct sct_context_list *prev = NULL, *curr, *next; + + for (curr = si->contexts; curr; curr = next) { + if (curr->context == context) { + /* remove */ + if (prev) + prev->next = curr->next; + else + si->contexts = curr->next; + next = curr->next; + FREE(curr); + } + else { + prev = curr; + } + } +} + + +/** + * Unbind context from surface. + */ +static void +unbind_context_surface(struct surface_context_tracker *sct, + struct pipe_context *context, + struct pipe_surface *surface) +{ + struct sct_surface *si = find_surface_info(sct, surface); + if (si) { + remove_context_from_surface(si, context); + } +} + + +/** + * Bind context to a set of surfaces (color + Z). + * Like MakeCurrent(). + */ +void +sct_bind_surfaces(struct surface_context_tracker *sct, + struct pipe_context *context, + uint num_surf, + struct pipe_surface **surfaces) +{ + struct sct_context *ci = find_create_context_info(sct, context); + uint i; + + if (!ci) { + return; /* out of memory */ + } + + /* unbind currently bound surfaces */ + for (i = 0; i < MAX_SURFACES; i++) { + if (ci->surfaces[i]) { + unbind_context_surface(sct, context, ci->surfaces[i]); + } + } + + /* bind new surfaces */ + for (i = 0; i < num_surf; i++) { + struct sct_surface *si = find_create_surface_info(sct, surfaces[i]); + if (!find_surface_context(si, context)) { + add_context_to_surface(si, context); + } + } +} + + +/** + * Return list of contexts bound to a surface. + */ +const struct sct_context_list * +sct_get_surface_contexts(struct surface_context_tracker *sct, + const struct pipe_surface *surface) +{ + const struct sct_surface *si = find_surface_info(sct, surface); + return si->contexts; +} + + + +static boolean +find_texture(const struct sct_context *ci, + const struct pipe_texture *texture) +{ + const struct texture_list *tl; + + for (tl = ci->textures_used; tl; tl = tl->next) { + if (tl->texture == texture) { + return TRUE; + } + } + return FALSE; +} + + +/** + * Add the given texture to the context's list of used textures. + */ +static void +add_texture_used(struct sct_context *ci, + struct pipe_texture *texture) +{ + if (!find_texture(ci, texture)) { + /* add to list */ + struct texture_list *tl = CALLOC_STRUCT(texture_list); + if (tl) { + pipe_texture_reference(&tl->texture, texture); + /* insert at head */ + tl->next = ci->textures_used; + ci->textures_used = tl; + } + } +} + + +/** + * Bind a texture to a rendering context. + */ +void +sct_bind_texture(struct surface_context_tracker *sct, + struct pipe_context *context, + uint unit, + struct pipe_texture *tex) +{ + struct sct_context *ci = find_context_info(sct, context); + + if (ci->textures[unit] != tex) { + /* put texture on the 'used' list */ + add_texture_used(ci, tex); + /* bind new */ + pipe_texture_reference(&ci->textures[unit], tex); + } +} + + +/** + * Check if the given texture has been used by the rendering context + * since the last call to sct_flush_textures(). + */ +boolean +sct_is_texture_used(struct surface_context_tracker *sct, + const struct pipe_context *context, + const struct pipe_texture *texture) +{ + const struct sct_context *ci = find_context_info(sct, context); + return find_texture(ci, texture); +} + + +/** + * To be called when the image contents of a texture are changed, such + * as for gl[Copy]TexSubImage(). + * XXX this may not be needed + */ +void +sct_update_texture(struct pipe_texture *tex) +{ + +} + + +/** + * When a scene is flushed/rendered we can release the list of + * used textures. + */ +void +sct_flush_textures(struct surface_context_tracker *sct, + struct pipe_context *context) +{ + struct sct_context *ci = find_context_info(sct, context); + struct texture_list *tl, *next; + uint i; + + for (tl = ci->textures_used; tl; tl = next) { + next = tl->next; + pipe_texture_release(&tl->texture); + FREE(tl); + } + ci->textures_used = NULL; + + /* put the currently bound textures on the 'used' list */ + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + add_texture_used(ci, ci->textures[i]); + } +} + + + +void +sct_destroy_context(struct surface_context_tracker *sct, + struct pipe_context *context) +{ + /* XXX should we require an unbinding first? */ + { + struct sct_surface *si; + for (si = sct->surfaces; si; si = si->next) { + remove_context_from_surface(si, context); + } + } + + /* remove context from context_info list */ + { + struct sct_context *ci, *next, *prev = NULL; + for (ci = sct->contexts; ci; ci = next) { + next = ci->next; + if (ci->context == context) { + if (prev) + prev->next = ci->next; + else + sct->contexts = ci->next; + FREE(ci); + } + else { + prev = ci; + } + } + } + +} + + +void +sct_destroy_surface(struct surface_context_tracker *sct, + struct pipe_surface *surface) +{ + if (1) { + /* debug/sanity: no context should be bound to surface */ + struct sct_context *ci; + uint i; + for (ci = sct->contexts; ci; ci = ci->next) { + for (i = 0; i < MAX_SURFACES; i++) { + assert(ci->surfaces[i] != surface); + } + } + } + + /* remove surface from sct_surface list */ + { + struct sct_surface *si, *next, *prev = NULL; + for (si = sct->surfaces; si; si = next) { + next = si->next; + if (si->surface == surface) { + /* unlink */ + if (prev) + prev->next = si->next; + else + sct->surfaces = si->next; + FREE(si); + } + else { + prev = si; + } + } + } +} diff --git a/src/gallium/auxiliary/sct/sct.h b/src/gallium/auxiliary/sct/sct.h new file mode 100644 index 0000000000..cf7c4d3bdf --- /dev/null +++ b/src/gallium/auxiliary/sct/sct.h @@ -0,0 +1,123 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Surface/Context Tracking + * + * For some drivers, we need to monitor the binding between contexts and + * surfaces/textures. + * This code may evolve quite a bit... + */ + + +#ifndef SCT_H +#define SCT_H + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_context; +struct pipe_surface; + +struct sct_context; +struct sct_surface; + + +/** + * Per-device info, basically + */ +struct surface_context_tracker +{ + struct sct_context *contexts; + struct sct_surface *surfaces; +}; + + + +/** + * Simple linked list of contexts + */ +struct sct_context_list +{ + const struct pipe_context *context; + struct sct_context_list *next; +}; + + + +extern void +sct_bind_surfaces(struct surface_context_tracker *sct, + struct pipe_context *context, + uint num_surf, + struct pipe_surface **surfaces); + + +extern void +sct_bind_texture(struct surface_context_tracker *sct, + struct pipe_context *context, + uint unit, + struct pipe_texture *texture); + + +extern void +sct_update_texture(struct pipe_texture *tex); + + +extern boolean +sct_is_texture_used(struct surface_context_tracker *sct, + const struct pipe_context *context, + const struct pipe_texture *texture); + +extern void +sct_flush_textures(struct surface_context_tracker *sct, + struct pipe_context *context); + + +extern const struct sct_context_list * +sct_get_surface_contexts(struct surface_context_tracker *sct, + const struct pipe_surface *surf); + + +extern void +sct_destroy_context(struct surface_context_tracker *sct, + struct pipe_context *context); + + +extern void +sct_destroy_surface(struct surface_context_tracker *sct, + struct pipe_surface *surface); + + + +#ifdef __cplusplus +} +#endif + +#endif /* SCT_H */ diff --git a/src/gallium/auxiliary/sct/usage.c b/src/gallium/auxiliary/sct/usage.c new file mode 100644 index 0000000000..6227f19962 --- /dev/null +++ b/src/gallium/auxiliary/sct/usage.c @@ -0,0 +1,61 @@ +/* surface / context tracking */ + + +/* + +context A: + render to texture T + +context B: + texture from T + +----------------------- + +flush surface: + which contexts are bound to the surface? + +----------------------- + +glTexSubImage(): + which contexts need to be flushed? + + */ + + +/* + +in MakeCurrent(): + + call sct_bind_surfaces(context, list of surfaces) to update the + dependencies between context and surfaces + + +in SurfaceFlush(), or whatever it is in D3D: + + call sct_get_surface_contexts(surface) to get a list of contexts + which are currently bound to the surface. + + + +in BindTexture(): + + call sct_bind_texture(context, texture) to indicate that the texture + is used in the scene. + + +in glTexSubImage() or RenderToTexture(): + + call sct_is_texture_used(context, texture) to determine if the texture + has been used in the scene, but the scene's not flushed. If TRUE is + returned it means the scene has to be rendered/flushed before the contents + of the texture can be changed. + + +in psb_scene_flush/terminate(): + + call sct_flush_textures(context) to tell the SCT that the textures which + were used in the scene can be released. + + + +*/ -- cgit v1.2.3 From 6d5ee6d9a4a705cce80117f90ee334986f5e5e26 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 10 Mar 2008 19:37:32 +0000 Subject: gallium: enable bug workaround in draw_vertex_cache_invalidate --- src/gallium/auxiliary/draw/draw_vertex_cache.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 53f8bbec44..161b247d4e 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -41,7 +41,11 @@ void draw_vertex_cache_invalidate( struct draw_context *draw ) assert(draw->vs.queue_nr == 0); assert(draw->vcache.referenced == 0); -// memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); + /* There's an error somewhere in the vcache code that requires this + * memset. The bug is exposed in q3demo demo001, but probably + * elsewhere as well. Will track it down later. + */ + memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); } -- cgit v1.2.3 From 3f5b9f4ba49df57e7bbab04eab55a17a99bb5046 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 10 Mar 2008 19:41:12 +0000 Subject: gallium: use the same bypass_clipping logic on all vs paths --- src/gallium/auxiliary/draw/draw_vs_llvm.c | 48 +++++++++++++++++++------------ src/gallium/auxiliary/draw/draw_vs_sse.c | 38 +++++++++++++++--------- 2 files changed, 53 insertions(+), 33 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 0fd557d667..53c260be53 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -135,25 +135,35 @@ vs_llvm_run( struct draw_vertex_shader *base, unsigned slot; float x, y, z, w; - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0b8bc2bf14..e5c1a40cca 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -158,20 +158,30 @@ vs_sse_run( struct draw_vertex_shader *base, z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_clipping) { + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->edgeflag = 1; + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + + /* Viewport mapping */ + vOut[j]->data[0][0] = x * scale[0] + trans[0]; + vOut[j]->data[0][1] = y * scale[1] + trans[1]; + vOut[j]->data[0][2] = z * scale[2] + trans[2]; + vOut[j]->data[0][3] = w; + } + else { + vOut[j]->clipmask = 0; + vOut[j]->edgeflag = 1; + vOut[j]->data[0][0] = x; + vOut[j]->data[0][1] = y; + vOut[j]->data[0][2] = z; + vOut[j]->data[0][3] = w; + } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. -- cgit v1.2.3 From 7375369fb32e203023fbacf948169aad3f4c3a1d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 10 Mar 2008 19:41:51 +0000 Subject: gallium: fix compiler warning --- src/gallium/auxiliary/draw/draw_pstipple.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 894b136f2c..b3e52dc1c7 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -482,11 +482,10 @@ static void pstip_first_tri(struct draw_stage *stage, struct prim_header *header) { struct pstip_stage *pstip = pstip_stage(stage); - struct draw_context *draw = stage->draw; struct pipe_context *pipe = pstip->pipe; uint num = MAX2(pstip->num_textures, pstip->num_samplers); - assert(draw->rasterizer->poly_stipple_enable); + assert(stage->draw->rasterizer->poly_stipple_enable); /* * Bind our fragprog, sampler and texture -- cgit v1.2.3 From 297b3be25a7f097fb9b1a79e332acddc12dcc3fe Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 10 Mar 2008 19:49:15 +0000 Subject: draw: placeholder/prototype code for a passthrough draw path --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/SConscript | 1 + src/gallium/auxiliary/draw/draw_context.c | 12 ++++++++++++ src/gallium/auxiliary/draw/draw_context.h | 5 +++++ src/gallium/auxiliary/draw/draw_private.h | 27 +++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vbuf.h | 6 ++++++ 6 files changed, 52 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 2daa1636f3..ce6667d8ec 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = draw C_SOURCES = \ + draw_passthrough.c \ draw_aaline.c \ draw_aapoint.c \ draw_clip.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index c18dcb2927..5cb7664c85 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -16,6 +16,7 @@ draw = env.ConvenienceLibrary( 'draw_offset.c', 'draw_prim.c', 'draw_pstipple.c', + 'draw_passthrough.c', 'draw_stipple.c', 'draw_twoside.c', 'draw_unfilled.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 428b6209e0..bb64b50a17 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -34,6 +34,7 @@ #include "pipe/p_util.h" #include "draw_context.h" #include "draw_private.h" +#include "draw_vbuf.h" @@ -114,6 +115,10 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ + + if (draw->render) + draw->render->destroy( draw->render ); + FREE( draw ); } @@ -349,3 +354,10 @@ void draw_reset_vertex_ids(struct draw_context *draw) draw_vertex_cache_reset_vertex_ids(draw); } + + +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ) +{ + draw->render = render; +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index ab87b4127c..df63e91a22 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -168,4 +168,9 @@ unsigned draw_trim_prim( unsigned mode, unsigned count ); + +struct vbuf_render; +void draw_set_render( struct draw_context *draw, + struct vbuf_render *render ); + #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index c732d723a7..25fa8c09c2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -162,8 +162,14 @@ typedef void (*full_fetch_func)( struct draw_context *draw, const unsigned *elts, unsigned count ); +typedef void (*pt_fetch_func)( struct draw_context *draw, + float *out, + unsigned start, + unsigned count ); +struct vbuf_render; + /** * Private context for the drawing module. */ @@ -191,6 +197,17 @@ struct draw_context struct draw_stage *rasterize; } pipeline; + + struct vbuf_render *render; + + /* Support prototype passthrough path: + */ + struct { + unsigned prim; + unsigned hw_vertex_size; + } pt; + + /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; @@ -244,6 +261,7 @@ struct draw_context fetch_func fetch[PIPE_ATTRIB_MAX]; unsigned nr_attrs; full_fetch_func fetch_func; + pt_fetch_func pt_fetch; } vertex_fetch; /* Post-tnl vertex cache: @@ -331,6 +349,15 @@ struct tgsi_exec_machine; extern void draw_update_vertex_fetch( struct draw_context *draw ); +/* Prototype/hack + */ +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count); + + #define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ #define DRAW_FLUSH_PRIM_QUEUE 0x2 #define DRAW_FLUSH_VERTEX_CACHE 0x4 diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index cfd2b9820c..5e7de905c1 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -85,6 +85,12 @@ struct vbuf_render { const ushort *indices, uint nr_indices ); + /* Draw Arrays path too. + */ + void (*draw_arrays)( struct vbuf_render *, + unsigned start, + uint nr ); + /** * Called when vbuf is done with this set of vertices: */ -- cgit v1.2.3 From b1525662b330ca8b4cdd930775f3642bfec3b58f Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 10 Mar 2008 16:28:54 -0700 Subject: Move SPE register allocator to rtasm code Move the register allocator to a common location. There is more code on the way that will make use of this interface. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 47 +++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 16 ++++ src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 101 +++++++---------------- 3 files changed, 92 insertions(+), 72 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 95a2d6fcbb..a996218ce7 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -306,6 +306,11 @@ void spe_init_func(struct spe_function *p, unsigned code_size) { p->store = align_malloc(code_size, 16); p->csr = p->store; + + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. + */ + p->regs[0] = ~7; + p->regs[1] = (1U << (80 - 64)) - 1; } @@ -317,6 +322,48 @@ void spe_release_func(struct spe_function *p) } +int spe_allocate_available_register(struct spe_function *p) +{ + unsigned i; + for (i = 0; i < 128; i++) { + const uint64_t mask = (1ULL << (i % 128)); + const unsigned idx = i / 128; + + if ((p->regs[idx] & mask) != 0) { + p->regs[idx] &= ~mask; + return i; + } + } + + return -1; +} + + +int spe_allocate_register(struct spe_function *p, int reg) +{ + const unsigned idx = reg / 128; + const unsigned bit = reg % 128; + + assert((p->regs[idx] & (1ULL << bit)) != 0); + + p->regs[idx] &= ~(1ULL << bit); + return reg; +} + + +void spe_release_register(struct spe_function *p, int reg) +{ + const unsigned idx = reg / 128; + const unsigned bit = reg % 128; + + assert((p->regs[idx] & (1ULL << bit)) == 0); + + p->regs[idx] |= (1ULL << bit); +} + + + + void spe_bi(struct spe_function *p, unsigned rA, int d, int e) { emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 10ce44b3a0..5a1eb1ed8d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -39,11 +39,27 @@ struct spe_function { uint32_t *store; uint32_t *csr; const char *fn; + + /** + * Mask of used / unused registers + * + * Each set bit corresponds to an available register. Each cleared bit + * corresponds to an allocated register. + * + * \sa + * spe_allocate_register, spe_allocate_available_register, + * spe_release_register + */ + uint64_t regs[2]; }; extern void spe_init_func(struct spe_function *p, unsigned code_size); extern void spe_release_func(struct spe_function *p); +extern int spe_allocate_available_register(struct spe_function *p); +extern int spe_allocate_register(struct spe_function *p, int reg); +extern void spe_release_register(struct spe_function *p, int reg); + #endif /* RTASM_PPC_SPE_H */ #ifndef EMIT_ diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 9cf74bab47..4828a8023b 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -33,46 +33,11 @@ #include "cell_context.h" #include "rtasm/rtasm_ppc_spe.h" -typedef uint64_t register_mask; - -int allocate_available_register(register_mask *m) -{ - unsigned i; - for (i = 0; i < 64; i++) { - const uint64_t mask = (1ULL << i); - - if ((m[0] & mask) != 0) { - m[0] &= ~mask; - return i; - } - } - - return -1; -} - - -int allocate_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) != 0); - - m[0] &= ~(1ULL << reg); - return reg; -} - - -void release_register(register_mask *m, unsigned reg) -{ - assert((m[0] & (1ULL << reg)) == 0); - - m[0] |= (1ULL << reg); -} - /** * Emit a 4x4 matrix transpose operation * * \param p Function that the transpose operation is to be appended to - * \param m Live register mask * \param row0 Register containing row 0 of the source matrix * \param row1 Register containing row 1 of the source matrix * \param row2 Register containing row 2 of the source matrix @@ -91,15 +56,15 @@ void release_register(register_mask *m, unsigned reg) * This function requires that four temporary are available on entry. */ static void -emit_matrix_transpose(struct spe_function *p, register_mask *m, +emit_matrix_transpose(struct spe_function *p, unsigned row0, unsigned row1, unsigned row2, unsigned row3, unsigned dest_ptr, unsigned shuf_ptr, unsigned count) { - int shuf_hi = allocate_available_register(m); - int shuf_lo = allocate_available_register(m); - int t1 = allocate_available_register(m); - int t2 = allocate_available_register(m); + int shuf_hi = spe_allocate_available_register(p); + int shuf_lo = spe_allocate_available_register(p); + int t1 = spe_allocate_available_register(p); + int t2 = spe_allocate_available_register(p); int t3; int t4; int col0; @@ -169,19 +134,19 @@ emit_matrix_transpose(struct spe_function *p, register_mask *m, /* Release all of the temporary registers used. */ - release_register(m, col0); - release_register(m, col1); - release_register(m, col2); - release_register(m, col3); - release_register(m, shuf_hi); - release_register(m, shuf_lo); - release_register(m, t2); - release_register(m, t4); + spe_release_register(p, col0); + spe_release_register(p, col1); + spe_release_register(p, col2); + spe_release_register(p, col3); + spe_release_register(p, shuf_hi); + spe_release_register(p, shuf_lo); + spe_release_register(p, t2); + spe_release_register(p, t4); } static void -emit_fetch(struct spe_function *p, register_mask *m, +emit_fetch(struct spe_function *p, unsigned in_ptr, unsigned *offset, unsigned out_ptr, unsigned shuf_ptr, enum pipe_format format) @@ -191,11 +156,11 @@ emit_fetch(struct spe_function *p, register_mask *m, const unsigned type = pf_type(format); const unsigned bytes = pf_size_x(format); - int v0 = allocate_available_register(m); - int v1 = allocate_available_register(m); - int v2 = allocate_available_register(m); - int v3 = allocate_available_register(m); - int tmp = allocate_available_register(m); + int v0 = spe_allocate_available_register(p); + int v1 = spe_allocate_available_register(p); + int v2 = spe_allocate_available_register(p); + int v3 = spe_allocate_available_register(p); + int tmp = spe_allocate_available_register(p); int float_zero = -1; int float_one = -1; float scale_signed = 0.0; @@ -260,19 +225,19 @@ emit_fetch(struct spe_function *p, register_mask *m, if (count < 4) { - float_one = allocate_available_register(m); + float_one = spe_allocate_available_register(p); spe_il(p, float_one, 1); spe_cuflt(p, float_one, float_one, 0); if (count < 3) { - float_zero = allocate_available_register(m); + float_zero = spe_allocate_available_register(p); spe_il(p, float_zero, 0); } } - release_register(m, tmp); + spe_release_register(p, tmp); - emit_matrix_transpose(p, m, v0, v1, v2, v3, out_ptr, shuf_ptr, count); + emit_matrix_transpose(p, v0, v1, v2, v3, out_ptr, shuf_ptr, count); switch (count) { case 1: @@ -284,11 +249,11 @@ emit_fetch(struct spe_function *p, register_mask *m, } if (float_zero != -1) { - release_register(m, float_zero); + spe_release_register(p, float_zero); } if (float_one != -1) { - release_register(m, float_one); + spe_release_register(p, float_one); } } @@ -297,7 +262,6 @@ void cell_update_vertex_fetch(struct draw_context *draw) { struct cell_context *const cell = (struct cell_context *) draw->driver_private; - register_mask m = ~0; struct spe_function *p = &cell->attrib_fetch; unsigned function_index[PIPE_ATTRIB_MAX]; unsigned unique_attr_formats; @@ -338,18 +302,11 @@ void cell_update_vertex_fetch(struct draw_context *draw) spe_init_func(p, 136 * unique_attr_formats); - /* Registers 0, 1, and 2 are reserved by the ABI. - */ - allocate_register(&m, 0); - allocate_register(&m, 1); - allocate_register(&m, 2); - - /* Allocate registers for the function's input parameters. */ - out_ptr = allocate_register(&m, 3); - in_ptr = allocate_register(&m, 4); - shuf_ptr = allocate_register(&m, 5); + out_ptr = spe_allocate_register(p, 3); + in_ptr = spe_allocate_register(p, 4); + shuf_ptr = spe_allocate_register(p, 5); /* Generate code for the individual attribute fetch functions. @@ -362,7 +319,7 @@ void cell_update_vertex_fetch(struct draw_context *draw) - (void *) p->store); offset = 0; - emit_fetch(p, & m, in_ptr, &offset, out_ptr, shuf_ptr, + emit_fetch(p, in_ptr, &offset, out_ptr, shuf_ptr, draw->vertex_element[i].src_format); spe_bi(p, 0, 0, 0); -- cgit v1.2.3 From d9d2ca7a07469a7d5cdc183f2daa6cf9e30938fe Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 10 Mar 2008 22:10:07 -0400 Subject: fix compilation --- src/gallium/auxiliary/draw/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index ce6667d8ec..2daa1636f3 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -4,7 +4,6 @@ include $(TOP)/configs/current LIBNAME = draw C_SOURCES = \ - draw_passthrough.c \ draw_aaline.c \ draw_aapoint.c \ draw_clip.c \ -- cgit v1.2.3 From be9a2457388d99a2185f258aeb5ef5183ccfbbb2 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 10 Mar 2008 22:10:18 -0400 Subject: fix double deletion plus, if the current hash is bigger than max size make sure we delete enough from it --- src/gallium/auxiliary/cso_cache/cso_cache.c | 5 ++++- src/gallium/auxiliary/cso_cache/cso_hash.c | 7 ------- 2 files changed, 4 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index b427b509f8..a2764b4265 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -207,8 +207,11 @@ static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type { /* if we're approach the maximum size, remove fourth of the entries * otherwise every subsequent call will go through the same */ - int max_entries = (max_size > cso_hash_size(hash)) ? max_size : cso_hash_size(hash); + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; int to_remove = (max_size < max_entries) * max_entries/4; + if (hash_size > max_size) + to_remove += hash_size - max_size; while (to_remove) { /*remove elements until we're good */ /*fixme: currently we pick the nodes to remove at random*/ diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index b3b4d667d2..5cad5d3be7 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -101,13 +101,6 @@ static void *cso_data_allocate_node(struct cso_hash_data *hash) static void cso_data_free_node(struct cso_node *node) { - /* XXX still a leak here. - * Need to cast value ptr to original cso type, then free the - * driver-specific data hanging off of it. For example: - struct cso_sampler *csamp = (struct cso_sampler *) node->value; - FREE(csamp->data); - */ - FREE(node->value); FREE(node); } -- cgit v1.2.3 From ff3c7a3243e4f3fc60e6cfcfc6a2711e9ea5cf65 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 11 Mar 2008 08:42:49 +0000 Subject: gallium: missing file --- src/gallium/auxiliary/draw/draw_passthrough.c | 222 ++++++++++++++++++++++++++ 1 file changed, 222 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_passthrough.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c new file mode 100644 index 0000000000..fc2dde38ba --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + + +/* This code is a prototype of what a passhthrough vertex shader might + * look like. + * + * Probably the best approach for us is to do: + * - vertex fetch + * - vertex shader + * - cliptest / viewport transform + * + * in one step, then examine the clipOrMask & choose between two paths: + * + * Either: + * - build primitive headers + * - clip and the primitive path + * - build clipped vertex buffers, + * - vertex-emit to vbuf buffers + * + * Or, if no clipping: + * - vertex-emit directly to vbuf buffers + * + * But when bypass clipping is enabled, we just take the latter + * choice. If (some new) passthrough-vertex-shader flag is also set, + * the pipeline degenerates to: + * + * - vertex fetch + * - vertex emit to vbuf buffers + * + * Which is what is prototyped here. + */ +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" + + + +/* Example of a fetch/emit passthrough shader which could be + * generated when bypass_clipping is enabled on a passthrough vertex + * shader. + */ +static void fetch_xyz_rgb_st( struct draw_context *draw, + float *out, + unsigned start, + unsigned count ) +{ + const unsigned *pitch = draw->vertex_fetch.pitch; + const ubyte **src = draw->vertex_fetch.src_ptr; + int i; + + const ubyte *xyzw = src[0] + start * pitch[0]; + const ubyte *rgba = src[1] + start * pitch[1]; + const ubyte *st = src[2] + start * pitch[2]; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyzw; xyzw += pitch[0]; + /* decode input, encode output. Assume both are float[4] */ + out[0] = in[0]; + out[1] = in[1]; + out[2] = in[2]; + out[3] = in[3]; + } + + { + const float *in = (const float *)rgba; rgba += pitch[1]; + /* decode input, encode output. Assume both are float[4] */ + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = in[3]; + } + + { + const float *in = (const float *)st; st += pitch[2]; + /* decode input, encode output. Assume both are float[2] */ + out[8] = in[0]; + out[9] = in[1]; + } + + out += 10; + } +} + + +static boolean update_shader( struct draw_context *draw ) +{ + const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); + + unsigned nr_attrs = vinfo->num_attribs; + unsigned i; + + for (i = 0; i < nr_attrs; i++) { + unsigned buf = draw->vertex_element[i].vertex_buffer_index; + + draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[i].src_offset; + + draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; + draw->vertex_fetch.fetch[i] = NULL; + } + + draw->vertex_fetch.nr_attrs = nr_attrs; + draw->vertex_fetch.fetch_func = NULL; + draw->vertex_fetch.pt_fetch = NULL; + + draw->pt.hw_vertex_size = vinfo->size * 4; + + /* Just trying to figure out how this would work: + */ + if (nr_attrs == 3 && + 0 /* some other tests */) + { + draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st; + assert(vinfo->size == 10); + return TRUE; + } + + return FALSE; +} + + + +static boolean set_prim( struct draw_context *draw, + unsigned prim ) +{ + assert(!draw->user.elts); + + draw->pt.prim = prim; + + switch (prim) { + case PIPE_PRIM_LINE_LOOP: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + return FALSE; + default: + draw->render->set_primitive( draw->render, prim ); + return TRUE; + } +} + + + +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count) +{ + float *hw_verts; + + if (!set_prim(draw, prim)) + return FALSE; + + if (!update_shader( draw )) + return FALSE; + + hw_verts = draw->render->allocate_vertices( draw->render, + draw->pt.hw_vertex_size, + count ); + + if (!hw_verts) + return FALSE; + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done on hardware. + */ + draw->vertex_fetch.pt_fetch( draw, + hw_verts, + start, count ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + start, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + draw->pt.hw_vertex_size, + count ); + + return TRUE; +} + -- cgit v1.2.3 From 5038c20795cb2e49d72c1f43a8b705056592356c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 11 Mar 2008 14:23:08 +0000 Subject: draw: don't free our copy of the render stage -- just borrowing it from vbuf stage --- src/gallium/auxiliary/draw/draw_context.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index bb64b50a17..fed2b6e759 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -116,8 +116,11 @@ void draw_destroy( struct draw_context *draw ) tgsi_exec_machine_free_data(&draw->machine); align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ + /* Not so fast -- we're just borrowing this at the moment. + * if (draw->render) draw->render->destroy( draw->render ); + */ FREE( draw ); } -- cgit v1.2.3 From 30fab81de8ea7bf81181db7bd605f376d4e4fca2 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 11 Mar 2008 14:31:17 -0600 Subject: gallium: fix fs/vs typo in cso_set_vertex_shader() --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index fbb26ca511..f7f4aebb16 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -349,6 +349,6 @@ void cso_set_vertex_shader(struct cso_context *ctx, if (ctx->vertex_shader != handle) { ctx->vertex_shader = handle; - ctx->pipe->bind_fs_state(ctx->pipe, handle); + ctx->pipe->bind_vs_state(ctx->pipe, handle); } } -- cgit v1.2.3 From 34a0ac7f2b93d6d0f3fc85106e7dacb38c4229b6 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 11 Mar 2008 15:01:52 -0600 Subject: gallium: fix some cso_state_callback cast warnings --- src/gallium/auxiliary/cso_cache/cso_cache.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index 44ee128a4a..e5edbbb556 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -84,47 +84,49 @@ extern "C" { #endif +typedef void (*cso_state_callback)(void *ctx, void *obj); + struct cso_cache; struct cso_blend { struct pipe_blend_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_depth_stencil_alpha { struct pipe_depth_stencil_alpha_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_rasterizer { struct pipe_rasterizer_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_fragment_shader { struct pipe_shader_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_vertex_shader { struct pipe_shader_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; struct cso_sampler { struct pipe_sampler_state state; void *data; - void (*delete_state)(struct pipe_context *, void *); + cso_state_callback delete_state; struct pipe_context *context; }; @@ -138,8 +140,6 @@ enum cso_cache_type { CSO_VERTEX_SHADER }; -typedef void (*cso_state_callback)(void *, void *); - unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); -- cgit v1.2.3 From 45c59895113f997e5f2b7e346f95e46099fa3566 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 11 Mar 2008 12:03:11 +0000 Subject: gallium: Conditional debugging output. Generalize the conditional debugging output code found trhought the gallium drivers. --- src/gallium/auxiliary/util/p_debug.c | 34 +++++++++++++++++++ src/gallium/include/pipe/p_debug.h | 64 ++++++++++++++++++++++++++++++++++++ 2 files changed, 98 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 93bfaea393..04e55dd91d 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -103,3 +103,37 @@ void debug_assert_fail(const char *expr, const char *file, unsigned line) debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr); debug_break(); } + + +#define DEBUG_MASK_TABLE_SIZE 256 + + +/** + * Mask hash table. + * + * For now we just take the lower bits of the key, and do no attempt to solve + * collisions. Use a proper hash table when we have dozens of drivers. + */ +static uint32_t debug_mask_table[DEBUG_MASK_TABLE_SIZE]; + + +void debug_mask_set(uint32_t uuid, uint32_t mask) +{ + unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1); + debug_mask_table[hash] = mask; +} + + +uint32_t debug_mask_get(uint32_t uuid) +{ + unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1); + return debug_mask_table[hash]; +} + + +void debug_mask_vprintf(uint32_t uuid, uint32_t what, const char *format, va_list ap) +{ + uint32_t mask = debug_mask_get(uuid); + if(mask & what) + debug_vprintf(format, ap); +} diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index a14a1fc5f6..f45363f355 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -41,6 +41,8 @@ #include +#include "p_compiler.h" + #ifdef __cplusplus extern "C" { @@ -93,6 +95,68 @@ void debug_assert_fail(const char *expr, const char *file, unsigned line); #define assert(expr) debug_assert(expr) +/** + * Set a channel's debug mask. + * + * uuid is just a random 32 bit integer that uniquely identifies the debugging + * channel. + * + * @note Due to current implementation issues, make sure the lower 8 bits of + * UUID are unique. + */ +void debug_mask_set(uint32_t uuid, uint32_t mask); + + +uint32_t debug_mask_get(uint32_t uuid); + + +/** + * Conditional debug output. + * + * This is just a generalization of the debug filtering mechanism used + * throughout Gallium. + * + * You use this function as: + * + * @code + * #define MYDRIVER_UUID 0x12345678 // random 32 bit identifier + * + * static inline mydriver_debug(uint32_t what, const char *format, ...) + * { + * #ifdef DEBUG + * va_list ap; + * va_start(ap, format); + * debug_mask_vprintf(MYDRIVER_UUID, what, format, ap); + * va_end(ap); + * #endif + * } + * + * ... + * + * debug_mask_set(MYDRIVER_UUID, + * MYDRIVER_DEBUG_THIS | + * MYDRIVER_DEBUG_THAT | + * ... ); + * + * ... + * + * mydriver_debug(MYDRIVER_DEBUG_THIS, + * "this and this happened\n"); + * + * mydriver_debug(MYDRIVER_DEBUG_THAT, + * "that = %f\n", that); + * ... + * @endcode + * + * You can also define several variants of mydriver_debug, with hardcoded what. + * Note that although macros with variable number of arguments would accomplish + * more in less code, they are not portable. + */ +void debug_mask_vprintf(uint32_t uuid, + uint32_t what, + const char *format, + va_list ap); + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 21ff00306131cd5598f95285badaaabc98021e11 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 11 Mar 2008 23:51:27 +0000 Subject: gallium: Silence MSVC warnings. --- src/gallium/auxiliary/util/u_snprintf.c | 10 +++------- src/gallium/drivers/i915simple/i915_state.c | 7 +++++-- src/gallium/drivers/softpipe/sp_state_sampler.c | 8 +++++--- 3 files changed, 13 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index 5e19a2ddb2..61c20b48f7 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -241,7 +241,6 @@ static void *mymemcpy(void *, void *, size_t); #endif /* !HAVE_VASPRINTF */ #if !HAVE_VSNPRINTF -#include /* For ERANGE and errno. */ #include /* For *_MAX. */ #if HAVE_INTTYPES_H #include /* For intmax_t (if not defined in ). */ @@ -445,8 +444,6 @@ static UINTMAX_T cast(LDOUBLE); static UINTMAX_T myround(LDOUBLE); static LDOUBLE mypow10(int); -extern int errno; - int rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) { @@ -747,7 +744,7 @@ rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) goto out; break; case 'c': - cvalue = va_arg(args, int); + cvalue = (unsigned char)va_arg(args, int); OUTCHAR(str, len, size, cvalue); break; case 's': @@ -844,7 +841,6 @@ out: str[size - 1] = '\0'; if (overflow || len >= INT_MAX) { - errno = overflow ? EOVERFLOW : ERANGE; return -1; } return (int)len; @@ -1106,7 +1102,7 @@ again: * Factor of ten with the number of digits needed for the fractional * part. For example, if the precision is 3, the mask will be 1000. */ - mask = mypow10(precision); + mask = (UINTMAX_T)mypow10(precision); /* * We "cheat" by converting the fractional part to integer by * multiplying by a factor of ten. @@ -1358,7 +1354,7 @@ cast(LDOUBLE value) if (value >= UINTMAX_MAX) return UINTMAX_MAX; - result = value; + result = (UINTMAX_T)value; /* * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 24143243d3..d9ab483bfc 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -273,6 +273,7 @@ static void i915_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct i915_context *i915 = i915_context(pipe); + unsigned i; assert(num <= PIPE_MAX_SAMPLERS); @@ -281,8 +282,10 @@ static void i915_bind_sampler_states(struct pipe_context *pipe, !memcmp(i915->sampler, sampler, num * sizeof(void *))) return; - memcpy(i915->sampler, sampler, num * sizeof(void *)); - memset(&i915->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * sizeof(void *)); + for (i = 0; i < num; ++i) + i915->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + i915->sampler[i] = NULL; i915->num_samplers = num; diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 7cf85b9207..033288a0aa 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -56,6 +56,7 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct softpipe_context *softpipe = softpipe_context(pipe); + unsigned i; assert(num <= PIPE_MAX_SAMPLERS); @@ -66,9 +67,10 @@ softpipe_bind_sampler_states(struct pipe_context *pipe, draw_flush(softpipe->draw); - memcpy(softpipe->sampler, sampler, num * sizeof(void *)); - memset(&softpipe->sampler[num], 0, (PIPE_MAX_SAMPLERS - num) * - sizeof(void *)); + for (i = 0; i < num; ++i) + softpipe->sampler[i] = sampler[i]; + for (i = num; i < PIPE_MAX_SAMPLERS; ++i) + softpipe->sampler[i] = NULL; softpipe->num_samplers = num; -- cgit v1.2.3 From 339e7ec6805e6de8794514c0a935081b5d36d38f Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 11 Mar 2008 18:54:31 -0600 Subject: gallium: rework CSO-related code in state tracker Use the code in cso_context.c rather than st_cache.c. Basically, binding of state objects now goes through the CSO module. But Vertex/fragment shaders go through pipe->bind_fs/vs_state() since they're not cached by the CSO module at this time. Also, update softpipe driver to handle NULL state objects in various places. This happens during context destruction. May need to update other drivers... --- src/gallium/auxiliary/draw/draw_aaline.c | 3 +- src/gallium/auxiliary/draw/draw_aapoint.c | 3 +- src/gallium/auxiliary/draw/draw_pstipple.c | 3 +- src/gallium/drivers/softpipe/sp_state_fs.c | 3 +- src/mesa/sources | 1 - src/mesa/state_tracker/st_atom_blend.c | 52 ++++++-------- src/mesa/state_tracker/st_atom_depth.c | 60 +++++++--------- src/mesa/state_tracker/st_atom_rasterizer.c | 108 ++++++++++++++-------------- src/mesa/state_tracker/st_atom_sampler.c | 43 ++++++----- src/mesa/state_tracker/st_atom_shader.c | 28 ++------ src/mesa/state_tracker/st_cb_accum.c | 1 - src/mesa/state_tracker/st_cb_clear.c | 47 ++++++------ src/mesa/state_tracker/st_cb_drawpixels.c | 64 +++++++++-------- src/mesa/state_tracker/st_cb_program.c | 5 +- src/mesa/state_tracker/st_context.c | 9 ++- src/mesa/state_tracker/st_context.h | 19 ++--- src/mesa/state_tracker/st_debug.c | 6 +- src/mesa/state_tracker/st_draw.c | 13 ++-- src/mesa/state_tracker/st_gen_mipmap.c | 34 ++++----- src/mesa/state_tracker/st_mesa_to_tgsi.c | 5 +- src/mesa/state_tracker/st_mesa_to_tgsi.h | 2 +- src/mesa/state_tracker/st_program.c | 59 +++++++++------ src/mesa/state_tracker/st_program.h | 22 ++---- 23 files changed, 292 insertions(+), 298 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 3ec73b0800..6b1e640ae9 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -733,7 +733,8 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ aaline->fs = aafs; /* pass-through */ - aaline->driver_bind_fs_state(aaline->pipe, aafs->driver_fs); + aaline->driver_bind_fs_state(aaline->pipe, + (aafs ? aafs->driver_fs : NULL)); } diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 70f696475f..99e9e9fe34 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -800,7 +800,8 @@ aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ aapoint->fs = aafs; /* pass-through */ - aapoint->driver_bind_fs_state(aapoint->pipe, aafs->driver_fs); + aapoint->driver_bind_fs_state(aapoint->pipe, + (aafs ? aafs->driver_fs : NULL)); } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index b3e52dc1c7..ed50d0805a 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -595,7 +595,8 @@ pstip_bind_fs_state(struct pipe_context *pipe, void *fs) /* save current */ pstip->fs = aafs; /* pass-through */ - pstip->driver_bind_fs_state(pstip->pipe, aafs->driver_fs); + pstip->driver_bind_fs_state(pstip->pipe, + (aafs ? aafs->driver_fs : NULL)); } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index eb641ed321..4eefd1d61f 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -120,7 +120,8 @@ softpipe_bind_vs_state(struct pipe_context *pipe, void *vs) softpipe->vs = (const struct sp_vertex_shader *)vs; - draw_bind_vertex_shader(softpipe->draw, softpipe->vs->draw_data); + draw_bind_vertex_shader(softpipe->draw, + (softpipe->vs ? softpipe->vs->draw_data : NULL)); softpipe->dirty |= SP_NEW_VS; } diff --git a/src/mesa/sources b/src/mesa/sources index f0bf7b31fb..e3d5f22849 100644 --- a/src/mesa/sources +++ b/src/mesa/sources @@ -184,7 +184,6 @@ STATETRACKER_SOURCES = \ state_tracker/st_cb_readpixels.c \ state_tracker/st_cb_strings.c \ state_tracker/st_cb_texture.c \ - state_tracker/st_cache.c \ state_tracker/st_context.c \ state_tracker/st_debug.c \ state_tracker/st_draw.c \ diff --git a/src/mesa/state_tracker/st_atom_blend.c b/src/mesa/state_tracker/st_atom_blend.c index 2a9d209153..6c13fc8141 100644 --- a/src/mesa/state_tracker/st_atom_blend.c +++ b/src/mesa/state_tracker/st_atom_blend.c @@ -33,11 +33,11 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -155,44 +155,43 @@ translate_logicop(GLenum logicop) static void update_blend( struct st_context *st ) { - struct pipe_blend_state blend; - const struct cso_blend *cso; + struct pipe_blend_state *blend = &st->state.blend; - memset(&blend, 0, sizeof(blend)); + memset(blend, 0, sizeof(*blend)); if (st->ctx->Color.ColorLogicOpEnabled || (st->ctx->Color.BlendEnabled && st->ctx->Color.BlendEquationRGB == GL_LOGIC_OP)) { /* logicop enabled */ - blend.logicop_enable = 1; - blend.logicop_func = translate_logicop(st->ctx->Color.LogicOp); + blend->logicop_enable = 1; + blend->logicop_func = translate_logicop(st->ctx->Color.LogicOp); } else if (st->ctx->Color.BlendEnabled) { /* blending enabled */ - blend.blend_enable = 1; + blend->blend_enable = 1; - blend.rgb_func = translate_blend(st->ctx->Color.BlendEquationRGB); + blend->rgb_func = translate_blend(st->ctx->Color.BlendEquationRGB); if (st->ctx->Color.BlendEquationRGB == GL_MIN || st->ctx->Color.BlendEquationRGB == GL_MAX) { /* Min/max are special */ - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ONE; + blend->rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend->rgb_dst_factor = PIPE_BLENDFACTOR_ONE; } else { - blend.rgb_src_factor = translate_blend(st->ctx->Color.BlendSrcRGB); - blend.rgb_dst_factor = translate_blend(st->ctx->Color.BlendDstRGB); + blend->rgb_src_factor = translate_blend(st->ctx->Color.BlendSrcRGB); + blend->rgb_dst_factor = translate_blend(st->ctx->Color.BlendDstRGB); } - blend.alpha_func = translate_blend(st->ctx->Color.BlendEquationA); + blend->alpha_func = translate_blend(st->ctx->Color.BlendEquationA); if (st->ctx->Color.BlendEquationA == GL_MIN || st->ctx->Color.BlendEquationA == GL_MAX) { /* Min/max are special */ - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ONE; + blend->alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend->alpha_dst_factor = PIPE_BLENDFACTOR_ONE; } else { - blend.alpha_src_factor = translate_blend(st->ctx->Color.BlendSrcA); - blend.alpha_dst_factor = translate_blend(st->ctx->Color.BlendDstA); + blend->alpha_src_factor = translate_blend(st->ctx->Color.BlendSrcA); + blend->alpha_dst_factor = translate_blend(st->ctx->Color.BlendDstA); } } else { @@ -201,25 +200,18 @@ update_blend( struct st_context *st ) /* Colormask - maybe reverse these bits? */ if (st->ctx->Color.ColorMask[0]) - blend.colormask |= PIPE_MASK_R; + blend->colormask |= PIPE_MASK_R; if (st->ctx->Color.ColorMask[1]) - blend.colormask |= PIPE_MASK_G; + blend->colormask |= PIPE_MASK_G; if (st->ctx->Color.ColorMask[2]) - blend.colormask |= PIPE_MASK_B; + blend->colormask |= PIPE_MASK_B; if (st->ctx->Color.ColorMask[3]) - blend.colormask |= PIPE_MASK_A; + blend->colormask |= PIPE_MASK_A; if (st->ctx->Color.DitherFlag) - blend.dither = 1; + blend->dither = 1; - cso = st_cached_blend_state(st, &blend); - - if (st->state.blend != cso) { - /* state has changed */ - st->state.blend = cso; - /* bind new state */ - st->pipe->bind_blend_state(st->pipe, cso->data); - } + cso_set_blend(st->cso_context, blend); if (memcmp(st->ctx->Color.BlendColor, &st->state.blend_color, 4 * sizeof(GLfloat)) != 0) { /* state has changed */ diff --git a/src/mesa/state_tracker/st_atom_depth.c b/src/mesa/state_tracker/st_atom_depth.c index 7aecdbfbcc..827ad3b548 100644 --- a/src/mesa/state_tracker/st_atom_depth.c +++ b/src/mesa/state_tracker/st_atom_depth.c @@ -34,10 +34,10 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -93,53 +93,47 @@ gl_stencil_op_to_pipe(GLenum func) static void update_depth_stencil_alpha(struct st_context *st) { - struct pipe_depth_stencil_alpha_state depth_stencil; - const struct cso_depth_stencil_alpha *cso; + struct pipe_depth_stencil_alpha_state *dsa = &st->state.depth_stencil; - memset(&depth_stencil, 0, sizeof(depth_stencil)); + memset(dsa, 0, sizeof(*dsa)); - depth_stencil.depth.enabled = st->ctx->Depth.Test; - depth_stencil.depth.writemask = st->ctx->Depth.Mask; - depth_stencil.depth.func = st_compare_func_to_pipe(st->ctx->Depth.Func); + dsa->depth.enabled = st->ctx->Depth.Test; + dsa->depth.writemask = st->ctx->Depth.Mask; + dsa->depth.func = st_compare_func_to_pipe(st->ctx->Depth.Func); if (st->ctx->Query.CurrentOcclusionObject && st->ctx->Query.CurrentOcclusionObject->Active) - depth_stencil.depth.occlusion_count = 1; + dsa->depth.occlusion_count = 1; if (st->ctx->Stencil.Enabled) { - depth_stencil.stencil[0].enabled = 1; - depth_stencil.stencil[0].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[0]); - depth_stencil.stencil[0].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[0]); - depth_stencil.stencil[0].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[0]); - depth_stencil.stencil[0].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[0]); - depth_stencil.stencil[0].ref_value = st->ctx->Stencil.Ref[0] & 0xff; - depth_stencil.stencil[0].value_mask = st->ctx->Stencil.ValueMask[0] & 0xff; - depth_stencil.stencil[0].write_mask = st->ctx->Stencil.WriteMask[0] & 0xff; + dsa->stencil[0].enabled = 1; + dsa->stencil[0].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[0]); + dsa->stencil[0].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[0]); + dsa->stencil[0].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[0]); + dsa->stencil[0].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[0]); + dsa->stencil[0].ref_value = st->ctx->Stencil.Ref[0] & 0xff; + dsa->stencil[0].value_mask = st->ctx->Stencil.ValueMask[0] & 0xff; + dsa->stencil[0].write_mask = st->ctx->Stencil.WriteMask[0] & 0xff; if (st->ctx->Stencil.TestTwoSide) { - depth_stencil.stencil[1].enabled = 1; - depth_stencil.stencil[1].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[1]); - depth_stencil.stencil[1].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[1]); - depth_stencil.stencil[1].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[1]); - depth_stencil.stencil[1].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[1]); - depth_stencil.stencil[1].ref_value = st->ctx->Stencil.Ref[1] & 0xff; - depth_stencil.stencil[1].value_mask = st->ctx->Stencil.ValueMask[1] & 0xff; - depth_stencil.stencil[1].write_mask = st->ctx->Stencil.WriteMask[1] & 0xff; + dsa->stencil[1].enabled = 1; + dsa->stencil[1].func = st_compare_func_to_pipe(st->ctx->Stencil.Function[1]); + dsa->stencil[1].fail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.FailFunc[1]); + dsa->stencil[1].zfail_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZFailFunc[1]); + dsa->stencil[1].zpass_op = gl_stencil_op_to_pipe(st->ctx->Stencil.ZPassFunc[1]); + dsa->stencil[1].ref_value = st->ctx->Stencil.Ref[1] & 0xff; + dsa->stencil[1].value_mask = st->ctx->Stencil.ValueMask[1] & 0xff; + dsa->stencil[1].write_mask = st->ctx->Stencil.WriteMask[1] & 0xff; } } if (st->ctx->Color.AlphaEnabled) { - depth_stencil.alpha.enabled = 1; - depth_stencil.alpha.func = st_compare_func_to_pipe(st->ctx->Color.AlphaFunc); - depth_stencil.alpha.ref = st->ctx->Color.AlphaRef; + dsa->alpha.enabled = 1; + dsa->alpha.func = st_compare_func_to_pipe(st->ctx->Color.AlphaFunc); + dsa->alpha.ref = st->ctx->Color.AlphaRef; } - cso = st_cached_depth_stencil_alpha_state(st, &depth_stencil); - if (st->state.depth_stencil != cso) { - /* state has changed */ - st->state.depth_stencil = cso; - st->pipe->bind_depth_stencil_alpha_state(st->pipe, cso->data); /* bind new state */ - } + cso_set_depth_stencil_alpha(st->cso_context, dsa); } diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 229839d8b2..77cef9236b 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -32,10 +32,11 @@ #include "main/macros.h" #include "st_context.h" -#include "st_cache.h" +#include "st_atom.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "st_atom.h" +#include "cso_cache/cso_context.h" + static GLuint translate_fill( GLenum mode ) { @@ -72,22 +73,21 @@ static GLboolean get_offset_flag( GLuint fill_mode, static void update_raster_state( struct st_context *st ) { GLcontext *ctx = st->ctx; - struct pipe_rasterizer_state raster; - const struct cso_rasterizer *cso; + struct pipe_rasterizer_state *raster = &st->state.rasterizer; const struct gl_vertex_program *vertProg = ctx->VertexProgram._Current; uint i; - memset(&raster, 0, sizeof(raster)); + memset(raster, 0, sizeof(*raster)); - raster.origin_lower_left = 1; /* Always true for OpenGL */ + raster->origin_lower_left = 1; /* Always true for OpenGL */ /* _NEW_POLYGON, _NEW_BUFFERS */ { if (ctx->Polygon.FrontFace == GL_CCW) - raster.front_winding = PIPE_WINDING_CCW; + raster->front_winding = PIPE_WINDING_CCW; else - raster.front_winding = PIPE_WINDING_CW; + raster->front_winding = PIPE_WINDING_CW; /* XXX * I think the intention here is that user-created framebuffer objects @@ -96,13 +96,13 @@ static void update_raster_state( struct st_context *st ) * But this is an implementation/driver-specific artifact - remove... */ if (ctx->DrawBuffer && ctx->DrawBuffer->Name != 0) - raster.front_winding ^= PIPE_WINDING_BOTH; + raster->front_winding ^= PIPE_WINDING_BOTH; } /* _NEW_LIGHT */ if (ctx->Light.ShadeModel == GL_FLAT) - raster.flatshade = 1; + raster->flatshade = 1; /* _NEW_LIGHT | _NEW_PROGRAM * @@ -113,28 +113,28 @@ static void update_raster_state( struct st_context *st ) if (ctx->VertexProgram._Current) { if (ctx->VertexProgram._Enabled) { /* user-defined program */ - raster.light_twoside = ctx->VertexProgram.TwoSideEnabled; + raster->light_twoside = ctx->VertexProgram.TwoSideEnabled; } else { /* TNL-generated program */ - raster.light_twoside = ctx->Light.Enabled && ctx->Light.Model.TwoSide; + raster->light_twoside = ctx->Light.Enabled && ctx->Light.Model.TwoSide; } } else if (ctx->Light.Enabled && ctx->Light.Model.TwoSide) { - raster.light_twoside = 1; + raster->light_twoside = 1; } /* _NEW_POLYGON */ if (ctx->Polygon.CullFlag) { if (ctx->Polygon.CullFaceMode == GL_FRONT_AND_BACK) { - raster.cull_mode = PIPE_WINDING_BOTH; + raster->cull_mode = PIPE_WINDING_BOTH; } else if (ctx->Polygon.CullFaceMode == GL_FRONT) { - raster.cull_mode = raster.front_winding; + raster->cull_mode = raster->front_winding; } else { - raster.cull_mode = raster.front_winding ^ PIPE_WINDING_BOTH; + raster->cull_mode = raster->front_winding ^ PIPE_WINDING_BOTH; } } @@ -144,23 +144,23 @@ static void update_raster_state( struct st_context *st ) GLuint fill_front = translate_fill( ctx->Polygon.FrontMode ); GLuint fill_back = translate_fill( ctx->Polygon.BackMode ); - if (raster.front_winding == PIPE_WINDING_CW) { - raster.fill_cw = fill_front; - raster.fill_ccw = fill_back; + if (raster->front_winding == PIPE_WINDING_CW) { + raster->fill_cw = fill_front; + raster->fill_ccw = fill_back; } else { - raster.fill_cw = fill_back; - raster.fill_ccw = fill_front; + raster->fill_cw = fill_back; + raster->fill_ccw = fill_front; } /* Simplify when culling is active: */ - if (raster.cull_mode & PIPE_WINDING_CW) { - raster.fill_cw = raster.fill_ccw; + if (raster->cull_mode & PIPE_WINDING_CW) { + raster->fill_cw = raster->fill_ccw; } - if (raster.cull_mode & PIPE_WINDING_CCW) { - raster.fill_ccw = raster.fill_cw; + if (raster->cull_mode & PIPE_WINDING_CCW) { + raster->fill_ccw = raster->fill_cw; } } @@ -168,95 +168,91 @@ static void update_raster_state( struct st_context *st ) */ if (ctx->Polygon.OffsetUnits != 0.0 || ctx->Polygon.OffsetFactor != 0.0) { - raster.offset_cw = get_offset_flag( raster.fill_cw, &ctx->Polygon ); - raster.offset_ccw = get_offset_flag( raster.fill_ccw, &ctx->Polygon ); - raster.offset_units = ctx->Polygon.OffsetUnits; - raster.offset_scale = ctx->Polygon.OffsetFactor; + raster->offset_cw = get_offset_flag( raster->fill_cw, &ctx->Polygon ); + raster->offset_ccw = get_offset_flag( raster->fill_ccw, &ctx->Polygon ); + raster->offset_units = ctx->Polygon.OffsetUnits; + raster->offset_scale = ctx->Polygon.OffsetFactor; } if (ctx->Polygon.SmoothFlag) - raster.poly_smooth = 1; + raster->poly_smooth = 1; if (ctx->Polygon.StippleFlag) - raster.poly_stipple_enable = 1; + raster->poly_stipple_enable = 1; /* _NEW_BUFFERS, _NEW_POLYGON */ - if (raster.fill_cw != PIPE_POLYGON_MODE_FILL || - raster.fill_ccw != PIPE_POLYGON_MODE_FILL) + if (raster->fill_cw != PIPE_POLYGON_MODE_FILL || + raster->fill_ccw != PIPE_POLYGON_MODE_FILL) { GLfloat mrd = (ctx->DrawBuffer ? ctx->DrawBuffer->_MRD : 1.0); - raster.offset_units = ctx->Polygon.OffsetFactor * mrd; - raster.offset_scale = (ctx->Polygon.OffsetUnits * mrd * + raster->offset_units = ctx->Polygon.OffsetFactor * mrd; + raster->offset_scale = (ctx->Polygon.OffsetUnits * mrd * st->polygon_offset_scale); } /* _NEW_POINT */ - raster.point_size = ctx->Point.Size; - raster.point_smooth = ctx->Point.SmoothFlag; - raster.point_sprite = ctx->Point.PointSprite; + raster->point_size = ctx->Point.Size; + raster->point_smooth = ctx->Point.SmoothFlag; + raster->point_sprite = ctx->Point.PointSprite; for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { if (ctx->Point.CoordReplace[i]) { if (ctx->Point.SpriteOrigin == GL_UPPER_LEFT) - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_UPPER_LEFT; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_UPPER_LEFT; else - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_LOWER_LEFT; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_LOWER_LEFT; } else { - raster.sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE; + raster->sprite_coord_mode[i] = PIPE_SPRITE_COORD_NONE; } } if (vertProg) { if (vertProg->Base.Id == 0) { if (vertProg->Base.OutputsWritten & (1 << VERT_RESULT_PSIZ)) { /* generated program which emits point size */ - raster.point_size_per_vertex = TRUE; + raster->point_size_per_vertex = TRUE; } } else if (ctx->VertexProgram.PointSizeEnabled) { /* user-defined program and GL_VERTEX_PROGRAM_POINT_SIZE set */ - raster.point_size_per_vertex = ctx->VertexProgram.PointSizeEnabled; + raster->point_size_per_vertex = ctx->VertexProgram.PointSizeEnabled; } } /* _NEW_LINE */ - raster.line_smooth = ctx->Line.SmoothFlag; + raster->line_smooth = ctx->Line.SmoothFlag; if (ctx->Line.SmoothFlag) { - raster.line_width = CLAMP(ctx->Line.Width, + raster->line_width = CLAMP(ctx->Line.Width, ctx->Const.MinLineWidthAA, ctx->Const.MaxLineWidthAA); } else { - raster.line_width = CLAMP(ctx->Line.Width, + raster->line_width = CLAMP(ctx->Line.Width, ctx->Const.MinLineWidth, ctx->Const.MaxLineWidth); } - raster.line_stipple_enable = ctx->Line.StippleFlag; - raster.line_stipple_pattern = ctx->Line.StipplePattern; + raster->line_stipple_enable = ctx->Line.StippleFlag; + raster->line_stipple_pattern = ctx->Line.StipplePattern; /* GL stipple factor is in [1,256], remap to [0, 255] here */ - raster.line_stipple_factor = ctx->Line.StippleFactor - 1; + raster->line_stipple_factor = ctx->Line.StippleFactor - 1; /* _NEW_MULTISAMPLE */ if (ctx->Multisample.Enabled) - raster.multisample = 1; + raster->multisample = 1; /* _NEW_SCISSOR */ if (ctx->Scissor.Enabled) - raster.scissor = 1; + raster->scissor = 1; - cso = st_cached_rasterizer_state(st, &raster); - if (st->state.rasterizer != cso) { - st->state.rasterizer = cso; - st->pipe->bind_rasterizer_state(st->pipe, cso->data); - } + cso_set_rasterizer(st->cso_context, raster); } const struct st_tracked_state st_update_rasterizer = { diff --git a/src/mesa/state_tracker/st_atom_sampler.c b/src/mesa/state_tracker/st_atom_sampler.c index 1000f98ffc..1babba9b4f 100644 --- a/src/mesa/state_tracker/st_atom_sampler.c +++ b/src/mesa/state_tracker/st_atom_sampler.c @@ -33,11 +33,11 @@ #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" +#include "cso_cache/cso_context.h" /** @@ -124,9 +124,9 @@ update_samplers(struct st_context *st) /* loop over sampler units (aka tex image units) */ for (su = 0; su < st->ctx->Const.MaxTextureImageUnits; su++) { - struct pipe_sampler_state sampler; + struct pipe_sampler_state *sampler = st->state.samplers + su; - memset(&sampler, 0, sizeof(sampler)); + memset(sampler, 0, sizeof(*sampler)); if (fs->Base.Base.SamplersUsed & (1 << su)) { GLuint texUnit = fs->Base.Base.SamplerUnits[su]; @@ -135,37 +135,37 @@ update_samplers(struct st_context *st) assert(texobj); - sampler.wrap_s = gl_wrap_to_sp(texobj->WrapS); - sampler.wrap_t = gl_wrap_to_sp(texobj->WrapT); - sampler.wrap_r = gl_wrap_to_sp(texobj->WrapR); + sampler->wrap_s = gl_wrap_to_sp(texobj->WrapS); + sampler->wrap_t = gl_wrap_to_sp(texobj->WrapT); + sampler->wrap_r = gl_wrap_to_sp(texobj->WrapR); - sampler.min_img_filter = gl_filter_to_img_filter(texobj->MinFilter); - sampler.min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter); - sampler.mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter); + sampler->min_img_filter = gl_filter_to_img_filter(texobj->MinFilter); + sampler->min_mip_filter = gl_filter_to_mip_filter(texobj->MinFilter); + sampler->mag_img_filter = gl_filter_to_img_filter(texobj->MagFilter); if (texobj->Target != GL_TEXTURE_RECTANGLE_ARB) - sampler.normalized_coords = 1; + sampler->normalized_coords = 1; - sampler.lod_bias = st->ctx->Texture.Unit[su].LodBias; + sampler->lod_bias = st->ctx->Texture.Unit[su].LodBias; #if 1 - sampler.min_lod = (texobj->MinLod) < 0.0 ? 0.0 : texobj->MinLod; - sampler.max_lod = texobj->MaxLod; + sampler->min_lod = (texobj->MinLod) < 0.0 ? 0.0 : texobj->MinLod; + sampler->max_lod = texobj->MaxLod; #else /* min/max lod should really be as follows (untested). * Also, calculate_first_last_level() needs to be overhauled * since today's hardware had real support for LOD clamping. */ - sampler.min_lod = MAX2(texobj->BaseLevel, texobj->MinLod); - sampler.max_lod = MIN2(texobj->MaxLevel, texobj->MaxLod); + sampler->min_lod = MAX2(texobj->BaseLevel, texobj->MinLod); + sampler->max_lod = MIN2(texobj->MaxLevel, texobj->MaxLod); #endif - sampler.max_anisotropy = texobj->MaxAnisotropy; + sampler->max_anisotropy = texobj->MaxAnisotropy; /* only care about ARB_shadow, not SGI shadow */ if (texobj->CompareMode == GL_COMPARE_R_TO_TEXTURE) { - sampler.compare = 1; - sampler.compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; - sampler.compare_func + sampler->compare = 1; + sampler->compare_mode = PIPE_TEX_COMPARE_R_TO_TEXTURE; + sampler->compare_func = st_compare_func_to_pipe(texobj->CompareFunc); } @@ -174,11 +174,10 @@ update_samplers(struct st_context *st) /* XXX more sampler state here */ } - st->state.sampler[su] = st_cached_sampler_state(st, &sampler)->data; + cso_single_sampler(st->cso_context, su, sampler); } - st->pipe->bind_sampler_states(st->pipe, st->state.num_samplers, - st->state.sampler); + cso_single_sampler_done(st->cso_context); } diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 10c131d554..0726688493 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -43,10 +43,9 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "st_atom_shader.h" @@ -70,9 +69,6 @@ struct translated_vertex_program /** Maps VERT_RESULT_x to slot */ GLuint output_to_slot[VERT_RESULT_MAX]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - /** Pointer to the translated vertex program */ struct st_vertex_program *vp; @@ -158,7 +154,7 @@ find_translated_vp(struct st_context *st, /* * Translate fragment program if needed. */ - if (!stfp->cso) { + if (!stfp->state.tokens) { GLuint inAttr, numIn = 0; for (inAttr = 0; inAttr < FRAG_ATTRIB_MAX; inAttr++) { @@ -175,11 +171,7 @@ find_translated_vp(struct st_context *st, assert(stfp->Base.Base.NumInstructions > 1); - (void) st_translate_fragment_program(st, stfp, - stfp->input_to_slot, - stfp->tokens, - ST_MAX_SHADER_TOKENS); - assert(stfp->cso); + st_translate_fragment_program(st, stfp, stfp->input_to_slot); } @@ -261,12 +253,8 @@ find_translated_vp(struct st_context *st, assert(stvp->Base.Base.NumInstructions > 1); - st_translate_vertex_program(st, stvp, - xvp->output_to_slot, - xvp->tokens, - ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, xvp->output_to_slot); - assert(stvp->cso); xvp->vp = stvp; /* translated VP is up to date now */ @@ -296,12 +284,10 @@ update_linkage( struct st_context *st ) xvp = find_translated_vp(st, stvp, stfp); st->vp = stvp; - st->state.vs = xvp->vp; - st->pipe->bind_vs_state(st->pipe, st->state.vs->cso->data); - st->fp = stfp; - st->state.fs = stfp->cso; - st->pipe->bind_fs_state(st->pipe, st->state.fs->data); + + st->pipe->bind_vs_state(st->pipe, stvp->driver_shader); + st->pipe->bind_fs_state(st->pipe, stfp->driver_shader); st->vertex_result_to_slot = xvp->output_to_slot; } diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 663c4f205d..f1fddc4e02 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -35,7 +35,6 @@ #include "main/macros.h" #include "st_context.h" -#include "st_cache.h" #include "st_cb_accum.h" #include "st_cb_fbo.h" #include "st_draw.h" diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index e712fd84cd..eae40f2a4f 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -35,7 +35,6 @@ #include "main/macros.h" #include "shader/prog_instruction.h" #include "st_atom.h" -#include "st_cache.h" #include "st_context.h" #include "st_cb_accum.h" #include "st_cb_clear.h" @@ -50,6 +49,8 @@ #include "pipe/p_defines.h" #include "pipe/p_winsys.h" +#include "cso_cache/cso_context.h" + @@ -157,8 +158,7 @@ make_frag_shader(struct st_context *st) p->OutputsWritten = (1 << FRAG_RESULT_COLR); stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); return stfp; } @@ -206,9 +206,10 @@ make_vertex_shader(struct st_context *st) (1 << VERT_RESULT_HPOS)); stvp = (struct st_vertex_program *) p; - st_translate_vertex_program(st, stvp, NULL, - stvp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, NULL); +#if 0 assert(stvp->cso); +#endif return stvp; } @@ -284,7 +285,6 @@ clear_with_quad(GLcontext *ctx, /* blend state: RGBA masking */ { struct pipe_blend_state blend; - const struct cso_blend *cso; memset(&blend, 0, sizeof(blend)); if (color) { if (ctx->Color.ColorMask[0]) @@ -298,14 +298,12 @@ clear_with_quad(GLcontext *ctx, if (st->ctx->Color.DitherFlag) blend.dither = 1; } - cso = st_cached_blend_state(st, &blend); - pipe->bind_blend_state(pipe, cso->data); + cso_set_blend(st->cso_context, &blend); } /* depth_stencil state: always pass/set to ref value */ { struct pipe_depth_stencil_alpha_state depth_stencil; - const struct cso_depth_stencil_alpha *cso; memset(&depth_stencil, 0, sizeof(depth_stencil)); if (depth) { depth_stencil.depth.enabled = 1; @@ -323,14 +321,13 @@ clear_with_quad(GLcontext *ctx, depth_stencil.stencil[0].value_mask = 0xff; depth_stencil.stencil[0].write_mask = ctx->Stencil.WriteMask[0] & 0xff; } - cso = st_cached_depth_stencil_alpha_state(st, &depth_stencil); - pipe->bind_depth_stencil_alpha_state(pipe, cso->data); + + cso_set_depth_stencil_alpha(st->cso_context, &depth_stencil); } /* rasterizer state: nothing */ { struct pipe_rasterizer_state raster; - const struct cso_rasterizer *cso; memset(&raster, 0, sizeof(raster)); #if 0 /* don't do per-pixel scissor; we'll just draw a PIPE_PRIM_QUAD @@ -339,8 +336,7 @@ clear_with_quad(GLcontext *ctx, if (ctx->Scissor.Enabled) raster.scissor = 1; #endif - cso = st_cached_rasterizer_state(st, &raster); - pipe->bind_rasterizer_state(pipe, cso->data); + cso_set_rasterizer(st->cso_context, &raster); } /* fragment shader state: color pass-through program */ @@ -349,7 +345,7 @@ clear_with_quad(GLcontext *ctx, if (!stfp) { stfp = make_frag_shader(st); } - pipe->bind_fs_state(pipe, stfp->cso->data); + pipe->bind_fs_state(pipe, stfp->driver_shader); } /* vertex shader state: color/position pass-through */ @@ -358,7 +354,7 @@ clear_with_quad(GLcontext *ctx, if (!stvp) { stvp = make_vertex_shader(st); } - pipe->bind_vs_state(pipe, stvp->cso->data); + pipe->bind_vs_state(pipe, stvp->driver_shader); } /* viewport state: viewport matching window dims */ @@ -380,16 +376,19 @@ clear_with_quad(GLcontext *ctx, /* draw quad matching scissor rect (XXX verify coord round-off) */ draw_quad(ctx, x0, y0, x1, y1, ctx->Depth.Clear, ctx->Color.ClearColor); +#if 0 + /* Can't depend on old state objects still existing -- may have + * been deleted to make room in the hash, etc. (Should get + * fixed...) + */ + st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL); +#else /* Restore pipe state */ - pipe->bind_blend_state(pipe, st->state.blend->data); - pipe->bind_depth_stencil_alpha_state(pipe, st->state.depth_stencil->data); - pipe->bind_fs_state(pipe, st->state.fs->data); - pipe->bind_vs_state(pipe, st->state.vs->cso->data); - pipe->bind_rasterizer_state(pipe, st->state.rasterizer->data); + cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + pipe->bind_fs_state(pipe, st->fp->driver_shader); + pipe->bind_vs_state(pipe, st->vp->driver_shader); +#endif pipe->set_viewport_state(pipe, &st->state.viewport); - /* OR: - st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL); - */ } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index dee4c4132a..18ec9645c4 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -41,7 +41,6 @@ #include "st_context.h" #include "st_atom.h" #include "st_atom_constbuf.h" -#include "st_cache.h" #include "st_draw.h" #include "st_program.h" #include "st_cb_drawpixels.h" @@ -58,6 +57,7 @@ #include "pipe/p_winsys.h" #include "util/p_tile.h" #include "shader/prog_instruction.h" +#include "cso_cache/cso_context.h" /** @@ -159,8 +159,7 @@ make_bitmap_fragment_program(GLcontext *ctx) stfp = (struct st_fragment_program *) p; stfp->Base.UsesKill = GL_TRUE; - st_translate_fragment_program(ctx->st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(ctx->st, stfp, NULL); return stfp; } @@ -204,8 +203,7 @@ combined_bitmap_fragment_program(GLcontext *ctx) #endif /* translate to TGSI tokens */ - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); /* save new program, update serial numbers */ st->bitmap.user_prog_sn = st->fp->serialNo; @@ -266,8 +264,7 @@ combined_drawpix_fragment_program(GLcontext *ctx) #endif /* translate to TGSI tokens */ - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); /* save new program, update serial numbers */ st->pixel_xfer.xfer_prog_sn = st->pixel_xfer.program->serialNo; @@ -343,8 +340,7 @@ make_fragment_shader_z(struct st_context *st) p->OutputsWritten = (1 << FRAG_RESULT_COLR) | (1 << FRAG_RESULT_DEPR); stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(st, stfp, NULL); return stfp; } @@ -421,8 +417,7 @@ st_make_passthrough_vertex_shader(struct st_context *st, GLboolean passColor) } stvp = (struct st_vertex_program *) p; - st_translate_vertex_program(st, stvp, NULL, - stvp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_vertex_program(st, stvp, NULL); progs[passColor] = stvp; @@ -641,7 +636,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, const GLfloat *color, GLboolean invertTex) { + struct st_context *st = ctx->st; struct pipe_context *pipe = ctx->st->pipe; + struct cso_context *cso = ctx->st->cso_context; GLfloat x0, y0, x1, y1; GLuint maxSize; @@ -656,24 +653,23 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, /* setup state: just scissor */ { struct pipe_rasterizer_state setup; - const struct cso_rasterizer *cso; memset(&setup, 0, sizeof(setup)); if (ctx->Scissor.Enabled) setup.scissor = 1; - cso = st_cached_rasterizer_state(ctx->st, &setup); - pipe->bind_rasterizer_state(pipe, cso->data); + + cso_set_rasterizer(cso, &setup); } /* fragment shader state: TEX lookup program */ - pipe->bind_fs_state(pipe, stfp->cso->data); + pipe->bind_fs_state(pipe, stfp->driver_shader); /* vertex shader state: position + texcoord pass-through */ - pipe->bind_vs_state(pipe, stvp->cso->data); + pipe->bind_vs_state(pipe, stvp->driver_shader); + /* texture sampling state: */ { struct pipe_sampler_state sampler; - const struct cso_sampler *cso; memset(&sampler, 0, sizeof(sampler)); sampler.wrap_s = PIPE_TEX_WRAP_CLAMP; sampler.wrap_t = PIPE_TEX_WRAP_CLAMP; @@ -682,8 +678,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; sampler.normalized_coords = 1; - cso = st_cached_sampler_state(ctx->st, &sampler); - pipe->bind_sampler_states(pipe, 1, (void**)&cso->data); + + cso_single_sampler(cso, 0, &sampler); + cso_single_sampler_done(cso); } /* viewport state: viewport matching window dims */ @@ -723,14 +720,25 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, draw_quad(ctx, x0, y0, z, x1, y1, invertTex); /* restore GL state */ - pipe->bind_rasterizer_state(pipe, ctx->st->state.rasterizer->data); - pipe->bind_fs_state(pipe, ctx->st->state.fs->data); - pipe->bind_vs_state(pipe, ctx->st->state.vs->cso->data); pipe->set_sampler_textures(pipe, ctx->st->state.num_textures, ctx->st->state.sampler_texture); - pipe->bind_sampler_states(pipe, ctx->st->state.num_samplers, - ctx->st->state.sampler); + pipe->set_viewport_state(pipe, &ctx->st->state.viewport); + +#if 0 + /* Can't depend on old state objects still existing -- may have + * been deleted to make room in the hash, etc. (Should get + * fixed...) + */ + st_invalidate_state(ctx, _NEW_COLOR | _NEW_TEXTURE); +#else + /* restore state */ + pipe->bind_fs_state(pipe, st->fp->driver_shader); + pipe->bind_vs_state(pipe, st->vp->driver_shader); + cso_set_rasterizer(cso, &st->state.rasterizer); + cso_set_samplers(cso, PIPE_MAX_SAMPLERS, + (const struct pipe_sampler_state **) st->state.sampler_list); +#endif } @@ -798,10 +806,10 @@ compatible_formats(GLenum format, GLenum type, enum pipe_format pipeFormat) static GLboolean any_fragment_ops(const struct st_context *st) { - if (st->state.depth_stencil->state.alpha.enabled || - st->state.blend->state.blend_enable || - st->state.blend->state.logicop_enable || - st->state.depth_stencil->state.depth.enabled) + if (st->state.depth_stencil.alpha.enabled || + st->state.depth_stencil.depth.enabled || + st->state.blend.blend_enable || + st->state.blend.logicop_enable) /* XXX more checks */ return GL_TRUE; else diff --git a/src/mesa/state_tracker/st_cb_program.c b/src/mesa/state_tracker/st_cb_program.c index 61d4f4c41c..4dc76f19b1 100644 --- a/src/mesa/state_tracker/st_cb_program.c +++ b/src/mesa/state_tracker/st_cb_program.c @@ -168,11 +168,13 @@ static void st_program_string_notify( GLcontext *ctx, stfp->serialNo++; +#if 0 if (stfp->cso) { /* free the TGSI code */ // cso_delete(stfp->vs); stfp->cso = NULL; } +#endif stfp->param_state = stfp->Base.Base.Parameters->StateFlags; @@ -184,13 +186,14 @@ static void st_program_string_notify( GLcontext *ctx, stvp->serialNo++; +#if 0 if (stvp->cso) { /* free the CSO data */ st->pipe->delete_vs_state(st->pipe, stvp->cso->data); FREE((void *) stvp->cso); stvp->cso = NULL; } - +#endif if (stvp->draw_shader) { draw_delete_vertex_shader(st->draw, stvp->draw_shader); stvp->draw_shader = NULL; diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 09e389f9dc..5458ab420e 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -56,6 +56,7 @@ #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" /** @@ -78,6 +79,7 @@ void st_invalidate_state(GLcontext * ctx, GLuint new_state) static struct st_context * st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) { + uint i; struct st_context *st = CALLOC_STRUCT( st_context ); ctx->st = st; @@ -93,12 +95,15 @@ st_create_context_priv( GLcontext *ctx, struct pipe_context *pipe ) st->dirty.mesa = ~0; st->dirty.st = ~0; - st->cache = cso_cache_create(); + st->cso_context = cso_create_context(pipe); st_init_atoms( st ); st_init_draw( st ); st_init_generate_mipmap(st); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + st->state.sampler_list[i] = &st->state.samplers[i]; + /* we want all vertex data to be placed in buffer objects */ vbo_use_buffer_objects(ctx); @@ -149,7 +154,7 @@ static void st_destroy_context_priv( struct st_context *st ) _vbo_DestroyContext(st->ctx); - cso_cache_delete( st->cache ); + cso_destroy_context(st->cso_context); _mesa_delete_program_cache(st->ctx, st->pixel_xfer.cache); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 897a5109b7..e81aebba3d 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -74,14 +74,11 @@ struct st_context * Other state is just parameter values. */ struct { - const struct cso_alpha_test *alpha_test; - const struct cso_blend *blend; - void *sampler[PIPE_MAX_SAMPLERS]; - const struct cso_depth_stencil_alpha *depth_stencil; - const struct cso_rasterizer *rasterizer; - const struct cso_fragment_shader *fs; - struct st_vertex_program *vs; - + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depth_stencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; + struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; struct pipe_blend_color blend_color; struct pipe_clip_state clip; struct pipe_constant_buffer constants[2]; @@ -151,6 +148,10 @@ struct st_context /** For gen/render mipmap feature */ struct { + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + void *blend_cso; void *depthstencil_cso; void *rasterizer_cso; @@ -158,7 +159,7 @@ struct st_context struct st_vertex_program *stvp; } gen_mipmap; - struct cso_cache *cache; + struct cso_context *cso_context; }; diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 5888bcb98a..9c13010da8 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -53,15 +53,15 @@ st_print_current(void) int i; printf("Vertex Transform Inputs:\n"); - for (i = 0; i < st->state.vs->cso->state.num_inputs; i++) { + for (i = 0; i < st->vp->state.num_inputs; i++) { printf(" Slot %d: VERT_ATTRIB_%d\n", i, st->vp->index_to_input[i]); } - tgsi_dump( st->state.vs->cso->state.tokens, 0 ); + tgsi_dump( st->vp->state.tokens, 0 ); if (st->vp->Base.Base.Parameters) _mesa_print_parameter_list(st->vp->Base.Base.Parameters); - tgsi_dump( st->state.fs->state.tokens, 0 ); + tgsi_dump( st->fp->state.tokens, 0 ); if (st->fp->Base.Base.Parameters) _mesa_print_parameter_list(st->fp->Base.Base.Parameters); } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 1c0fa8c6aa..98504e46c1 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -36,7 +36,6 @@ #include "vbo/vbo.h" #include "st_atom.h" -#include "st_cache.h" #include "st_context.h" #include "st_cb_bufferobjects.h" #include "st_draw.h" @@ -219,7 +218,7 @@ st_draw_vbo(GLcontext *ctx, /* must get these after state validation! */ vp = ctx->st->vp; - vs = &ctx->st->state.vs->cso->state; + vs = &ctx->st->vp->state; /* loop over TGSI shader inputs to determine vertex buffer * and attribute info @@ -481,10 +480,10 @@ st_feedback_draw_vbo(GLcontext *ctx, /* must get these after state validation! */ vp = ctx->st->vp; - vs = &ctx->st->state.vs->cso->state; + vs = &st->vp->state; - if (!st->state.vs->draw_shader) { - st->state.vs->draw_shader = draw_create_vertex_shader(draw, vs); + if (!st->vp->draw_shader) { + st->vp->draw_shader = draw_create_vertex_shader(draw, vs); } /* @@ -496,8 +495,8 @@ st_feedback_draw_vbo(GLcontext *ctx, assert(draw); draw_set_viewport_state(draw, &st->state.viewport); draw_set_clip_state(draw, &st->state.clip); - draw_set_rasterizer_state(draw, &st->state.rasterizer->state); - draw_bind_vertex_shader(draw, st->state.vs->draw_shader); + draw_set_rasterizer_state(draw, &st->state.rasterizer); + draw_bind_vertex_shader(draw, st->vp->draw_shader); set_feedback_vertex_format(ctx); /* loop over TGSI shader inputs to determine vertex buffer diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 3723e26d45..9c4e1032ef 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -38,6 +38,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "cso_cache/cso_cache.h" +#include "cso_cache/cso_context.h" #include "st_context.h" #include "st_draw.h" @@ -89,8 +90,7 @@ make_tex_fragment_program(GLcontext *ctx) stfp = (struct st_fragment_program *) p; - st_translate_fragment_program(ctx->st, stfp, NULL, - stfp->tokens, ST_MAX_SHADER_TOKENS); + st_translate_fragment_program(ctx->st, stfp, NULL); return stfp; } @@ -118,6 +118,7 @@ st_init_generate_mipmap(struct st_context *st) blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; blend.colormask = PIPE_MASK_RGBA; + st->gen_mipmap.blend = blend; st->gen_mipmap.blend_cso = pipe->create_blend_state(pipe, &blend); memset(&depthstencil, 0, sizeof(depthstencil)); @@ -257,14 +258,14 @@ st_render_mipmap(struct st_context *st, sampler.normalized_coords = 1; - /* bind CSOs */ - pipe->bind_blend_state(pipe, st->gen_mipmap.blend_cso); - pipe->bind_depth_stencil_alpha_state(pipe, st->gen_mipmap.depthstencil_cso); - pipe->bind_rasterizer_state(pipe, st->gen_mipmap.rasterizer_cso); + /* bind state */ + cso_set_blend(st->cso_context, &st->gen_mipmap.blend); + cso_set_depth_stencil_alpha(st->cso_context, &st->gen_mipmap.depthstencil); + cso_set_rasterizer(st->cso_context, &st->gen_mipmap.rasterizer); /* bind shaders */ - pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->cso->data); - pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->cso->data); + pipe->bind_fs_state(pipe, st->gen_mipmap.stfp->driver_shader); + pipe->bind_vs_state(pipe, st->gen_mipmap.stvp->driver_shader); /* * XXX for small mipmap levels, it may be faster to use the software @@ -304,17 +305,18 @@ st_render_mipmap(struct st_context *st, /*pt->first_level = first_level_save;*/ /* restore pipe state */ - if (st->state.rasterizer) - pipe->bind_rasterizer_state(pipe, st->state.rasterizer->data); - if (st->state.fs) - pipe->bind_fs_state(pipe, st->state.fs->data); - if (st->state.vs) - pipe->bind_vs_state(pipe, st->state.vs->cso->data); - pipe->bind_sampler_states(pipe, st->state.num_samplers, - st->state.sampler); +#if 0 + cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + cso_set_samplers(st->cso_context, st->state.samplers_list); + pipe->bind_fs_state(pipe, st->fp->shader_program); + pipe->bind_vs_state(pipe, st->vp->shader_program); pipe->set_sampler_textures(pipe, st->state.num_textures, st->state.sampler_texture); pipe->set_viewport_state(pipe, &st->state.viewport); +#else + /* XXX is this sufficient? */ + st_invalidate_state(st->ctx, _NEW_COLOR | _NEW_TEXTURE); +#endif return TRUE; } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 97206752af..9446b012ad 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -683,8 +683,9 @@ find_temporaries(const struct gl_program *program, * \param tokens array to store translated tokens in * \param maxTokens size of the tokens array * + * \return number of tokens placed in 'tokens' buffer, or zero if error */ -GLboolean +GLuint tgsi_translate_mesa_program( uint procType, const struct gl_program *program, @@ -918,6 +919,6 @@ tgsi_translate_mesa_program( maxTokens - ti ); } - return GL_TRUE; + return ti; } diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.h b/src/mesa/state_tracker/st_mesa_to_tgsi.h index 3ababf1339..63fc855b53 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.h +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.h @@ -39,7 +39,7 @@ extern "C" { struct tgsi_token; struct gl_program; -GLboolean +GLuint tgsi_translate_mesa_program( uint procType, const struct gl_program *program, diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index aa252c845a..0f8784e132 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -42,15 +42,29 @@ #include "tgsi/util/tgsi_dump.h" #include "st_context.h" -#include "st_cache.h" #include "st_atom.h" #include "st_program.h" #include "st_mesa_to_tgsi.h" +#include "cso_cache/cso_context.h" #define TGSI_DEBUG 0 +/** XXX we should use the version of this from p_util.h but including + * that header causes symbol collisions. + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + + /** * Translate a Mesa vertex shader into a TGSI shader. * \param outputMapping to map vertex program output registers to TGSI @@ -61,15 +75,15 @@ void st_translate_vertex_program(struct st_context *st, struct st_vertex_program *stvp, - const GLuint outputMapping[], - struct tgsi_token *tokensOut, - GLuint maxTokens) + const GLuint outputMapping[]) { + struct pipe_context *pipe = st->pipe; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; GLuint defaultOutputMapping[VERT_RESULT_MAX]; struct pipe_shader_state vs; - const struct cso_vertex_shader *cso; GLuint attr, i; GLuint num_generic = 0; + GLuint num_tokens; memset(&vs, 0, sizeof(vs)); @@ -240,7 +254,7 @@ st_translate_vertex_program(struct st_context *st, /* XXX: fix static allocation of tokens: */ - tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX, + num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX, &stvp->Base.Base, /* inputs */ vs.num_inputs, @@ -252,20 +266,21 @@ st_translate_vertex_program(struct st_context *st, vs.num_outputs, outputMapping, vs.output_semantic_name, - vs.output_semantic_index, + vs.output_semantic_index, /* tokenized result */ - tokensOut, maxTokens); + tokens, ST_MAX_SHADER_TOKENS); - vs.tokens = tokensOut; + vs.tokens = (struct tgsi_token *) + mem_dup(tokens, num_tokens * sizeof(tokens[0])); - cso = st_cached_vs_state(st, &vs); - stvp->cso = cso; + stvp->state = vs; /* struct copy */ + stvp->driver_shader = pipe->create_vs_state(pipe, &vs); if (0) _mesa_print_program(&stvp->Base.Base); if (TGSI_DEBUG) - tgsi_dump( tokensOut, 0 ); + tgsi_dump( vs.tokens, 0 ); } @@ -280,10 +295,10 @@ st_translate_vertex_program(struct st_context *st, const struct cso_fragment_shader * st_translate_fragment_program(struct st_context *st, struct st_fragment_program *stfp, - const GLuint inputMapping[], - struct tgsi_token *tokensOut, - GLuint maxTokens) + const GLuint inputMapping[]) { + struct pipe_context *pipe = st->pipe; + struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; GLuint outputMapping[FRAG_RESULT_MAX]; GLuint defaultInputMapping[FRAG_ATTRIB_MAX]; struct pipe_shader_state fs; @@ -293,6 +308,7 @@ st_translate_fragment_program(struct st_context *st, const GLbitfield inputsRead = stfp->Base.Base.InputsRead; GLuint vslot = 0; GLuint num_generic = 0; + GLuint num_tokens; memset(&fs, 0, sizeof(fs)); @@ -401,7 +417,7 @@ st_translate_fragment_program(struct st_context *st, /* XXX: fix static allocation of tokens: */ - tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT, + num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT, &stfp->Base.Base, /* inputs */ fs.num_inputs, @@ -415,18 +431,19 @@ st_translate_fragment_program(struct st_context *st, fs.output_semantic_name, fs.output_semantic_index, /* tokenized result */ - tokensOut, maxTokens); + tokens, ST_MAX_SHADER_TOKENS); - fs.tokens = tokensOut; + fs.tokens = (struct tgsi_token *) + mem_dup(tokens, num_tokens * sizeof(tokens[0])); - cso = st_cached_fs_state(st, &fs); - stfp->cso = cso; + stfp->state = fs; /* struct copy */ + stfp->driver_shader = pipe->create_fs_state(pipe, &fs); if (0) _mesa_print_program(&stfp->Base.Base); if (TGSI_DEBUG) - tgsi_dump( tokensOut, 0/*TGSI_DUMP_VERBOSE*/ ); + tgsi_dump( fs.tokens, 0/*TGSI_DUMP_VERBOSE*/ ); return cso; } diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 31558af6ce..78786dcbb6 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -60,11 +60,8 @@ struct st_fragment_program /** map FP input back to VP output */ GLuint input_map[PIPE_MAX_SHADER_INPUTS]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - - /** Pointer to the corresponding cached shader */ - const struct cso_fragment_shader *cso; + struct pipe_shader_state state; + struct pipe_shader_state *driver_shader; GLuint param_state; @@ -88,11 +85,8 @@ struct st_vertex_program /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */ GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; - /** The program in TGSI format */ - struct tgsi_token tokens[ST_MAX_SHADER_TOKENS]; - - /** Pointer to the corresponding cached shader */ - const struct cso_vertex_shader *cso; + struct pipe_shader_state state; + struct pipe_shader_state *driver_shader; /** For using our private draw module (glRasterPos) */ struct draw_vertex_shader *draw_shader; @@ -122,16 +116,12 @@ st_vertex_program( struct gl_vertex_program *vp ) extern const struct cso_fragment_shader * st_translate_fragment_program(struct st_context *st, struct st_fragment_program *fp, - const GLuint inputMapping[], - struct tgsi_token *tokens, - GLuint maxTokens); + const GLuint inputMapping[]); extern void st_translate_vertex_program(struct st_context *st, struct st_vertex_program *vp, - const GLuint vert_output_to_slot[], - struct tgsi_token *tokens, - GLuint maxTokens); + const GLuint vert_output_to_slot[]); #endif -- cgit v1.2.3 From feb02084a88ca6e23c34fa06e963765c890f0b65 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Mar 2008 11:37:02 +0100 Subject: tgsi: Dump source register divide component. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 59be14a748..a393c65da6 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -1047,6 +1047,11 @@ dump_instruction_short( CHR( ')' ); } + if (src->SrcRegisterExtSwz.ExtDivide != TGSI_EXTSWIZZLE_ONE) { + CHR( '/' ); + ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES_SHORT ); + } + first_reg = FALSE; } -- cgit v1.2.3 From 8fd633b5cfa36e0cf0acef096315c9250015aba7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 12 Mar 2008 10:43:53 +0000 Subject: gallium: reduce signed/unsigned warnings --- src/gallium/auxiliary/draw/draw_passthrough.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c index fc2dde38ba..a51fa0ab23 100644 --- a/src/gallium/auxiliary/draw/draw_passthrough.c +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -78,7 +78,7 @@ static void fetch_xyz_rgb_st( struct draw_context *draw, { const unsigned *pitch = draw->vertex_fetch.pitch; const ubyte **src = draw->vertex_fetch.src_ptr; - int i; + unsigned i; const ubyte *xyzw = src[0] + start * pitch[0]; const ubyte *rgba = src[1] + start * pitch[1]; -- cgit v1.2.3 From a2ea51ed828d8b503492a7b42ac937d2642ac4f1 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 12 Mar 2008 15:01:18 +0000 Subject: gallium: Change assert behavior on runtime (Mark Mueller). --- src/gallium/auxiliary/util/p_debug.c | 38 ++++++++++++++++++++++++++++++++++-- 1 file changed, 36 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 04e55dd91d..09cabdae25 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -36,8 +36,9 @@ #include #endif -#include "pipe/p_debug.h" #include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" #ifdef WIN32 @@ -97,11 +98,44 @@ static INLINE void debug_break(void) #endif } +#if defined(WIN32) +ULONG_PTR debug_config_file = 0; +void *mapped_config_file = 0; + +enum { + eAssertAbortEn = 0x1, +}; + +/* Check for aborts enabled. */ +static unsigned abort_en() +{ + if (!mapped_config_file) + { + /* Open an 8 byte file for configuration data. */ + mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file); + } + /* An value of "0" (ascii) in the configuration file will clear the first 8 bits in the test byte. */ + /* An value of "1" (ascii) in the configuration file will set the first bit in the test byte. */ + /* An value of "2" (ascii) in the configuration file will set the second bit in the test byte. */ + return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn; +} +#else /* WIN32 */ +static unsigned abort_en() +{ + return !GETENV("GALLIUM_ABORT_ON_ASSERT"); +} +#endif void debug_assert_fail(const char *expr, const char *file, unsigned line) { debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr); - debug_break(); + if (abort_en()) + { + debug_break(); + } else + { + debug_printf("continuing...\n"); + } } -- cgit v1.2.3 From 8901a46a742b0cfecde0b981fc65160bf3e8d019 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 12 Mar 2008 15:02:11 +0000 Subject: gallium: Generic handle table. --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_handle_table.c | 207 ++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_handle_table.h | 96 +++++++++++++ 4 files changed, 305 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_handle_table.c create mode 100644 src/gallium/auxiliary/util/u_handle_table.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 2a3a9380b3..2abbe9500e 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ + u_handle_table.c \ u_mm.c \ u_snprintf.c diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 642088b962..2030214aa7 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,6 +6,7 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_tile.c', 'p_util.c', + 'u_handle_table.c', 'u_mm.c', 'u_snprintf.c', ]) diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c new file mode 100644 index 0000000000..8a298f7c41 --- /dev/null +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -0,0 +1,207 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Generic handle table implementation. + * + * @author José Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "u_handle_table.h" + + +#define HANDLE_TABLE_INITIAL_SIZE 16 + + +struct handle_table +{ + /** Object array. Empty handles have a null object */ + void **objects; + + /** Number of objects the handle can currently hold */ + unsigned size; + /** Number of consecutive objects allocated at the start of the table */ + unsigned filled; + + /** Optional object destructor */ + void (*destroy)(void *object); +}; + + +struct handle_table * +handle_table_create(void) +{ + struct handle_table *ht; + + ht = MALLOC_STRUCT(handle_table); + if(!ht) + return NULL; + + ht->objects = (void **)CALLOC(HANDLE_TABLE_INITIAL_SIZE, sizeof(void *)); + if(!ht->objects) { + FREE(ht); + return NULL; + } + + ht->size = HANDLE_TABLE_INITIAL_SIZE; + ht->filled = 0; + + ht->destroy = NULL; + + return ht; +} + + +void +handle_table_set_destroy(struct handle_table *ht, + void (*destroy)(void *object)) +{ + assert(ht); + ht->destroy = destroy; +} + + +unsigned +handle_table_add(struct handle_table *ht, + void *object) +{ + unsigned index; + unsigned handle; + + assert(ht); + assert(object); + if(!object) + return 0; + + /* linear search for an empty handle */ + while(ht->filled < ht->size) { + if(!ht->objects[ht->filled]) + break; + ++ht->filled; + } + + /* grow the table */ + if(ht->filled == ht->size) { + unsigned new_size; + void **new_objects; + + new_size = ht->size*2; + assert(new_size); + + new_objects = (void **)REALLOC((void *)ht->objects, + ht->size*sizeof(void *), + new_size*sizeof(void *)); + if(!new_objects) + return 0; + + memset(new_objects + ht->size, 0, (new_size - ht->size)*sizeof(void *)); + + ht->size = new_size; + ht->objects = new_objects; + } + + index = ht->filled; + + handle = index + 1; + + /* check integer overflow */ + if(!handle) + return 0; + + assert(!ht->objects[index]); + ht->objects[index] = object; + ++ht->filled; + + return handle; +} + + +void * +handle_table_get(struct handle_table *ht, + unsigned handle) +{ + void *object; + + assert(ht); + assert(handle > 0); + assert(handle <= ht->size); + if(!handle || handle > ht->size) + return NULL; + + object = ht->objects[handle - 1]; + assert(object); + + return object; +} + + +void +handle_table_remove(struct handle_table *ht, + unsigned handle) +{ + void *object; + unsigned index; + + assert(ht); + assert(handle > 0); + assert(handle <= ht->size); + if(!handle || handle > ht->size) + return; + + index = handle - 1; + object = ht->objects[index]; + assert(object); + + if(object && ht->destroy) + ht->destroy(object); + + ht->objects[index] = NULL; + if(index < ht->filled) + ht->filled = index; +} + + +void +handle_table_destroy(struct handle_table *ht) +{ + unsigned index; + assert(ht); + + if(ht->destroy) + for(index = 0; index < ht->size; ++index) + if(ht->objects[index]) + ht->destroy(ht->objects[index]); + + FREE(ht->objects); + FREE(ht); +} + diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h new file mode 100644 index 0000000000..51fc273865 --- /dev/null +++ b/src/gallium/auxiliary/util/u_handle_table.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Generic handle table. + * + * @author José Fonseca + */ + +#ifndef U_HANDLE_TABLE_H_ +#define U_HANDLE_TABLE_H_ + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Abstract data type to map integer handles to objects. + */ +struct handle_table; + + +struct handle_table * +handle_table_create(void); + + +/** + * Set an optional destructor callback. + * + * If set, it will be called during handle_table_remove and + * handle_table_destroy calls. + */ +void +handle_table_set_destroy(struct handle_table *ht, + void (*destroy)(void *object)); + + +/** + * Add a new object. + * + * Returns a zero handle on failure (out of memory). + */ +unsigned +handle_table_add(struct handle_table *ht, + void *object); + +/** + * Fetch an existing object. + * + * Returns NULL for an invalid handle. + */ +void * +handle_table_get(struct handle_table *ht, + unsigned handle); + + +void +handle_table_remove(struct handle_table *ht, + unsigned handle); + + +void +handle_table_destroy(struct handle_table *ht); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_HANDLE_TABLE_H_ */ -- cgit v1.2.3 From e5b1a53c9f9ad247272415e0e21e83cfe00728a9 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Mar 2008 15:29:39 +0100 Subject: tgsi: Dump TXP opcode. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index a393c65da6..1913d482f1 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -459,7 +459,8 @@ static const char *TGSI_OPCODES[] = "OPCODE_IFC", "OPCODE_BREAKC", "OPCODE_KIL", - "OPCODE_END" + "OPCODE_END", + "OPCODE_TXP" }; static const char *TGSI_OPCODES_SHORT[] = @@ -597,7 +598,8 @@ static const char *TGSI_OPCODES_SHORT[] = "IFC", "BREAKC", "KIL", - "END" + "END", + "TXP" }; static const char *TGSI_SATS[] = -- cgit v1.2.3 From ba75e82b6ebaf88dd2e4a8f764b2d296d715bf8a Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Mar 2008 16:41:25 +0100 Subject: tgsi: Remove ExtDivide field from existence. Implement OPCODE_TXP. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 46 +++++++--------------- src/gallium/auxiliary/tgsi/util/tgsi_build.c | 5 --- src/gallium/auxiliary/tgsi/util/tgsi_build.h | 1 - src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 9 ----- .../drivers/i915simple/i915_fpc_translate.c | 12 +++--- src/gallium/include/pipe/p_shader_tokens.h | 13 +----- 6 files changed, 21 insertions(+), 65 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index ac52441400..f2ed9e0353 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1220,7 +1220,8 @@ fetch_texel( struct tgsi_sampler *sampler, static void exec_tex(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst, - boolean biasLod) + boolean biasLod, + boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; union tgsi_exec_channel r[8]; @@ -1234,17 +1235,9 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[0], 0, CHAN_X); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[1], 0, CHAN_W); micro_div( &r[0], &r[0], &r[1] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1266,19 +1259,11 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -1300,19 +1285,11 @@ exec_tex(struct tgsi_exec_machine *mach, FETCH(&r[1], 0, CHAN_Y); FETCH(&r[2], 0, CHAN_Z); - switch (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide) { - case TGSI_EXTSWIZZLE_W: + if (projected) { FETCH(&r[3], 0, CHAN_W); micro_div( &r[0], &r[0], &r[3] ); micro_div( &r[1], &r[1], &r[3] ); micro_div( &r[2], &r[2], &r[3] ); - break; - - case TGSI_EXTSWIZZLE_ONE: - break; - - default: - assert (0); } if (biasLod) { @@ -2007,14 +1984,14 @@ exec_instruction( /* simple texture lookup */ /* src[0] = texcoord */ /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE); + exec_tex(mach, inst, FALSE, FALSE); break; case TGSI_OPCODE_TXB: /* Texture lookup with lod bias */ /* src[0] = texcoord (src[0].w = LOD bias) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); break; case TGSI_OPCODE_TXD: @@ -2030,7 +2007,14 @@ exec_instruction( /* Texture lookup with explit LOD */ /* src[0] = texcoord (src[0].w = LOD) */ /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE); + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, TRUE); break; case TGSI_OPCODE_UP2H: diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c index a00ff1c2a5..9c883ab704 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c @@ -719,7 +719,6 @@ tgsi_build_full_instruction( reg->SrcRegisterExtSwz.NegateY, reg->SrcRegisterExtSwz.NegateZ, reg->SrcRegisterExtSwz.NegateW, - reg->SrcRegisterExtSwz.ExtDivide, prev_token, instruction, header ); @@ -1057,7 +1056,6 @@ tgsi_default_src_register_ext_swz( void ) src_register_ext_swz.NegateY = 0; src_register_ext_swz.NegateZ = 0; src_register_ext_swz.NegateW = 0; - src_register_ext_swz.ExtDivide = TGSI_EXTSWIZZLE_ONE; src_register_ext_swz.Padding = 0; src_register_ext_swz.Extended = 0; @@ -1084,7 +1082,6 @@ tgsi_build_src_register_ext_swz( unsigned negate_y, unsigned negate_z, unsigned negate_w, - unsigned ext_divide, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -1099,7 +1096,6 @@ tgsi_build_src_register_ext_swz( assert( negate_y <= 1 ); assert( negate_z <= 1 ); assert( negate_w <= 1 ); - assert( ext_divide <= TGSI_EXTSWIZZLE_ONE ); src_register_ext_swz = tgsi_default_src_register_ext_swz(); src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; @@ -1110,7 +1106,6 @@ tgsi_build_src_register_ext_swz( src_register_ext_swz.NegateY = negate_y; src_register_ext_swz.NegateZ = negate_z; src_register_ext_swz.NegateW = negate_w; - src_register_ext_swz.ExtDivide = ext_divide; prev_token->Extended = 1; instruction_grow( instruction, header ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h index 607860e7fc..80bffc4ae7 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -229,7 +229,6 @@ tgsi_build_src_register_ext_swz( unsigned negate_y, unsigned negate_z, unsigned negate_w, - unsigned ext_divide, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 1913d482f1..ceb407b884 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -1049,11 +1049,6 @@ dump_instruction_short( CHR( ')' ); } - if (src->SrcRegisterExtSwz.ExtDivide != TGSI_EXTSWIZZLE_ONE) { - CHR( '/' ); - ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES_SHORT ); - } - first_reg = FALSE; } @@ -1368,10 +1363,6 @@ dump_instruction_verbose( TXT( "\nNegateW : " ); UID( src->SrcRegisterExtSwz.NegateW ); } - if( deflt || fs->SrcRegisterExtSwz.ExtDivide != src->SrcRegisterExtSwz.ExtDivide ) { - TXT( "\nExtDivide : " ); - ENM( src->SrcRegisterExtSwz.ExtDivide, TGSI_EXTSWIZZLES ); - } if( ignored ) { TXT( "\nPadding : " ); UIX( src->SrcRegisterExtSwz.Padding ); diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index c2d9a93c6c..7b4fca5db1 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -872,19 +872,17 @@ i915_translate_instruction(struct i915_fp_compile *p, break; case TGSI_OPCODE_TEX: - if (inst->FullSrcRegisters[0].SrcRegisterExtSwz.ExtDivide - == TGSI_EXTSWIZZLE_W) { - emit_tex(p, inst, T0_TEXLDP); - } - else { - emit_tex(p, inst, T0_TEXLD); - } + emit_tex(p, inst, T0_TEXLD); break; case TGSI_OPCODE_TXB: emit_tex(p, inst, T0_TEXLDB); break; + case TGSI_OPCODE_TXP: + emit_tex(p, inst, T0_TEXLDP); + break; + case TGSI_OPCODE_XPD: /* Cross product: * result.x = src0.y * src1.z - src0.z * src1.y; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index f0bb43bc5b..210169f54f 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -655,9 +655,6 @@ struct tgsi_src_register_ext * * NegateX, NegateY, NegateZ and NegateW negate individual components of the * source register. - * - * ExtDivide specifies which component is used to divide all components of the - * source register. */ struct tgsi_src_register_ext_swz @@ -671,15 +668,7 @@ struct tgsi_src_register_ext_swz unsigned NegateY : 1; /* BOOL */ unsigned NegateZ : 1; /* BOOL */ unsigned NegateW : 1; /* BOOL */ - - /* - * XXX: Do not use. This field has been depricated. - * XXX: If using in conjunction with OPCODE_TEX, please use OPCODE_TXP - * XXX: and, if needed, perform a swizzle on the texture coordinate. - */ - unsigned ExtDivide : 4; /* TGSI_EXTSWIZZLE_ */ - - unsigned Padding : 3; + unsigned Padding : 7; unsigned Extended : 1; /* BOOL */ }; -- cgit v1.2.3 From 12ab5f97013e398b9f6485b97d6691c3c170447a Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 12 Mar 2008 13:20:29 -0600 Subject: gallium: change draw_vertex_shader->state from pointer to struct We were sometimes keeping a pointer to a stack-allocated object. Now make a copy of the pipe_shader_state object. This should fix some seemingly random memory errors/crashes. --- src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 25fa8c09c2..4147472d45 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -133,7 +133,7 @@ struct draw_vertex_shader { /* This member will disappear shortly: */ - const struct pipe_shader_state *state; + struct pipe_shader_state state; struct tgsi_shader_info info; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 514303e0ea..55bec14116 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -71,7 +71,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader, { /* specify the vertex program to interpret/execute */ tgsi_exec_machine_bind_shader(&draw->machine, - shader->state->tokens, + shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); @@ -187,7 +187,7 @@ draw_create_vs_exec(struct draw_context *draw, if (vs == NULL) return NULL; - vs->state = state; + vs->state = *state; vs->prepare = vs_exec_prepare; vs->run = vs_exec_run; vs->delete = vs_exec_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index e5c1a40cca..5ee2adb344 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -221,14 +221,14 @@ draw_create_vs_sse(struct draw_context *draw, if (vs == NULL) return NULL; - vs->base.state = templ; + vs->base.state = *templ; vs->base.prepare = vs_sse_prepare; vs->base.run = vs_sse_run; vs->base.delete = vs_sse_delete; x86_init_func( &vs->sse2_program ); - if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state->tokens, + if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, &vs->sse2_program )) goto fail; -- cgit v1.2.3 From 329c5431348117e5b99adf14936d2f57f2ef5f1f Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 12 Mar 2008 19:29:30 -0600 Subject: gallium: fix polygon stipple Was broken by commit 4528287e040415c2071012d02f20979ff995c754 (bind all samplers/texures at once). --- src/gallium/auxiliary/draw/draw_pstipple.c | 34 +++++++++++++++++++----------- 1 file changed, 22 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index ed50d0805a..8b3e84a9a0 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -67,6 +67,7 @@ struct pstip_stage struct draw_stage stage; void *sampler_cso; + uint sampler_unit; struct pipe_texture *texture; uint num_samplers; uint num_textures; @@ -76,8 +77,8 @@ struct pstip_stage */ struct pstip_fragment_shader *fs; struct { - void *sampler[PIPE_MAX_SAMPLERS]; - struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + void *samplers[PIPE_MAX_SAMPLERS]; + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; const struct pipe_poly_stipple *stipple; } state; @@ -328,6 +329,8 @@ generate_pstip_fs(struct pstip_stage *pstip) tgsi_dump(pstip_fs.tokens, 0); #endif + pstip->sampler_unit = transform.maxSampler + 1; + #if 1 /* XXX remove */ if (transform.wincoordInput < 0) { pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; @@ -483,17 +486,24 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) { struct pstip_stage *pstip = pstip_stage(stage); struct pipe_context *pipe = pstip->pipe; - uint num = MAX2(pstip->num_textures, pstip->num_samplers); + uint num_samplers; + + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); + num_samplers = MAX2(num_samplers, pstip->sampler_unit + 1); assert(stage->draw->rasterizer->poly_stipple_enable); - /* - * Bind our fragprog, sampler and texture - */ + /* bind our fragprog */ bind_pstip_fragment_shader(pstip); - pstip->driver_bind_sampler_states(pipe, num + 1, pstip->state.sampler); - pstip->driver_set_sampler_textures(pipe, num + 1, pstip->state.texture); + /* plug in our sampler, texture */ + pstip->state.samplers[pstip->sampler_unit] = pstip->sampler_cso; + pstip->state.textures[pstip->sampler_unit] = pstip->texture; + + pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); /* now really draw first line */ stage->tri = passthrough_tri; @@ -516,9 +526,9 @@ pstip_flush(struct draw_stage *stage, unsigned flags) /* XXX restore original texture, sampler state */ pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, - pstip->state.sampler); + pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, pstip->num_textures, - pstip->state.texture); + pstip->state.textures); } @@ -617,7 +627,7 @@ pstip_bind_sampler_states(struct pipe_context *pipe, { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - memcpy(pstip->state.sampler, sampler, num * sizeof(void *)); + memcpy(pstip->state.samplers, sampler, num * sizeof(void *)); pstip->num_samplers = num; /* pass-through */ pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); @@ -630,7 +640,7 @@ pstip_set_sampler_textures(struct pipe_context *pipe, { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); /* save current */ - memcpy(pstip->state.texture, texture, num * sizeof(struct pipe_texture *)); + memcpy(pstip->state.textures, texture, num * sizeof(struct pipe_texture *)); pstip->num_textures = num; /* pass-through */ pstip->driver_set_sampler_textures(pstip->pipe, num, texture); -- cgit v1.2.3 From a1d56728655a3fc87360b45ac8b348bcfdf6ac15 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 12 Mar 2008 21:42:33 -0400 Subject: document hash collision resolutions --- src/gallium/auxiliary/cso_cache/cso_hash.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index d5bca9d591..84b45a5963 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -25,6 +25,16 @@ * **************************************************************************/ +/** + This file provides a hash implementation that is capable of dealing + with collisions. It stores colliding entries in linked list. All + functions operating on the hash return an iterator. The iterator + itself points to the collision list. If there wasn't any collision + the list will have just one entry, otherwise client code should + iterate over the entries to find the exact entry among ones that + had the same key (e.g. memcmp could be used on the data to check + that) +*/ /* * Authors: * Zack Rusin -- cgit v1.2.3 From 2366bb1baf2e9ae5b6ecf19f66ae9e0a4b0d2f36 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 12 Mar 2008 22:06:51 -0400 Subject: Add some basic documentation for gallivm code --- src/gallium/auxiliary/gallivm/gallivm.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h index 57912a952f..b4d6555d2f 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.h +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -33,6 +33,16 @@ #ifndef GALLIVM_H #define GALLIVM_H +/* + LLVM representation consists of two stages - layout independent + intermediate representation gallivm_ir and driver specific + gallivm_prog. TGSI is first being translated into gallivm_ir + after that driver can set number of options on gallivm_ir and + have it compiled into gallivm_prog. gallivm_prog can be either + executed (assuming there's LLVM JIT backend for the current + target) or machine code generation can be done (assuming there's + a LLVM code generator for thecurrent target) + */ #if defined __cplusplus extern "C" { #endif -- cgit v1.2.3 From cac037d36d451e8cafbb4a759d0edf9fa8b1ca81 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 12 Mar 2008 22:51:57 -0400 Subject: add code handling dependencies between generated code --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 96 ++++++++++++++++++++--- src/gallium/auxiliary/gallivm/instructionssoa.h | 6 ++ src/gallium/auxiliary/gallivm/soabuiltins.c | 18 +++++ src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 1 + 4 files changed, 111 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 89d513afd0..6f83b56a72 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -143,8 +143,16 @@ std::vector InstructionsSoa::extractVector(llvm::Value *vector) void InstructionsSoa::createFunctionMap() { - m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; - m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; + m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; + m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; + m_functionsMap[TGSI_OPCODE_POWER] = "pow"; +} + +void InstructionsSoa::createDependencies() +{ + std::vector powDeps(1); + powDeps[0] = "powf"; + m_builtinDependencies["pow"] = powDeps; } llvm::Function * InstructionsSoa::function(int op) @@ -154,15 +162,14 @@ llvm::Function * InstructionsSoa::function(int op) std::string name = m_functionsMap[op]; + std::vector deps = m_builtinDependencies[name]; + for (unsigned int i = 0; i < deps.size(); ++i) { + injectFunction(m_builtins->getFunction(deps[i])); + } + llvm::Function *originalFunc = m_builtins->getFunction(name); - llvm::Function *func = CloneFunction(originalFunc); - currentModule()->getFunctionList().push_back(func); - std::cout << "Func parent is "<getParent() - <<", cur is "<dump(); - //func->setParent(currentModule()); - m_functions[op] = func; - return func; + injectFunction(originalFunc, op); + return m_functions[op]; } llvm::Module * InstructionsSoa::currentModule() const @@ -177,6 +184,7 @@ llvm::Module * InstructionsSoa::currentModule() const void InstructionsSoa::createBuiltins() { m_builtins = createSoaBuiltins(); + createDependencies(); } std::vector InstructionsSoa::dp3(const std::vector in1, @@ -304,3 +312,71 @@ std::vector InstructionsSoa::callBuiltin(llvm::Function *func, const std return allocaToResult(allocaPtr); } + +std::vector InstructionsSoa::pow(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_POWER); + return callBuiltin(func, in1, in2); +} + +void checkFunction(Function *func) +{ + for (Function::const_iterator BI = func->begin(), BE = func->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); + II != IE; ++II) { + const Instruction &I = *II; + std::cout<< "Instr = "<setOperand(op, V); + } + } + } +} + +void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) +{ + assert(originalFunc); + std::cout << "injecting function originalFunc " <getName() <getFunction(originalFunc->getName()); + if (func) { + m_functions[op] = func; + return; + } + } + llvm::Function *func = 0; + if (originalFunc->isDeclaration()) { + std::cout << "function decleration" <getFunctionType(), GlobalValue::ExternalLinkage, + originalFunc->getName(), currentModule()); + func->setCallingConv(CallingConv::C); + const ParamAttrsList *pal = 0; + func->setParamAttrs(pal); + currentModule()->dump(); + } else { + DenseMap val; + val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf"); + std::cout <<" replacing "<getFunction("powf") + <<", with " <getFunction("powf")<getFunctionList().push_back(func); + std::cout << "Func parent is "<getParent() + <<", cur is "< #include #include @@ -59,6 +60,8 @@ public: const std::vector in3); std::vector mul(const std::vector in1, const std::vector in2); + std::vector pow(const std::vector in1, + const std::vector in2); void end(); std::vector extractVector(llvm::Value *vector); @@ -68,6 +71,7 @@ private: llvm::Value *z, llvm::Value *w); void createFunctionMap(); void createBuiltins(); + void createDependencies(); llvm::Function *function(int); llvm::Module *currentModule() const; llvm::Value *allocaTemp(); @@ -81,6 +85,7 @@ private: const std::vector in1, const std::vector in2, const std::vector in3); + void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); private: llvm::LLVMFoldingBuilder m_builder; StorageSoa *m_storage; @@ -88,6 +93,7 @@ private: std::map m_functionsMap; std::map m_functions; llvm::Module *m_builtins; + std::map > m_builtinDependencies; private: mutable int m_idx; diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 24c14e1b69..4d658be520 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -60,6 +60,24 @@ void dp4(float4 *res, res[3] = dot; } +extern float powf(float num, float p); + +void pow(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + float4 p; + p.x = powf(tmp0x.x, tmp1x.x); + p.y = powf(tmp0x.y, tmp1x.y); + p.z = powf(tmp0x.z, tmp1x.z); + p.w = powf(tmp0x.w, tmp1x.w); + + res[0] = p; + res[1] = p; + res[2] = p; + res[3] = p; +} + #if 0 void yo(float4 *out, float4 *in) { diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 3f65865a5a..d2b747ffd1 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -806,6 +806,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_POWER: { + out = instr->pow(inputs[0], inputs[1]); } break; case TGSI_OPCODE_CROSSPRODUCT: { -- cgit v1.2.3 From 5ba2f0a507b8d413eea2eb7da09c304ab5a8d3f1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 13 Mar 2008 09:57:01 +0000 Subject: tgsi: bump MAX_SRC_REGS to 4, for TXD --- src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index 5ccb5bfcf6..40083728d6 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -56,7 +56,7 @@ struct tgsi_full_immediate }; #define TGSI_FULL_MAX_DST_REGISTERS 2 -#define TGSI_FULL_MAX_SRC_REGISTERS 3 +#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ struct tgsi_full_instruction { -- cgit v1.2.3 From ddb4e5cbaced3e96117a97fe362ab890794f5ab7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 13 Mar 2008 10:01:38 +0000 Subject: tgsi: replace erroneous use of FETCH with emit_tempf --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 0da3b0bdb7..4e80597b3f 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1821,7 +1821,11 @@ emit_instruction( STORE( func, *inst, 5, 0, CHAN_Z ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C ); + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); STORE( func, *inst, 0, 0, CHAN_W ); } break; @@ -2021,11 +2025,19 @@ emit_instruction( STORE( func, *inst, 0, 0, CHAN_Y ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - FETCH( func, *inst, 0, TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ); + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); STORE( func, *inst, 0, 0, CHAN_Z ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - FETCH( func, *inst, 0, TGSI_EXEC_TEMP_ONE_I, TGSI_EXEC_TEMP_ONE_C ); + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); STORE( func, *inst, 0, 0, CHAN_W ); } break; -- cgit v1.2.3 From 192d1cbbdf9f8e2527ef38761195a87517c2d244 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 12 Mar 2008 22:39:13 +0000 Subject: gallium: Add a new handle_table_set that accepts an arbitrary handle. --- src/gallium/auxiliary/util/u_handle_table.c | 85 ++++++++++++++++++++++------- src/gallium/auxiliary/util/u_handle_table.h | 11 ++++ 2 files changed, 75 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 8a298f7c41..ab427ee371 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -26,6 +26,7 @@ **************************************************************************/ /** + * @file * Generic handle table implementation. * * @author José Fonseca @@ -90,6 +91,39 @@ handle_table_set_destroy(struct handle_table *ht, } +/** + * Resize the table if necessary + */ +static INLINE int +handle_table_resize(struct handle_table *ht, + unsigned minimum_size) +{ + unsigned new_size; + void **new_objects; + + if(ht->size > minimum_size) + return ht->size; + + new_size = ht->size; + while(!(new_size > minimum_size)) + new_size *= 2; + assert(new_size); + + new_objects = (void **)REALLOC((void *)ht->objects, + ht->size*sizeof(void *), + new_size*sizeof(void *)); + if(!new_objects) + return 0; + + memset(new_objects + ht->size, 0, (new_size - ht->size)*sizeof(void *)); + + ht->size = new_size; + ht->objects = new_objects; + + return ht->size; +} + + unsigned handle_table_add(struct handle_table *ht, void *object) @@ -109,34 +143,17 @@ handle_table_add(struct handle_table *ht, ++ht->filled; } - /* grow the table */ - if(ht->filled == ht->size) { - unsigned new_size; - void **new_objects; - - new_size = ht->size*2; - assert(new_size); - - new_objects = (void **)REALLOC((void *)ht->objects, - ht->size*sizeof(void *), - new_size*sizeof(void *)); - if(!new_objects) - return 0; - - memset(new_objects + ht->size, 0, (new_size - ht->size)*sizeof(void *)); - - ht->size = new_size; - ht->objects = new_objects; - } - index = ht->filled; - handle = index + 1; /* check integer overflow */ if(!handle) return 0; + /* grow the table if necessary */ + if(!handle_table_resize(ht, index)) + return 0; + assert(!ht->objects[index]); ht->objects[index] = object; ++ht->filled; @@ -145,6 +162,32 @@ handle_table_add(struct handle_table *ht, } +unsigned +handle_table_set(struct handle_table *ht, + unsigned handle, + void *object) +{ + unsigned index; + + assert(ht); + assert(handle > 0); + assert(handle <= ht->size); + if(!handle || handle > ht->size) + return 0; + + index = handle - 1; + + /* grow the table if necessary */ + if(!handle_table_resize(ht, index)) + return 0; + + assert(!ht->objects[index]); + ht->objects[index] = object; + + return handle; +} + + void * handle_table_get(struct handle_table *ht, unsigned handle) diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h index 51fc273865..a2f1f604ad 100644 --- a/src/gallium/auxiliary/util/u_handle_table.h +++ b/src/gallium/auxiliary/util/u_handle_table.h @@ -26,6 +26,7 @@ **************************************************************************/ /** + * @file * Generic handle table. * * @author José Fonseca @@ -42,6 +43,8 @@ extern "C" { /** * Abstract data type to map integer handles to objects. + * + * Also referred as "pointer array". */ struct handle_table; @@ -70,6 +73,14 @@ unsigned handle_table_add(struct handle_table *ht, void *object); +/** + * Returns zero on failure (out of memory). + */ +unsigned +handle_table_set(struct handle_table *ht, + unsigned handle, + void *object); + /** * Fetch an existing object. * -- cgit v1.2.3 From e584eb888fac90783c5f62333a39e6735be3e488 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 13 Mar 2008 09:58:29 +0000 Subject: gallium: Add a bit of documentation to cso_hash. --- src/gallium/auxiliary/cso_cache/cso_hash.h | 49 +++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index 84b45a5963..a3a65b68c8 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -26,19 +26,20 @@ **************************************************************************/ /** - This file provides a hash implementation that is capable of dealing - with collisions. It stores colliding entries in linked list. All - functions operating on the hash return an iterator. The iterator - itself points to the collision list. If there wasn't any collision - the list will have just one entry, otherwise client code should - iterate over the entries to find the exact entry among ones that - had the same key (e.g. memcmp could be used on the data to check - that) -*/ - /* - * Authors: - * Zack Rusin - */ + * @file + * Hash table implementation. + * + * This file provides a hash implementation that is capable of dealing + * with collisions. It stores colliding entries in linked list. All + * functions operating on the hash return an iterator. The iterator + * itself points to the collision list. If there wasn't any collision + * the list will have just one entry, otherwise client code should + * iterate over the entries to find the exact entry among ones that + * had the same key (e.g. memcmp could be used on the data to check + * that) + * + * @author Zack Rusin + */ #ifndef CSO_HASH_H #define CSO_HASH_H @@ -48,24 +49,38 @@ extern "C" { #endif + struct cso_hash; struct cso_node; + struct cso_hash_iter { struct cso_hash *hash; struct cso_node *node; }; + struct cso_hash *cso_hash_create(void); void cso_hash_delete(struct cso_hash *hash); + int cso_hash_size(struct cso_hash *hash); + +/** + * Create a list of objects and just add entry with the same key to the list. + */ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, void *data); + void *cso_hash_take(struct cso_hash *hash, unsigned key); + struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); + +/** + * Return an iterator pointing to the first entry in the collision list. + */ struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key); @@ -73,12 +88,16 @@ int cso_hash_iter_is_null(struct cso_hash_iter iter); unsigned cso_hash_iter_key(struct cso_hash_iter iter); void *cso_hash_iter_data(struct cso_hash_iter iter); + struct cso_hash_iter cso_hash_iter_next(struct cso_hash_iter iter); struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); -/* KW: a convenience routine: - */ +/** + * Convenience routine to iterate over the collision list while doing a memory + * comparison to see which entry in the list is a direct copy of our template + * and returns that entry. + */ void *cso_hash_find_data_from_template( struct cso_hash *hash, unsigned hash_key, void *templ, -- cgit v1.2.3 From 42f28684168ff88942f51fb01377702570d9d4d7 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 13 Mar 2008 10:19:23 +0000 Subject: gallium: General purpose hash table, which is actually just a convenient frontend to cso_hash. --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_hash_table.c | 199 ++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_hash_table.h | 86 +++++++++++++ 4 files changed, 287 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_hash_table.c create mode 100644 src/gallium/auxiliary/util/u_hash_table.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 2abbe9500e..2016c6fb1f 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -8,6 +8,7 @@ C_SOURCES = \ p_tile.c \ p_util.c \ u_handle_table.c \ + u_hash_table.c \ u_mm.c \ u_snprintf.c diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 2030214aa7..154a3eca8c 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -7,6 +7,7 @@ util = env.ConvenienceLibrary( 'p_tile.c', 'p_util.c', 'u_handle_table.c', + 'u_hash_table.c', 'u_mm.c', 'u_snprintf.c', ]) diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c new file mode 100644 index 0000000000..ac2cb1b540 --- /dev/null +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -0,0 +1,199 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * General purpose hash table implementation. + * + * Just uses the cso_hash for now, but it might be better switch to a linear + * probing hash table implementation at some point -- as it is said they have + * better lookup and cache performance and it appears to be possible to write + * a lock-free implementation of such hash tables . + * + * @author José Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "cso_cache/cso_hash.h" +#include "u_hash_table.h" + + +struct hash_table +{ + struct cso_hash *cso; + + /** Hash function */ + unsigned (*hash)(void *key); + + /** Compare two keys */ + int (*compare)(void *key1, void *key2); + + /* TODO: key, value destructors? */ +}; + + +struct hash_table_item +{ + void *key; + void *value; +}; + + +struct hash_table * +hash_table_create(unsigned (*hash)(void *key), + int (*compare)(void *key1, void *key2)) +{ + struct hash_table *ht; + + ht = MALLOC_STRUCT(hash_table); + if(!ht) + return NULL; + + ht->cso = cso_hash_create(); + if(!ht->cso) { + FREE(ht); + return NULL; + } + + ht->hash = hash; + ht->compare = compare; + + return ht; +} + + +static struct hash_table_item * +hash_table_find_item(struct hash_table *ht, + void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + + iter = cso_hash_find(ht->cso, key_hash); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + if (!ht->compare(item->key, key)) + return item; + iter = cso_hash_iter_next(iter); + } + + return NULL; +} + + +enum pipe_error +hash_table_set(struct hash_table *ht, + void *key, + void *value) +{ + unsigned key_hash; + struct hash_table_item *item; + + assert(ht); + + key_hash = ht->hash(key); + + item = hash_table_find_item(ht, key, key_hash); + if(item) { + /* TODO: key/value destruction? */ + item->value = value; + return PIPE_OK; + } + + item = MALLOC_STRUCT(hash_table_item); + if(!item) + return PIPE_ERROR_OUT_OF_MEMORY; + + item->key = key; + item->value = value; + + cso_hash_insert(ht->cso, key_hash, item); + /* FIXME: there is no OOM propagation in cso_hash */ + if(0) { + FREE(item); + return PIPE_ERROR_OUT_OF_MEMORY; + } + + return PIPE_OK; +} + + +void * +hash_table_get(struct hash_table *ht, + void *key) +{ + unsigned key_hash; + struct hash_table_item *item; + + assert(ht); + + key_hash = ht->hash(key); + + item = hash_table_find_item(ht, key, key_hash); + if(!item) + return NULL; + + return item->value; +} + + +void +hash_table_remove(struct hash_table *ht, + void *key) +{ + unsigned key_hash; + struct hash_table_item *item; + + assert(ht); + + key_hash = ht->hash(key); + + item = hash_table_find_item(ht, key, key_hash); + if(!item) + return; + + /* FIXME: cso_hash_take takes the first element of the collision list + * indiscriminately, so we can not take the item down. */ + item->value = NULL; +} + + +void +hash_table_destroy(struct hash_table *ht) +{ + assert(ht); + + cso_hash_delete(ht->cso); + + FREE(ht); +} + diff --git a/src/gallium/auxiliary/util/u_hash_table.h b/src/gallium/auxiliary/util/u_hash_table.h new file mode 100644 index 0000000000..d941f2c6b1 --- /dev/null +++ b/src/gallium/auxiliary/util/u_hash_table.h @@ -0,0 +1,86 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * General purpose hash table. + * + * @author José Fonseca + */ + +#ifndef U_HASH_TABLE_H_ +#define U_HASH_TABLE_H_ + + +#include "pipe/p_error.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Generic purpose hash table. + */ +struct hash_table; + + +/** + * Create an hash table. + * + * @param hash hash function + * @param compare should return 0 for two equal keys. + */ +struct hash_table * +hash_table_create(unsigned (*hash)(void *key), + int (*compare)(void *key1, void *key2)); + + +enum pipe_error +hash_table_set(struct hash_table *ht, + void *key, + void *value); + +void * +hash_table_get(struct hash_table *ht, + void *key); + + +void +hash_table_remove(struct hash_table *ht, + void *key); + + +void +hash_table_destroy(struct hash_table *ht); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_HASH_TABLE_H_ */ -- cgit v1.2.3 From bcb454e7a6e2f7efae114321c65bf98e91d5892f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 13 Mar 2008 18:12:36 +0100 Subject: tgsi: Drop pre-ps_2_0 opcodes. --- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 68 ------------- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 46 ++------- src/gallium/include/pipe/p_shader_tokens.h | 147 ++++++++++++--------------- 3 files changed, 71 insertions(+), 190 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index d2b747ffd1..ab9e7a06fb 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -588,40 +588,6 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_NOP: break; - case TGSI_OPCODE_TEXBEM: - break; - case TGSI_OPCODE_TEXBEML: - break; - case TGSI_OPCODE_TEXREG2AR: - break; - case TGSI_OPCODE_TEXM3X2PAD: - break; - case TGSI_OPCODE_TEXM3X2TEX: - break; - case TGSI_OPCODE_TEXM3X3PAD: - break; - case TGSI_OPCODE_TEXM3X3TEX: - break; - case TGSI_OPCODE_TEXM3X3SPEC: - break; - case TGSI_OPCODE_TEXM3X3VSPEC: - break; - case TGSI_OPCODE_TEXREG2GB: - break; - case TGSI_OPCODE_TEXREG2RGB: - break; - case TGSI_OPCODE_TEXDP3TEX: - break; - case TGSI_OPCODE_TEXDP3: - break; - case TGSI_OPCODE_TEXM3X3: - break; - case TGSI_OPCODE_TEXM3X2DEPTH: - break; - case TGSI_OPCODE_TEXDEPTH: - break; - case TGSI_OPCODE_BEM: - break; case TGSI_OPCODE_M4X3: break; case TGSI_OPCODE_M3X4: @@ -981,40 +947,6 @@ translate_instructionir(llvm::Module *module, break; case TGSI_OPCODE_NOP: break; - case TGSI_OPCODE_TEXBEM: - break; - case TGSI_OPCODE_TEXBEML: - break; - case TGSI_OPCODE_TEXREG2AR: - break; - case TGSI_OPCODE_TEXM3X2PAD: - break; - case TGSI_OPCODE_TEXM3X2TEX: - break; - case TGSI_OPCODE_TEXM3X3PAD: - break; - case TGSI_OPCODE_TEXM3X3TEX: - break; - case TGSI_OPCODE_TEXM3X3SPEC: - break; - case TGSI_OPCODE_TEXM3X3VSPEC: - break; - case TGSI_OPCODE_TEXREG2GB: - break; - case TGSI_OPCODE_TEXREG2RGB: - break; - case TGSI_OPCODE_TEXDP3TEX: - break; - case TGSI_OPCODE_TEXDP3: - break; - case TGSI_OPCODE_TEXM3X3: - break; - case TGSI_OPCODE_TEXM3X2DEPTH: - break; - case TGSI_OPCODE_TEXDEPTH: - break; - case TGSI_OPCODE_BEM: - break; case TGSI_OPCODE_M4X3: break; case TGSI_OPCODE_M3X4: diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index ceb407b884..aa78278700 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -324,7 +324,7 @@ static const char *TGSI_IMMS_SHORT[] = "FLT32" }; -static const char *TGSI_OPCODES[] = +static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] = { "OPCODE_ARL", "OPCODE_MOV", @@ -380,6 +380,7 @@ static const char *TGSI_OPCODES[] = "OPCODE_STR", "OPCODE_TEX", "OPCODE_TXD", + "OPCODE_TXP", "OPCODE_UP2H", "OPCODE_UP2US", "OPCODE_UP4B", @@ -433,23 +434,6 @@ static const char *TGSI_OPCODES[] = "OPCODE_NOISE3", "OPCODE_NOISE4", "OPCODE_NOP", - "OPCODE_TEXBEM", - "OPCODE_TEXBEML", - "OPCODE_TEXREG2AR", - "OPCODE_TEXM3X2PAD", - "OPCODE_TEXM3X2TEX", - "OPCODE_TEXM3X3PAD", - "OPCODE_TEXM3X3TEX", - "OPCODE_TEXM3X3SPEC", - "OPCODE_TEXM3X3VSPEC", - "OPCODE_TEXREG2GB", - "OPCODE_TEXREG2RGB", - "OPCODE_TEXDP3TEX", - "OPCODE_TEXDP3", - "OPCODE_TEXM3X3", - "OPCODE_TEXM3X2DEPTH", - "OPCODE_TEXDEPTH", - "OPCODE_BEM", "OPCODE_M4X3", "OPCODE_M3X4", "OPCODE_M3X3", @@ -459,11 +443,10 @@ static const char *TGSI_OPCODES[] = "OPCODE_IFC", "OPCODE_BREAKC", "OPCODE_KIL", - "OPCODE_END", - "OPCODE_TXP" + "OPCODE_END" }; -static const char *TGSI_OPCODES_SHORT[] = +static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] = { "ARL", "MOV", @@ -519,6 +502,7 @@ static const char *TGSI_OPCODES_SHORT[] = "STR", "TEX", "TXD", + "TXP", "UP2H", "UP2US", "UP4B", @@ -572,23 +556,6 @@ static const char *TGSI_OPCODES_SHORT[] = "NOISE3", "NOISE4", "NOP", - "TEXBEM", - "TEXBEML", - "TEXREG2AR", - "TEXM3X2PAD", - "TEXM3X2TEX", - "TEXM3X3PAD", - "TEXM3X3TEX", - "TEXM3X3SPEC", - "TEXM3X3VSPEC", - "TEXREG2GB", - "TEXREG2RGB", - "TEXDP3TEX", - "TEXDP3", - "TEXM3X3", - "TEXM3X2DEPTH", - "TEXDEPTH", - "BEM", "M4X3", "M3X4", "M3X3", @@ -598,8 +565,7 @@ static const char *TGSI_OPCODES_SHORT[] = "IFC", "BREAKC", "KIL", - "END", - "TXP" + "END" }; static const char *TGSI_SATS[] = diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index e3947f663b..f05e1a34b3 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -227,22 +227,22 @@ struct tgsi_immediate_float32 #define TGSI_OPCODE_STR 51 #define TGSI_OPCODE_TEX 52 #define TGSI_OPCODE_TXD 53 -#define TGSI_OPCODE_TXP 134 -#define TGSI_OPCODE_UP2H 54 -#define TGSI_OPCODE_UP2US 55 -#define TGSI_OPCODE_UP4B 56 -#define TGSI_OPCODE_UP4UB 57 -#define TGSI_OPCODE_X2D 58 +#define TGSI_OPCODE_TXP 54 +#define TGSI_OPCODE_UP2H 55 +#define TGSI_OPCODE_UP2US 56 +#define TGSI_OPCODE_UP4B 57 +#define TGSI_OPCODE_UP4UB 58 +#define TGSI_OPCODE_X2D 59 /* * GL_NV_vertex_program2 */ -#define TGSI_OPCODE_ARA 59 -#define TGSI_OPCODE_ARR 60 -#define TGSI_OPCODE_BRA 61 -#define TGSI_OPCODE_CAL 62 -#define TGSI_OPCODE_RET 63 -#define TGSI_OPCODE_SSG 64 +#define TGSI_OPCODE_ARA 60 +#define TGSI_OPCODE_ARR 61 +#define TGSI_OPCODE_BRA 62 +#define TGSI_OPCODE_CAL 63 +#define TGSI_OPCODE_RET 64 +#define TGSI_OPCODE_SSG 65 /* * GL_ARB_vertex_program @@ -253,9 +253,9 @@ struct tgsi_immediate_float32 /* * GL_ARB_fragment_program */ -#define TGSI_OPCODE_CMP 65 -#define TGSI_OPCODE_SCS 66 -#define TGSI_OPCODE_TXB 67 +#define TGSI_OPCODE_CMP 66 +#define TGSI_OPCODE_SCS 67 +#define TGSI_OPCODE_TXB 68 /* * GL_NV_fragment_program_option @@ -265,19 +265,19 @@ struct tgsi_immediate_float32 /* * GL_NV_fragment_program2 */ -#define TGSI_OPCODE_NRM 68 -#define TGSI_OPCODE_DIV 69 -#define TGSI_OPCODE_DP2 70 +#define TGSI_OPCODE_NRM 69 +#define TGSI_OPCODE_DIV 70 +#define TGSI_OPCODE_DP2 71 #define TGSI_OPCODE_DP2A TGSI_OPCODE_DOT2ADD -#define TGSI_OPCODE_TXL 71 -#define TGSI_OPCODE_BRK 72 -#define TGSI_OPCODE_IF 73 -#define TGSI_OPCODE_LOOP 74 -#define TGSI_OPCODE_REP 75 -#define TGSI_OPCODE_ELSE 76 -#define TGSI_OPCODE_ENDIF 77 -#define TGSI_OPCODE_ENDLOOP 78 -#define TGSI_OPCODE_ENDREP 79 +#define TGSI_OPCODE_TXL 72 +#define TGSI_OPCODE_BRK 73 +#define TGSI_OPCODE_IF 74 +#define TGSI_OPCODE_LOOP 75 +#define TGSI_OPCODE_REP 76 +#define TGSI_OPCODE_ELSE 77 +#define TGSI_OPCODE_ENDIF 78 +#define TGSI_OPCODE_ENDLOOP 79 +#define TGSI_OPCODE_ENDREP 80 /* * GL_NV_vertex_program2_option @@ -286,26 +286,26 @@ struct tgsi_immediate_float32 /* * GL_NV_vertex_program3 */ -#define TGSI_OPCODE_PUSHA 80 -#define TGSI_OPCODE_POPA 81 +#define TGSI_OPCODE_PUSHA 81 +#define TGSI_OPCODE_POPA 82 /* * GL_NV_gpu_program4 */ -#define TGSI_OPCODE_CEIL 82 -#define TGSI_OPCODE_I2F 83 -#define TGSI_OPCODE_NOT 84 -#define TGSI_OPCODE_TRUNC 85 -#define TGSI_OPCODE_SHL 86 -#define TGSI_OPCODE_SHR 87 -#define TGSI_OPCODE_AND 88 -#define TGSI_OPCODE_OR 89 -#define TGSI_OPCODE_MOD 90 -#define TGSI_OPCODE_XOR 91 -#define TGSI_OPCODE_SAD 92 -#define TGSI_OPCODE_TXF 93 -#define TGSI_OPCODE_TXQ 94 -#define TGSI_OPCODE_CONT 95 +#define TGSI_OPCODE_CEIL 83 +#define TGSI_OPCODE_I2F 84 +#define TGSI_OPCODE_NOT 85 +#define TGSI_OPCODE_TRUNC 86 +#define TGSI_OPCODE_SHL 87 +#define TGSI_OPCODE_SHR 88 +#define TGSI_OPCODE_AND 89 +#define TGSI_OPCODE_OR 90 +#define TGSI_OPCODE_MOD 91 +#define TGSI_OPCODE_XOR 92 +#define TGSI_OPCODE_SAD 93 +#define TGSI_OPCODE_TXF 94 +#define TGSI_OPCODE_TXQ 95 +#define TGSI_OPCODE_CONT 96 /* * GL_NV_vertex_program4 @@ -321,70 +321,53 @@ struct tgsi_immediate_float32 * GL_NV_geometry_program4 */ /* Same as GL_NV_gpu_program4 */ -#define TGSI_OPCODE_EMIT 96 -#define TGSI_OPCODE_ENDPRIM 97 +#define TGSI_OPCODE_EMIT 97 +#define TGSI_OPCODE_ENDPRIM 98 /* * GLSL */ -#define TGSI_OPCODE_BGNLOOP2 98 -#define TGSI_OPCODE_BGNSUB 99 -#define TGSI_OPCODE_ENDLOOP2 100 -#define TGSI_OPCODE_ENDSUB 101 +#define TGSI_OPCODE_BGNLOOP2 99 +#define TGSI_OPCODE_BGNSUB 100 +#define TGSI_OPCODE_ENDLOOP2 101 +#define TGSI_OPCODE_ENDSUB 102 #define TGSI_OPCODE_INT TGSI_OPCODE_TRUNC -#define TGSI_OPCODE_NOISE1 102 -#define TGSI_OPCODE_NOISE2 103 -#define TGSI_OPCODE_NOISE3 104 -#define TGSI_OPCODE_NOISE4 105 -#define TGSI_OPCODE_NOP 106 +#define TGSI_OPCODE_NOISE1 103 +#define TGSI_OPCODE_NOISE2 104 +#define TGSI_OPCODE_NOISE3 105 +#define TGSI_OPCODE_NOISE4 106 +#define TGSI_OPCODE_NOP 107 /* * ps_1_1 */ #define TGSI_OPCODE_TEXKILL TGSI_OPCODE_KILP -#define TGSI_OPCODE_TEXBEM 107 -#define TGSI_OPCODE_TEXBEML 108 -#define TGSI_OPCODE_TEXREG2AR 109 -#define TGSI_OPCODE_TEXM3X2PAD 110 -#define TGSI_OPCODE_TEXM3X2TEX 111 -#define TGSI_OPCODE_TEXM3X3PAD 112 -#define TGSI_OPCODE_TEXM3X3TEX 113 -#define TGSI_OPCODE_TEXM3X3SPEC 114 -#define TGSI_OPCODE_TEXM3X3VSPEC 115 /* * ps_1_2 */ -#define TGSI_OPCODE_TEXREG2GB 116 -#define TGSI_OPCODE_TEXREG2RGB 117 -#define TGSI_OPCODE_TEXDP3TEX 118 -#define TGSI_OPCODE_TEXDP3 119 -#define TGSI_OPCODE_TEXM3X3 120 /* CMP - use TGSI_OPCODE_CND0 */ /* * ps_1_3 */ -#define TGSI_OPCODE_TEXM3X2DEPTH 121 /* CMP - use TGSI_OPCODE_CND0 */ /* * ps_1_4 */ #define TGSI_OPCODE_TEXLD TGSI_OPCODE_TEX -#define TGSI_OPCODE_TEXDEPTH 122 -#define TGSI_OPCODE_BEM 123 /* * ps_2_0 */ #define TGSI_OPCODE_M4X4 TGSI_OPCODE_MULTIPLYMATRIX -#define TGSI_OPCODE_M4X3 124 -#define TGSI_OPCODE_M3X4 125 -#define TGSI_OPCODE_M3X3 126 -#define TGSI_OPCODE_M3X2 127 +#define TGSI_OPCODE_M4X3 108 +#define TGSI_OPCODE_M3X4 109 +#define TGSI_OPCODE_M3X3 110 +#define TGSI_OPCODE_M3X2 111 #define TGSI_OPCODE_CRS TGSI_OPCODE_XPD -#define TGSI_OPCODE_NRM4 128 +#define TGSI_OPCODE_NRM4 112 #define TGSI_OPCODE_SINCOS TGSI_OPCODE_SCS #define TGSI_OPCODE_TEXLDB TGSI_OPCODE_TXB #define TGSI_OPCODE_DP2ADD TGSI_OPCODE_DP2A @@ -393,10 +376,10 @@ struct tgsi_immediate_float32 * ps_2_x */ #define TGSI_OPCODE_CALL TGSI_OPCODE_CAL -#define TGSI_OPCODE_CALLNZ 129 -#define TGSI_OPCODE_IFC 130 +#define TGSI_OPCODE_CALLNZ 113 +#define TGSI_OPCODE_IFC 114 #define TGSI_OPCODE_BREAK TGSI_OPCODE_BRK -#define TGSI_OPCODE_BREAKC 131 +#define TGSI_OPCODE_BREAKC 115 #define TGSI_OPCODE_DSX TGSI_OPCODE_DDX #define TGSI_OPCODE_DSY TGSI_OPCODE_DDY #define TGSI_OPCODE_TEXLDD TGSI_OPCODE_TXD @@ -417,10 +400,10 @@ struct tgsi_immediate_float32 * vs_2_x */ -#define TGSI_OPCODE_KIL 132 /* unpredicated kill */ -#define TGSI_OPCODE_END 133 /* aka HALT */ +#define TGSI_OPCODE_KIL 116 /* unpredicated kill */ +#define TGSI_OPCODE_END 117 /* aka HALT */ -#define TGSI_OPCODE_LAST 135 +#define TGSI_OPCODE_LAST 118 #define TGSI_SAT_NONE 0 /* do not saturate */ #define TGSI_SAT_ZERO_ONE 1 /* clamp to [0,1] */ -- cgit v1.2.3 From b0d5519b449feda7b048bc59d4fede54e43f5ae1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 13 Mar 2008 15:23:04 +0000 Subject: gallium: make the windows config function more readable with 80-ish columns --- src/gallium/auxiliary/util/p_debug.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 09cabdae25..bb84e8096d 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -109,20 +109,30 @@ enum { /* Check for aborts enabled. */ static unsigned abort_en() { - if (!mapped_config_file) - { - /* Open an 8 byte file for configuration data. */ - mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file); - } - /* An value of "0" (ascii) in the configuration file will clear the first 8 bits in the test byte. */ - /* An value of "1" (ascii) in the configuration file will set the first bit in the test byte. */ - /* An value of "2" (ascii) in the configuration file will set the second bit in the test byte. */ - return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn; + if (!mapped_config_file) + { + /* Open an 8 byte file for configuration data. */ + mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file); + } + + /* A value of "0" (ascii) in the configuration file will clear the + * first 8 bits in the test byte. + * + * A value of "1" (ascii) in the configuration file will set the + * first bit in the test byte. + * + * A value of "2" (ascii) in the configuration file will set the + * second bit in the test byte. + * + * Currently the only interesting values are 0 and 1, which clear + * and set abort-on-assert behaviour respectively. + */ + return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn; } #else /* WIN32 */ static unsigned abort_en() { - return !GETENV("GALLIUM_ABORT_ON_ASSERT"); + return !GETENV("GALLIUM_ABORT_ON_ASSERT"); } #endif -- cgit v1.2.3 From fa9e7e9a8debb68611909ac2ffab527c6c39a3e5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 13 Mar 2008 18:08:18 +0000 Subject: gallium: remove semantic info from pipe_shader_state Brian's patch to clean up the shader interfaces. --- src/gallium/auxiliary/draw/draw_aaline.c | 2 +- src/gallium/auxiliary/draw/draw_aapoint.c | 2 +- src/gallium/auxiliary/draw/draw_pstipple.c | 2 +- src/gallium/include/pipe/p_state.h | 2 + src/mesa/state_tracker/st_debug.c | 2 + src/mesa/state_tracker/st_draw.c | 6 +- src/mesa/state_tracker/st_program.c | 167 ++++++++++++++++------------- src/mesa/state_tracker/st_program.h | 2 + 8 files changed, 104 insertions(+), 81 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 6b1e640ae9..bed709bad0 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -340,7 +340,7 @@ generate_aaline_fs(struct aaline_stage *aaline) tgsi_dump(aaline_fs.tokens, 0); #endif -#if 1 /* XXX remove */ +#if 0 /* XXX remove */ aaline_fs.input_semantic_name[aaline_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; aaline_fs.input_semantic_index[aaline_fs.num_inputs] = transform.maxGeneric + 1; aaline_fs.num_inputs++; diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 99e9e9fe34..4bc2b5f3e2 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -514,7 +514,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) tgsi_dump(aapoint_fs.tokens, 0); #endif -#if 1 /* XXX remove */ +#if 0 /* XXX remove */ aapoint_fs.input_semantic_name[aapoint_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; aapoint_fs.input_semantic_index[aapoint_fs.num_inputs] = transform.maxGeneric + 1; aapoint_fs.num_inputs++; diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 8b3e84a9a0..d7475b457c 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -331,7 +331,7 @@ generate_pstip_fs(struct pstip_stage *pstip) pstip->sampler_unit = transform.maxSampler + 1; -#if 1 /* XXX remove */ +#if 0 /* XXX remove */ if (transform.wincoordInput < 0) { pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; pstip_fs.input_semantic_index[pstip_fs.num_inputs] = (ubyte)transform.maxInput; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index e338a27383..bb5fab5d5f 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -162,6 +162,7 @@ struct pipe_constant_buffer struct pipe_shader_state { const struct tgsi_token *tokens; +#if 0 /* XXX these are going away */ ubyte num_inputs; ubyte num_outputs; @@ -169,6 +170,7 @@ struct pipe_shader_state ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; +#endif }; diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 9c13010da8..8b9b91aa34 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -52,10 +52,12 @@ st_print_current(void) struct st_context *st = ctx->st; int i; +#if 0 printf("Vertex Transform Inputs:\n"); for (i = 0; i < st->vp->state.num_inputs; i++) { printf(" Slot %d: VERT_ATTRIB_%d\n", i, st->vp->index_to_input[i]); } +#endif tgsi_dump( st->vp->state.tokens, 0 ); if (st->vp->Base.Base.Parameters) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 98504e46c1..5ebd0bbcce 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -223,7 +223,7 @@ st_draw_vbo(GLcontext *ctx, /* loop over TGSI shader inputs to determine vertex buffer * and attribute info */ - for (attr = 0; attr < vs->num_inputs; attr++) { + for (attr = 0; attr < /*vs*/vp->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; struct pipe_vertex_element velement; @@ -333,7 +333,7 @@ st_draw_vbo(GLcontext *ctx, } /* unreference buffers (frees wrapped user-space buffer objects) */ - for (attr = 0; attr < vs->num_inputs; attr++) { + for (attr = 0; attr < /*vs*/vp->num_inputs; attr++) { pipe_buffer_reference(winsys, &vbuffer[attr].buffer, NULL); assert(!vbuffer[attr].buffer); pipe->set_vertex_buffer(pipe, attr, &vbuffer[attr]); @@ -502,7 +502,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* loop over TGSI shader inputs to determine vertex buffer * and attribute info */ - for (attr = 0; attr < vs->num_inputs; attr++) { + for (attr = 0; attr < /*vs*/vp->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; struct pipe_vertex_element velement; diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 0f8784e132..d9d11ee0e8 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -85,6 +85,14 @@ st_translate_vertex_program(struct st_context *st, GLuint num_generic = 0; GLuint num_tokens; + ubyte vs_input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte vs_input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + uint vs_num_inputs = 0; + + ubyte vs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte vs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + uint vs_num_outputs = 0; + memset(&vs, 0, sizeof(vs)); /* @@ -93,36 +101,36 @@ st_translate_vertex_program(struct st_context *st, */ for (attr = 0; attr < VERT_ATTRIB_MAX; attr++) { if (stvp->Base.Base.InputsRead & (1 << attr)) { - const GLuint slot = vs.num_inputs; + const GLuint slot = vs_num_inputs; - vs.num_inputs++; + vs_num_inputs++; stvp->input_to_index[attr] = slot; stvp->index_to_input[slot] = attr; switch (attr) { case VERT_ATTRIB_POS: - vs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - vs.input_semantic_index[slot] = 0; + vs_input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + vs_input_semantic_index[slot] = 0; break; case VERT_ATTRIB_WEIGHT: /* fall-through */ case VERT_ATTRIB_NORMAL: /* just label as a generic */ - vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.input_semantic_index[slot] = 0; + vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_input_semantic_index[slot] = 0; break; case VERT_ATTRIB_COLOR0: - vs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - vs.input_semantic_index[slot] = 0; + vs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + vs_input_semantic_index[slot] = 0; break; case VERT_ATTRIB_COLOR1: - vs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - vs.input_semantic_index[slot] = 1; + vs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + vs_input_semantic_index[slot] = 1; break; case VERT_ATTRIB_FOG: - vs.input_semantic_name[slot] = TGSI_SEMANTIC_FOG; - vs.input_semantic_index[slot] = 0; + vs_input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + vs_input_semantic_index[slot] = 0; break; case VERT_ATTRIB_TEX0: case VERT_ATTRIB_TEX1: @@ -132,8 +140,8 @@ st_translate_vertex_program(struct st_context *st, case VERT_ATTRIB_TEX5: case VERT_ATTRIB_TEX6: case VERT_ATTRIB_TEX7: - vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.input_semantic_index[slot] = num_generic++; + vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_input_semantic_index[slot] = num_generic++; break; case VERT_ATTRIB_GENERIC0: case VERT_ATTRIB_GENERIC1: @@ -144,8 +152,8 @@ st_translate_vertex_program(struct st_context *st, case VERT_ATTRIB_GENERIC6: case VERT_ATTRIB_GENERIC7: assert(attr < VERT_ATTRIB_MAX); - vs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.input_semantic_index[slot] = num_generic++; + vs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_input_semantic_index[slot] = num_generic++; break; default: assert(0); @@ -155,8 +163,8 @@ st_translate_vertex_program(struct st_context *st, /* initialize output semantics to defaults */ for (i = 0; i < PIPE_MAX_SHADER_OUTPUTS; i++) { - vs.output_semantic_name[i] = TGSI_SEMANTIC_GENERIC; - vs.output_semantic_index[i] = 0; + vs_output_semantic_name[i] = TGSI_SEMANTIC_GENERIC; + vs_output_semantic_index[i] = 0; } num_generic = 0; @@ -173,8 +181,8 @@ st_translate_vertex_program(struct st_context *st, assert(slot != ~0); } else { - slot = vs.num_outputs; - vs.num_outputs++; + slot = vs_num_outputs; + vs_num_outputs++; defaultOutputMapping[attr] = slot; } @@ -185,32 +193,32 @@ st_translate_vertex_program(struct st_context *st, switch (attr) { case VERT_RESULT_HPOS: assert(slot == 0); - vs.output_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - vs.output_semantic_index[slot] = 0; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + vs_output_semantic_index[slot] = 0; break; case VERT_RESULT_COL0: - vs.output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - vs.output_semantic_index[slot] = 0; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + vs_output_semantic_index[slot] = 0; break; case VERT_RESULT_COL1: - vs.output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - vs.output_semantic_index[slot] = 1; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + vs_output_semantic_index[slot] = 1; break; case VERT_RESULT_BFC0: - vs.output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; - vs.output_semantic_index[slot] = 0; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; + vs_output_semantic_index[slot] = 0; break; case VERT_RESULT_BFC1: - vs.output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; - vs.output_semantic_index[slot] = 1; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_BCOLOR; + vs_output_semantic_index[slot] = 1; break; case VERT_RESULT_FOGC: - vs.output_semantic_name[slot] = TGSI_SEMANTIC_FOG; - vs.output_semantic_index[slot] = 0; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_FOG; + vs_output_semantic_index[slot] = 0; break; case VERT_RESULT_PSIZ: - vs.output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; - vs.output_semantic_index[slot] = 0; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_PSIZE; + vs_output_semantic_index[slot] = 0; break; case VERT_RESULT_EDGE: assert(0); @@ -223,30 +231,30 @@ st_translate_vertex_program(struct st_context *st, case VERT_RESULT_TEX5: case VERT_RESULT_TEX6: case VERT_RESULT_TEX7: - vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.output_semantic_index[slot] = num_generic++; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_output_semantic_index[slot] = num_generic++; break; case VERT_RESULT_VAR0: /* fall-through */ default: assert(attr - VERT_RESULT_VAR0 < MAX_VARYING); - vs.output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - vs.output_semantic_index[slot] = num_generic++; + vs_output_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + vs_output_semantic_index[slot] = num_generic++; } } } - assert(vs.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); + assert(vs_output_semantic_name[0] == TGSI_SEMANTIC_POSITION); if (outputMapping) { - /* find max output slot referenced to compute vs.num_outputs */ + /* find max output slot referenced to compute vs_num_outputs */ GLuint maxSlot = 0; for (attr = 0; attr < VERT_RESULT_MAX; attr++) { if (outputMapping[attr] != ~0 && outputMapping[attr] > maxSlot) maxSlot = outputMapping[attr]; } - vs.num_outputs = maxSlot + 1; + vs_num_outputs = maxSlot + 1; } else { outputMapping = defaultOutputMapping; @@ -257,22 +265,23 @@ st_translate_vertex_program(struct st_context *st, num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_VERTEX, &stvp->Base.Base, /* inputs */ - vs.num_inputs, + vs_num_inputs, stvp->input_to_index, - vs.input_semantic_name, - vs.input_semantic_index, + vs_input_semantic_name, + vs_input_semantic_index, NULL, /* outputs */ - vs.num_outputs, + vs_num_outputs, outputMapping, - vs.output_semantic_name, - vs.output_semantic_index, + vs_output_semantic_name, + vs_output_semantic_index, /* tokenized result */ tokens, ST_MAX_SHADER_TOKENS); vs.tokens = (struct tgsi_token *) mem_dup(tokens, num_tokens * sizeof(tokens[0])); + stvp->num_inputs = vs_num_inputs; stvp->state = vs; /* struct copy */ stvp->driver_shader = pipe->create_vs_state(pipe, &vs); @@ -310,6 +319,14 @@ st_translate_fragment_program(struct st_context *st, GLuint num_generic = 0; GLuint num_tokens; + ubyte fs_input_semantic_name[PIPE_MAX_SHADER_INPUTS]; + ubyte fs_input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + uint fs_num_inputs = 0; + + ubyte fs_output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; + ubyte fs_output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + uint fs_num_outputs = 0; + memset(&fs, 0, sizeof(fs)); /* which vertex output goes to the first fragment input: */ @@ -323,33 +340,33 @@ st_translate_fragment_program(struct st_context *st, */ for (attr = 0; attr < FRAG_ATTRIB_MAX; attr++) { if (inputsRead & (1 << attr)) { - const GLuint slot = fs.num_inputs; + const GLuint slot = fs_num_inputs; defaultInputMapping[attr] = slot; stfp->input_map[slot] = vslot++; - fs.num_inputs++; + fs_num_inputs++; switch (attr) { case FRAG_ATTRIB_WPOS: - fs.input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; - fs.input_semantic_index[slot] = 0; + fs_input_semantic_name[slot] = TGSI_SEMANTIC_POSITION; + fs_input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_LINEAR; break; case FRAG_ATTRIB_COL0: - fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - fs.input_semantic_index[slot] = 0; + fs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + fs_input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_LINEAR; break; case FRAG_ATTRIB_COL1: - fs.input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; - fs.input_semantic_index[slot] = 1; + fs_input_semantic_name[slot] = TGSI_SEMANTIC_COLOR; + fs_input_semantic_index[slot] = 1; interpMode[slot] = TGSI_INTERPOLATE_LINEAR; break; case FRAG_ATTRIB_FOGC: - fs.input_semantic_name[slot] = TGSI_SEMANTIC_FOG; - fs.input_semantic_index[slot] = 0; + fs_input_semantic_name[slot] = TGSI_SEMANTIC_FOG; + fs_input_semantic_index[slot] = 0; interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; break; case FRAG_ATTRIB_TEX0: @@ -360,15 +377,15 @@ st_translate_fragment_program(struct st_context *st, case FRAG_ATTRIB_TEX5: case FRAG_ATTRIB_TEX6: case FRAG_ATTRIB_TEX7: - fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - fs.input_semantic_index[slot] = num_generic++; + fs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + fs_input_semantic_index[slot] = num_generic++; interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; break; case FRAG_ATTRIB_VAR0: /* fall-through */ default: - fs.input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; - fs.input_semantic_index[slot] = num_generic++; + fs_input_semantic_name[slot] = TGSI_SEMANTIC_GENERIC; + fs_input_semantic_index[slot] = num_generic++; interpMode[slot] = TGSI_INTERPOLATE_PERSPECTIVE; } } @@ -383,10 +400,10 @@ st_translate_fragment_program(struct st_context *st, /* if z is written, emit that first */ if (outputsWritten & (1 << FRAG_RESULT_DEPR)) { - fs.output_semantic_name[fs.num_outputs] = TGSI_SEMANTIC_POSITION; - fs.output_semantic_index[fs.num_outputs] = 0; - outputMapping[FRAG_RESULT_DEPR] = fs.num_outputs; - fs.num_outputs++; + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_POSITION; + fs_output_semantic_index[fs_num_outputs] = 0; + outputMapping[FRAG_RESULT_DEPR] = fs_num_outputs; + fs_num_outputs++; outputsWritten &= ~(1 << FRAG_RESULT_DEPR); } @@ -399,15 +416,15 @@ st_translate_fragment_program(struct st_context *st, assert(0); break; case FRAG_RESULT_COLR: - fs.output_semantic_name[fs.num_outputs] = TGSI_SEMANTIC_COLOR; - fs.output_semantic_index[fs.num_outputs] = numColors; - outputMapping[attr] = fs.num_outputs; + fs_output_semantic_name[fs_num_outputs] = TGSI_SEMANTIC_COLOR; + fs_output_semantic_index[fs_num_outputs] = numColors; + outputMapping[attr] = fs_num_outputs; numColors++; break; default: assert(0); } - fs.num_outputs++; + fs_num_outputs++; } } } @@ -420,16 +437,16 @@ st_translate_fragment_program(struct st_context *st, num_tokens = tgsi_translate_mesa_program( TGSI_PROCESSOR_FRAGMENT, &stfp->Base.Base, /* inputs */ - fs.num_inputs, + fs_num_inputs, inputMapping, - fs.input_semantic_name, - fs.input_semantic_index, + fs_input_semantic_name, + fs_input_semantic_index, interpMode, /* outputs */ - fs.num_outputs, + fs_num_outputs, outputMapping, - fs.output_semantic_name, - fs.output_semantic_index, + fs_output_semantic_name, + fs_output_semantic_index, /* tokenized result */ tokens, ST_MAX_SHADER_TOKENS); diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 78786dcbb6..9ef2a07eaa 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -85,6 +85,8 @@ struct st_vertex_program /** maps a TGSI input index back to a Mesa VERT_ATTRIB_x */ GLuint index_to_input[PIPE_MAX_SHADER_INPUTS]; + GLuint num_inputs; + struct pipe_shader_state state; struct pipe_shader_state *driver_shader; -- cgit v1.2.3 From cb294542bcaca7d9847067ce502a68fd8e92f42e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 12 Mar 2008 16:28:33 -0600 Subject: gallium: remove a debug printf --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index f7f4aebb16..fd86bfaca9 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -102,8 +102,6 @@ static void cso_release_all( struct cso_context *ctx ) void cso_destroy_context( struct cso_context *ctx ) { - debug_printf("%s\n", __FUNCTION__); - if (ctx) cso_release_all( ctx ); -- cgit v1.2.3 From 7ffbaebce1da7fec36a38f424f266806a3a0fc6a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 13 Mar 2008 13:22:24 -0600 Subject: gallium: fix bug in stip_first_tri() Need to compute num_samplers after binding/creating the fragment shader. --- src/gallium/auxiliary/draw/draw_pstipple.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index d7475b457c..08f06bf222 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -488,16 +488,16 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) struct pipe_context *pipe = pstip->pipe; uint num_samplers; - /* how many samplers? */ - /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ - num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); - num_samplers = MAX2(num_samplers, pstip->sampler_unit + 1); - assert(stage->draw->rasterizer->poly_stipple_enable); /* bind our fragprog */ bind_pstip_fragment_shader(pstip); + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); + num_samplers = MAX2(num_samplers, pstip->sampler_unit + 1); + /* plug in our sampler, texture */ pstip->state.samplers[pstip->sampler_unit] = pstip->sampler_cso; pstip->state.textures[pstip->sampler_unit] = pstip->texture; -- cgit v1.2.3 From b12a28db96d3bc7f01b6cdc9ee909f95a8c9ccc2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 13 Mar 2008 14:05:30 -0600 Subject: gallium: remove dead code related to shader semantic input/output info --- src/gallium/auxiliary/draw/draw_aaline.c | 6 ------ src/gallium/auxiliary/draw/draw_aapoint.c | 6 ------ src/gallium/auxiliary/draw/draw_pstipple.c | 10 +--------- src/gallium/include/pipe/p_state.h | 9 --------- 4 files changed, 1 insertion(+), 30 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index bed709bad0..f2b983374e 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -340,12 +340,6 @@ generate_aaline_fs(struct aaline_stage *aaline) tgsi_dump(aaline_fs.tokens, 0); #endif -#if 0 /* XXX remove */ - aaline_fs.input_semantic_name[aaline_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; - aaline_fs.input_semantic_index[aaline_fs.num_inputs] = transform.maxGeneric + 1; - aaline_fs.num_inputs++; -#endif - aaline->fs->aaline_fs = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 4bc2b5f3e2..67a7a8ebab 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -514,12 +514,6 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) tgsi_dump(aapoint_fs.tokens, 0); #endif -#if 0 /* XXX remove */ - aapoint_fs.input_semantic_name[aapoint_fs.num_inputs] = TGSI_SEMANTIC_GENERIC; - aapoint_fs.input_semantic_index[aapoint_fs.num_inputs] = transform.maxGeneric + 1; - aapoint_fs.num_inputs++; -#endif - aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 08f06bf222..09d542002f 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -324,21 +324,13 @@ generate_pstip_fs(struct pstip_stage *pstip) (struct tgsi_token *) pstip_fs.tokens, MAX, &transform.base); -#if 1 /* DEBUG */ +#if 0 /* DEBUG */ tgsi_dump(orig_fs->tokens, 0); tgsi_dump(pstip_fs.tokens, 0); #endif pstip->sampler_unit = transform.maxSampler + 1; -#if 0 /* XXX remove */ - if (transform.wincoordInput < 0) { - pstip_fs.input_semantic_name[pstip_fs.num_inputs] = TGSI_SEMANTIC_POSITION; - pstip_fs.input_semantic_index[pstip_fs.num_inputs] = (ubyte)transform.maxInput; - pstip_fs.num_inputs++; - } -#endif - pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); } diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index bb5fab5d5f..02c354322d 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -162,15 +162,6 @@ struct pipe_constant_buffer struct pipe_shader_state { const struct tgsi_token *tokens; -#if 0 - /* XXX these are going away */ - ubyte num_inputs; - ubyte num_outputs; - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ - ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; -#endif }; -- cgit v1.2.3 From 8b8c9acdb747499149e633179a8ad10b0e4206b1 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 13 Mar 2008 14:33:57 -0600 Subject: gallium: added draw_enable_line_stipple() function Allows drivers that implement line stipple to turn off this drawing stage. --- src/gallium/auxiliary/draw/draw_context.c | 12 ++++++++++++ src/gallium/auxiliary/draw/draw_context.h | 3 +++ src/gallium/auxiliary/draw/draw_private.h | 5 +++-- 3 files changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index fed2b6e759..3d09d95851 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -85,6 +85,7 @@ struct draw_context *draw_create( void ) /* these defaults are oriented toward the needs of softpipe */ draw->wide_point_threshold = 1000000.0; /* infinity */ draw->wide_line_threshold = 1.0; + draw->line_stipple = TRUE; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -254,6 +255,17 @@ draw_wide_line_threshold(struct draw_context *draw, float threshold) } +/** + * Tells the draw module whether or not to implement line stipple. + */ +void +draw_enable_line_stipple(struct draw_context *draw, boolean enable) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->line_stipple = enable; +} + + /** * Ask the draw module for the location/slot of the given vertex attribute in * a post-transformed vertex. diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index df63e91a22..e8e2b56bae 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -94,6 +94,9 @@ void draw_wide_point_threshold(struct draw_context *draw, float threshold); void draw_wide_line_threshold(struct draw_context *draw, float threshold); +void draw_enable_line_stipple(struct draw_context *draw, boolean enable); + + boolean draw_use_sse(struct draw_context *draw); void diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4147472d45..379b6c3206 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -238,6 +238,7 @@ struct draw_context float wide_point_threshold; /**< convert pnts to tris if larger than this */ float wide_line_threshold; /**< convert lines to tris if wider than this */ + boolean line_stipple; /**< do line stipple? */ boolean use_sse; /* If a prim stage introduces new vertex attributes, they'll be stored here @@ -344,10 +345,10 @@ extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ); extern void draw_vertex_shader_queue_flush( struct draw_context *draw ); -struct tgsi_exec_machine; - extern void draw_update_vertex_fetch( struct draw_context *draw ); +extern boolean draw_need_pipeline(const struct draw_context *draw); + /* Prototype/hack */ -- cgit v1.2.3 From 3faf6230ff4b63833c072ac7afeb43c25d3cba22 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 13 Mar 2008 14:34:35 -0600 Subject: gallium: added draw_need_pipeline() predicate function To test if we need any pipeline stage, or whether we can go into passthrough mode. --- src/gallium/auxiliary/draw/draw_validate.c | 55 +++++++++++++++++++++++++++++- 1 file changed, 54 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index b43295b586..602fa3429d 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -33,6 +33,59 @@ #include "draw_private.h" +/** + * Check if we need any special pipeline stages, or whether prims/verts + * can go through untouched. + */ +boolean +draw_need_pipeline(const struct draw_context *draw) +{ + /* clipping */ + if (!draw->rasterizer->bypass_clipping) + return TRUE; + + /* vertex shader */ + if (!draw->rasterizer->bypass_vs) + return TRUE; + + /* line stipple */ + if (draw->rasterizer->line_stipple_enable && draw->line_stipple) + return TRUE; + + /* wide lines */ + if (draw->rasterizer->line_width > draw->wide_line_threshold) + return TRUE; + + /* large points */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) + return TRUE; + + /* AA lines */ + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) + return TRUE; + + /* AA points */ + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + return TRUE; + + /* polygon stipple */ + if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) + return TRUE; + + /* polygon offset */ + if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) + return TRUE; + + /* two-side lighting */ + if (draw->rasterizer->light_twoside) + return TRUE; + + /* polygon cull */ + if (draw->rasterizer->cull_mode) + return TRUE; + + return FALSE; +} @@ -92,7 +145,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.wide_point; } - if (draw->rasterizer->line_stipple_enable) { + if (draw->rasterizer->line_stipple_enable && draw->line_stipple) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; precalc_flat = 1; /* only needed for lines really */ -- cgit v1.2.3 From a889928d85ac8ba7e1a7fe15393858a9422cf750 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 13 Mar 2008 16:41:12 -0400 Subject: add a way of removing an exact iterator from the hash --- src/gallium/auxiliary/cso_cache/cso_hash.c | 25 ++++++++++++++++++++++--- src/gallium/auxiliary/cso_cache/cso_hash.h | 16 ++++++++++++++-- 2 files changed, 36 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 5cad5d3be7..ddce3822f7 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -99,7 +99,7 @@ static void *cso_data_allocate_node(struct cso_hash_data *hash) return MALLOC(hash->nodeSize); } -static void cso_data_free_node(struct cso_node *node) +static void cso_free_node(struct cso_node *node) { FREE(node); } @@ -248,7 +248,7 @@ void cso_hash_delete(struct cso_hash *hash) struct cso_node *cur = *bucket++; while (cur != e_for_x) { struct cso_node *next = cur->next; - cso_data_free_node(cur); + cso_free_node(cur); cur = next; } } @@ -367,7 +367,7 @@ void * cso_hash_take(struct cso_hash *hash, if (*node != hash->data.e) { void *t = (*node)->value; struct cso_node *next = (*node)->next; - cso_data_free_node(*node); + cso_free_node(*node); *node = next; --hash->data.d->size; cso_data_has_shrunk(hash->data.d); @@ -393,3 +393,22 @@ int cso_hash_size(struct cso_hash *hash) { return hash->data.d->size; } + +struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter) +{ + struct cso_hash_iter ret = iter; + struct cso_node *node = iter.node; + struct cso_node **node_ptr; + + if (node == hash->data.e) + return iter; + + ret = cso_hash_iter_next(ret); + node_ptr = (struct cso_node**)(&hash->data.d->buckets[node->key % hash->data.d->numBuckets]); + while (*node_ptr != node) + node_ptr = &(*node_ptr)->next; + *node_ptr = node->next; + cso_free_node(node); + --hash->data.d->size; + return ret; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index a3a65b68c8..73c4742006 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -68,14 +68,26 @@ int cso_hash_size(struct cso_hash *hash); /** - * Create a list of objects and just add entry with the same key to the list. + * Adds a data with the given key to the hash. If entry with the given + * key is already in the hash, this current entry is instered before it + * in the collision list. + * Function returns iterator pointing to the inserted item in the hash. */ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, unsigned key, void *data); +/** + * Removes the item pointed to by the current iterator from the hash. + * Note that the data itself is not erased and if it was a malloc'ed pointer + * it will have to be freed after calling this function by the callee. + * Function returns iterator pointing to the item after the removed one in + * the hash. + */ +struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter iter); void *cso_hash_take(struct cso_hash *hash, unsigned key); + struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); /** @@ -97,7 +109,7 @@ struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); * Convenience routine to iterate over the collision list while doing a memory * comparison to see which entry in the list is a direct copy of our template * and returns that entry. - */ + */ void *cso_hash_find_data_from_template( struct cso_hash *hash, unsigned hash_key, void *templ, -- cgit v1.2.3 From b9518a4e39f739a31dd3f62d67563944f8c266a9 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 13 Mar 2008 16:55:48 -0600 Subject: gallium: added new EMIT_HEADER token Used to emit the struct vertex_header info for softpipe. Before we were using the EMIT_ALL token but that's insufficient for the draw pass-through mode. EMIT_ALL might get removed soon... --- src/gallium/auxiliary/draw/draw_vertex.c | 3 +++ src/gallium/auxiliary/draw/draw_vertex.h | 1 + 2 files changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index daf1ef4b80..970adc95e7 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -52,6 +52,9 @@ draw_compute_vertex_size(struct vertex_info *vinfo) switch (vinfo->emit[i]) { case EMIT_OMIT: break; + case EMIT_HEADER: + vinfo->size += sizeof(struct vertex_header) / 4; + break; case EMIT_4UB: /* fall-through */ case EMIT_1F_PSIZE: diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 267c74203b..abd2017ed3 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -48,6 +48,7 @@ enum attrib_emit { EMIT_OMIT, /**< don't emit the attribute */ EMIT_ALL, /**< emit whole post-xform vertex, w/ header */ + EMIT_HEADER, /**< emit vertex_header struct (XXX temp?) */ EMIT_1F, EMIT_1F_PSIZE, /**< insert constant point size */ EMIT_2F, -- cgit v1.2.3 From 13334c8dd2744402d43f8ea0a9d2c0e5e76ac28e Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 13 Mar 2008 16:57:37 -0600 Subject: gallium: added EMIT_HEADER case in emit_vertex() --- src/gallium/auxiliary/draw/draw_vbuf.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c index 71ac73912b..f83b441e93 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -225,6 +225,11 @@ emit_vertex( struct vbuf_stage *vbuf, vbuf->vertex_ptr += vinfo->size; count += vinfo->size; break; + case EMIT_HEADER: + memcpy(vbuf->vertex_ptr, vertex, sizeof(*vertex)); + *vbuf->vertex_ptr += sizeof(*vertex) / 4; + count += sizeof(*vertex) / 4; + break; case EMIT_1F: *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); count++; -- cgit v1.2.3 From cf106789abd4a84e8f07dc6ca12d2261e9bf92cd Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 13 Mar 2008 17:04:04 -0600 Subject: gallium: added EMIT_HEADER case --- src/gallium/auxiliary/draw/draw_vf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index 901ff20a7e..4b5b2a86f6 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -263,6 +263,10 @@ void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, } break; } + case EMIT_HEADER: + /* XXX emit new DRAW_EMIT_HEADER attribute??? */ + count += sizeof(struct vertex_header) / 4; + break; case EMIT_1F: attrs[nr_attrs].attrib = j; attrs[nr_attrs].format = DRAW_EMIT_1F; -- cgit v1.2.3 From ce49c4c24bf72ea642015d566ff687d512574fd5 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 13 Mar 2008 17:04:56 -0600 Subject: gallium: fix bug in draw_num_vs_outputs() --- src/gallium/auxiliary/draw/draw_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 3d09d95851..e8473a4c57 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -311,7 +311,7 @@ uint draw_num_vs_outputs(struct draw_context *draw) { uint count = draw->vertex_shader->info.num_outputs; - if (draw->extra_vp_outputs.slot >= 0) + if (draw->extra_vp_outputs.slot > 0) count++; return count; } -- cgit v1.2.3 From d088d640fca415261a208d3cbede94a6522ebb6b Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 13 Mar 2008 17:10:28 -0600 Subject: gallium: plug in vertex passthrough code Based on a patch from Zack. Basically, implement a new draw_arrays function that copies the incoming user-vertices to the hardware vertex buffer, doing format/type conversion as needed. The vertex fetch/store code is totally temporary for now. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/SConscript | 1 + src/gallium/auxiliary/draw/draw_passthrough.c | 106 ++++++++++++++++++++++++-- src/gallium/auxiliary/draw/draw_prim.c | 47 +++++++++++- 4 files changed, 147 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 2daa1636f3..21e9f737b7 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -15,6 +15,7 @@ C_SOURCES = \ draw_debug.c \ draw_flatshade.c \ draw_offset.c \ + draw_passthrough.c \ draw_prim.c \ draw_pstipple.c \ draw_stipple.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 5cb7664c85..64b444dbd5 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -15,6 +15,7 @@ draw = env.ConvenienceLibrary( 'draw_flatshade.c', 'draw_offset.c', 'draw_prim.c', + 'draw_passthrough.c', 'draw_pstipple.c', 'draw_passthrough.c', 'draw_stipple.c', diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c index a51fa0ab23..d16f056191 100644 --- a/src/gallium/auxiliary/draw/draw_passthrough.c +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -66,6 +66,95 @@ #include "draw/draw_vertex.h" +/** + * General-purpose fetch from user's vertex arrays, emit to driver's + * vertex buffer. + * + * XXX this is totally temporary. + */ +static void +fetch_store_general( struct draw_context *draw, + float *out, + unsigned start, + unsigned count ) +{ + const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); + const unsigned nr_attrs = vinfo->num_attribs; + uint i, j; + + const unsigned *pitch = draw->vertex_fetch.pitch; + const ubyte **src = draw->vertex_fetch.src_ptr; + + for (i = start; i < count; i++) { + for (j = 0; j < nr_attrs; j++) { + const uint jj = vinfo->src_index[j]; + const enum pipe_format srcFormat = draw->vertex_element[jj].src_format; + const ubyte *from = src[jj] + i * pitch[jj]; + float attrib[4]; + + switch (srcFormat) { + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = f[2]; + attrib[3] = f[3]; + } + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = f[2]; + attrib[3] = 1.0; + } + break; + case PIPE_FORMAT_R32G32_FLOAT: + { + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = 0.0; + attrib[3] = 1.0; + } + break; + case PIPE_FORMAT_R32_FLOAT: + { + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = 0.0; + attrib[2] = 0.0; + attrib[3] = 1.0; + } + break; + default: + abort(); + } + + /* XXX this will probably only work for softpipe */ + switch (vinfo->emit[j]) { + case EMIT_HEADER: + memset(out, 0, sizeof(struct vertex_header)); + out += sizeof(struct vertex_header) / 4; + break; + case EMIT_4F: + out[0] = attrib[0]; + out[1] = attrib[1]; + out[2] = attrib[2]; + out[3] = attrib[3]; + out += 4; + break; + default: + abort(); + } + + } + } +} + + /* Example of a fetch/emit passthrough shader which could be * generated when bypass_clipping is enabled on a passthrough vertex @@ -116,7 +205,6 @@ static void fetch_xyz_rgb_st( struct draw_context *draw, } } - static boolean update_shader( struct draw_context *draw ) { const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); @@ -143,11 +231,15 @@ static boolean update_shader( struct draw_context *draw ) /* Just trying to figure out how this would work: */ - if (nr_attrs == 3 && - 0 /* some other tests */) + if (draw->rasterizer->bypass_vs || + (nr_attrs == 3 && 0 /* some other tests */)) { +#if 0 draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st; - assert(vinfo->size == 10); +#else + draw->vertex_fetch.pt_fetch = fetch_store_general; +#endif + /*assert(vinfo->size == 10);*/ return TRUE; } @@ -175,7 +267,6 @@ static boolean set_prim( struct draw_context *draw, } - boolean draw_passthrough_arrays(struct draw_context *draw, unsigned prim, @@ -184,10 +275,13 @@ draw_passthrough_arrays(struct draw_context *draw, { float *hw_verts; + if (draw_need_pipeline(draw)) + return FALSE; + if (!set_prim(draw, prim)) return FALSE; - if (!update_shader( draw )) + if (!update_shader(draw)) return FALSE; hw_verts = draw->render->allocate_vertices( draw->render, diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 7d6cd43410..ff71ba9b73 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -31,6 +31,7 @@ */ #include "pipe/p_debug.h" +#include "pipe/p_util.h" #include "draw_private.h" #include "draw_context.h" @@ -118,7 +119,42 @@ static void draw_prim_queue_flush( struct draw_context *draw ) draw_vertex_cache_unreference( draw ); } +static INLINE void fetch_and_store(struct draw_context *draw) +{ + unsigned i; + + /* run vertex shader on vertex cache entries, four per invokation */ +#if 0 + { + const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); + memcpy(draw->vs.queue[0].vertex, draw->vs.queue[i + j].elt, + count * vinfo->size); + } +#elif 0 + draw_update_vertex_fetch(draw); + for (i = 0; i < draw->vs.queue_nr; i += 4) { + struct vertex_header *dests[4]; + unsigned elts[4]; + struct tgsi_exec_machine *machine = &draw->machine; + int j, n = MIN2(4, draw->vs.queue_nr - i); + + for (j = 0; j < n; j++) { + elts[j] = draw->vs.queue[i + j].elt; + dests[j] = draw->vs.queue[i + j].vertex; + } + for ( ; j < 4; j++) { + elts[j] = elts[0]; + dests[j] = draw->vs.queue[i + j].vertex; + } + //fetch directly into dests + draw->vertex_fetch.fetch_func(draw, machine, dests, count); + } +#endif + + draw->vs.post_nr = draw->vs.queue_nr; + draw->vs.queue_nr = 0; +} void draw_do_flush( struct draw_context *draw, unsigned flags ) { @@ -134,7 +170,10 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) if (flags >= DRAW_FLUSH_SHADER_QUEUE) { if (draw->vs.queue_nr) - (*draw->shader_queue_flush)(draw); + if (draw->rasterizer->bypass_vs) + fetch_and_store(draw); + else + (*draw->shader_queue_flush)(draw); if (flags >= DRAW_FLUSH_PRIM_QUEUE) { if (draw->pq.queue_nr) @@ -485,7 +524,11 @@ draw_arrays(struct draw_context *draw, unsigned prim, } /* drawing done here: */ - draw_prim(draw, prim, start, count); + if (!draw->rasterizer->bypass_vs || + !draw_passthrough_arrays(draw, prim, start, count)) { + /* we have to run the whole pipeline */ + draw_prim(draw, prim, start, count); + } } -- cgit v1.2.3 From 34be3969505d378c5a6734a134f03d094a865c56 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 13 Mar 2008 17:39:30 -0600 Subject: gallium: fix EMIT_HEADER case in draw_vf_set_vertex_info() --- src/gallium/auxiliary/draw/draw_vf.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index 4b5b2a86f6..f4e29a6293 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -265,7 +265,11 @@ void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, } case EMIT_HEADER: /* XXX emit new DRAW_EMIT_HEADER attribute??? */ - count += sizeof(struct vertex_header) / 4; + attrs[nr_attrs].attrib = 0; + attrs[nr_attrs].format = DRAW_EMIT_PAD; + attrs[nr_attrs].offset = offsetof(struct vertex_header, data); + count += offsetof(struct vertex_header, data)/4; + nr_attrs++; break; case EMIT_1F: attrs[nr_attrs].attrib = j; -- cgit v1.2.3 From d34bc880a4d17420ec20d422dcb461783457c473 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 14 Mar 2008 08:42:08 +0000 Subject: tgsi: add debug_printf version of tgsi_dump --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 22 ++++++++++++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 4 ++++ 2 files changed, 26 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index aa78278700..7d292778ad 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -1563,3 +1563,25 @@ tgsi_dump_str( *str = dump.text; } + + +void tgsi_debug_dump( struct tgsi_token *tokens ) +{ + char *str, *p; + + tgsi_dump_str( &str, tokens, 0 ); + + p = str; + while (p != NULL) + { + char *end = strchr( p, '\n' ); + if (end != NULL) + { + *end++ = '\0'; + } + debug_printf( "%s\n", p ); + p = end; + } + + FREE( str ); +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index b983b38226..51d79a0362 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -20,6 +20,10 @@ tgsi_dump_str( const struct tgsi_token *tokens, unsigned flags ); +/* Dump to debug_printf() + */ +void tgsi_debug_dump( struct tgsi_token *tokens ); + #if defined __cplusplus } #endif -- cgit v1.2.3 From e4cdce43cebe6a2b38f7ea5145474ca2b12c57bb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 14 Mar 2008 08:42:45 +0000 Subject: gallium: fix some compiler warnings --- src/gallium/auxiliary/draw/draw_prim.c | 4 ++-- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 4 +++- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index ff71ba9b73..888fa536ea 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -121,8 +121,6 @@ static void draw_prim_queue_flush( struct draw_context *draw ) static INLINE void fetch_and_store(struct draw_context *draw) { - unsigned i; - /* run vertex shader on vertex cache entries, four per invokation */ #if 0 { @@ -131,6 +129,8 @@ static INLINE void fetch_and_store(struct draw_context *draw) count * vinfo->size); } #elif 0 + unsigned i; + draw_update_vertex_fetch(draw); for (i = 0; i < draw->vs.queue_nr; i += 4) { struct vertex_header *dests[4]; diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index 184aac16f4..db0913cb2b 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -196,7 +196,7 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) struct prim_header prim; const void *vertex_buffer = cvbr->vertex_buffer; const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); - unsigned i, j; + unsigned i; prim.det = 0; prim.reset_line_stipple = 0; diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 10483675ea..82cb31ece7 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -71,8 +71,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * simply emit the whole post-xform vertex as-is: */ struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; - vinfo_vbuf->num_attribs = 0; #if 0 + vinfo_vbuf->num_attribs = 0; /* special-case to allow memcpy of whole vertex */ draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); /* size in dwords or floats */ @@ -82,6 +82,8 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) /* for pass-through mode, we need a more explicit list of attribs */ const uint num = draw_num_vs_outputs(softpipe->draw); uint i; + + vinfo_vbuf->num_attribs = 0; draw_emit_vertex_attr(vinfo_vbuf, EMIT_HEADER, INTERP_NONE, 0); for (i = 0; i < num; i++) { draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); -- cgit v1.2.3 From 3088eb59497ec8621e003ce3bc87025f257c0a92 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 14 Mar 2008 10:23:39 -0600 Subject: gallium: added some debug code (disabled) --- src/gallium/auxiliary/draw/draw_vs_exec.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 55bec14116..364693e0b4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -166,6 +166,19 @@ vs_exec_run( struct draw_vertex_shader *shader, vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } + +#if 0 /*DEBUG*/ + printf("Post xform vert:\n"); + for (slot = 0; slot < draw->num_vs_outputs; slot++) { + printf("%d: %f %f %f %f\n", slot, + vOut[j]->data[slot][0], + vOut[j]->data[slot][1], + vOut[j]->data[slot][2], + vOut[j]->data[slot][3]); + } +#endif + + } /* loop over vertices */ } -- cgit v1.2.3 From 9de9e1fe8c3f87fe672aed074348f07107fa3cec Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 14 Mar 2008 11:24:28 -0600 Subject: gallium: print warning rather than assert(0) for LOG/EXP opcodes Glean vertProg1 runs all the way through, rather than aborting. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 34 +++++++++++++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index f2ed9e0353..ad871d2bdf 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1516,11 +1516,41 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - assert (0); + printf("TGSI: EXP opcode not implemented\n"); + /* from ARB_v_p: + tmp = ScalarLoad(op0); + result.x = 2^floor(tmp); + result.y = tmp - floor(tmp); + result.z = RoughApprox2ToX(tmp); + result.w = 1.0; + */ +#if 0 + /* something like this: */ + FETCH( &r[0], 0, CHAN_X ); + micro_exp2( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } +#endif break; case TGSI_OPCODE_LOG: - assert (0); + printf("TGSI: LOG opcode not implemented\n"); + /* from ARB_v_p: + tmp = fabs(ScalarLoad(op0)); + result.x = floor(log2(tmp)); + result.y = tmp / 2^(floor(log2(tmp))); + result.z = RoughApproxLog2(tmp); + result.w = 1.0; + */ +#if 0 + /* something like this: */ + FETCH( &r[0], 0, CHAN_X ); + micro_lg2( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } +#endif break; case TGSI_OPCODE_MUL: -- cgit v1.2.3 From 2b8f31a6daf6a52086a3454a5dfd1f8bac046804 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 14 Mar 2008 21:40:02 +0100 Subject: scons: Remove second occurence of draw_passthrough.c. --- src/gallium/auxiliary/draw/SConscript | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 64b444dbd5..d7fb86d992 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -14,10 +14,9 @@ draw = env.ConvenienceLibrary( 'draw_debug.c', 'draw_flatshade.c', 'draw_offset.c', - 'draw_prim.c', 'draw_passthrough.c', + 'draw_prim.c', 'draw_pstipple.c', - 'draw_passthrough.c', 'draw_stipple.c', 'draw_twoside.c', 'draw_unfilled.c', -- cgit v1.2.3 From f23207ca57095b620febaf723815cc3eef3e87bd Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 14 Mar 2008 21:44:06 +0100 Subject: tgsi: Use debug_printf(). --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index ad871d2bdf..78e7dec569 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1516,7 +1516,7 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - printf("TGSI: EXP opcode not implemented\n"); + debug_printf("TGSI: EXP opcode not implemented\n"); /* from ARB_v_p: tmp = ScalarLoad(op0); result.x = 2^floor(tmp); @@ -1535,7 +1535,7 @@ exec_instruction( break; case TGSI_OPCODE_LOG: - printf("TGSI: LOG opcode not implemented\n"); + debug_printf("TGSI: LOG opcode not implemented\n"); /* from ARB_v_p: tmp = fabs(ScalarLoad(op0)); result.x = floor(log2(tmp)); -- cgit v1.2.3 From 5a09ad8248ce452136ed96a3d46532b03c877618 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 14 Mar 2008 16:13:35 -0600 Subject: gallium: add explicit control for point sprites (convert points to textured quads) New draw_enable_point_sprites() function. Fixes spriteblast.c demo --- src/gallium/auxiliary/draw/draw_context.c | 12 ++++++++++++ src/gallium/auxiliary/draw/draw_context.h | 2 ++ src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_validate.c | 8 +++++++- src/gallium/auxiliary/draw/draw_wide_point.c | 3 ++- 5 files changed, 24 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index e8473a4c57..4cca965ac1 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -86,6 +86,7 @@ struct draw_context *draw_create( void ) draw->wide_point_threshold = 1000000.0; /* infinity */ draw->wide_line_threshold = 1.0; draw->line_stipple = TRUE; + draw->point_sprite = TRUE; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -266,6 +267,17 @@ draw_enable_line_stipple(struct draw_context *draw, boolean enable) } +/** + * Tells draw module whether to convert points to quads for sprite mode. + */ +void +draw_enable_point_sprites(struct draw_context *draw, boolean enable) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->point_sprite = enable; +} + + /** * Ask the draw module for the location/slot of the given vertex attribute in * a post-transformed vertex. diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index e8e2b56bae..dae687e590 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -96,6 +96,8 @@ void draw_wide_line_threshold(struct draw_context *draw, float threshold); void draw_enable_line_stipple(struct draw_context *draw, boolean enable); +void draw_enable_point_sprites(struct draw_context *draw, boolean enable); + boolean draw_use_sse(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 379b6c3206..1c65c3d1b2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -239,6 +239,7 @@ struct draw_context float wide_point_threshold; /**< convert pnts to tris if larger than this */ float wide_line_threshold; /**< convert lines to tris if wider than this */ boolean line_stipple; /**< do line stipple? */ + boolean point_sprite; /**< convert points to quads for sprites? */ boolean use_sse; /* If a prim stage introduces new vertex attributes, they'll be stored here diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 602fa3429d..33e5559508 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -76,6 +76,10 @@ draw_need_pipeline(const struct draw_context *draw) if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) return TRUE; + /* point sprites */ + if (draw->rasterizer->point_sprite && draw->point_sprite) + return TRUE; + /* two-side lighting */ if (draw->rasterizer->light_twoside) return TRUE; @@ -110,7 +114,9 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) && !draw->rasterizer->line_smooth); /* drawing large points? */ - if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + if (draw->rasterizer->point_sprite && draw->point_sprite) + wide_points = TRUE; + else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) wide_points = FALSE; else if (draw->rasterizer->point_size > draw->wide_point_threshold) wide_points = TRUE; diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c index 65bd50f2b8..c53f7e6cb3 100644 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_wide_point.c @@ -184,7 +184,8 @@ static void widepoint_first_point( struct draw_stage *stage, wide->half_point_size = 0.5f * draw->rasterizer->point_size; /* XXX we won't know the real size if it's computed by the vertex shader! */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) { + if ((draw->rasterizer->point_size > draw->wide_point_threshold) || + (draw->rasterizer->point_sprite && draw->point_sprite)) { stage->point = widepoint_point; } else { -- cgit v1.2.3 From cb98f71d42e4c714dfb0c3e29d28d8418a1ee86b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 15 Mar 2008 00:55:28 +0000 Subject: gallium: Ensure we don't add null objects to the table, as they mark empty handles. --- src/gallium/auxiliary/util/u_handle_table.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index ab427ee371..d25872972a 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -175,6 +175,10 @@ handle_table_set(struct handle_table *ht, if(!handle || handle > ht->size) return 0; + assert(object); + if(!object) + return 0; + index = handle - 1; /* grow the table if necessary */ -- cgit v1.2.3 From 9425a702bef7d3f601e9ddc2b801f00a3d52dbb8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 17 Mar 2008 10:10:45 +0000 Subject: gallium: improvements, or extensions at least, to the passthrough path Passthrough is actually more tricky than you'd think... --- src/gallium/auxiliary/draw/draw_passthrough.c | 349 +++++++++++++++++------- src/gallium/auxiliary/draw/draw_vbuf.h | 6 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 12 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 13 +- 4 files changed, 275 insertions(+), 105 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c index d16f056191..fdec6a591b 100644 --- a/src/gallium/auxiliary/draw/draw_passthrough.c +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -85,14 +85,52 @@ fetch_store_general( struct draw_context *draw, const unsigned *pitch = draw->vertex_fetch.pitch; const ubyte **src = draw->vertex_fetch.src_ptr; - for (i = start; i < count; i++) { + for (i = start; i < start + count; i++) { for (j = 0; j < nr_attrs; j++) { + /* vinfo->src_index is the output of the vertex shader + * matching this hw-vertex component. + * + * In passthrough, we require a 1:1 mapping between vertex + * shader outputs and inputs, which in turn correspond to + * vertex elements in the state. So, this is the vertex + * element we're interested in... + */ const uint jj = vinfo->src_index[j]; const enum pipe_format srcFormat = draw->vertex_element[jj].src_format; const ubyte *from = src[jj] + i * pitch[jj]; float attrib[4]; + /* Except... When we're not. Two cases EMIT_HEADER & + * EMIT_1F_PSIZE don't consume an input. Should have some + * method for indicating this, or change the logic here + * somewhat so it doesn't matter. + * + * Just hack this up now, do something better about it later. + */ + if (vinfo->emit[j] == EMIT_HEADER) { + memset(out, 0, sizeof(struct vertex_header)); + out += sizeof(struct vertex_header) / 4; + continue; + } + else if (vinfo->emit[j] == EMIT_1F_PSIZE) { + out[0] = 1.0; /* xxx */ + out += 1; + continue; + } + + + /* The normal fetch/emit code: + */ switch (srcFormat) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + ubyte *ub = (ubyte *) from; + attrib[0] = UBYTE_TO_FLOAT(ub[0]); + attrib[1] = UBYTE_TO_FLOAT(ub[1]); + attrib[2] = UBYTE_TO_FLOAT(ub[2]); + attrib[3] = UBYTE_TO_FLOAT(ub[3]); + } + break; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *f = (float *) from; @@ -130,14 +168,21 @@ fetch_store_general( struct draw_context *draw, } break; default: - abort(); + assert(0); } - /* XXX this will probably only work for softpipe */ + debug_printf("attrib %d: %f %f %f %f\n", j, + attrib[0], attrib[1], attrib[2], attrib[3]); + switch (vinfo->emit[j]) { - case EMIT_HEADER: - memset(out, 0, sizeof(struct vertex_header)); - out += sizeof(struct vertex_header) / 4; + case EMIT_1F: + out[0] = attrib[0]; + out += 1; + break; + case EMIT_2F: + out[0] = attrib[0]; + out[1] = attrib[1]; + out += 2; break; case EMIT_4F: out[0] = attrib[0]; @@ -147,64 +192,15 @@ fetch_store_general( struct draw_context *draw, out += 4; break; default: - abort(); + assert(0); } - } + debug_printf("\n"); } } -/* Example of a fetch/emit passthrough shader which could be - * generated when bypass_clipping is enabled on a passthrough vertex - * shader. - */ -static void fetch_xyz_rgb_st( struct draw_context *draw, - float *out, - unsigned start, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - unsigned i; - - const ubyte *xyzw = src[0] + start * pitch[0]; - const ubyte *rgba = src[1] + start * pitch[1]; - const ubyte *st = src[2] + start * pitch[2]; - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyzw; xyzw += pitch[0]; - /* decode input, encode output. Assume both are float[4] */ - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; - } - - { - const float *in = (const float *)rgba; rgba += pitch[1]; - /* decode input, encode output. Assume both are float[4] */ - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = in[3]; - } - - { - const float *in = (const float *)st; st += pitch[2]; - /* decode input, encode output. Assume both are float[2] */ - out[8] = in[0]; - out[9] = in[1]; - } - - out += 10; - } -} - static boolean update_shader( struct draw_context *draw ) { const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); @@ -229,70 +225,166 @@ static boolean update_shader( struct draw_context *draw ) draw->pt.hw_vertex_size = vinfo->size * 4; - /* Just trying to figure out how this would work: - */ - if (draw->rasterizer->bypass_vs || - (nr_attrs == 3 && 0 /* some other tests */)) - { -#if 0 - draw->vertex_fetch.pt_fetch = fetch_xyz_rgb_st; -#else - draw->vertex_fetch.pt_fetch = fetch_store_general; -#endif - /*assert(vinfo->size == 10);*/ + draw->vertex_fetch.pt_fetch = fetch_store_general; + return TRUE; +} + + + + +static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + return TRUE; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; return TRUE; + case PIPE_PRIM_LINE_STRIP: + *first = 2; + *incr = 1; + return TRUE; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + *first = 3; + *incr = 1; + return TRUE; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + return TRUE; + default: + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + return FALSE; } - - return FALSE; } static boolean set_prim( struct draw_context *draw, - unsigned prim ) + unsigned prim, + unsigned count ) { assert(!draw->user.elts); - draw->pt.prim = prim; - switch (prim) { case PIPE_PRIM_LINE_LOOP: + if (count > 1024) + return FALSE; + return draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP ); + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + if (count > 1024) + return FALSE; + return draw->render->set_primitive( draw->render, prim ); + case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: - return FALSE; + return draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES ); + default: - draw->render->set_primitive( draw->render, prim ); - return TRUE; + return draw->render->set_primitive( draw->render, prim ); + break; } + + return TRUE; } -boolean -draw_passthrough_arrays(struct draw_context *draw, - unsigned prim, - unsigned start, - unsigned count) + +#define INDEX(i) (start + (i)) +static void pt_draw_arrays( struct draw_context *draw, + unsigned start, + unsigned length ) { - float *hw_verts; + ushort *tmp = NULL; + unsigned i, j; - if (draw_need_pipeline(draw)) - return FALSE; + switch (draw->pt.prim) { + case PIPE_PRIM_LINE_LOOP: + tmp = MALLOC( sizeof(ushort) * (length + 1) ); - if (!set_prim(draw, prim)) - return FALSE; + for (i = 0; i < length; i++) + tmp[i] = INDEX(i); + tmp[length] = 0; - if (!update_shader(draw)) - return FALSE; + draw->render->draw( draw->render, + tmp, + length+1 ); + break; - hw_verts = draw->render->allocate_vertices( draw->render, - draw->pt.hw_vertex_size, - count ); + + case PIPE_PRIM_QUAD_STRIP: + tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) ); + + for (j = i = 0; i + 3 < length; i += 2, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+2); + tmp[j+4] = INDEX(i+0); + tmp[j+5] = INDEX(i+3); + } + + if (j) + draw->render->draw( draw->render, tmp, j ); + break; + + case PIPE_PRIM_QUADS: + tmp = MALLOC( sizeof(int) * (length / 4 * 6) ); + + for (j = i = 0; i + 3 < length; i += 4, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+1); + tmp[j+4] = INDEX(i+2); + tmp[j+5] = INDEX(i+3); + } + + if (j) + draw->render->draw( draw->render, tmp, j ); + break; + + default: + draw->render->draw_arrays( draw->render, + start, + length ); + break; + } + + if (tmp) + FREE(tmp); +} + + + +static boolean do_draw( struct draw_context *draw, + unsigned start, unsigned count ) +{ + float *hw_verts = + draw->render->allocate_vertices( draw->render, + (ushort)draw->pt.hw_vertex_size, + (ushort)count ); if (!hw_verts) return FALSE; - /* Single routine to fetch vertices, run shader and emit HW verts. - * Clipping and viewport transformation are done on hardware. + /* Single routine to fetch vertices and emit HW verts. */ draw->vertex_fetch.pt_fetch( draw, hw_verts, @@ -301,9 +393,9 @@ draw_passthrough_arrays(struct draw_context *draw, /* Draw arrays path to avoid re-emitting index list again and * again. */ - draw->render->draw_arrays( draw->render, - start, - count ); + pt_draw_arrays( draw, + 0, + count ); draw->render->release_vertices( draw->render, @@ -314,3 +406,68 @@ draw_passthrough_arrays(struct draw_context *draw, return TRUE; } + +boolean +draw_passthrough_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count) +{ + unsigned i = 0; + unsigned first, incr; + + //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); + + split_prim_inplace(prim, &first, &incr); + + count -= (count - first) % incr; + + debug_printf("%s %d %d %d\n", __FUNCTION__, prim, start, count); + + if (draw_need_pipeline(draw)) + return FALSE; + + debug_printf("%s AAA\n", __FUNCTION__); + + if (!set_prim(draw, prim, count)) + return FALSE; + + /* XXX: need a single value that reflects the most recent call to + * driver->set_primitive: + */ + draw->pt.prim = prim; + + debug_printf("%s BBB\n", __FUNCTION__); + + if (!update_shader(draw)) + return FALSE; + + debug_printf("%s CCC\n", __FUNCTION__); + + /* Chop this up into bite-sized pieces that a driver should be able + * to devour -- problem is we don't have a quick way to query the + * driver on the maximum size for this chunk in the current state. + */ + while (i + first <= count) { + int nr = MIN2( count - i, 1024 ); + + /* snap to prim boundary + */ + nr -= (nr - first) % incr; + + if (!do_draw( draw, start + i, nr )) { + assert(0); + return FALSE; + } + + /* increment allowing for repeated vertices + */ + i += nr - (first - incr); + } + + + debug_printf("%s DONE\n", __FUNCTION__); + return TRUE; +} + + diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 5e7de905c1..e90f37872a 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -74,9 +74,11 @@ struct vbuf_render { /** * Notify the renderer of the current primitive when it changes. - * Prim is restricted to TRIANGLES, LINES and POINTS. + * Must succeed for TRIANGLES, LINES and POINTS. Other prims at + * the discretion of the driver, for the benefit of the passthrough + * path. */ - void (*set_primitive)( struct vbuf_render *, unsigned prim ); + boolean (*set_primitive)( struct vbuf_render *, unsigned prim ); /** * DrawElements, note indices are ushort: diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index 9d5f609220..eb64f51943 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -116,7 +116,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, } -static void +static boolean i915_vbuf_render_set_primitive( struct vbuf_render *render, unsigned prim ) { @@ -125,15 +125,17 @@ i915_vbuf_render_set_primitive( struct vbuf_render *render, switch(prim) { case PIPE_PRIM_POINTS: i915_render->hwprim = PRIM3D_POINTLIST; - break; + return TRUE; case PIPE_PRIM_LINES: i915_render->hwprim = PRIM3D_LINELIST; - break; + return TRUE; case PIPE_PRIM_TRIANGLES: i915_render->hwprim = PRIM3D_TRILIST; - break; + return TRUE; default: - assert(0); + /* Actually, can handle a lot more just fine... Fixme. + */ + return FALSE; } } diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e7d0cb2b87..d940718ed2 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -101,11 +101,20 @@ sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, } -static void +static boolean sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); - cvbr->prim = prim; + if (prim == PIPE_PRIM_TRIANGLES || + prim == PIPE_PRIM_LINES || + prim == PIPE_PRIM_POINTS) { + cvbr->prim = prim; + return TRUE; + } + else { + return FALSE; + } + } -- cgit v1.2.3 From e3a747f1fefc147e8dca5729bcc8d68f419c595a Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 17 Mar 2008 15:37:42 -0600 Subject: gallium: new util_draw_texquad() function. --- src/gallium/auxiliary/util/u_draw_quad.c | 112 +++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_draw_quad.h | 37 ++++++++++ 2 files changed, 149 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_draw_quad.c create mode 100644 src/gallium/auxiliary/util/u_draw_quad.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c new file mode 100644 index 0000000000..79a69de633 --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -0,0 +1,112 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_winsys.h" +#include "util/u_draw_quad.h" + + +/** + * Draw screen-aligned textured quad. + */ +void +util_draw_texquad(struct pipe_context *pipe, + float x0, float y0, float x1, float y1, float z) +{ + struct pipe_buffer *vbuf; + struct pipe_vertex_buffer vbuffer; + struct pipe_vertex_element velement; + uint numAttribs = 2, vertexBytes, i, j; + float *v; + + vertexBytes = 4 * (4 * numAttribs * sizeof(float)); + + /* XXX create one-time */ + vbuf = pipe->winsys->buffer_create(pipe->winsys, 32, + PIPE_BUFFER_USAGE_VERTEX, vertexBytes); + assert(vbuf); + + v = (float *) pipe->winsys->buffer_map(pipe->winsys, vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + /* + * Load vertex buffer + */ + for (i = j = 0; i < 4; i++) { + v[j + 2] = z; /* z */ + v[j + 3] = 1.0; /* w */ + v[j + 6] = 0.0; /* r */ + v[j + 7] = 1.0; /* q */ + j += 8; + } + + v[0] = x0; + v[1] = y0; + v[4] = 0.0; /*s*/ + v[5] = 0.0; /*t*/ + + v[8] = x1; + v[9] = y0; + v[12] = 1.0; + v[13] = 0.0; + + v[16] = x1; + v[17] = y1; + v[20] = 1.0; + v[21] = 1.0; + + v[24] = x0; + v[25] = y1; + v[28] = 0.0; + v[29] = 1.0; + + pipe->winsys->buffer_unmap(pipe->winsys, vbuf); + + /* tell pipe about the vertex buffer */ + vbuffer.buffer = vbuf; + vbuffer.pitch = numAttribs * 4 * sizeof(float); /* vertex size */ + vbuffer.buffer_offset = 0; + pipe->set_vertex_buffer(pipe, 0, &vbuffer); + + /* tell pipe about the vertex attributes */ + for (i = 0; i < numAttribs; i++) { + velement.src_offset = i * 4 * sizeof(float); + velement.vertex_buffer_index = 0; + velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velement.nr_components = 4; + pipe->set_vertex_element(pipe, i, &velement); + } + + /* draw */ + pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, 4); + + /* XXX: do one-time */ + pipe_buffer_reference(pipe->winsys, &vbuf, NULL); +} diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h new file mode 100644 index 0000000000..a97f55d2ef --- /dev/null +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -0,0 +1,37 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_DRAWQUAD_H +#define U_DRAWQUAD_H + + +extern void +util_draw_texquad(struct pipe_context *pipe, + float x0, float y0, float x1, float y1, float z); + + +#endif -- cgit v1.2.3 From f1b42595d02828bcc617bc88b4ce6cf38a69ddbf Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 17 Mar 2008 15:38:14 -0600 Subject: gallium: new mipmap generation code Based on code from Mesa's state tracker. Still need to implement fallbacks for those texture formats which can't generally be rendered to. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 495 ++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_gen_mipmap.h | 63 ++++ 2 files changed, 558 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_gen_mipmap.c create mode 100644 src/gallium/auxiliary/util/u_gen_mipmap.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c new file mode 100644 index 0000000000..f0b822929e --- /dev/null +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -0,0 +1,495 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Mipmap generation utility + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_draw_quad.h" +#include "util/u_gen_mipmap.h" + +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" + + +/** + * Make simple fragment shader: + * TEX OUT[0], IN[0], SAMP[0], 2D; + * END; + */ +static void +make_fragment_shader(struct gen_mipmap_state *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + uint maxTokens = 100; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_FRAGMENT; + uint ti = 0; + struct pipe_shader_state shader; + + tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare TEX[0] input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare color[0] output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* TEX instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader.tokens = tokens; + ctx->fs = pipe->create_fs_state(pipe, &shader); +} + + +/** + * Make simple fragment shader: + * MOV OUT[0], IN[0]; + * MOV OUT[1], IN[1]; + * END; + * + * XXX eliminate this when vertex passthrough-mode is more solid. + */ +static void +make_vertex_shader(struct gen_mipmap_state *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + uint maxTokens = 100; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_VERTEX; + uint ti = 0; + struct pipe_shader_state shader; + + tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare POS input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + /* + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + /* declare TEX[0] input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + /* + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 1; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare POS output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare TEX[0] output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 1; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* MOVE out[0], in[0]; # POS */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + /* MOVE out[1], in[1]; # TEX */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 1; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 1; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader.tokens = tokens; + ctx->vs = pipe->create_vs_state(pipe, &shader); +} + + +/** + * Create a mipmap generation context. + * The idea is to create one of these and re-use it each time we need to + * generate a mipmap. + */ +struct gen_mipmap_state * +util_create_gen_mipmap(struct pipe_context *pipe) +{ + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct gen_mipmap_state *ctx; + + ctx = CALLOC_STRUCT(gen_mipmap_state); + if (!ctx) + return NULL; + + ctx->pipe = pipe; + + /* we don't use blending, but need to set valid values */ + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + ctx->blend = pipe->create_blend_state(pipe, &blend); + + /* depth/stencil/alpha */ + memset(&depthstencil, 0, sizeof(depthstencil)); + ctx->depthstencil = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); + + /* rasterizer */ + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer.front_winding = PIPE_WINDING_CW; + rasterizer.cull_mode = PIPE_WINDING_NONE; + rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + //rasterizer.bypass_vs = 1; + ctx->rasterizer = pipe->create_rasterizer_state(pipe, &rasterizer); + +#if 0 + /* viewport */ + ctx->viewport.scale[0] = 1.0; + ctx->viewport.scale[1] = 1.0; + ctx->viewport.scale[2] = 1.0; + ctx->viewport.scale[3] = 1.0; + ctx->viewport.translate[0] = 0.0; + ctx->viewport.translate[1] = 0.0; + ctx->viewport.translate[2] = 0.0; + ctx->viewport.translate[3] = 0.0; +#endif + + make_vertex_shader(ctx); + make_fragment_shader(ctx); + + return ctx; +} + + +/** + * Destroy a mipmap generation context + */ +void +util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->delete_blend_state(pipe, ctx->blend); + pipe->delete_depth_stencil_alpha_state(pipe, ctx->depthstencil); + pipe->delete_rasterizer_state(pipe, ctx->rasterizer); + pipe->delete_vs_state(pipe, ctx->vs); + pipe->delete_fs_state(pipe, ctx->fs); + + FREE(ctx); +} + + +#if 0 +static void +simple_viewport(struct pipe_context *pipe, uint width, uint height) +{ + struct pipe_viewport_state vp; + + vp.scale[0] = 0.5 * width; + vp.scale[1] = -0.5 * height; + vp.scale[2] = 1.0; + vp.scale[3] = 1.0; + vp.translate[0] = 0.5 * width; + vp.translate[1] = 0.5 * height; + vp.translate[2] = 0.0; + vp.translate[3] = 0.0; + + pipe->set_viewport_state(pipe, &vp); +} +#endif + + +/** + * Generate mipmap images. It's assumed all needed texture memory is + * already allocated. + * + * \param pt the texture to generate mipmap levels for + * \param face which cube face to generate mipmaps for (0 for non-cube maps) + * \param baseLevel the first mipmap level to use as a src + * \param lastLevel the last mipmap level to generate + */ +void +util_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_framebuffer_state fb; + struct pipe_sampler_state sampler; + void *sampler_cso; + uint dstLevel; + uint zslice = 0; + + /* check if we can render in the texture's format */ + if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) { +#if 0 + fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); +#endif + return; + } + + /* init framebuffer state */ + memset(&fb, 0, sizeof(fb)); + fb.num_cbufs = 1; + + /* sampler state */ + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + + /* bind our state */ + pipe->bind_blend_state(pipe, ctx->blend); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->depthstencil); + pipe->bind_rasterizer_state(pipe, ctx->rasterizer); + pipe->bind_vs_state(pipe, ctx->vs); + pipe->bind_fs_state(pipe, ctx->fs); +#if 0 + pipe->set_viewport_state(pipe, &ctx->viewport); +#endif + + /* + * XXX for small mipmap levels, it may be faster to use the software + * fallback path... + */ + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + + /* + * Setup framebuffer / dest surface + */ + fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + pipe->set_framebuffer_state(pipe, &fb); + + /* + * Setup sampler state + * Note: we should only have to set the min/max LOD clamps to ensure + * we grab texels from the right mipmap level. But some hardware + * has trouble with min clamping so we also set the lod_bias to + * try to work around that. + */ + sampler.min_lod = sampler.max_lod = srcLevel; + sampler.lod_bias = srcLevel; + sampler_cso = pipe->create_sampler_state(pipe, &sampler); + pipe->bind_sampler_states(pipe, 1, &sampler_cso); + +#if 0 + simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); +#endif + + pipe->set_sampler_textures(pipe, 1, &pt); + + /* quad coords in window coords (bypassing clipping, viewport mapping) */ + util_draw_texquad(pipe, + 0.0F, 0.0F, /* x0, y0 */ + (float) pt->width[dstLevel], /* x1 */ + (float) pt->height[dstLevel], /* y1 */ + 0.0F); /* z */ + + + pipe->flush(pipe, PIPE_FLUSH_WAIT); + + /*pipe->texture_update(pipe, pt); not really needed */ + + pipe->delete_sampler_state(pipe, sampler_cso); + } + + /* Note: caller must restore pipe/gallium state at this time */ +} diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h new file mode 100644 index 0000000000..6ee909f0a6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_GENMIPMAP_H +#define U_GENMIPMAP_H + +#include "pipe/p_state.h" + + +struct gen_mipmap_state +{ + struct pipe_context *pipe; + + void *blend; + void *depthstencil; + void *rasterizer; + /*struct pipe_viewport_state viewport;*/ + struct pipe_sampler_state *vs; + struct pipe_sampler_state *fs; +}; + + + +extern struct gen_mipmap_state * +util_create_gen_mipmap(struct pipe_context *pipe); + + +extern void +util_destroy_gen_mipmap(struct gen_mipmap_state *ctx); + + + +extern void +util_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel); + + +#endif -- cgit v1.2.3 From 11d723cda68d775fa20e43db16ac2ce7e00430b1 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 17 Mar 2008 15:39:19 -0600 Subject: gallium: added new u_draw_quad.c and u_gen_mipmap.c files. --- src/gallium/auxiliary/util/Makefile | 2 ++ src/gallium/auxiliary/util/SConscript | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 2016c6fb1f..d11fa6e803 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -7,6 +7,8 @@ C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ + u_draw_quad.c \ + u_gen_mipmap.c \ u_handle_table.c \ u_hash_table.c \ u_mm.c \ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 154a3eca8c..6d4ba8643d 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,6 +6,8 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_tile.c', 'p_util.c', + 'u_draw_quad.c', + 'u_gen_mipmap.c', 'u_handle_table.c', 'u_hash_table.c', 'u_mm.c', -- cgit v1.2.3 From 0c715de39fa8337a2753dacd77ed280000416c1a Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 17 Mar 2008 15:37:09 -0700 Subject: cell: Fix simple register allocator THere are 64-bits in a uint64_t, not 128. Duh. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index a996218ce7..842d713f84 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -326,8 +326,8 @@ int spe_allocate_available_register(struct spe_function *p) { unsigned i; for (i = 0; i < 128; i++) { - const uint64_t mask = (1ULL << (i % 128)); - const unsigned idx = i / 128; + const uint64_t mask = (1ULL << (i % 64)); + const unsigned idx = i / 64; if ((p->regs[idx] & mask) != 0) { p->regs[idx] &= ~mask; @@ -341,8 +341,8 @@ int spe_allocate_available_register(struct spe_function *p) int spe_allocate_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 128; - const unsigned bit = reg % 128; + const unsigned idx = reg / 64; + const unsigned bit = reg % 64; assert((p->regs[idx] & (1ULL << bit)) != 0); @@ -353,8 +353,8 @@ int spe_allocate_register(struct spe_function *p, int reg) void spe_release_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 128; - const unsigned bit = reg % 128; + const unsigned idx = reg / 64; + const unsigned bit = reg % 64; assert((p->regs[idx] & (1ULL << bit)) == 0); -- cgit v1.2.3 From 9f106a8683ec89b873f0237fbb6930a63b89dfa0 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 17 Mar 2008 16:07:54 -0700 Subject: cell: Don't free NULL code pointers --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 842d713f84..24be65bff9 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -316,7 +316,9 @@ void spe_init_func(struct spe_function *p, unsigned code_size) void spe_release_func(struct spe_function *p) { - align_free(p->store); + if (p->store != NULL) { + align_free(p->store); + } p->store = NULL; p->csr = NULL; } -- cgit v1.2.3 From de779c8d319b6269705cd7e6f2009243a771a2b9 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 17 Mar 2008 17:45:00 -0600 Subject: gallium: initial gen mipmap s/w fallback code --- src/gallium/auxiliary/util/u_gen_mipmap.c | 609 +++++++++++++++++++++++++++++- 1 file changed, 607 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index f0b822929e..8ca06281ab 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -34,6 +34,7 @@ #include "pipe/p_context.h" +#include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" @@ -48,6 +49,612 @@ #include "tgsi/util/tgsi_parse.h" + +enum dtype +{ + UBYTE, + UBYTE_3_3_2, + USHORT, + USHORT_4_4_4_4, + USHORT_5_6_5, + USHORT_1_5_5_5_REV, + UINT, + FLOAT, + HALF_FLOAT +}; + + +typedef ushort half_float; + + +#if 0 +extern half_float +float_to_half(float f); + +extern float +half_to_float(half_float h); +#endif + + +/** + * Average together two rows of a source image to produce a single new + * row in the dest image. It's legal for the two source rows to point + * to the same data. The source width must be equal to either the + * dest width or two times the dest width. + * \param datatype GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc. + * \param comps number of components per pixel (1..4) + */ +static void +do_row(enum dtype datatype, uint comps, int srcWidth, + const void *srcRowA, const void *srcRowB, + int dstWidth, void *dstRow) +{ + const uint k0 = (srcWidth == dstWidth) ? 0 : 1; + const uint colStride = (srcWidth == dstWidth) ? 1 : 2; + + assert(comps >= 1); + assert(comps <= 4); + + /* This assertion is no longer valid with non-power-of-2 textures + assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth); + */ + + if (datatype == UBYTE && comps == 4) { + uint i, j, k; + const ubyte(*rowA)[4] = (const ubyte(*)[4]) srcRowA; + const ubyte(*rowB)[4] = (const ubyte(*)[4]) srcRowB; + ubyte(*dst)[4] = (ubyte(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4; + } + } + else if (datatype == UBYTE && comps == 3) { + uint i, j, k; + const ubyte(*rowA)[3] = (const ubyte(*)[3]) srcRowA; + const ubyte(*rowB)[3] = (const ubyte(*)[3]) srcRowB; + ubyte(*dst)[3] = (ubyte(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + } + } + else if (datatype == UBYTE && comps == 2) { + uint i, j, k; + const ubyte(*rowA)[2] = (const ubyte(*)[2]) srcRowA; + const ubyte(*rowB)[2] = (const ubyte(*)[2]) srcRowB; + ubyte(*dst)[2] = (ubyte(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2; + } + } + else if (datatype == UBYTE && comps == 1) { + uint i, j, k; + const ubyte *rowA = (const ubyte *) srcRowA; + const ubyte *rowB = (const ubyte *) srcRowB; + ubyte *dst = (ubyte *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2; + } + } + + else if (datatype == USHORT && comps == 4) { + uint i, j, k; + const ushort(*rowA)[4] = (const ushort(*)[4]) srcRowA; + const ushort(*rowB)[4] = (const ushort(*)[4]) srcRowB; + ushort(*dst)[4] = (ushort(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4; + } + } + else if (datatype == USHORT && comps == 3) { + uint i, j, k; + const ushort(*rowA)[3] = (const ushort(*)[3]) srcRowA; + const ushort(*rowB)[3] = (const ushort(*)[3]) srcRowB; + ushort(*dst)[3] = (ushort(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4; + } + } + else if (datatype == USHORT && comps == 2) { + uint i, j, k; + const ushort(*rowA)[2] = (const ushort(*)[2]) srcRowA; + const ushort(*rowB)[2] = (const ushort(*)[2]) srcRowB; + ushort(*dst)[2] = (ushort(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4; + dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4; + } + } + else if (datatype == USHORT && comps == 1) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4; + } + } + + else if (datatype == FLOAT && comps == 4) { + uint i, j, k; + const float(*rowA)[4] = (const float(*)[4]) srcRowA; + const float(*rowB)[4] = (const float(*)[4]) srcRowB; + float(*dst)[4] = (float(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + dst[i][2] = (rowA[j][2] + rowA[k][2] + + rowB[j][2] + rowB[k][2]) * 0.25F; + dst[i][3] = (rowA[j][3] + rowA[k][3] + + rowB[j][3] + rowB[k][3]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 3) { + uint i, j, k; + const float(*rowA)[3] = (const float(*)[3]) srcRowA; + const float(*rowB)[3] = (const float(*)[3]) srcRowB; + float(*dst)[3] = (float(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + dst[i][2] = (rowA[j][2] + rowA[k][2] + + rowB[j][2] + rowB[k][2]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 2) { + uint i, j, k; + const float(*rowA)[2] = (const float(*)[2]) srcRowA; + const float(*rowB)[2] = (const float(*)[2]) srcRowB; + float(*dst)[2] = (float(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i][0] = (rowA[j][0] + rowA[k][0] + + rowB[j][0] + rowB[k][0]) * 0.25F; + dst[i][1] = (rowA[j][1] + rowA[k][1] + + rowB[j][1] + rowB[k][1]) * 0.25F; + } + } + else if (datatype == FLOAT && comps == 1) { + uint i, j, k; + const float *rowA = (const float *) srcRowA; + const float *rowB = (const float *) srcRowB; + float *dst = (float *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F; + } + } + +#if 0 + else if (datatype == HALF_FLOAT && comps == 4) { + uint i, j, k, comp; + const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA; + const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB; + half_float(*dst)[4] = (half_float(*)[4]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 4; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 3) { + uint i, j, k, comp; + const half_float(*rowA)[3] = (const half_float(*)[3]) srcRowA; + const half_float(*rowB)[3] = (const half_float(*)[3]) srcRowB; + half_float(*dst)[3] = (half_float(*)[3]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 3; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 2) { + uint i, j, k, comp; + const half_float(*rowA)[2] = (const half_float(*)[2]) srcRowA; + const half_float(*rowB)[2] = (const half_float(*)[2]) srcRowB; + half_float(*dst)[2] = (half_float(*)[2]) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + for (comp = 0; comp < 2; comp++) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j][comp]); + ak = half_to_float(rowA[k][comp]); + bj = half_to_float(rowB[j][comp]); + bk = half_to_float(rowB[k][comp]); + dst[i][comp] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } + } + else if (datatype == HALF_FLOAT && comps == 1) { + uint i, j, k; + const half_float *rowA = (const half_float *) srcRowA; + const half_float *rowB = (const half_float *) srcRowB; + half_float *dst = (half_float *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + float aj, ak, bj, bk; + aj = half_to_float(rowA[j]); + ak = half_to_float(rowA[k]); + bj = half_to_float(rowB[j]); + bk = half_to_float(rowB[k]); + dst[i] = float_to_half((aj + ak + bj + bk) * 0.25F); + } + } +#endif + + else if (datatype == UINT && comps == 1) { + uint i, j, k; + const uint *rowA = (const uint *) srcRowA; + const uint *rowB = (const uint *) srcRowB; + float *dst = (float *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4; + } + } + + else if (datatype == USHORT_5_6_5 && comps == 3) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x3f; + const int rowAg1 = (rowA[k] >> 5) & 0x3f; + const int rowBg0 = (rowB[j] >> 5) & 0x3f; + const int rowBg1 = (rowB[k] >> 5) & 0x3f; + const int rowAb0 = (rowA[j] >> 11) & 0x1f; + const int rowAb1 = (rowA[k] >> 11) & 0x1f; + const int rowBb0 = (rowB[j] >> 11) & 0x1f; + const int rowBb1 = (rowB[k] >> 11) & 0x1f; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + dst[i] = (blue << 11) | (green << 5) | red; + } + } + else if (datatype == USHORT_4_4_4_4 && comps == 4) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0xf; + const int rowAr1 = rowA[k] & 0xf; + const int rowBr0 = rowB[j] & 0xf; + const int rowBr1 = rowB[k] & 0xf; + const int rowAg0 = (rowA[j] >> 4) & 0xf; + const int rowAg1 = (rowA[k] >> 4) & 0xf; + const int rowBg0 = (rowB[j] >> 4) & 0xf; + const int rowBg1 = (rowB[k] >> 4) & 0xf; + const int rowAb0 = (rowA[j] >> 8) & 0xf; + const int rowAb1 = (rowA[k] >> 8) & 0xf; + const int rowBb0 = (rowB[j] >> 8) & 0xf; + const int rowBb1 = (rowB[k] >> 8) & 0xf; + const int rowAa0 = (rowA[j] >> 12) & 0xf; + const int rowAa1 = (rowA[k] >> 12) & 0xf; + const int rowBa0 = (rowB[j] >> 12) & 0xf; + const int rowBa1 = (rowB[k] >> 12) & 0xf; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; + dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red; + } + } + else if (datatype == USHORT_1_5_5_5_REV && comps == 4) { + uint i, j, k; + const ushort *rowA = (const ushort *) srcRowA; + const ushort *rowB = (const ushort *) srcRowB; + ushort *dst = (ushort *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0xf; + const int rowAg0 = (rowA[j] >> 5) & 0x1f; + const int rowAg1 = (rowA[k] >> 5) & 0x1f; + const int rowBg0 = (rowB[j] >> 5) & 0x1f; + const int rowBg1 = (rowB[k] >> 5) & 0x1f; + const int rowAb0 = (rowA[j] >> 10) & 0x1f; + const int rowAb1 = (rowA[k] >> 10) & 0x1f; + const int rowBb0 = (rowB[j] >> 10) & 0x1f; + const int rowBb1 = (rowB[k] >> 10) & 0x1f; + const int rowAa0 = (rowA[j] >> 15) & 0x1; + const int rowAa1 = (rowA[k] >> 15) & 0x1; + const int rowBa0 = (rowB[j] >> 15) & 0x1; + const int rowBa1 = (rowB[k] >> 15) & 0x1; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2; + dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red; + } + } + else if (datatype == UBYTE_3_3_2 && comps == 3) { + uint i, j, k; + const ubyte *rowA = (const ubyte *) srcRowA; + const ubyte *rowB = (const ubyte *) srcRowB; + ubyte *dst = (ubyte *) dstRow; + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x3; + const int rowAr1 = rowA[k] & 0x3; + const int rowBr0 = rowB[j] & 0x3; + const int rowBr1 = rowB[k] & 0x3; + const int rowAg0 = (rowA[j] >> 2) & 0x7; + const int rowAg1 = (rowA[k] >> 2) & 0x7; + const int rowBg0 = (rowB[j] >> 2) & 0x7; + const int rowBg1 = (rowB[k] >> 2) & 0x7; + const int rowAb0 = (rowA[j] >> 5) & 0x7; + const int rowAb1 = (rowA[k] >> 5) & 0x7; + const int rowBb0 = (rowB[j] >> 5) & 0x7; + const int rowBb1 = (rowB[k] >> 5) & 0x7; + const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2; + const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2; + const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2; + dst[i] = (blue << 5) | (green << 2) | red; + } + } + else { + debug_printf("bad format in do_row()"); + } +} + + +static void +format_to_type_comps(enum pipe_format pformat, + enum dtype *datatype, uint *comps) +{ + switch (pformat) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8A8_UNORM: + *datatype = UBYTE; + *comps = 4; + return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + *datatype = USHORT_1_5_5_5_REV; + *comps = 4; + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + *datatype = USHORT_4_4_4_4; + *comps = 4; + return; + case PIPE_FORMAT_R5G6B5_UNORM: + *datatype = USHORT_5_6_5; + *comps = 3; + return; + case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_U_I8: + *datatype = UBYTE; + *comps = 1; + return; + case PIPE_FORMAT_U_A8_L8: + *datatype = UBYTE; + *comps = 2; + return; + default: + assert(0); + } +} + + +static void +reduce_1d(enum pipe_format pformat, + int srcWidth, const ubyte *srcPtr, + int dstWidth, ubyte *dstPtr) +{ + enum dtype datatype; + uint comps; + + format_to_type_comps(pformat, &datatype, &comps); + + /* we just duplicate the input row, kind of hack, saves code */ + do_row(datatype, comps, + srcWidth, srcPtr, srcPtr, + dstWidth, dstPtr); +} + + +/** + * Strides are in bytes. If zero, it'll be computed as width * bpp. + */ +static void +reduce_2d(enum pipe_format pformat, + int srcWidth, int srcHeight, + int srcRowStride, const ubyte *srcPtr, + int dstWidth, int dstHeight, + int dstRowStride, ubyte *dstPtr) +{ + enum dtype datatype; + uint comps; + const int bpt = pf_get_size(pformat); + const ubyte *srcA, *srcB; + ubyte *dst; + int row; + + format_to_type_comps(pformat, &datatype, &comps); + + if (!srcRowStride) + srcRowStride = bpt * srcWidth; + + if (!dstRowStride) + dstRowStride = bpt * dstWidth; + + /* Compute src and dst pointers */ + srcA = srcPtr; + if (srcHeight > 1) + srcB = srcA + srcRowStride; + else + srcB = srcA; + dst = dstPtr; + + for (row = 0; row < dstHeight; row++) { + do_row(datatype, comps, + srcWidth, srcA, srcB, + dstWidth, dst); + srcA += 2 * srcRowStride; + srcB += 2 * srcRowStride; + dst += dstRowStride; + } +} + + +static void +make_1d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_winsys *winsys = pipe->winsys; + const uint zslice = 0; + uint dstLevel; + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + void *srcMap, *dstMap; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + + srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset); + dstMap = ((ubyte *) winsys->buffer_map(winsys, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset); + + reduce_1d(pt->format, + srcSurf->width, srcMap, + dstSurf->width, dstMap); + + winsys->buffer_unmap(winsys, srcSurf->buffer); + winsys->buffer_unmap(winsys, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } +} + + +static void +make_2d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_winsys *winsys = pipe->winsys; + const uint zslice = 0; + uint dstLevel; + const int bpt = pf_get_size(pt->format); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + ubyte *srcMap, *dstMap; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + + srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset); + dstMap = ((ubyte *) winsys->buffer_map(winsys, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset); + + reduce_2d(pt->format, + srcSurf->width, srcSurf->height, + srcSurf->pitch * bpt, srcMap, + dstSurf->width, dstSurf->height, + dstSurf->pitch * bpt, dstMap); + + winsys->buffer_unmap(winsys, srcSurf->buffer); + winsys->buffer_unmap(winsys, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } +} + + +static void +make_3d_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ +} + + +static void +fallback_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + switch (pt->target) { + case PIPE_TEXTURE_1D: + make_1d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + case PIPE_TEXTURE_2D: + case PIPE_TEXTURE_CUBE: + make_2d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + case PIPE_TEXTURE_3D: + make_3d_mipmap(ctx, pt, face, baseLevel, lastLevel); + break; + default: + assert(0); + } +} + + /** * Make simple fragment shader: * TEX OUT[0], IN[0], SAMP[0], 2D; @@ -415,9 +1022,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, /* check if we can render in the texture's format */ if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) { -#if 0 fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); -#endif return; } -- cgit v1.2.3 From b2b905b04c09dd5e701a43b0fecb73921b8f2866 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 18 Mar 2008 15:59:55 +0100 Subject: gallium: Silencium warnings on Windows. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 8ca06281ab..d6d8c766ce 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -325,7 +325,7 @@ do_row(enum dtype datatype, uint comps, int srcWidth, float *dst = (float *) dstRow; for (i = j = 0, k = k0; i < (uint) dstWidth; i++, j += colStride, k += colStride) { - dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4; + dst[i] = (float) rowA[j] / 4 + (float) rowA[k] / 4 + (float) rowB[j] / 4 + (float) rowB[k] / 4; } } @@ -1070,8 +1070,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, * has trouble with min clamping so we also set the lod_bias to * try to work around that. */ - sampler.min_lod = sampler.max_lod = srcLevel; - sampler.lod_bias = srcLevel; + sampler.min_lod = sampler.max_lod = (float) srcLevel; + sampler.lod_bias = (float) srcLevel; sampler_cso = pipe->create_sampler_state(pipe, &sampler); pipe->bind_sampler_states(pipe, 1, &sampler_cso); -- cgit v1.2.3 From 544c27bcefd8a10318800c3cc2019514ee01a15b Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 09:22:41 -0600 Subject: gallium: utility for packing color, z values according to pipe_format --- src/gallium/auxiliary/util/u_pack_color.h | 125 ++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_pack_color.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h new file mode 100644 index 0000000000..93a18c1c7e --- /dev/null +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -0,0 +1,125 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Functions to produce packed colors/Z from floats. + */ + + +#include "pipe/p_compiler.h" + + +/** + * Note rgba outside [0,1] will be clamped for int pixel formats. + */ +static INLINE void +util_pack_color(const float rgba[4], enum pipe_format format, void *dest) +{ + ubyte r, g, b, a; + + if (pf_size_x(format) <= 8) { + /* format uses 8-bit components or less */ + UNCLAMPED_FLOAT_TO_UBYTE(r, rgba[0]); + UNCLAMPED_FLOAT_TO_UBYTE(g, rgba[1]); + UNCLAMPED_FLOAT_TO_UBYTE(b, rgba[2]); + UNCLAMPED_FLOAT_TO_UBYTE(a, rgba[3]); + } + + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | a; + } + return; + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (a << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | a; + } + return; + case PIPE_FORMAT_R5G6B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + } + return; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *d = (float *) dest; + d[0] = rgba[0]; + d[1] = rgba[1]; + d[2] = rgba[2]; + d[3] = rgba[3]; + } + return; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *d = (float *) dest; + d[0] = rgba[0]; + d[1] = rgba[1]; + d[2] = rgba[2]; + } + return; + /* XXX lots more cases to add */ + default: + debug_printf("gallium: unhandled format in util_pack_color()"); + } +} + + +/** + * Note: it's assumed that z is in [0,1] + */ +static INLINE uint +util_pack_z(enum pipe_format format, double z) +{ + switch (format) { + case PIPE_FORMAT_Z16_UNORM: + return (uint) (z * 0xffff); + case PIPE_FORMAT_Z32_UNORM: + /* special-case to avoid overflow */ + if (z == 1.0) + return 0xffffffff; + else + return (uint) (z * 0xffffffff); + case PIPE_FORMAT_S8Z24_UNORM: + return (uint) (z * 0xffffff); + case PIPE_FORMAT_Z24S8_UNORM: + return ((uint) (z * 0xffffff)) << 8; + default: + debug_printf("gallium: unhandled fomrat in util_pack_z()"); + return 0; + } +} -- cgit v1.2.3 From e8f8b12bb940794cef8eff52ae8c908ad0604161 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 09:28:04 -0600 Subject: gallium: fix a mix-up in the uint[1] do_row() case --- src/gallium/auxiliary/util/u_gen_mipmap.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index d6d8c766ce..d47c677074 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -322,10 +322,10 @@ do_row(enum dtype datatype, uint comps, int srcWidth, uint i, j, k; const uint *rowA = (const uint *) srcRowA; const uint *rowB = (const uint *) srcRowB; - float *dst = (float *) dstRow; + uint *dst = (uint *) dstRow; for (i = j = 0, k = k0; i < (uint) dstWidth; i++, j += colStride, k += colStride) { - dst[i] = (float) rowA[j] / 4 + (float) rowA[k] / 4 + (float) rowB[j] / 4 + (float) rowB[k] / 4; + dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4; } } -- cgit v1.2.3 From 56ac9eb1f6e3826e4e8f7ab0f1fdbeed06c41c9f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 18 Mar 2008 11:49:29 +0000 Subject: gallium: Don't be pedantic about removing non exiting items from the table. --- src/gallium/auxiliary/util/u_handle_table.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index d25872972a..fae4ac4039 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -226,9 +226,13 @@ handle_table_remove(struct handle_table *ht, index = handle - 1; object = ht->objects[index]; - assert(object); + if(!object) { + /* XXX: this warning may be noisy for legitimate use -- remove later */ + debug_warning("removing empty handle"); + return; + } - if(object && ht->destroy) + if(ht->destroy) ht->destroy(object); ht->objects[index] = NULL; -- cgit v1.2.3 From d26139d6a19aaf8b4dbbaa1ee937fed2283923e4 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 18 Mar 2008 12:01:14 +0000 Subject: d3d: Add function to walk through all items in the hash table. --- src/gallium/auxiliary/util/u_hash_table.c | 23 ++++++++++++++++++++++- src/gallium/auxiliary/util/u_hash_table.h | 5 +++++ 2 files changed, 27 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index ac2cb1b540..f3f16a8d94 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -187,6 +187,28 @@ hash_table_remove(struct hash_table *ht, } +enum pipe_error +hash_table_foreach(struct hash_table *ht, + enum pipe_error (*callback)(void *key, void *value, void *data), + void *data) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + enum pipe_error result; + + iter = cso_hash_first_node(ht->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + result = callback(item->key, item->value, data); + if(result != PIPE_OK) + return result; + iter = cso_hash_iter_next(iter); + } + + return PIPE_OK; +} + + void hash_table_destroy(struct hash_table *ht) { @@ -196,4 +218,3 @@ hash_table_destroy(struct hash_table *ht) FREE(ht); } - diff --git a/src/gallium/auxiliary/util/u_hash_table.h b/src/gallium/auxiliary/util/u_hash_table.h index d941f2c6b1..1583bd7548 100644 --- a/src/gallium/auxiliary/util/u_hash_table.h +++ b/src/gallium/auxiliary/util/u_hash_table.h @@ -75,6 +75,11 @@ hash_table_remove(struct hash_table *ht, void *key); +enum pipe_error +hash_table_foreach(struct hash_table *ht, + enum pipe_error (*callback)(void *key, void *value, void *data), + void *data); + void hash_table_destroy(struct hash_table *ht); -- cgit v1.2.3 From 527e30c53baadb396e5503e5188f0a9f1b2d2501 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 18 Mar 2008 12:46:24 +0000 Subject: d3d: Allow to iterate over the handle table. --- src/gallium/auxiliary/util/u_handle_table.c | 22 ++++++++++++++++++++++ src/gallium/auxiliary/util/u_handle_table.h | 9 +++++++++ 2 files changed, 31 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index fae4ac4039..0bfb9e1b4a 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -241,6 +241,28 @@ handle_table_remove(struct handle_table *ht, } +unsigned +handle_table_get_next_handle(struct handle_table *ht, + unsigned handle) +{ + unsigned index; + + for(index = handle; index < ht->size; ++index) { + if(!ht->objects[index]) + return index + 1; + } + + return 0; +} + + +unsigned +handle_table_get_first_handle(struct handle_table *ht) +{ + return handle_table_get_next_handle(ht, 0); +} + + void handle_table_destroy(struct handle_table *ht) { diff --git a/src/gallium/auxiliary/util/u_handle_table.h b/src/gallium/auxiliary/util/u_handle_table.h index a2f1f604ad..d080135c9f 100644 --- a/src/gallium/auxiliary/util/u_handle_table.h +++ b/src/gallium/auxiliary/util/u_handle_table.h @@ -100,6 +100,15 @@ void handle_table_destroy(struct handle_table *ht); +unsigned +handle_table_get_first_handle(struct handle_table *ht); + + +unsigned +handle_table_get_next_handle(struct handle_table *ht, + unsigned handle); + + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 31970c4633c5000916b0a36022ff761038f5cf5a Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 16:56:55 -0600 Subject: gallium: utilities for creating simple vertex/fragment shaders --- src/gallium/auxiliary/util/u_simple_shaders.c | 263 ++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_simple_shaders.h | 52 +++++ 2 files changed, 315 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_simple_shaders.c create mode 100644 src/gallium/auxiliary/util/u_simple_shaders.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c new file mode 100644 index 0000000000..88e2ab05bd --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -0,0 +1,263 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Simple vertex/fragment shader generators. + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_debug.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_simple_shaders.h" + +#include "tgsi/util/tgsi_build.h" +#include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" + + + +/** + * Make simple vertex pass-through shader. + */ +void * +util_make_vertex_passthrough_shader(struct pipe_context *pipe, + uint num_attribs, + const uint *semantic_names, + const uint *semantic_indexes) +{ + uint maxTokens = 100; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_VERTEX; + uint ti, i; + struct pipe_shader_state shader; + + tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare inputs */ + for (i = 0; i < num_attribs; i++) { + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + /* + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + } + + /* declare outputs */ + for (i = 0; i < num_attribs; i++) { + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + } + + /* emit MOV instructions */ + for (i = 0; i < num_attribs; i++) { + /* MOVE out[i], in[i]; */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = i; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = i; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + } + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader.tokens = tokens; + return pipe->create_vs_state(pipe, &shader); +} + + + + +/** + * Make simple fragment texture shader: + * TEX OUT[0], IN[0], SAMP[0], 2D; + * END; + */ +void * +util_make_fragment_tex_shader(struct pipe_context *pipe) +{ + uint maxTokens = 100; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_FRAGMENT; + uint ti; + struct pipe_shader_state shader; + + tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare TEX[0] input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = 0; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare color[0] output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* TEX instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_TEX; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 2; + inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + inst.FullSrcRegisters[1].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader.tokens = tokens; + return pipe->create_fs_state(pipe, &shader); +} + diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h new file mode 100644 index 0000000000..3ef4f28801 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -0,0 +1,52 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_SIMPLE_SHADERS_H +#define U_SIMPLE_SHADERS_H + + +#include "pipe/p_compiler.h" + + +struct pipe_context; + + +extern void * +util_make_vertex_passthrough_shader(struct pipe_context *pipe, + uint num_attribs, + const uint *semantic_names, + const uint *semantic_indexes); + + +extern void * +util_make_fragment_tex_shader(struct pipe_context *pipe); + + +#endif + + -- cgit v1.2.3 From 4bd2b74441092154f0d0048822c6e36cfcc183af Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 16:57:23 -0600 Subject: gallium: use new simple shader utility routines --- src/gallium/auxiliary/util/u_gen_mipmap.c | 260 ++---------------------------- 1 file changed, 12 insertions(+), 248 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index d47c677074..e32ce768c4 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -43,6 +43,7 @@ #include "util/u_draw_quad.h" #include "util/u_gen_mipmap.h" +#include "util/u_simple_shaders.h" #include "tgsi/util/tgsi_build.h" #include "tgsi/util/tgsi_dump.h" @@ -655,252 +656,6 @@ fallback_gen_mipmap(struct gen_mipmap_state *ctx, } -/** - * Make simple fragment shader: - * TEX OUT[0], IN[0], SAMP[0], 2D; - * END; - */ -static void -make_fragment_shader(struct gen_mipmap_state *ctx) -{ - struct pipe_context *pipe = ctx->pipe; - uint maxTokens = 100; - struct tgsi_token *tokens; - struct tgsi_header *header; - struct tgsi_processor *processor; - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - const uint procType = TGSI_PROCESSOR_FRAGMENT; - uint ti = 0; - struct pipe_shader_state shader; - - tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); - - /* shader header - */ - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); - - header = (struct tgsi_header *) &tokens[1]; - *header = tgsi_build_header(); - - processor = (struct tgsi_processor *) &tokens[2]; - *processor = tgsi_build_processor( procType, header ); - - ti = 3; - - /* declare TEX[0] input */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - /* XXX this could be linear... */ - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration(&decl, - &tokens[ti], - header, - maxTokens - ti); - - /* declare color[0] output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration(&decl, - &tokens[ti], - header, - maxTokens - ti); - - /* declare sampler */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration(&decl, - &tokens[ti], - header, - maxTokens - ti); - - /* TEX instruction */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_TEX; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 2; - inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - inst.FullSrcRegisters[1].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction(&inst, - &tokens[ti], - header, - maxTokens - ti ); - - /* END instruction */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction(&inst, - &tokens[ti], - header, - maxTokens - ti ); - -#if 0 /*debug*/ - tgsi_dump(tokens, 0); -#endif - - shader.tokens = tokens; - ctx->fs = pipe->create_fs_state(pipe, &shader); -} - - -/** - * Make simple fragment shader: - * MOV OUT[0], IN[0]; - * MOV OUT[1], IN[1]; - * END; - * - * XXX eliminate this when vertex passthrough-mode is more solid. - */ -static void -make_vertex_shader(struct gen_mipmap_state *ctx) -{ - struct pipe_context *pipe = ctx->pipe; - uint maxTokens = 100; - struct tgsi_token *tokens; - struct tgsi_header *header; - struct tgsi_processor *processor; - struct tgsi_full_declaration decl; - struct tgsi_full_instruction inst; - const uint procType = TGSI_PROCESSOR_VERTEX; - uint ti = 0; - struct pipe_shader_state shader; - - tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); - - /* shader header - */ - *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); - - header = (struct tgsi_header *) &tokens[1]; - *header = tgsi_build_header(); - - processor = (struct tgsi_processor *) &tokens[2]; - *processor = tgsi_build_processor( procType, header ); - - ti = 3; - - /* declare POS input */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - /* - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; - decl.Semantic.SemanticIndex = 0; - */ - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration(&decl, - &tokens[ti], - header, - maxTokens - ti); - /* declare TEX[0] input */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - /* - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - */ - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 1; - ti += tgsi_build_full_declaration(&decl, - &tokens[ti], - header, - maxTokens - ti); - - /* declare POS output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; - ti += tgsi_build_full_declaration(&decl, - &tokens[ti], - header, - maxTokens - ti); - - /* declare TEX[0] output */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_OUTPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 1; - ti += tgsi_build_full_declaration(&decl, - &tokens[ti], - header, - maxTokens - ti); - - /* MOVE out[0], in[0]; # POS */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 0; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 0; - ti += tgsi_build_full_instruction(&inst, - &tokens[ti], - header, - maxTokens - ti ); - - /* MOVE out[1], in[1]; # TEX */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_MOV; - inst.Instruction.NumDstRegs = 1; - inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - inst.FullDstRegisters[0].DstRegister.Index = 1; - inst.Instruction.NumSrcRegs = 1; - inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - inst.FullSrcRegisters[0].SrcRegister.Index = 1; - ti += tgsi_build_full_instruction(&inst, - &tokens[ti], - header, - maxTokens - ti ); - - /* END instruction */ - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = TGSI_OPCODE_END; - inst.Instruction.NumDstRegs = 0; - inst.Instruction.NumSrcRegs = 0; - ti += tgsi_build_full_instruction(&inst, - &tokens[ti], - header, - maxTokens - ti ); - -#if 0 /*debug*/ - tgsi_dump(tokens, 0); -#endif - - shader.tokens = tokens; - ctx->vs = pipe->create_vs_state(pipe, &shader); -} - - /** * Create a mipmap generation context. * The idea is to create one of these and re-use it each time we need to @@ -953,8 +708,17 @@ util_create_gen_mipmap(struct pipe_context *pipe) ctx->viewport.translate[3] = 0.0; #endif - make_vertex_shader(ctx); - make_fragment_shader(ctx); + /* vertex shader */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indexes); + } + + /* fragment shader */ + ctx->fs = util_make_fragment_tex_shader(pipe); return ctx; } -- cgit v1.2.3 From e5a20499d84ba8d270c3144f2c1c7615eeb077c7 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 16:58:15 -0600 Subject: gallium: make the gen_mipmap_state struct private --- src/gallium/auxiliary/util/u_gen_mipmap.c | 13 +++++++++++++ src/gallium/auxiliary/util/u_gen_mipmap.h | 13 +------------ 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index e32ce768c4..e18f8ab72a 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -50,6 +50,19 @@ #include "tgsi/util/tgsi_parse.h" +struct gen_mipmap_state +{ + struct pipe_context *pipe; + + void *blend; + void *depthstencil; + void *rasterizer; + /*struct pipe_viewport_state viewport;*/ + struct pipe_sampler_state *vs; + struct pipe_sampler_state *fs; +}; + + enum dtype { diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index 6ee909f0a6..80496140a2 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -31,18 +31,7 @@ #include "pipe/p_state.h" -struct gen_mipmap_state -{ - struct pipe_context *pipe; - - void *blend; - void *depthstencil; - void *rasterizer; - /*struct pipe_viewport_state viewport;*/ - struct pipe_sampler_state *vs; - struct pipe_sampler_state *fs; -}; - +struct gen_mipmap_state; extern struct gen_mipmap_state * -- cgit v1.2.3 From 8de9503d5d290c475a1940a71e58086ab3735f83 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 16:59:15 -0600 Subject: added u_simple_shaders.c --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/SConscript | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index d11fa6e803..7af9b35d4b 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -12,6 +12,7 @@ C_SOURCES = \ u_handle_table.c \ u_hash_table.c \ u_mm.c \ + u_simple_shaders.c \ u_snprintf.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 6d4ba8643d..0eb1b8d9da 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -11,6 +11,7 @@ util = env.ConvenienceLibrary( 'u_handle_table.c', 'u_hash_table.c', 'u_mm.c', + 'u_simple_shaders.c', 'u_snprintf.c', ]) -- cgit v1.2.3 From 18f9fa9e71abdd999e49ef78729bfe3d92304312 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 16:59:46 -0600 Subject: gallium: protect against multi-#include --- src/gallium/auxiliary/util/u_pack_color.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 93a18c1c7e..cd13823985 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -31,7 +31,12 @@ */ +#ifndef U_PACK_COLOR_H +#define U_PACK_COLOR_H + + #include "pipe/p_compiler.h" +#include "pipe/p_format.h" /** @@ -123,3 +128,6 @@ util_pack_z(enum pipe_format format, double z) return 0; } } + + +#endif /* U_PACK_COLOR_H */ -- cgit v1.2.3 From 0abe462128a8a0725e006751e553f89fee2d7fa7 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 17:04:59 -0600 Subject: gallium: new pixel blit code Copy rectangular region from one surface to another w/ scaling. Disables most fragment operations. --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_blit.c | 276 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_blit.h | 60 ++++++++ 4 files changed, 338 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_blit.c create mode 100644 src/gallium/auxiliary/util/u_blit.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 7af9b35d4b..9b6c2708b6 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ p_debug.c \ p_tile.c \ p_util.c \ + u_blit.c \ u_draw_quad.c \ u_gen_mipmap.c \ u_handle_table.c \ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 0eb1b8d9da..b44f2d5e39 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,6 +6,7 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_tile.c', 'p_util.c', + 'u_blit.c', 'u_draw_quad.c', 'u_gen_mipmap.c', 'u_handle_table.c', diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c new file mode 100644 index 0000000000..4b4ab8185f --- /dev/null +++ b/src/gallium/auxiliary/util/u_blit.c @@ -0,0 +1,276 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Copy/blit pixel rect between surfaces + * + * @author Brian Paul + */ + + +#include "pipe/p_context.h" +#include "pipe/p_debug.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "pipe/p_util.h" +#include "pipe/p_winsys.h" +#include "pipe/p_shader_tokens.h" + +#include "util/u_draw_quad.h" +#include "util/u_blit.h" +#include "util/u_simple_shaders.h" + + +struct blit_state +{ + struct pipe_context *pipe; + + void *blend; + void *depthstencil; + void *rasterizer; + void *samplers[2]; /* one for linear, one for nearest sampling */ + + /*struct pipe_viewport_state viewport;*/ + struct pipe_sampler_state *vs; + struct pipe_sampler_state *fs; +}; + + +/** + * Create state object for blit. + * Intended to be created once and re-used for many blit() calls. + */ +struct blit_state * +util_create_blit(struct pipe_context *pipe) +{ + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct blit_state *ctx; + struct pipe_sampler_state sampler; + + ctx = CALLOC_STRUCT(blit_state); + if (!ctx) + return NULL; + + ctx->pipe = pipe; + + /* we don't use blending, but need to set valid values */ + memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.colormask = PIPE_MASK_RGBA; + ctx->blend = pipe->create_blend_state(pipe, &blend); + + /* depth/stencil/alpha */ + memset(&depthstencil, 0, sizeof(depthstencil)); + ctx->depthstencil = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); + + /* rasterizer */ + memset(&rasterizer, 0, sizeof(rasterizer)); + rasterizer.front_winding = PIPE_WINDING_CW; + rasterizer.cull_mode = PIPE_WINDING_NONE; + rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + /*rasterizer.bypass_vs = 1;*/ + ctx->rasterizer = pipe->create_rasterizer_state(pipe, &rasterizer); + + /* samplers */ + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST; + sampler.normalized_coords = 1; + ctx->samplers[0] = pipe->create_sampler_state(pipe, &sampler); + + sampler.min_img_filter = PIPE_TEX_MIPFILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_MIPFILTER_LINEAR; + ctx->samplers[1] = pipe->create_sampler_state(pipe, &sampler); + + +#if 0 + /* viewport */ + ctx->viewport.scale[0] = 1.0; + ctx->viewport.scale[1] = 1.0; + ctx->viewport.scale[2] = 1.0; + ctx->viewport.scale[3] = 1.0; + ctx->viewport.translate[0] = 0.0; + ctx->viewport.translate[1] = 0.0; + ctx->viewport.translate[2] = 0.0; + ctx->viewport.translate[3] = 0.0; +#endif + + /* vertex shader */ + { + const uint semantic_names[] = { TGSI_SEMANTIC_POSITION, + TGSI_SEMANTIC_GENERIC }; + const uint semantic_indexes[] = { 0, 0 }; + ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, + semantic_indexes); + } + + /* fragment shader */ + ctx->fs = util_make_fragment_tex_shader(pipe); + + return ctx; +} + + +/** + * Destroy a blit context + */ +void +util_destroy_blit(struct blit_state *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->delete_blend_state(pipe, ctx->blend); + pipe->delete_depth_stencil_alpha_state(pipe, ctx->depthstencil); + pipe->delete_rasterizer_state(pipe, ctx->rasterizer); + pipe->delete_sampler_state(pipe, ctx->samplers[0]); + pipe->delete_sampler_state(pipe, ctx->samplers[1]); + + pipe->delete_vs_state(pipe, ctx->vs); + pipe->delete_fs_state(pipe, ctx->fs); + + FREE(ctx); +} + + +/** + * Copy pixel block from src surface to dst surface. + * Overlapping regions are acceptable. + * XXX need some control over blitting Z and/or stencil. + */ +void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp, *tex; + struct pipe_surface *texSurf; + struct pipe_framebuffer_state fb; + const int srcW = abs(srcX1 - srcX0); + const int srcH = abs(srcY1 - srcY0); + const int srcLeft = MIN2(srcX0, srcX1); + const int srcTop = MIN2(srcY0, srcY1); + + assert(filter == PIPE_TEX_MIPFILTER_NEAREST || + filter == PIPE_TEX_MIPFILTER_LINEAR); + + if (srcLeft != srcX0) { + /* left-right flip */ + int tmp = dstX0; + dstX0 = dstX1; + dstX1 = tmp; + } + + if (srcTop != srcY0) { + /* up-down flip */ + int tmp = dstY0; + dstY0 = dstY1; + dstY1 = tmp; + } + + /* + * XXX for now we're always creating a temporary texture. + * Strictly speaking that's not always needed. + */ + + /* create temp texture */ + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = src->format; + texTemp.last_level = 0; + texTemp.width[0] = srcW; + texTemp.height[0] = srcH; + texTemp.depth[0] = 1; + texTemp.compressed = 0; + texTemp.cpp = pf_get_bits(src->format) / 8; + + tex = screen->texture_create(screen, &texTemp); + if (!tex) + return; + + texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0); + + /* load temp texture */ + pipe->surface_copy(pipe, FALSE, + texSurf, 0, 0, /* dest */ + src, srcLeft, srcTop, /* src */ + srcW, srcH); /* size */ + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.num_cbufs = 1; + fb.cbufs[0] = dst; + pipe->set_framebuffer_state(pipe, &fb); + + /* sampler */ + if (filter == PIPE_TEX_MIPFILTER_NEAREST) + pipe->bind_sampler_states(pipe, 1, &ctx->samplers[0]); + else + pipe->bind_sampler_states(pipe, 1, &ctx->samplers[1]); + + /* texture */ + pipe->set_sampler_textures(pipe, 1, &tex); + + /* shaders */ + pipe->bind_fs_state(pipe, ctx->fs); + pipe->bind_vs_state(pipe, ctx->vs); + + /* misc state */ + pipe->bind_blend_state(pipe, ctx->blend); + pipe->bind_depth_stencil_alpha_state(pipe, ctx->depthstencil); + pipe->bind_rasterizer_state(pipe, ctx->rasterizer); + + /* draw quad */ + util_draw_texquad(pipe, dstX0, dstY0, dstX1, dstY1, z); + + /* unbind */ + pipe->set_sampler_textures(pipe, 0, NULL); + pipe->bind_sampler_states(pipe, 0, NULL); + + /* free stuff */ + pipe_surface_reference(&texSurf, NULL); + screen->texture_release(screen, &tex); + + /* Note: caller must restore pipe/gallium state at this time */ +} + diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h new file mode 100644 index 0000000000..0a8e05f88b --- /dev/null +++ b/src/gallium/auxiliary/util/u_blit.h @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_BLIT_H +#define U_BLIT_H + + +struct pipe_context; +struct pipe_surface; + + +struct copy_pixels_state; + + +extern struct blit_state * +util_create_blit(struct pipe_context *pipe); + + +extern void +util_destroy_blit(struct blit_state *ctx); + + + +extern void +util_blit_pixels(struct blit_state *ctx, + struct pipe_surface *src, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter); + + +#endif -- cgit v1.2.3 From e5b19a0f833b5a3d5ffcf50d25a620d00bd8914b Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 17:14:05 -0600 Subject: gallium: added cso_unset_*() functions If we go behind the CSO context's back and set pipe state directly we need to invalidate the CSO's 'current' pointers. This will be revisited... --- src/gallium/auxiliary/cso_cache/cso_context.c | 36 ++++++++++++++++++++++++++- src/gallium/auxiliary/cso_cache/cso_context.h | 13 ++++++++++ 2 files changed, 48 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index fd86bfaca9..294ac82281 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -149,6 +149,12 @@ void cso_set_blend(struct cso_context *ctx, } } +void cso_unset_blend(struct cso_context *ctx) +{ + ctx->blend = NULL; +} + + void cso_single_sampler(struct cso_context *ctx, unsigned idx, const struct pipe_sampler_state *templ) @@ -220,6 +226,15 @@ void cso_set_samplers( struct cso_context *ctx, cso_single_sampler_done( ctx ); } +void cso_unset_samplers( struct cso_context *ctx ) +{ + uint i; + for (i = 0; i < ctx->nr_samplers; i++) + ctx->samplers[i] = NULL; +} + + + void cso_set_depth_stencil_alpha(struct cso_context *ctx, const struct pipe_depth_stencil_alpha_state *templ) { @@ -252,6 +267,11 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, } } +void cso_unset_depth_stencil_alpha(struct cso_context *ctx) +{ + ctx->depth_stencil = NULL; +} + void cso_set_rasterizer(struct cso_context *ctx, @@ -285,7 +305,10 @@ void cso_set_rasterizer(struct cso_context *ctx, } } - +void cso_unset_rasterizer(struct cso_context *ctx) +{ + ctx->rasterizer = NULL; +} @@ -320,6 +343,12 @@ void cso_set_fragment_shader(struct cso_context *ctx, } } +void cso_unset_fragment_shader(struct cso_context *ctx) +{ + ctx->fragment_shader = NULL; +} + + void cso_set_vertex_shader(struct cso_context *ctx, const struct pipe_shader_state *templ) { @@ -350,3 +379,8 @@ void cso_set_vertex_shader(struct cso_context *ctx, ctx->pipe->bind_vs_state(ctx->pipe, handle); } } + +void cso_unset_vertex_shader(struct cso_context *ctx) +{ + ctx->vertex_shader = NULL; +} diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 1f2a630804..6aa619abf5 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -44,16 +44,25 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ); void cso_set_blend( struct cso_context *cso, const struct pipe_blend_state *blend ); +void cso_unset_blend(struct cso_context *cso); + void cso_set_depth_stencil_alpha( struct cso_context *cso, const struct pipe_depth_stencil_alpha_state *dsa ); +void cso_unset_depth_stencil_alpha( struct cso_context *cso ); + void cso_set_rasterizer( struct cso_context *cso, const struct pipe_rasterizer_state *rasterizer ); +void cso_unset_rasterizer( struct cso_context *cso ); + void cso_set_samplers( struct cso_context *cso, unsigned count, const struct pipe_sampler_state **states ); +void cso_unset_samplers( struct cso_context *cso ); + + /* Alternate interface to support state trackers that like to modify * samplers one at a time: */ @@ -72,9 +81,13 @@ void cso_single_sampler_done( struct cso_context *cso ); void cso_set_fragment_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); +void cso_unset_fragment_shader( struct cso_context *cso ); + void cso_set_vertex_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); +void cso_unset_vertex_shader( struct cso_context *cso ); + void cso_destroy_context( struct cso_context *cso ); -- cgit v1.2.3 From d946b55543e483a15d8efbe4022d895c8444f6ac Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 17:15:25 -0600 Subject: gallium: s/copy_pixels_state/blit_state --- src/gallium/auxiliary/util/u_blit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index 0a8e05f88b..a349be99ad 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -34,7 +34,7 @@ struct pipe_context; struct pipe_surface; -struct copy_pixels_state; +struct blit_state; extern struct blit_state * -- cgit v1.2.3 From 9f50a6a24c922926904c215249c3a0426dd433aa Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 18 Mar 2008 17:19:57 -0600 Subject: gallium: added braces to silence warning --- src/gallium/auxiliary/draw/draw_prim.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 888fa536ea..cb0277fb6c 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -169,11 +169,12 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) flushing = TRUE; if (flags >= DRAW_FLUSH_SHADER_QUEUE) { - if (draw->vs.queue_nr) + if (draw->vs.queue_nr) { if (draw->rasterizer->bypass_vs) fetch_and_store(draw); else (*draw->shader_queue_flush)(draw); + } if (flags >= DRAW_FLUSH_PRIM_QUEUE) { if (draw->pq.queue_nr) -- cgit v1.2.3 From e08501b45763cf177f03fb34b737050d23ba4bc0 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 19 Mar 2008 16:41:07 +0000 Subject: gallium: Add generic enum and flags dumping utility functions. --- src/gallium/auxiliary/util/p_debug.c | 59 ++++++++++++++++++++++++++++++++++++ src/gallium/include/pipe/p_debug.h | 49 ++++++++++++++++++++++++++++++ 2 files changed, 108 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index bb84e8096d..bd3a0221ea 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -52,6 +52,9 @@ rpl_EngDebugPrint(const char *format, ...) } int rpl_vsnprintf(char *, size_t, const char *, va_list); +int rpl_snprintf(char *str, size_t size, const char *format, ...); +#define vsnprintf rpl_vsnprintf +#define snprintf rpl_snprintf #endif @@ -181,3 +184,59 @@ void debug_mask_vprintf(uint32_t uuid, uint32_t what, const char *format, va_lis if(mask & what) debug_vprintf(format, ap); } + + +const char * +debug_dump_enum(const struct debug_named_value *names, + unsigned long value) +{ + static char rest[256]; + + while(names->name) { + if(names->value == value) + return names->name; + ++names; + } + + snprintf(rest, sizeof(rest), "0x%08x", value); + return rest; +} + + +const char * +debug_dump_flags(const struct debug_named_value *names, + unsigned long value) +{ + static char output[4096]; + static char rest[256]; + int first = 1; + + output[0] = '\0'; + + while(names->name) { + if((names->value & value) == names->value) { + if (!first) + strncat(output, "|", sizeof(output)); + else + first = 0; + strncat(output, names->name, sizeof(output)); + value &= ~names->value; + } + ++names; + } + + if (value) { + if (!first) + strncat(output, "|", sizeof(output)); + else + first = 0; + + snprintf(rest, sizeof(rest), "0x%08x", value); + strncat(output, rest, sizeof(output)); + } + + if(first) + return "0"; + + return output; +} diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index f3dfa06216..e924c1ef60 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -168,6 +168,55 @@ void debug_mask_vprintf(uint32_t uuid, #endif +/** + * Used by debug_dump_enum and debug_dump_flags to describe symbols. + */ +struct debug_named_value +{ + const char *name; + unsigned long value; +}; + + +/** + * Some C pre-processor magic to simplify creating named values. + * + * Example: + * @code + * static const debug_named_value my_names[] = { + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_X), + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Y), + * DEBUG_NAMED_VALUE(MY_ENUM_VALUE_Z), + * DEBUG_NAMED_VALUE_END + * }; + * + * ... + * debug_printf("%s = %s\n", + * name, + * debug_dump_enum(my_names, my_value)); + * ... + * @endcode + */ +#define DEBUG_NAMED_VALUE(__symbol) {#__symbol, (unsigned long)__symbol} +#define DEBUG_NAMED_VALUE_END {NULL, 0} + + +/** + * Convert a enum value to a string. + */ +const char * +debug_dump_enum(const struct debug_named_value *names, + unsigned long value); + + +/** + * Convert binary flags value to a string. + */ +const char * +debug_dump_flags(const struct debug_named_value *names, + unsigned long value); + + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 7d95efde0a0e13e13c59444703bc47eb13926385 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 19 Mar 2008 11:12:48 -0600 Subject: gallium: implement CSO save/restore functions for use by meta operations (blit, gen-mipmaps, quad-clear, etc) Also, additional cso_set_*() functions for viewport, framebuffer, blend color, etc. state. --- src/gallium/auxiliary/cso_cache/cso_context.c | 202 +++++++++++++++++++++++--- src/gallium/auxiliary/cso_cache/cso_context.h | 53 +++++-- src/gallium/auxiliary/util/u_blit.c | 125 ++++++++-------- src/gallium/auxiliary/util/u_blit.h | 5 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 116 ++++++++------- src/gallium/auxiliary/util/u_gen_mipmap.h | 6 +- src/mesa/state_tracker/st_atom_blend.c | 12 +- src/mesa/state_tracker/st_atom_framebuffer.c | 9 +- src/mesa/state_tracker/st_atom_viewport.c | 26 ++-- src/mesa/state_tracker/st_cb_blit.c | 17 +-- src/mesa/state_tracker/st_cb_clear.c | 35 ++--- src/mesa/state_tracker/st_cb_drawpixels.c | 25 ++-- src/mesa/state_tracker/st_context.h | 1 - src/mesa/state_tracker/st_gen_mipmap.c | 20 +-- 14 files changed, 408 insertions(+), 244 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 294ac82281..53d05ae6ce 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -52,11 +52,27 @@ struct cso_context { void *samplers[PIPE_MAX_SAMPLERS]; unsigned nr_samplers; - void *blend; - void *depth_stencil; - void *rasterizer; - void *fragment_shader; - void *vertex_shader; + void *samplers_saved[PIPE_MAX_SAMPLERS]; + unsigned nr_samplers_saved; + + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + uint nr_textures; + + struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS]; + uint nr_textures_saved; + + /** Current and saved state. + * The saved state is used as a 1-deep stack. + */ + void *blend, *blend_saved; + void *depth_stencil, *depth_stencil_saved; + void *rasterizer, *rasterizer_saved; + void *fragment_shader, *fragment_shader_saved; + void *vertex_shader, *vertex_shader_saved; + + struct pipe_framebuffer_state fb, fb_saved; + struct pipe_viewport_state vp, vp_saved; + struct pipe_blend_color blend_color; }; @@ -149,12 +165,23 @@ void cso_set_blend(struct cso_context *ctx, } } -void cso_unset_blend(struct cso_context *ctx) +void cso_save_blend(struct cso_context *ctx) +{ + assert(!ctx->blend_saved); + ctx->blend_saved = ctx->blend; +} + +void cso_restore_blend(struct cso_context *ctx) { - ctx->blend = NULL; + if (ctx->blend != ctx->blend_saved) { + ctx->blend = ctx->blend_saved; + ctx->pipe->bind_blend_state(ctx->pipe, ctx->blend_saved); + } + ctx->blend_saved = NULL; } + void cso_single_sampler(struct cso_context *ctx, unsigned idx, const struct pipe_sampler_state *templ) @@ -226,13 +253,48 @@ void cso_set_samplers( struct cso_context *ctx, cso_single_sampler_done( ctx ); } -void cso_unset_samplers( struct cso_context *ctx ) +void cso_save_samplers(struct cso_context *ctx) +{ + ctx->nr_samplers_saved = ctx->nr_samplers; + memcpy(ctx->samplers_saved, ctx->samplers, sizeof(ctx->samplers)); +} + +void cso_restore_samplers(struct cso_context *ctx) +{ + cso_set_samplers(ctx, ctx->nr_samplers_saved, + (const struct pipe_sampler_state **) ctx->samplers_saved); +} + + +void cso_set_sampler_textures( struct cso_context *ctx, + uint count, + struct pipe_texture **textures ) { uint i; - for (i = 0; i < ctx->nr_samplers; i++) - ctx->samplers[i] = NULL; + + ctx->nr_textures = count; + + for (i = 0; i < count; i++) + ctx->textures[i] = textures[i]; + for ( ; i < PIPE_MAX_SAMPLERS; i++) + ctx->textures[i] = NULL; + + ctx->pipe->set_sampler_textures(ctx->pipe, count, textures); } +void cso_save_sampler_textures( struct cso_context *ctx ) +{ + ctx->nr_textures_saved = ctx->nr_textures; + memcpy(ctx->textures_saved, ctx->textures, sizeof(ctx->textures)); +} + +void cso_restore_sampler_textures( struct cso_context *ctx ) +{ + cso_set_sampler_textures(ctx, ctx->nr_textures_saved, ctx->textures_saved); + ctx->nr_textures_saved = 0; +} + + void cso_set_depth_stencil_alpha(struct cso_context *ctx, @@ -267,15 +329,25 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, } } -void cso_unset_depth_stencil_alpha(struct cso_context *ctx) +void cso_save_depth_stencil_alpha(struct cso_context *ctx) { - ctx->depth_stencil = NULL; + assert(!ctx->depth_stencil_saved); + ctx->depth_stencil_saved = ctx->depth_stencil; +} + +void cso_restore_depth_stencil_alpha(struct cso_context *ctx) +{ + if (ctx->depth_stencil != ctx->depth_stencil_saved) { + ctx->depth_stencil = ctx->depth_stencil_saved; + ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, ctx->depth_stencil_saved); + } + ctx->depth_stencil_saved = NULL; } void cso_set_rasterizer(struct cso_context *ctx, - const struct pipe_rasterizer_state *templ) + const struct pipe_rasterizer_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_rasterizer_state)); @@ -305,11 +377,20 @@ void cso_set_rasterizer(struct cso_context *ctx, } } -void cso_unset_rasterizer(struct cso_context *ctx) +void cso_save_rasterizer(struct cso_context *ctx) { - ctx->rasterizer = NULL; + assert(!ctx->rasterizer_saved); + ctx->rasterizer_saved = ctx->rasterizer; } +void cso_restore_rasterizer(struct cso_context *ctx) +{ + if (ctx->rasterizer != ctx->rasterizer_saved) { + ctx->rasterizer = ctx->rasterizer_saved; + ctx->pipe->bind_rasterizer_state(ctx->pipe, ctx->rasterizer_saved); + } + ctx->rasterizer_saved = NULL; +} void cso_set_fragment_shader(struct cso_context *ctx, @@ -343,11 +424,23 @@ void cso_set_fragment_shader(struct cso_context *ctx, } } -void cso_unset_fragment_shader(struct cso_context *ctx) +void cso_save_fragment_shader(struct cso_context *ctx) { - ctx->fragment_shader = NULL; + assert(!ctx->fragment_shader_saved); + ctx->fragment_shader_saved = ctx->fragment_shader; } +void cso_restore_fragment_shader(struct cso_context *ctx) +{ + assert(ctx->fragment_shader_saved); + if (ctx->fragment_shader_saved != ctx->fragment_shader) { + ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved); + ctx->fragment_shader = ctx->fragment_shader_saved; + } + ctx->fragment_shader_saved = NULL; +} + + void cso_set_vertex_shader(struct cso_context *ctx, const struct pipe_shader_state *templ) @@ -380,7 +473,78 @@ void cso_set_vertex_shader(struct cso_context *ctx, } } -void cso_unset_vertex_shader(struct cso_context *ctx) +void cso_save_vertex_shader(struct cso_context *ctx) { - ctx->vertex_shader = NULL; + assert(!ctx->vertex_shader_saved); + ctx->vertex_shader_saved = ctx->vertex_shader; +} + +void cso_restore_vertex_shader(struct cso_context *ctx) +{ + assert(ctx->vertex_shader_saved); + if (ctx->vertex_shader_saved != ctx->vertex_shader) { + ctx->pipe->bind_fs_state(ctx->pipe, ctx->vertex_shader_saved); + ctx->vertex_shader = ctx->vertex_shader_saved; + } + ctx->vertex_shader_saved = NULL; +} + + + +void cso_set_framebuffer(struct cso_context *ctx, + const struct pipe_framebuffer_state *fb) +{ + if (memcmp(&ctx->fb, fb, sizeof(*fb))) { + ctx->fb = *fb; + ctx->pipe->set_framebuffer_state(ctx->pipe, fb); + } +} + +void cso_save_framebuffer(struct cso_context *ctx) +{ + ctx->fb_saved = ctx->fb; +} + +void cso_restore_framebuffer(struct cso_context *ctx) +{ + if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { + ctx->fb = ctx->fb_saved; + ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb); + } +} + + +void cso_set_viewport(struct cso_context *ctx, + const struct pipe_viewport_state *vp) +{ + if (memcmp(&ctx->vp, vp, sizeof(*vp))) { + ctx->vp = *vp; + ctx->pipe->set_viewport_state(ctx->pipe, vp); + } +} + +void cso_save_viewport(struct cso_context *ctx) +{ + ctx->vp_saved = ctx->vp; +} + + +void cso_restore_viewport(struct cso_context *ctx) +{ + if (memcmp(&ctx->vp, &ctx->vp_saved, sizeof(ctx->vp))) { + ctx->vp = ctx->vp_saved; + ctx->pipe->set_viewport_state(ctx->pipe, &ctx->vp); + } +} + + + + +void cso_set_blend_color(struct cso_context *ctx, + const struct pipe_blend_color *bc) +{ + if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) { + ctx->blend_color = *bc; + ctx->pipe->set_blend_color(ctx->pipe, bc); + } } diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 6aa619abf5..665e8d9911 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -41,27 +41,36 @@ struct cso_context; struct cso_context *cso_create_context( struct pipe_context *pipe ); +void cso_destroy_context( struct cso_context *cso ); + + + void cso_set_blend( struct cso_context *cso, const struct pipe_blend_state *blend ); +void cso_save_blend(struct cso_context *cso); +void cso_restore_blend(struct cso_context *cso); + -void cso_unset_blend(struct cso_context *cso); void cso_set_depth_stencil_alpha( struct cso_context *cso, const struct pipe_depth_stencil_alpha_state *dsa ); +void cso_save_depth_stencil_alpha(struct cso_context *cso); +void cso_restore_depth_stencil_alpha(struct cso_context *cso); + -void cso_unset_depth_stencil_alpha( struct cso_context *cso ); void cso_set_rasterizer( struct cso_context *cso, const struct pipe_rasterizer_state *rasterizer ); +void cso_save_rasterizer(struct cso_context *cso); +void cso_restore_rasterizer(struct cso_context *cso); + -void cso_unset_rasterizer( struct cso_context *cso ); void cso_set_samplers( struct cso_context *cso, unsigned count, const struct pipe_sampler_state **states ); - -void cso_unset_samplers( struct cso_context *cso ); - +void cso_save_samplers(struct cso_context *cso); +void cso_restore_samplers(struct cso_context *cso); /* Alternate interface to support state trackers that like to modify * samplers one at a time: @@ -73,6 +82,15 @@ void cso_single_sampler( struct cso_context *cso, void cso_single_sampler_done( struct cso_context *cso ); + +void cso_set_sampler_textures( struct cso_context *cso, + uint count, + struct pipe_texture **textures ); +void cso_save_sampler_textures( struct cso_context *cso ); +void cso_restore_sampler_textures( struct cso_context *cso ); + + + /* These aren't really sensible -- most of the time the api provides * object semantics for shaders anyway, and the cases where it doesn't * (eg mesa's internall-generated texenv programs), it will be up to @@ -80,15 +98,32 @@ void cso_single_sampler_done( struct cso_context *cso ); */ void cso_set_fragment_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); +void cso_save_fragment_shader(struct cso_context *cso); +void cso_restore_fragment_shader(struct cso_context *cso); + -void cso_unset_fragment_shader( struct cso_context *cso ); void cso_set_vertex_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); +void cso_save_vertex_shader(struct cso_context *cso); +void cso_restore_vertex_shader(struct cso_context *cso); -void cso_unset_vertex_shader( struct cso_context *cso ); -void cso_destroy_context( struct cso_context *cso ); + +void cso_set_framebuffer(struct cso_context *cso, + const struct pipe_framebuffer_state *fb); +void cso_save_framebuffer(struct cso_context *cso); +void cso_restore_framebuffer(struct cso_context *cso); + + +void cso_set_viewport(struct cso_context *cso, + const struct pipe_viewport_state *vp); +void cso_save_viewport(struct cso_context *cso); +void cso_restore_viewport(struct cso_context *cso); + + +void cso_set_blend_color(struct cso_context *cso, + const struct pipe_blend_color *bc); #ifdef __cplusplus diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 4b4ab8185f..123304fe68 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -45,15 +45,18 @@ #include "util/u_blit.h" #include "util/u_simple_shaders.h" +#include "cso_cache/cso_context.h" + struct blit_state { struct pipe_context *pipe; + struct cso_context *cso; - void *blend; - void *depthstencil; - void *rasterizer; - void *samplers[2]; /* one for linear, one for nearest sampling */ + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state sampler; /*struct pipe_viewport_state viewport;*/ struct pipe_sampler_state *vs; @@ -66,56 +69,44 @@ struct blit_state * Intended to be created once and re-used for many blit() calls. */ struct blit_state * -util_create_blit(struct pipe_context *pipe) +util_create_blit(struct pipe_context *pipe, struct cso_context *cso) { - struct pipe_blend_state blend; - struct pipe_depth_stencil_alpha_state depthstencil; - struct pipe_rasterizer_state rasterizer; struct blit_state *ctx; - struct pipe_sampler_state sampler; ctx = CALLOC_STRUCT(blit_state); if (!ctx) return NULL; ctx->pipe = pipe; + ctx->cso = cso; - /* we don't use blending, but need to set valid values */ - memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.colormask = PIPE_MASK_RGBA; - ctx->blend = pipe->create_blend_state(pipe, &blend); + /* disabled blending/masking */ + memset(&ctx->blend, 0, sizeof(ctx->blend)); + ctx->blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + ctx->blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + ctx->blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + ctx->blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + ctx->blend.colormask = PIPE_MASK_RGBA; - /* depth/stencil/alpha */ - memset(&depthstencil, 0, sizeof(depthstencil)); - ctx->depthstencil = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); + /* no-op depth/stencil/alpha */ + memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil)); /* rasterizer */ - memset(&rasterizer, 0, sizeof(rasterizer)); - rasterizer.front_winding = PIPE_WINDING_CW; - rasterizer.cull_mode = PIPE_WINDING_NONE; - rasterizer.bypass_clipping = 1; /* bypasses viewport too */ - /*rasterizer.bypass_vs = 1;*/ - ctx->rasterizer = pipe->create_rasterizer_state(pipe, &rasterizer); + memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); + ctx->rasterizer.front_winding = PIPE_WINDING_CW; + ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; + ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + /*ctx->rasterizer.bypass_vs = 1;*/ /* samplers */ - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.min_img_filter = PIPE_TEX_MIPFILTER_NEAREST; - sampler.mag_img_filter = PIPE_TEX_MIPFILTER_NEAREST; - sampler.normalized_coords = 1; - ctx->samplers[0] = pipe->create_sampler_state(pipe, &sampler); - - sampler.min_img_filter = PIPE_TEX_MIPFILTER_LINEAR; - sampler.mag_img_filter = PIPE_TEX_MIPFILTER_LINEAR; - ctx->samplers[1] = pipe->create_sampler_state(pipe, &sampler); - + memset(&ctx->sampler, 0, sizeof(ctx->sampler)); + ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + ctx->sampler.min_img_filter = 0; /* set later */ + ctx->sampler.mag_img_filter = 0; /* set later */ + ctx->sampler.normalized_coords = 1; #if 0 /* viewport */ @@ -153,12 +144,6 @@ util_destroy_blit(struct blit_state *ctx) { struct pipe_context *pipe = ctx->pipe; - pipe->delete_blend_state(pipe, ctx->blend); - pipe->delete_depth_stencil_alpha_state(pipe, ctx->depthstencil); - pipe->delete_rasterizer_state(pipe, ctx->rasterizer); - pipe->delete_sampler_state(pipe, ctx->samplers[0]); - pipe->delete_sampler_state(pipe, ctx->samplers[1]); - pipe->delete_vs_state(pipe, ctx->vs); pipe->delete_fs_state(pipe, ctx->fs); @@ -236,17 +221,24 @@ util_blit_pixels(struct blit_state *ctx, src, srcLeft, srcTop, /* src */ srcW, srcH); /* size */ - /* drawing dest */ - memset(&fb, 0, sizeof(fb)); - fb.num_cbufs = 1; - fb.cbufs[0] = dst; - pipe->set_framebuffer_state(pipe, &fb); + /* save state (restored below) */ + cso_save_blend(ctx->cso); + cso_save_depth_stencil_alpha(ctx->cso); + cso_save_rasterizer(ctx->cso); + cso_save_samplers(ctx->cso); + cso_save_sampler_textures(ctx->cso); + cso_save_framebuffer(ctx->cso); + + /* set misc state we care about */ + cso_set_blend(ctx->cso, &ctx->blend); + cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); + cso_set_rasterizer(ctx->cso, &ctx->rasterizer); /* sampler */ - if (filter == PIPE_TEX_MIPFILTER_NEAREST) - pipe->bind_sampler_states(pipe, 1, &ctx->samplers[0]); - else - pipe->bind_sampler_states(pipe, 1, &ctx->samplers[1]); + ctx->sampler.min_img_filter = filter; + ctx->sampler.mag_img_filter = filter; + cso_single_sampler(ctx->cso, 0, &ctx->sampler); + cso_single_sampler_done(ctx->cso); /* texture */ pipe->set_sampler_textures(pipe, 1, &tex); @@ -255,22 +247,25 @@ util_blit_pixels(struct blit_state *ctx, pipe->bind_fs_state(pipe, ctx->fs); pipe->bind_vs_state(pipe, ctx->vs); - /* misc state */ - pipe->bind_blend_state(pipe, ctx->blend); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->depthstencil); - pipe->bind_rasterizer_state(pipe, ctx->rasterizer); + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.num_cbufs = 1; + fb.cbufs[0] = dst; + cso_set_framebuffer(ctx->cso, &fb); /* draw quad */ util_draw_texquad(pipe, dstX0, dstY0, dstX1, dstY1, z); - /* unbind */ - pipe->set_sampler_textures(pipe, 0, NULL); - pipe->bind_sampler_states(pipe, 0, NULL); + /* restore state we changed */ + cso_restore_blend(ctx->cso); + cso_restore_depth_stencil_alpha(ctx->cso); + cso_restore_rasterizer(ctx->cso); + cso_restore_samplers(ctx->cso); + cso_restore_sampler_textures(ctx->cso); + cso_restore_framebuffer(ctx->cso); - /* free stuff */ + /* free the texture */ pipe_surface_reference(&texSurf, NULL); screen->texture_release(screen, &tex); - - /* Note: caller must restore pipe/gallium state at this time */ } diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index a349be99ad..61f1d9bb32 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -30,15 +30,16 @@ #define U_BLIT_H + struct pipe_context; struct pipe_surface; +struct cso_context; struct blit_state; - extern struct blit_state * -util_create_blit(struct pipe_context *pipe); +util_create_blit(struct pipe_context *pipe, struct cso_context *cso); extern void diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index e18f8ab72a..27141c4d13 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -49,14 +49,18 @@ #include "tgsi/util/tgsi_dump.h" #include "tgsi/util/tgsi_parse.h" +#include "cso_cache/cso_context.h" + struct gen_mipmap_state { struct pipe_context *pipe; + struct cso_context *cso; - void *blend; - void *depthstencil; - void *rasterizer; + struct pipe_blend_state blend; + struct pipe_depth_stencil_alpha_state depthstencil; + struct pipe_rasterizer_state rasterizer; + struct pipe_sampler_state sampler; /*struct pipe_viewport_state viewport;*/ struct pipe_sampler_state *vs; struct pipe_sampler_state *fs; @@ -675,11 +679,9 @@ fallback_gen_mipmap(struct gen_mipmap_state *ctx, * generate a mipmap. */ struct gen_mipmap_state * -util_create_gen_mipmap(struct pipe_context *pipe) +util_create_gen_mipmap(struct pipe_context *pipe, + struct cso_context *cso) { - struct pipe_blend_state blend; - struct pipe_depth_stencil_alpha_state depthstencil; - struct pipe_rasterizer_state rasterizer; struct gen_mipmap_state *ctx; ctx = CALLOC_STRUCT(gen_mipmap_state); @@ -687,27 +689,36 @@ util_create_gen_mipmap(struct pipe_context *pipe) return NULL; ctx->pipe = pipe; + ctx->cso = cso; - /* we don't use blending, but need to set valid values */ - memset(&blend, 0, sizeof(blend)); - blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; - blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; - blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; - blend.colormask = PIPE_MASK_RGBA; - ctx->blend = pipe->create_blend_state(pipe, &blend); + /* disabled blending/masking */ + memset(&ctx->blend, 0, sizeof(ctx->blend)); + ctx->blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + ctx->blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + ctx->blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + ctx->blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; + ctx->blend.colormask = PIPE_MASK_RGBA; - /* depth/stencil/alpha */ - memset(&depthstencil, 0, sizeof(depthstencil)); - ctx->depthstencil = pipe->create_depth_stencil_alpha_state(pipe, &depthstencil); + /* no-op depth/stencil/alpha */ + memset(&ctx->depthstencil, 0, sizeof(ctx->depthstencil)); /* rasterizer */ - memset(&rasterizer, 0, sizeof(rasterizer)); - rasterizer.front_winding = PIPE_WINDING_CW; - rasterizer.cull_mode = PIPE_WINDING_NONE; - rasterizer.bypass_clipping = 1; /* bypasses viewport too */ - //rasterizer.bypass_vs = 1; - ctx->rasterizer = pipe->create_rasterizer_state(pipe, &rasterizer); + memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); + ctx->rasterizer.front_winding = PIPE_WINDING_CW; + ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; + ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + /*ctx->rasterizer.bypass_vs = 1;*/ + + /* sampler state */ + memset(&ctx->sampler, 0, sizeof(ctx->sampler)); + ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; + ctx->sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + ctx->sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + ctx->sampler.normalized_coords = 1; + #if 0 /* viewport */ @@ -745,9 +756,6 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) { struct pipe_context *pipe = ctx->pipe; - pipe->delete_blend_state(pipe, ctx->blend); - pipe->delete_depth_stencil_alpha_state(pipe, ctx->depthstencil); - pipe->delete_rasterizer_state(pipe, ctx->rasterizer); pipe->delete_vs_state(pipe, ctx->vs); pipe->delete_fs_state(pipe, ctx->fs); @@ -792,8 +800,6 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb; - struct pipe_sampler_state sampler; - void *sampler_cso; uint dstLevel; uint zslice = 0; @@ -803,30 +809,29 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, return; } - /* init framebuffer state */ - memset(&fb, 0, sizeof(fb)); - fb.num_cbufs = 1; - - /* sampler state */ - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; - sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.normalized_coords = 1; + /* save state (restored below) */ + cso_save_blend(ctx->cso); + cso_save_depth_stencil_alpha(ctx->cso); + cso_save_rasterizer(ctx->cso); + cso_save_samplers(ctx->cso); + cso_save_sampler_textures(ctx->cso); + cso_save_framebuffer(ctx->cso); /* bind our state */ - pipe->bind_blend_state(pipe, ctx->blend); - pipe->bind_depth_stencil_alpha_state(pipe, ctx->depthstencil); - pipe->bind_rasterizer_state(pipe, ctx->rasterizer); + cso_set_blend(ctx->cso, &ctx->blend); + cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); + cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + pipe->bind_vs_state(pipe, ctx->vs); pipe->bind_fs_state(pipe, ctx->fs); #if 0 pipe->set_viewport_state(pipe, &ctx->viewport); #endif + /* init framebuffer state */ + memset(&fb, 0, sizeof(fb)); + fb.num_cbufs = 1; + /* * XXX for small mipmap levels, it may be faster to use the software * fallback path... @@ -838,7 +843,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, * Setup framebuffer / dest surface */ fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); - pipe->set_framebuffer_state(pipe, &fb); + cso_set_framebuffer(ctx->cso, &fb); /* * Setup sampler state @@ -847,11 +852,10 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, * has trouble with min clamping so we also set the lod_bias to * try to work around that. */ - sampler.min_lod = sampler.max_lod = (float) srcLevel; - sampler.lod_bias = (float) srcLevel; - sampler_cso = pipe->create_sampler_state(pipe, &sampler); - pipe->bind_sampler_states(pipe, 1, &sampler_cso); - + ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel; + ctx->sampler.lod_bias = (float) srcLevel; + cso_single_sampler(ctx->cso, 0, &ctx->sampler); + cso_single_sampler_done(ctx->cso); #if 0 simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); #endif @@ -869,9 +873,13 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, pipe->flush(pipe, PIPE_FLUSH_WAIT); /*pipe->texture_update(pipe, pt); not really needed */ - - pipe->delete_sampler_state(pipe, sampler_cso); } - /* Note: caller must restore pipe/gallium state at this time */ + /* restore state we changed */ + cso_restore_blend(ctx->cso); + cso_restore_depth_stencil_alpha(ctx->cso); + cso_restore_rasterizer(ctx->cso); + cso_restore_samplers(ctx->cso); + cso_restore_sampler_textures(ctx->cso); + cso_restore_framebuffer(ctx->cso); } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index 80496140a2..eeabf3bf07 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -31,11 +31,15 @@ #include "pipe/p_state.h" +struct pipe_context; +struct pipe_texture; +struct cso_context; + struct gen_mipmap_state; extern struct gen_mipmap_state * -util_create_gen_mipmap(struct pipe_context *pipe); +util_create_gen_mipmap(struct pipe_context *pipe, struct cso_context *cso); extern void diff --git a/src/mesa/state_tracker/st_atom_blend.c b/src/mesa/state_tracker/st_atom_blend.c index 6c13fc8141..2a0e92245c 100644 --- a/src/mesa/state_tracker/st_atom_blend.c +++ b/src/mesa/state_tracker/st_atom_blend.c @@ -39,6 +39,7 @@ #include "pipe/p_defines.h" #include "cso_cache/cso_context.h" +#include "main/macros.h" /** * Convert GLenum blend tokens to pipe tokens. @@ -213,13 +214,10 @@ update_blend( struct st_context *st ) cso_set_blend(st->cso_context, blend); - if (memcmp(st->ctx->Color.BlendColor, &st->state.blend_color, 4 * sizeof(GLfloat)) != 0) { - /* state has changed */ - st->state.blend_color.color[0] = st->ctx->Color.BlendColor[0]; - st->state.blend_color.color[1] = st->ctx->Color.BlendColor[1]; - st->state.blend_color.color[2] = st->ctx->Color.BlendColor[2]; - st->state.blend_color.color[3] = st->ctx->Color.BlendColor[3]; - st->pipe->set_blend_color(st->pipe, (struct pipe_blend_color *) st->ctx->Color.BlendColor); + { + struct pipe_blend_color bc; + COPY_4FV(bc.color, st->ctx->Color.BlendColor); + cso_set_blend_color(st->cso_context, &bc); } } diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index 3e58d49f1f..c8fa0cbdfb 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -35,6 +35,7 @@ #include "st_atom.h" #include "st_cb_fbo.h" #include "pipe/p_context.h" +#include "cso_cache/cso_context.h" /** @@ -76,13 +77,7 @@ update_framebuffer_state( struct st_context *st ) } } - /* XXX: The memcmp is insufficient for eliminating redundant state changes, - * but we should probably do more work here to that end. - */ - if (1 /*memcmp(&framebuffer, &st->state.framebuffer, sizeof(framebuffer)) != 0*/) { - st->state.framebuffer = framebuffer; - st->pipe->set_framebuffer_state( st->pipe, &framebuffer ); - } + cso_set_framebuffer(st->cso_context, &framebuffer); } diff --git a/src/mesa/state_tracker/st_atom_viewport.c b/src/mesa/state_tracker/st_atom_viewport.c index 147aa3c51a..eb3f62cfbe 100644 --- a/src/mesa/state_tracker/st_atom_viewport.c +++ b/src/mesa/state_tracker/st_atom_viewport.c @@ -29,9 +29,9 @@ #include "context.h" #include "colormac.h" #include "st_context.h" -#include "pipe/p_context.h" #include "st_atom.h" - +#include "pipe/p_context.h" +#include "cso_cache/cso_context.h" /** * Update the viewport transformation matrix. Depends on: @@ -65,22 +65,18 @@ update_viewport( struct st_context *st ) GLfloat half_width = ctx->Viewport.Width / 2.0; GLfloat half_height = ctx->Viewport.Height / 2.0; GLfloat half_depth = (ctx->Viewport.Far - ctx->Viewport.Near) / 2.0; - struct pipe_viewport_state vp; - vp.scale[0] = half_width; - vp.scale[1] = half_height * yScale; - vp.scale[2] = half_depth; - vp.scale[3] = 1.0; + st->state.viewport.scale[0] = half_width; + st->state.viewport.scale[1] = half_height * yScale; + st->state.viewport.scale[2] = half_depth; + st->state.viewport.scale[3] = 1.0; - vp.translate[0] = half_width + x; - vp.translate[1] = (half_height + y) * yScale + yBias; - vp.translate[2] = half_depth + z; - vp.translate[3] = 0.0; + st->state.viewport.translate[0] = half_width + x; + st->state.viewport.translate[1] = (half_height + y) * yScale + yBias; + st->state.viewport.translate[2] = half_depth + z; + st->state.viewport.translate[3] = 0.0; - if (memcmp(&vp, &st->state.viewport, sizeof(vp)) != 0) { - st->state.viewport = vp; - st->pipe->set_viewport_state(st->pipe, &vp); - } + cso_set_viewport(st->cso_context, &st->state.viewport); } } diff --git a/src/mesa/state_tracker/st_cb_blit.c b/src/mesa/state_tracker/st_cb_blit.c index dfa79c975c..64314a5078 100644 --- a/src/mesa/state_tracker/st_cb_blit.c +++ b/src/mesa/state_tracker/st_cb_blit.c @@ -52,7 +52,7 @@ void st_init_blit(struct st_context *st) { - st->blit = util_create_blit(st->pipe); + st->blit = util_create_blit(st->pipe, st->cso_context); } @@ -98,22 +98,9 @@ st_BlitFramebuffer(GLcontext *ctx, } -#if 0 - /* XXX is this sufficient? */ - st_invalidate_state(ctx, _NEW_COLOR | _NEW_TEXTURE); -#else - /* need to "unset" cso state because we went behind the back of the cso - * tracker. Without unset, the _set_ calls would be no-ops. - */ - cso_unset_blend(st->cso_context); - cso_unset_depth_stencil_alpha(st->cso_context); - cso_unset_rasterizer(st->cso_context); - cso_set_blend(st->cso_context, &st->state.blend); - cso_set_depth_stencil_alpha(st->cso_context, &st->state.depth_stencil); - cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + /* shaders don't go through CSO yet */ pipe->bind_fs_state(pipe, st->fp->driver_shader); pipe->bind_vs_state(pipe, st->vp->driver_shader); -#endif } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index 693cddedf7..c23938dc68 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -247,10 +247,19 @@ clear_with_quad(GLcontext *ctx, x1, y1); */ + cso_save_blend(st->cso_context); + cso_save_depth_stencil_alpha(st->cso_context); + cso_save_rasterizer(st->cso_context); + cso_save_viewport(st->cso_context); + /* blend state: RGBA masking */ { struct pipe_blend_state blend; memset(&blend, 0, sizeof(blend)); + blend.rgb_src_factor = PIPE_BLENDFACTOR_ONE; + blend.alpha_src_factor = PIPE_BLENDFACTOR_ONE; + blend.rgb_dst_factor = PIPE_BLENDFACTOR_ZERO; + blend.alpha_dst_factor = PIPE_BLENDFACTOR_ZERO; if (color) { if (ctx->Color.ColorMask[0]) blend.colormask |= PIPE_MASK_R; @@ -294,13 +303,6 @@ clear_with_quad(GLcontext *ctx, { struct pipe_rasterizer_state raster; memset(&raster, 0, sizeof(raster)); -#if 0 - /* don't do per-pixel scissor; we'll just draw a PIPE_PRIM_QUAD - * that matches the scissor bounds. - */ - if (ctx->Scissor.Enabled) - raster.scissor = 1; -#endif #if TEST_DRAW_PASSTHROUGH raster.bypass_clipping = 1; raster.bypass_vs = 1; @@ -342,28 +344,21 @@ clear_with_quad(GLcontext *ctx, vp.translate[1] = 0.5 * height; vp.translate[2] = 0.0; vp.translate[3] = 0.0; - pipe->set_viewport_state(pipe, &vp); + cso_set_viewport(st->cso_context, &vp); } #endif /* draw quad matching scissor rect (XXX verify coord round-off) */ draw_quad(ctx, x0, y0, x1, y1, ctx->Depth.Clear, ctx->Color.ClearColor); -#if 0 - /* Can't depend on old state objects still existing -- may have - * been deleted to make room in the hash, etc. (Should get - * fixed...) - */ - st_invalidate_state(ctx, _NEW_COLOR | _NEW_DEPTH | _NEW_STENCIL); -#else /* Restore pipe state */ - cso_set_blend(st->cso_context, &st->state.blend); - cso_set_depth_stencil_alpha(st->cso_context, &st->state.depth_stencil); - cso_set_rasterizer(st->cso_context, &st->state.rasterizer); + cso_restore_blend(st->cso_context); + cso_restore_depth_stencil_alpha(st->cso_context); + cso_restore_rasterizer(st->cso_context); + cso_restore_viewport(st->cso_context); + /* these don't go through cso yet */ pipe->bind_fs_state(pipe, st->fp->driver_shader); pipe->bind_vs_state(pipe, st->vp->driver_shader); -#endif - pipe->set_viewport_state(pipe, &st->state.viewport); } diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 18ec9645c4..33d34445ee 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -650,6 +650,9 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, assert(width <= maxSize); assert(height <= maxSize); + cso_save_rasterizer(cso); + cso_save_viewport(cso); + /* setup state: just scissor */ { struct pipe_rasterizer_state setup; @@ -696,7 +699,7 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, vp.translate[1] = 0.5 * height; vp.translate[2] = 0.0; vp.translate[3] = 0.0; - pipe->set_viewport_state(pipe, &vp); + cso_set_viewport(cso, &vp); } /* texture state: */ @@ -719,26 +722,18 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, else draw_quad(ctx, x0, y0, z, x1, y1, invertTex); - /* restore GL state */ - pipe->set_sampler_textures(pipe, ctx->st->state.num_textures, - ctx->st->state.sampler_texture); - - pipe->set_viewport_state(pipe, &ctx->st->state.viewport); - -#if 0 - /* Can't depend on old state objects still existing -- may have - * been deleted to make room in the hash, etc. (Should get - * fixed...) - */ - st_invalidate_state(ctx, _NEW_COLOR | _NEW_TEXTURE); -#else /* restore state */ + cso_restore_rasterizer(cso); + cso_restore_viewport(cso); + /* shaders don't go through cso yet */ pipe->bind_fs_state(pipe, st->fp->driver_shader); pipe->bind_vs_state(pipe, st->vp->driver_shader); + cso_set_rasterizer(cso, &st->state.rasterizer); cso_set_samplers(cso, PIPE_MAX_SAMPLERS, (const struct pipe_sampler_state **) st->state.sampler_list); -#endif + pipe->set_sampler_textures(pipe, ctx->st->state.num_textures, + ctx->st->state.sampler_texture); } diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 63150dbeaf..ca8307c4ba 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -82,7 +82,6 @@ struct st_context struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state samplers[PIPE_MAX_SAMPLERS]; struct pipe_sampler_state *sampler_list[PIPE_MAX_SAMPLERS]; - struct pipe_blend_color blend_color; struct pipe_clip_state clip; struct pipe_constant_buffer constants[2]; struct pipe_framebuffer_state framebuffer; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 6c3afca1ba..61e1d9621c 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -59,7 +59,7 @@ void st_init_generate_mipmap(struct st_context *st) { - st->gen_mipmap = util_create_gen_mipmap(st->pipe); + st->gen_mipmap = util_create_gen_mipmap(st->pipe, st->cso_context); } @@ -94,19 +94,11 @@ st_render_mipmap(struct st_context *st, util_gen_mipmap(st->gen_mipmap, pt, face, baseLevel, lastLevel); - /* restore pipe state */ -#if 0 - cso_set_rasterizer(st->cso_context, &st->state.rasterizer); - cso_set_samplers(st->cso_context, st->state.samplers_list); - pipe->bind_fs_state(pipe, st->fp->shader_program); - pipe->bind_vs_state(pipe, st->vp->shader_program); - pipe->set_sampler_textures(pipe, st->state.num_textures, - st->state.sampler_texture); - pipe->set_viewport_state(pipe, &st->state.viewport); -#else - /* XXX is this sufficient? */ - st_invalidate_state(st->ctx, _NEW_COLOR | _NEW_TEXTURE); -#endif + /* shaders don't go through CSO yet */ + if (st->fp) + pipe->bind_fs_state(pipe, st->fp->driver_shader); + if (st->vp) + pipe->bind_vs_state(pipe, st->vp->driver_shader); return TRUE; } -- cgit v1.2.3 From b4f03d0c98674c83d06edfa767a7898eca5d4ef8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 19 Mar 2008 17:35:42 +0000 Subject: gallium: explict float casts --- src/gallium/auxiliary/util/u_blit.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 123304fe68..d05dae3af8 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -254,7 +254,11 @@ util_blit_pixels(struct blit_state *ctx, cso_set_framebuffer(ctx->cso, &fb); /* draw quad */ - util_draw_texquad(pipe, dstX0, dstY0, dstX1, dstY1, z); + util_draw_texquad(pipe, + (float)dstX0, + (float)dstY0, + (float)dstX1, + (float)dstY1, z); /* restore state we changed */ cso_restore_blend(ctx->cso); -- cgit v1.2.3 From df5ba799fa929d4c739be9d11d3f1000afc265b2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 19 Mar 2008 20:46:08 +0000 Subject: gallium: Fix broken logic. --- src/gallium/auxiliary/util/u_handle_table.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 0bfb9e1b4a..8b0f7fca4b 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -248,7 +248,7 @@ handle_table_get_next_handle(struct handle_table *ht, unsigned index; for(index = handle; index < ht->size; ++index) { - if(!ht->objects[index]) + if(ht->objects[index]) return index + 1; } -- cgit v1.2.3 From b45669283fe4b9af9f2e78ac3c0c84207cf63775 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 19 Mar 2008 16:41:54 -0600 Subject: gallium: fix bug in cso_single_sampler_done() in computation of nr_samplers Need to find highest used sampler so search from end toward beginning. --- src/gallium/auxiliary/cso_cache/cso_context.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 53d05ae6ce..01fe216447 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -217,9 +217,11 @@ void cso_single_sampler_done( struct cso_context *ctx ) { unsigned i; - for (i = 0; i < 8; i++) - if (ctx->samplers[i] == NULL) + /* find highest non-null sampler */ + for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { + if (ctx->samplers[i - 1] != NULL) break; + } ctx->nr_samplers = i; -- cgit v1.2.3 From 4984487bc3338fc351a0631eaa4515e4adbb86a9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 19 Mar 2008 17:08:16 -0600 Subject: gallium: add face, dirtyLevels params to pipe->texture_update() This provides better information about which images in texture object have changed. Also, call texture_update() from more places previously missed. --- src/gallium/auxiliary/draw/draw_aaline.c | 1 + src/gallium/auxiliary/draw/draw_pstipple.c | 2 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 3 ++- src/gallium/drivers/cell/ppu/cell_texture.c | 3 ++- src/gallium/drivers/i915simple/i915_texture.c | 3 ++- src/gallium/drivers/i965simple/brw_tex_layout.c | 3 ++- src/gallium/drivers/softpipe/sp_texture.c | 3 ++- src/gallium/include/pipe/p_context.h | 6 ++---- src/mesa/state_tracker/st_atom_texture.c | 5 ----- src/mesa/state_tracker/st_cb_drawpixels.c | 3 +++ src/mesa/state_tracker/st_cb_texture.c | 13 +++++++------ src/mesa/state_tracker/st_texture.h | 2 -- 12 files changed, 24 insertions(+), 23 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index f2b983374e..b4fa6bd967 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -410,6 +410,7 @@ aaline_create_texture(struct aaline_stage *aaline) /* unmap */ pipe_surface_unmap(surface); pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); } } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 09d542002f..9d154a6838 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -374,7 +374,7 @@ pstip_update_texture(struct pstip_stage *pstip) /* unmap */ pipe_surface_unmap(surface); pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pstip->texture); + pipe->texture_update(pipe, pstip->texture, 0, 0x1); } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 27141c4d13..028b180a77 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -872,7 +872,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, pipe->flush(pipe, PIPE_FLUSH_WAIT); - /*pipe->texture_update(pipe, pt); not really needed */ + /* need to signal that the texture has changed _after_ rendering to it */ + pipe->texture_update(pipe, pt, face, (1 << dstLevel)); } /* restore state we changed */ diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index e235421107..9c694e136d 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -134,7 +134,8 @@ cell_texture_release_screen(struct pipe_screen *screen, static void -cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +cell_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, + uint face, uint levelsMask) { /* XXX TO DO: re-tile the texture data ... */ diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index ef5adff550..c39e747705 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -542,7 +542,8 @@ i915_texture_release_screen(struct pipe_screen *screen, static void -i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, + uint face, uint levelsMask) { /* no-op? */ } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index b24ac87c37..b580f98204 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -358,7 +358,8 @@ brw_texture_release_screen(struct pipe_screen *screen, static void -brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture) +brw_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, + uint face, uint levelsMask) { /* no-op? */ } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 64bf353aa4..a98b3b1a4a 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -174,7 +174,8 @@ softpipe_get_tex_surface_screen(struct pipe_screen *screen, static void softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture) + struct pipe_texture *texture, + uint face, uint levelsMask) { struct softpipe_context *softpipe = softpipe_context(pipe); uint unit; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index b64948f0f3..a3824601be 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -192,12 +192,10 @@ struct pipe_context { /** * Called when texture data is changed. - * Note: we could pass some hints about which mip levels or cube faces - * have changed... - * XXX this may go away - could pass a 'write' flag to get_tex_surface() */ void (*texture_update)(struct pipe_context *pipe, - struct pipe_texture *texture); + struct pipe_texture *texture, + uint face, uint dirtyLevelsMask); diff --git a/src/mesa/state_tracker/st_atom_texture.c b/src/mesa/state_tracker/st_atom_texture.c index e53a897637..2a711e513d 100644 --- a/src/mesa/state_tracker/st_atom_texture.c +++ b/src/mesa/state_tracker/st_atom_texture.c @@ -76,11 +76,6 @@ update_textures(struct st_context *st) pt = st_get_stobj_texture(stObj); pipe_texture_reference(&st->state.sampler_texture[unit], pt); - - if (stObj && stObj->dirtyData) { - st->pipe->texture_update(st->pipe, pt); - stObj->dirtyData = GL_FALSE; - } } st->pipe->set_sampler_textures(st->pipe, st->state.num_textures, diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 33d34445ee..99d5e3e848 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -513,6 +513,8 @@ make_texture(struct st_context *st, /* unmap */ pipe_surface_unmap(surface); pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pt, 0, 0x1); + assert(success); /* restore */ @@ -1100,6 +1102,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, /* Release surface */ pipe_surface_unmap(surface); pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pt, 0, 0x1); pt->format = format; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index d4731c7737..306b27c423 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -476,6 +476,7 @@ st_TexImage(GLcontext * ctx, struct gl_texture_image *texImage, GLsizei imageSize, int compressed) { + struct pipe_context *pipe = ctx->st->pipe; struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); GLint postConvWidth, postConvHeight; @@ -690,8 +691,8 @@ st_TexImage(GLcontext * ctx, texImage->Data = NULL; } - /* flag data as dirty */ - stObj->dirtyData = GL_TRUE; + if (stObj->pt) + pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); if (level == texObj->BaseLevel && texObj->GenerateMipmap) { ctx->Driver.GenerateMipmap(ctx, target, texObj); @@ -866,6 +867,7 @@ st_TexSubimage(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { + struct pipe_context *pipe = ctx->st->pipe; struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); GLuint dstRowStride; @@ -924,8 +926,7 @@ st_TexSubimage(GLcontext * ctx, texImage->Data = NULL; } - /* flag data as dirty */ - stObj->dirtyData = GL_TRUE; + pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); } @@ -1179,8 +1180,7 @@ do_copy_texsubimage(GLcontext *ctx, pipe_surface_reference(&dest_surface, NULL); - /* flag data as dirty */ - stObj->dirtyData = GL_TRUE; + pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); if (level == texObj->BaseLevel && texObj->GenerateMipmap) { ctx->Driver.GenerateMipmap(ctx, target, texObj); @@ -1481,6 +1481,7 @@ st_finalize_texture(GLcontext *ctx, if (stImage && stObj->pt != stImage->pt) { copy_image_data_to_texture(ctx->st, stObj, level, stImage); *needFlush = GL_TRUE; + pipe->texture_update(pipe, stObj->pt, face, (1 << level)); } } } diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index 78f5f451ed..7abccb3a69 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -71,8 +71,6 @@ struct st_texture_object GLboolean imageOverride; GLint depthOverride; GLuint pitchOverride; - - GLboolean dirtyData; }; -- cgit v1.2.3 From a88202d3b02a24a3bfff95c5e375ead44dae4c5e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 20 Mar 2008 13:10:32 +0000 Subject: gallium: add debug facility to dump random blobs as hex --- src/gallium/auxiliary/util/p_debug.c | 18 ++++++++++++++++++ src/gallium/include/pipe/p_debug.h | 8 ++++++++ 2 files changed, 26 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index bd3a0221ea..c51e9e6a69 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -85,6 +85,22 @@ void debug_printf(const char *format, ...) } +void debug_print_blob( const char *name, + const void *blob, + unsigned size ) +{ + const unsigned *ublob = (const unsigned *)blob; + unsigned i; + + debug_printf("%s (%d dwords%s)\n", name, size/4, + size%4 ? "... plus a few bytes" : ""); + + for (i = 0; i < size/4; i++) { + debug_printf("%d:\t%08x\n", i, ublob[i]); + } +} + + /* TODO: implement a debug_abort that calls EngBugCheckEx on WIN32 */ @@ -240,3 +256,5 @@ debug_dump_flags(const struct debug_named_value *names, return output; } + + diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index e924c1ef60..14f8056924 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -73,6 +73,14 @@ extern "C" { */ void debug_printf(const char *format, ...); + +/* Dump a blob in hex to the same place that debug_printf sends its + * messages: + */ +void debug_print_blob( const char *name, + const void *blob, + unsigned size ); + /** * @sa debug_printf */ -- cgit v1.2.3 From 6a9a3afcf923ec5c67069cdb1656f52675cd8ede Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 20 Mar 2008 09:11:27 -0600 Subject: gallium: added util_make_fragment_passthrough_shader() --- src/gallium/auxiliary/util/u_simple_shaders.c | 95 +++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_simple_shaders.h | 12 ++++ 2 files changed, 107 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 88e2ab05bd..fca6f11119 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -261,3 +261,98 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) return pipe->create_fs_state(pipe, &shader); } + + + + +/** + * Make simple fragment color pass-through shader. + */ +void * +util_make_fragment_passthrough_shader(struct pipe_context *pipe) +{ + uint maxTokens = 40; + struct tgsi_token *tokens; + struct tgsi_header *header; + struct tgsi_processor *processor; + struct tgsi_full_declaration decl; + struct tgsi_full_instruction inst; + const uint procType = TGSI_PROCESSOR_FRAGMENT; + uint ti, i; + struct pipe_shader_state shader; + + tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + + /* shader header + */ + *(struct tgsi_version *) &tokens[0] = tgsi_build_version(); + + header = (struct tgsi_header *) &tokens[1]; + *header = tgsi_build_header(); + + processor = (struct tgsi_processor *) &tokens[2]; + *processor = tgsi_build_processor( procType, header ); + + ti = 3; + + /* declare input */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + /* declare output */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_OUTPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; + decl.Semantic.SemanticIndex = 0; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = 0; + ti += tgsi_build_full_declaration(&decl, + &tokens[ti], + header, + maxTokens - ti); + + + /* MOVE out[0], in[0]; */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_MOV; + inst.Instruction.NumDstRegs = 1; + inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + inst.FullDstRegisters[0].DstRegister.Index = 0; + inst.Instruction.NumSrcRegs = 1; + inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + inst.FullSrcRegisters[0].SrcRegister.Index = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + /* END instruction */ + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = TGSI_OPCODE_END; + inst.Instruction.NumDstRegs = 0; + inst.Instruction.NumSrcRegs = 0; + ti += tgsi_build_full_instruction(&inst, + &tokens[ti], + header, + maxTokens - ti ); + + assert(ti < maxTokens); + +#if 0 /*debug*/ + tgsi_dump(tokens, 0); +#endif + + shader.tokens = tokens; + return pipe->create_fs_state(pipe, &shader); +} + diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index 3ef4f28801..ca219a092c 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -36,6 +36,11 @@ struct pipe_context; +#ifdef __cplusplus +extern "C" { +#endif + + extern void * util_make_vertex_passthrough_shader(struct pipe_context *pipe, uint num_attribs, @@ -47,6 +52,13 @@ extern void * util_make_fragment_tex_shader(struct pipe_context *pipe); +extern void * +util_make_fragment_passthrough_shader(struct pipe_context *pipe); + + +#ifdef __cplusplus +} #endif +#endif -- cgit v1.2.3 From 8dd90ee19d97c4b032c2b057d96b3e674be3e1fd Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 20 Mar 2008 09:15:24 -0600 Subject: gallium: temporarily disable the memcmp() in cso_set_framebuffer() The memcmp() fails to detect buffer size changes... --- src/gallium/auxiliary/cso_cache/cso_context.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 01fe216447..4a1a6cb79c 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -496,7 +496,8 @@ void cso_restore_vertex_shader(struct cso_context *ctx) void cso_set_framebuffer(struct cso_context *ctx, const struct pipe_framebuffer_state *fb) { - if (memcmp(&ctx->fb, fb, sizeof(*fb))) { + /* XXX this memcmp() fails to detect buffer size changes */ + if (1/*memcmp(&ctx->fb, fb, sizeof(*fb))*/) { ctx->fb = *fb; ctx->pipe->set_framebuffer_state(ctx->pipe, fb); } -- cgit v1.2.3 From 8c71406c74b80fa2d2b1a488938c3b9dfc156343 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 20 Mar 2008 10:44:27 -0600 Subject: gallium: added util_draw_vertex_buffer() --- src/gallium/auxiliary/util/u_draw_quad.c | 59 +++++++++++++++++++++----------- src/gallium/auxiliary/util/u_draw_quad.h | 16 +++++++++ 2 files changed, 55 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 79a69de633..37e8533609 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -33,16 +33,52 @@ #include "util/u_draw_quad.h" +/** + * Draw a simple vertex buffer / primitive. + * Limited to float[4] vertex attribs, tightly packed. + */ +void +util_draw_vertex_buffer(struct pipe_context *pipe, + struct pipe_buffer *vbuf, + uint prim_type, + uint num_verts, + uint num_attribs) +{ + struct pipe_vertex_buffer vbuffer; + struct pipe_vertex_element velement; + uint i; + + /* tell pipe about the vertex buffer */ + vbuffer.buffer = vbuf; + vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */ + vbuffer.buffer_offset = 0; + pipe->set_vertex_buffer(pipe, 0, &vbuffer); + + /* tell pipe about the vertex attributes */ + for (i = 0; i < num_attribs; i++) { + velement.src_offset = i * 4 * sizeof(float); + velement.vertex_buffer_index = 0; + velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velement.nr_components = 4; + pipe->set_vertex_element(pipe, i, &velement); + } + + /* draw */ + pipe->draw_arrays(pipe, prim_type, 0, num_verts); +} + + + /** * Draw screen-aligned textured quad. + * Note: this function allocs/destroys a vertex buffer and isn't especially + * efficient. */ void util_draw_texquad(struct pipe_context *pipe, float x0, float y0, float x1, float y1, float z) { struct pipe_buffer *vbuf; - struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velement; uint numAttribs = 2, vertexBytes, i, j; float *v; @@ -89,24 +125,7 @@ util_draw_texquad(struct pipe_context *pipe, pipe->winsys->buffer_unmap(pipe->winsys, vbuf); - /* tell pipe about the vertex buffer */ - vbuffer.buffer = vbuf; - vbuffer.pitch = numAttribs * 4 * sizeof(float); /* vertex size */ - vbuffer.buffer_offset = 0; - pipe->set_vertex_buffer(pipe, 0, &vbuffer); - - /* tell pipe about the vertex attributes */ - for (i = 0; i < numAttribs; i++) { - velement.src_offset = i * 4 * sizeof(float); - velement.vertex_buffer_index = 0; - velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - velement.nr_components = 4; - pipe->set_vertex_element(pipe, i, &velement); - } - - /* draw */ - pipe->draw_arrays(pipe, PIPE_PRIM_TRIANGLE_FAN, 0, 4); + util_draw_vertex_buffer(pipe, vbuf, PIPE_PRIM_TRIANGLE_FAN, 4, 2); - /* XXX: do one-time */ pipe_buffer_reference(pipe->winsys, &vbuf, NULL); } diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index a97f55d2ef..5b6539a99c 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -29,9 +29,25 @@ #define U_DRAWQUAD_H +#ifdef __cplusplus +extern "C" { +#endif + + +extern void +util_draw_vertex_buffer(struct pipe_context *pipe, + struct pipe_buffer *vbuf, + uint num_attribs, uint num_verts, uint prim_type); + + extern void util_draw_texquad(struct pipe_context *pipe, float x0, float y0, float x1, float y1, float z); +#ifdef __cplusplus +} +#endif + + #endif -- cgit v1.2.3 From 309d6e52c5c1cdeca1434cbe29e869b9176e5fa5 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 20 Mar 2008 10:44:51 -0600 Subject: gallium: create vertex buffer once and re-use. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 71 ++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 028b180a77..5824843069 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -64,6 +64,9 @@ struct gen_mipmap_state /*struct pipe_viewport_state viewport;*/ struct pipe_sampler_state *vs; struct pipe_sampler_state *fs; + + struct pipe_buffer *vbuf; /**< quad vertices */ + float vertices[4][2][4]; /**< vertex/texcoords for quad */ }; @@ -683,6 +686,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, struct cso_context *cso) { struct gen_mipmap_state *ctx; + uint i; ctx = CALLOC_STRUCT(gen_mipmap_state); if (!ctx) @@ -744,10 +748,62 @@ util_create_gen_mipmap(struct pipe_context *pipe, /* fragment shader */ ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->vbuf = pipe->winsys->buffer_create(pipe->winsys, + 32, + PIPE_BUFFER_USAGE_VERTEX, + 4 * 2 * 4 * sizeof(float)); + if (!ctx->vbuf) { + FREE(ctx); + return NULL; + } + + /* vertex data that doesn't change */ + for (i = 0; i < 4; i++) { + ctx->vertices[i][0][2] = 0.0f; /* z */ + ctx->vertices[i][0][3] = 1.0f; /* w */ + ctx->vertices[i][1][2] = 0.0f; /* r */ + ctx->vertices[i][1][3] = 1.0f; /* q */ + } + return ctx; } +static void +set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) +{ + void *buf; + + ctx->vertices[0][0][0] = 0.0f; /*x*/ + ctx->vertices[0][0][1] = 0.0f; /*y*/ + ctx->vertices[0][1][0] = 0.0f; /*s*/ + ctx->vertices[0][1][1] = 0.0f; /*t*/ + + ctx->vertices[1][0][0] = width; /*x*/ + ctx->vertices[1][0][1] = 0.0f; /*y*/ + ctx->vertices[1][1][0] = 1.0f; /*s*/ + ctx->vertices[1][1][1] = 0.0f; /*t*/ + + ctx->vertices[2][0][0] = width; + ctx->vertices[2][0][1] = height; + ctx->vertices[2][1][0] = 1.0f; + ctx->vertices[2][1][1] = 1.0f; + + ctx->vertices[3][0][0] = 0.0f; + ctx->vertices[3][0][1] = height; + ctx->vertices[3][1][0] = 0.0f; + ctx->vertices[3][1][1] = 1.0f; + + buf = ctx->pipe->winsys->buffer_map(ctx->pipe->winsys, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + + ctx->pipe->winsys->buffer_unmap(ctx->pipe->winsys, ctx->vbuf); +} + + + /** * Destroy a mipmap generation context */ @@ -759,6 +815,8 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) pipe->delete_vs_state(pipe, ctx->vs); pipe->delete_fs_state(pipe, ctx->fs); + pipe->winsys->buffer_destroy(pipe->winsys, ctx->vbuf); + FREE(ctx); } @@ -863,12 +921,13 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, pipe->set_sampler_textures(pipe, 1, &pt); /* quad coords in window coords (bypassing clipping, viewport mapping) */ - util_draw_texquad(pipe, - 0.0F, 0.0F, /* x0, y0 */ - (float) pt->width[dstLevel], /* x1 */ - (float) pt->height[dstLevel], /* y1 */ - 0.0F); /* z */ - + set_vertex_data(ctx, + (float) pt->width[dstLevel], + (float) pt->height[dstLevel]); + util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ pipe->flush(pipe, PIPE_FLUSH_WAIT); -- cgit v1.2.3 From f259ea0347754e0e8c93fd16796fc1db72b03372 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 20 Mar 2008 19:25:44 +0000 Subject: gallium: remove unused local var --- src/gallium/auxiliary/util/u_simple_shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index fca6f11119..442467ecad 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -278,7 +278,7 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) struct tgsi_full_declaration decl; struct tgsi_full_instruction inst; const uint procType = TGSI_PROCESSOR_FRAGMENT; - uint ti, i; + uint ti; struct pipe_shader_state shader; tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); -- cgit v1.2.3 From 482f4995253a0c295dc02e34e58a138ac8822c54 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 20 Mar 2008 20:25:40 +0100 Subject: gallium: Fix build on Windows. --- src/gallium/auxiliary/util/u_simple_shaders.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 442467ecad..a2c918b7fe 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -68,7 +68,7 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, uint ti, i; struct pipe_shader_state shader; - tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); /* shader header */ @@ -173,7 +173,7 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) uint ti; struct pipe_shader_state shader; - tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); /* shader header */ -- cgit v1.2.3 From 400b12b4ceda32cc35b60d0484dfd333f1749b8e Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 20 Mar 2008 20:34:03 +0100 Subject: gallium: Fix build on Windows. --- src/gallium/auxiliary/util/u_simple_shaders.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index a2c918b7fe..8cb74e81d2 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -281,7 +281,7 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) uint ti; struct pipe_shader_state shader; - tokens = (struct tgsi_token *) malloc(maxTokens * sizeof(tokens[0])); + tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); /* shader header */ -- cgit v1.2.3 From 9fa88fb3c57f6868dc169b692528cf23ceced76c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 20 Mar 2008 20:34:46 +0000 Subject: gallium: Give some chance for the table to actually grow. --- src/gallium/auxiliary/util/u_handle_table.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 8b0f7fca4b..5a731a6b96 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -171,8 +171,7 @@ handle_table_set(struct handle_table *ht, assert(ht); assert(handle > 0); - assert(handle <= ht->size); - if(!handle || handle > ht->size) + if(!handle) return 0; assert(object); -- cgit v1.2.3 From 18be9a588acb46dad52321c91eab9927bc332756 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 20 Mar 2008 14:26:51 -0600 Subject: gallium: use sizeof(vertex buffer) --- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 5824843069..b84752e0b4 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -751,7 +751,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->vbuf = pipe->winsys->buffer_create(pipe->winsys, 32, PIPE_BUFFER_USAGE_VERTEX, - 4 * 2 * 4 * sizeof(float)); + sizeof(ctx->vertices)); if (!ctx->vbuf) { FREE(ctx); return NULL; -- cgit v1.2.3 From b028d32af4f5e7beeb4e659a4726e0d35d105ccc Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 20 Mar 2008 14:27:14 -0600 Subject: gallium: create one vertex buffer and re-use --- src/gallium/auxiliary/util/u_blit.c | 80 ++++++++++++++++++++++++++++++++++--- 1 file changed, 75 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index d05dae3af8..bb316554a4 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -61,6 +61,9 @@ struct blit_state /*struct pipe_viewport_state viewport;*/ struct pipe_sampler_state *vs; struct pipe_sampler_state *fs; + + struct pipe_buffer *vbuf; /**< quad vertices */ + float vertices[4][2][4]; /**< vertex/texcoords for quad */ }; @@ -72,6 +75,7 @@ struct blit_state * util_create_blit(struct pipe_context *pipe, struct cso_context *cso) { struct blit_state *ctx; + uint i; ctx = CALLOC_STRUCT(blit_state); if (!ctx) @@ -132,6 +136,24 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) /* fragment shader */ ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->vbuf = pipe->winsys->buffer_create(pipe->winsys, + 32, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(ctx->vertices)); + if (!ctx->vbuf) { + FREE(ctx); + ctx->pipe->delete_fs_state(ctx->pipe, ctx->fs); + ctx->pipe->delete_vs_state(ctx->pipe, ctx->vs); + return NULL; + } + + /* init vertex data that doesn't change */ + for (i = 0; i < 4; i++) { + ctx->vertices[i][0][3] = 1.0f; /* w */ + ctx->vertices[i][1][2] = 0.0f; /* r */ + ctx->vertices[i][1][3] = 1.0f; /* q */ + } + return ctx; } @@ -147,10 +169,55 @@ util_destroy_blit(struct blit_state *ctx) pipe->delete_vs_state(pipe, ctx->vs); pipe->delete_fs_state(pipe, ctx->fs); + pipe->winsys->buffer_destroy(pipe->winsys, ctx->vbuf); + FREE(ctx); } +/** + * Setup vertex data for the textured quad we'll draw. + * Note: y=0=top + */ +static void +setup_vertex_data(struct blit_state *ctx, + float x0, float y0, float x1, float y1, float z) +{ + void *buf; + + ctx->vertices[0][0][0] = x0; + ctx->vertices[0][0][1] = y0; + ctx->vertices[0][0][2] = z; + ctx->vertices[0][1][0] = 0.0f; /*s*/ + ctx->vertices[0][1][1] = 0.0f; /*t*/ + + ctx->vertices[1][0][0] = x1; + ctx->vertices[1][0][1] = y0; + ctx->vertices[1][0][2] = z; + ctx->vertices[1][1][0] = 1.0f; /*s*/ + ctx->vertices[1][1][1] = 0.0f; /*t*/ + + ctx->vertices[2][0][0] = x1; + ctx->vertices[2][0][1] = y1; + ctx->vertices[2][0][2] = z; + ctx->vertices[2][1][0] = 1.0f; + ctx->vertices[2][1][1] = 1.0f; + + ctx->vertices[3][0][0] = x0; + ctx->vertices[3][0][1] = y1; + ctx->vertices[3][0][2] = z; + ctx->vertices[3][1][0] = 0.0f; + ctx->vertices[3][1][1] = 1.0f; + + buf = ctx->pipe->winsys->buffer_map(ctx->pipe->winsys, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + + ctx->pipe->winsys->buffer_unmap(ctx->pipe->winsys, ctx->vbuf); +} + + /** * Copy pixel block from src surface to dst surface. * Overlapping regions are acceptable. @@ -254,11 +321,14 @@ util_blit_pixels(struct blit_state *ctx, cso_set_framebuffer(ctx->cso, &fb); /* draw quad */ - util_draw_texquad(pipe, - (float)dstX0, - (float)dstY0, - (float)dstX1, - (float)dstY1, z); + setup_vertex_data(ctx, + (float) dstX0, (float) dstY0, + (float) dstX1, (float) dstY1, z); + + util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ /* restore state we changed */ cso_restore_blend(ctx->cso); -- cgit v1.2.3 From 00cf178d93e5bc36a88a9f8ff444f60c493be14d Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 20 Mar 2008 15:21:55 -0600 Subject: gallium: enable vp input semantic info --- src/gallium/auxiliary/util/u_simple_shaders.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 8cb74e81d2..d230ddfbeb 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -84,16 +84,15 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, /* declare inputs */ for (i = 0; i < num_attribs; i++) { - decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; - /* + decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; - decl.Semantic.SemanticIndex = 0; - */ + decl.Semantic.SemanticName = semantic_names[i]; + decl.Semantic.SemanticIndex = semantic_indexes[i]; + decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; + decl.u.DeclarationRange.Last = i; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, @@ -102,19 +101,17 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, /* declare outputs */ for (i = 0; i < num_attribs; i++) { - decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_OUTPUT; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = semantic_names[i]; decl.Semantic.SemanticIndex = semantic_indexes[i]; decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; + decl.u.DeclarationRange.Last = i; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, maxTokens - ti); - } /* emit MOV instructions */ -- cgit v1.2.3 From 0565e6888a332956661f6bc8b5778b058168e5f9 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 20 Mar 2008 15:22:20 -0600 Subject: gallium: set fb.width/height --- src/gallium/auxiliary/util/u_blit.c | 2 ++ src/gallium/auxiliary/util/u_gen_mipmap.c | 2 ++ 2 files changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index bb316554a4..28a404fd01 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -316,6 +316,8 @@ util_blit_pixels(struct blit_state *ctx, /* drawing dest */ memset(&fb, 0, sizeof(fb)); + fb.width = dst->width; + fb.height = dst->height; fb.num_cbufs = 1; fb.cbufs[0] = dst; cso_set_framebuffer(ctx->cso, &fb); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b84752e0b4..cf02f00b1b 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -901,6 +901,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, * Setup framebuffer / dest surface */ fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + fb.width = pt->width[dstLevel]; + fb.height = pt->height[dstLevel]; cso_set_framebuffer(ctx->cso, &fb); /* -- cgit v1.2.3 From 4e977fb35befa60b2f74a21c0c9818854e6a7c85 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 22 Mar 2008 10:05:55 +0000 Subject: gallium: Remove pedantic asserts. Move these to a higher level instead. --- src/gallium/auxiliary/util/u_handle_table.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 5a731a6b96..2176a00959 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -170,7 +170,7 @@ handle_table_set(struct handle_table *ht, unsigned index; assert(ht); - assert(handle > 0); + assert(handle); if(!handle) return 0; @@ -184,7 +184,9 @@ handle_table_set(struct handle_table *ht, if(!handle_table_resize(ht, index)) return 0; - assert(!ht->objects[index]); + if(ht->objects[index] && ht->destroy) + ht->destroy(ht->objects[index]); + ht->objects[index] = object; return handle; @@ -198,13 +200,11 @@ handle_table_get(struct handle_table *ht, void *object; assert(ht); - assert(handle > 0); - assert(handle <= ht->size); + assert(handle); if(!handle || handle > ht->size) return NULL; object = ht->objects[handle - 1]; - assert(object); return object; } @@ -218,18 +218,14 @@ handle_table_remove(struct handle_table *ht, unsigned index; assert(ht); - assert(handle > 0); - assert(handle <= ht->size); + assert(handle); if(!handle || handle > ht->size) return; index = handle - 1; object = ht->objects[index]; - if(!object) { - /* XXX: this warning may be noisy for legitimate use -- remove later */ - debug_warning("removing empty handle"); + if(!object) return; - } if(ht->destroy) ht->destroy(object); -- cgit v1.2.3 From 28b3b078959db8a1e60adfb66f35ceb04d4f414d Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 22 Mar 2008 09:12:27 -0600 Subject: gallium: fix mem leak (fee pstipple stage) --- src/gallium/auxiliary/draw/draw_context.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 4cca965ac1..41da93cdf8 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -113,6 +113,8 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.aaline->destroy( draw->pipeline.aaline ); if (draw->pipeline.aapoint) draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); + if (draw->pipeline.pstipple) + draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); tgsi_exec_machine_free_data(&draw->machine); -- cgit v1.2.3 From dae7993afc56be1e71e600af60e01e51eab17eda Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 22 Mar 2008 09:42:59 -0600 Subject: gallium: free samplers, textures in destroy() --- src/gallium/auxiliary/draw/draw_aaline.c | 7 +++++++ src/gallium/auxiliary/draw/draw_pstipple.c | 6 ++++++ 2 files changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index b4fa6bd967..6742f7f4b9 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -665,7 +665,14 @@ aaline_reset_stipple_counter(struct draw_stage *stage) static void aaline_destroy(struct draw_stage *stage) { + struct aaline_stage *aaline = aaline_stage(stage); + + aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); + + pipe_texture_release(&aaline->texture); + draw_free_temp_verts( stage ); + FREE( stage ); } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 9d154a6838..bd8d3a76ae 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -534,6 +534,12 @@ pstip_reset_stipple_counter(struct draw_stage *stage) static void pstip_destroy(struct draw_stage *stage) { + struct pstip_stage *pstip = pstip_stage(stage); + + pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso); + + pipe_texture_release(&pstip->texture); + draw_free_temp_verts( stage ); FREE( stage ); } -- cgit v1.2.3 From f40357e25c0520ef1d64ffab03501da4c8b93529 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Mar 2008 16:44:59 +0000 Subject: gallium: beginnings of draw module vertex rework Trying to put a structure in place that we can actually optimize. Initially just implementing a passthrough mode, this will fairly soon replace all the vertex_cache/prim_queue/shader_queue stuff that's so hard to understand... Split the vertex processing into a couple of distinct stages: - Frontend - Prepares two lists of elements (fetch and draw) to be processed by the next stage. This stage doesn't fetch or draw vertices, but makes the decision which to draw. Multiple implementations of this will implement different strategies, currently just a vcache implementation. - MiddleEnd - Takes the list of fetch elements, fetches them, runs the vertex shader, cliptest, viewport transform on them to produce a linear array of vertex_header vertices. - Passes that list of vertices, plus the draw_elements (which index into that list) onto the backend - Backend - Either the existing primitive/clipping pipeline, or the vbuf_render hardware backend provided by the driver. Currently, the middle-end is the old passthrough code, and it build hardware vertices, not vertex_header vertices as above. It may be that passthrough is a special case in this respect. --- src/gallium/auxiliary/draw/Makefile | 4 + src/gallium/auxiliary/draw/SConscript | 6 +- src/gallium/auxiliary/draw/draw_context.c | 65 ++++- src/gallium/auxiliary/draw/draw_passthrough.c | 4 +- src/gallium/auxiliary/draw/draw_prim.c | 2 +- src/gallium/auxiliary/draw/draw_private.h | 33 ++- src/gallium/auxiliary/draw/draw_pt.c | 206 +++++++++++++++ src/gallium/auxiliary/draw/draw_pt.h | 116 ++++++++ src/gallium/auxiliary/draw/draw_pt_elts.c | 88 ++++++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 326 +++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pt_vcache.c | 338 ++++++++++++++++++++++++ 11 files changed, 1169 insertions(+), 19 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt.c create mode 100644 src/gallium/auxiliary/draw/draw_pt.h create mode 100644 src/gallium/auxiliary/draw/draw_pt_elts.c create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_emit.c create mode 100644 src/gallium/auxiliary/draw/draw_pt_vcache.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 21e9f737b7..0c7ce5da5b 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -16,6 +16,10 @@ C_SOURCES = \ draw_flatshade.c \ draw_offset.c \ draw_passthrough.c \ + draw_pt.c \ + draw_pt_vcache.c \ + draw_pt_fetch_emit.c \ + draw_pt_elts.c \ draw_prim.c \ draw_pstipple.c \ draw_stipple.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index d7fb86d992..9b3e7247c5 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -14,7 +14,11 @@ draw = env.ConvenienceLibrary( 'draw_debug.c', 'draw_flatshade.c', 'draw_offset.c', - 'draw_passthrough.c', + 'draw_passthrough.c', # going away soon + 'draw_pt.c', + 'draw_pt_vcache.c', + 'draw_pt_fetch_emit.c', + 'draw_pt_elts.c', 'draw_prim.c', 'draw_pstipple.c', 'draw_stipple.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 41da93cdf8..903cc26766 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -37,10 +37,11 @@ #include "draw_vbuf.h" - struct draw_context *draw_create( void ) { struct draw_context *draw = CALLOC_STRUCT( draw_context ); + if (draw == NULL) + goto fail; #if defined(__i386__) || defined(__386__) draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; @@ -61,6 +62,19 @@ struct draw_context *draw_create( void ) draw->pipeline.validate = draw_validate_stage( draw ); draw->pipeline.first = draw->pipeline.validate; + if (!draw->pipeline.wide_line || + !draw->pipeline.wide_point || + !draw->pipeline.stipple || + !draw->pipeline.unfilled || + !draw->pipeline.twoside || + !draw->pipeline.offset || + !draw->pipeline.clip || + !draw->pipeline.flatshade || + !draw->pipeline.cull || + !draw->pipeline.validate) + goto fail; + + ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -75,6 +89,8 @@ struct draw_context *draw_create( void ) uint i; const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; char *tmp = align_malloc(Elements(draw->vs.queue) * size, 16); + if (!tmp) + goto fail; for (i = 0; i < Elements(draw->vs.queue); i++) draw->vs.queue[i].vertex = (struct vertex_header *)(tmp + i * size); @@ -93,22 +109,42 @@ struct draw_context *draw_create( void ) draw_vertex_cache_invalidate( draw ); draw_set_mapped_element_buffer( draw, 0, NULL ); + if (!draw_pt_init( draw )) + goto fail; + return draw; + +fail: + draw_destroy( draw ); + return NULL; } void draw_destroy( struct draw_context *draw ) { - draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); - draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); - draw->pipeline.stipple->destroy( draw->pipeline.stipple ); - draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); - draw->pipeline.twoside->destroy( draw->pipeline.twoside ); - draw->pipeline.offset->destroy( draw->pipeline.offset ); - draw->pipeline.clip->destroy( draw->pipeline.clip ); - draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); - draw->pipeline.cull->destroy( draw->pipeline.cull ); - draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (!draw) + return; + + if (draw->pipeline.wide_line) + draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); + if (draw->pipeline.wide_point) + draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); + if (draw->pipeline.stipple) + draw->pipeline.stipple->destroy( draw->pipeline.stipple ); + if (draw->pipeline.unfilled) + draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); + if (draw->pipeline.twoside) + draw->pipeline.twoside->destroy( draw->pipeline.twoside ); + if (draw->pipeline.offset) + draw->pipeline.offset->destroy( draw->pipeline.offset ); + if (draw->pipeline.clip) + draw->pipeline.clip->destroy( draw->pipeline.clip ); + if (draw->pipeline.flatshade) + draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); + if (draw->pipeline.cull) + draw->pipeline.cull->destroy( draw->pipeline.cull ); + if (draw->pipeline.validate) + draw->pipeline.validate->destroy( draw->pipeline.validate ); if (draw->pipeline.aaline) draw->pipeline.aaline->destroy( draw->pipeline.aaline ); if (draw->pipeline.aapoint) @@ -117,8 +153,11 @@ void draw_destroy( struct draw_context *draw ) draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); + tgsi_exec_machine_free_data(&draw->machine); - align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ + + if (draw->vs.queue[0].vertex) + align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ /* Not so fast -- we're just borrowing this at the moment. * @@ -126,6 +165,8 @@ void draw_destroy( struct draw_context *draw ) draw->render->destroy( draw->render ); */ + draw_pt_destroy( draw ); + FREE( draw ); } diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c index fdec6a591b..dd00894c5b 100644 --- a/src/gallium/auxiliary/draw/draw_passthrough.c +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -125,9 +125,9 @@ fetch_store_general( struct draw_context *draw, case PIPE_FORMAT_B8G8R8A8_UNORM: { ubyte *ub = (ubyte *) from; - attrib[0] = UBYTE_TO_FLOAT(ub[0]); + attrib[2] = UBYTE_TO_FLOAT(ub[0]); attrib[1] = UBYTE_TO_FLOAT(ub[1]); - attrib[2] = UBYTE_TO_FLOAT(ub[2]); + attrib[0] = UBYTE_TO_FLOAT(ub[2]); attrib[3] = UBYTE_TO_FLOAT(ub[3]); } break; diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index cb0277fb6c..4fe0ddc02a 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -526,7 +526,7 @@ draw_arrays(struct draw_context *draw, unsigned prim, /* drawing done here: */ if (!draw->rasterizer->bypass_vs || - !draw_passthrough_arrays(draw, prim, start, count)) { + !draw_pt_arrays(draw, prim, start, count)) { /* we have to run the whole pipeline */ draw_prim(draw, prim, start, count); } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 1c65c3d1b2..0c5afcacfa 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -53,6 +53,9 @@ struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; +struct draw_pt_middle_end; +struct draw_pt_front_end; + /** * Basic vertex info. * Carry some useful information around with the vertices in the prim pipe. @@ -203,8 +206,21 @@ struct draw_context /* Support prototype passthrough path: */ struct { - unsigned prim; - unsigned hw_vertex_size; + unsigned prim; /* XXX: to be removed */ + unsigned hw_vertex_size; /* XXX: to be removed */ + + struct { + struct draw_pt_middle_end *fetch_emit; + struct draw_pt_middle_end *fetch_shade_emit; + struct draw_pt_middle_end *fetch_shade_cliptest_pipeline_or_emit; + } middle; + + struct { + struct draw_pt_front_end *noop; + struct draw_pt_front_end *split_arrays; + struct draw_pt_front_end *vcache; + } front; + } pt; @@ -351,7 +367,18 @@ extern void draw_update_vertex_fetch( struct draw_context *draw ); extern boolean draw_need_pipeline(const struct draw_context *draw); -/* Prototype/hack +/* Passthrough mode (second attempt): + */ +boolean draw_pt_init( struct draw_context *draw ); +void draw_pt_destroy( struct draw_context *draw ); +boolean draw_pt_arrays( struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count ); + + + +/* Prototype/hack (DEPRECATED) */ boolean draw_passthrough_arrays(struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c new file mode 100644 index 0000000000..219b563422 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -0,0 +1,206 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + + +static boolean too_many_verts( struct draw_context *draw, + unsigned verts ) +{ + return verts < 1024; +} + +static boolean too_many_elts( struct draw_context *draw, + unsigned elts ) +{ + return elts < (16 * 1024); +} + + +boolean +draw_pt_arrays(struct draw_context *draw, + unsigned prim, + unsigned start, + unsigned count) +{ + const boolean pipeline = draw_need_pipeline(draw); + const boolean cliptest = !draw->rasterizer->bypass_clipping; + const boolean shading = !draw->rasterizer->bypass_vs; + struct draw_pt_front_end *frontend = NULL; + struct draw_pt_middle_end *middle = NULL; + + + /* Overall we do: + * - frontend -- prepare fetch_elts, draw_elts - eg vcache + * - middle -- fetch, shade, cliptest, viewport + * - pipeline -- the prim pipeline: clipping, wide lines, etc + * - backend -- the vbuf_render provided by the driver. + */ + + +#if 0 + if (!cliptest && !pipeline && !shading) { + /* This is the 'passthrough' path: + */ + /* Fetch user verts, emit hw verts: + */ + middle = draw->pt.middle.fetch_emit; + } + else if (!cliptest && !pipeline) { + /* Fetch user verts, run vertex shader, emit hw verts: + */ + middle = draw->pt.middle.fetch_shade_emit; + } + else if (!pipeline) { + /* Even though !pipeline, we have to run it to get clipping. We + * do know that the pipeline is just the clipping operation, but + * that probably doesn't help much. + * + * This is going to be the most important path for a lot of + * swtnl cards. + */ + /* Fetch user verts, + * run vertex shader, + * cliptest and viewport trasform + * if no clipped vertices, + * emit hw verts + * else + * run pipline + */ + middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; + } + else if (!cliptest) { + /* Fetch user verts, run vertex shader, run pipeline: + */ + middle = draw->pt.middle.fetch_shade_pipeline; + } + else { + /* This is what we're currently always doing: + */ + /* Fetch user verts, run vertex shader, cliptest, run pipeline: + */ + middle = draw->pt.middle.fetch_shade_cliptest_pipeline; + } +#else + if (cliptest /*|| pipeline*/ || shading) + return FALSE; + + middle = draw->pt.middle.fetch_emit; +#endif + + + /* If !pipeline, need to make sure we respect the driver's limited + * capabilites to receive blocks of vertex data and elements. + */ +#if 0 + if (!pipeline) { + unsigned vertex_mode = passthrough; + unsigned nr_verts = count_vertices( draw, start, count ); + unsigned hw_prim = prim; + + if (is_elts(draw)) { + frontend = draw->pt.front.vcache; + hw_prim = reduced_prim(prim); + } + + if (too_many_verts(nr_verts)) { + /* if (is_verts(draw) && can_split(prim)) { + draw = draw_arrays_split; + } + else */ { + frontend = draw->pt.front.vcache; + hw_prim = reduced_prim(prim); + } + } + + if (too_many_elts(count)) { + + /* if (is_elts(draw) && can_split(prim)) { + draw = draw_elts_split; + } + else */ { + frontend = draw->pt.front.vcache; + hw_prim = reduced_prim(prim); + } + } + + if (!good_prim(hw_prim)) { + draw = draw->pt.front.vcache; + } + } +#else + frontend = draw->pt.front.vcache; +#endif + + frontend->prepare( frontend, middle ); + + frontend->run( frontend, + prim, + draw_pt_elt_func( draw ), + draw_pt_elt_ptr( draw, start ), + count ); + + frontend->finish( frontend ); + + return TRUE; +} + + +boolean draw_pt_init( struct draw_context *draw ) +{ + draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); + if (!draw->pt.middle.fetch_emit) + return FALSE; + + draw->pt.front.vcache = draw_pt_vcache(); + if (!draw->pt.front.vcache) + return FALSE; + + return TRUE; +} + + +void draw_pt_destroy( struct draw_context *draw ) +{ + if (draw->pt.middle.fetch_emit) { + draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit ); + draw->pt.middle.fetch_emit = NULL; + } + + if (draw->pt.front.vcache) { + draw->pt.front.vcache->destroy( draw->pt.front.vcache ); + draw->pt.front.vcache = NULL; + } +} diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h new file mode 100644 index 0000000000..1b81d196f6 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -0,0 +1,116 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#ifndef DRAW_PT_H +#define DRAW_PT_H + +#include "pipe/p_compiler.h" + +typedef ushort (*pt_elt_func)( const void *elts, ushort idx ); + + +/* The "front end" - prepare sets of fetch, draw elements for the + * middle end. + * + * Currenly one version of this: + * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims + * Later: + * - varray, varray_split + * - velement, velement_split + * + * Currenly only using the vcache version. + */ +struct draw_pt_front_end { + void (*prepare)( struct draw_pt_front_end *, + struct draw_pt_middle_end * ); + + void (*run)( struct draw_pt_front_end *, + unsigned prim, + pt_elt_func elt_func, + const void *elt_ptr, + unsigned count ); + + void (*finish)( struct draw_pt_front_end * ); + void (*destroy)( struct draw_pt_front_end * ); +}; + + +/* The "middle end" - prepares actual hardware vertices for the + * hardware backend. + * + * Currently two versions of this: + * - fetch, vertex shade, cliptest, prim-pipeline + * - fetch, emit (ie passthrough) + * Later: + * - fetch, vertex shade, cliptest, maybe-pipeline, maybe-emit + * - fetch, vertex shade, emit + * + * Currenly only using the passthrough version. + */ +struct draw_pt_middle_end { + void (*prepare)( struct draw_pt_middle_end * ); + + void (*run)( struct draw_pt_middle_end *, + unsigned prim, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ); + + void (*finish)( struct draw_pt_middle_end * ); + void (*destroy)( struct draw_pt_middle_end * ); +}; + + +/* The "back end" - supplied by the driver, defined in draw_vbuf.h. + * + * Not sure whether to wrap the prim pipeline up as an alternate + * backend. Would be a win for everything except pure passthrough + * mode... + */ +struct vbuf_render; + + +/* Helper functions. + */ +pt_elt_func draw_pt_elt_func( struct draw_context *draw ); +const void *draw_pt_elt_ptr( struct draw_context *draw, + unsigned start ); + +/* Implementations: + */ +struct draw_pt_front_end *draw_pt_vcache( void ); +struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); + + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c new file mode 100644 index 0000000000..585b83fa90 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -0,0 +1,88 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "draw/draw_pt.h" +#include "draw/draw_private.h" + +/* Neat get_elt func that also works for varrays drawing by encoding + * the start value into a pointer. + */ + +static ushort elt_uint( const void *elts, ushort idx ) +{ + return *(((const uint *)elts) + idx); +} + +static ushort elt_ushort( const void *elts, ushort idx ) +{ + return *(((const ushort *)elts) + idx); +} + +static ushort elt_ubyte( const void *elts, ushort idx ) +{ + return *(((const ubyte *)elts) + idx); +} + +static ushort elt_vert( const void *elts, ushort idx ) +{ + return (const ubyte *)elts - (const ubyte *)NULL + idx; +} + +pt_elt_func draw_pt_elt_func( struct draw_context *draw ) +{ + switch (draw->user.eltSize) { + case 0: return elt_vert; + case 1: return elt_ubyte; + case 2: return elt_ushort; + case 4: return elt_uint; + default: return NULL; + } +} + +const void *draw_pt_elt_ptr( struct draw_context *draw, + unsigned start ) +{ + const char *elts = draw->user.elts; + + switch (draw->user.eltSize) { + case 0: + return (const void *)(((const ubyte *)NULL) + start); + case 1: + return (const void *)(((const ubyte *)elts) + start); + case 2: + return (const void *)(((const ushort *)elts) + start); + case 4: + return (const void *)(((const uint *)elts) + start); + default: + return NULL; + } +} diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c new file mode 100644 index 0000000000..74a946545a --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -0,0 +1,326 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +/* The simplest 'middle end' in the new vertex code. + * + * The responsibilities of a middle end are to: + * - perform vertex fetch using + * - draw vertex element/buffer state + * - a list of fetch indices we received as an input + * - run the vertex shader + * - cliptest, + * - clip coord calculation + * - viewport transformation + * - if necessary, run the primitive pipeline, passing it: + * - a linear array of vertex_header vertices constructed here + * - a set of draw indices we received as an input + * - otherwise, drive the hw backend, + * - allocate space for hardware format vertices + * - translate the vertex-shader output vertices to hw format + * - calling the backend draw functions. + * + * For convenience, we provide a helper function to drive the hardware + * backend given similar inputs to those required to run the pipeline. + * + * In the case of passthrough mode, many of these actions are disabled + * or noops, so we end up doing: + * + * - perform vertex fetch + * - drive the hw backend + * + * IE, basically just vertex fetch to post-vs-format vertices, + * followed by a call to the backend helper function. + */ + + +struct fetch_emit_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct { + const ubyte *ptr; + unsigned pitch; + enum pipe_format src_format; + unsigned dst_format; + } fetch[PIPE_ATTRIB_MAX]; + + unsigned nr_fetch; + unsigned hw_vertex_size; +}; + + +/** + * General-purpose fetch from user's vertex arrays, emit to driver's + * vertex buffer. + * + * XXX this is totally temporary. + */ +static void +fetch_store_general( struct fetch_emit_middle_end *feme, + void *out_ptr, + const unsigned *fetch_elts, + unsigned count ) +{ + float *out = (float *)out_ptr; + struct vbuf_render *render = feme->draw->render; + uint i, j; + + for (i = 0; i < count; i++) { + unsigned elt = fetch_elts[i]; + + for (j = 0; j < feme->nr_fetch; j++) { + float attrib[4]; + const ubyte *from = (feme->fetch[j].ptr + + feme->fetch[j].pitch * elt); + + /* The normal fetch/emit code: + */ + switch (feme->fetch[j].src_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + ubyte *ub = (ubyte *) from; + attrib[2] = UBYTE_TO_FLOAT(ub[0]); + attrib[1] = UBYTE_TO_FLOAT(ub[1]); + attrib[0] = UBYTE_TO_FLOAT(ub[2]); + attrib[3] = UBYTE_TO_FLOAT(ub[3]); + } + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = f[2]; + attrib[3] = f[3]; + } + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = f[2]; + attrib[3] = 1.0; + } + break; + case PIPE_FORMAT_R32G32_FLOAT: + { + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = 0.0; + attrib[3] = 1.0; + } + break; + case PIPE_FORMAT_R32_FLOAT: + { + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = 0.0; + attrib[2] = 0.0; + attrib[3] = 1.0; + } + break; + default: + assert(0); + } + + if (0) debug_printf("attrib %d: %f %f %f %f\n", j, + attrib[0], attrib[1], attrib[2], attrib[3]); + + switch (feme->fetch[j].dst_format) { + case EMIT_1F: + out[0] = attrib[0]; + out += 1; + break; + case EMIT_2F: + out[0] = attrib[0]; + out[1] = attrib[1]; + out += 2; + break; + case EMIT_4F: + out[0] = attrib[0]; + out[1] = attrib[1]; + out[2] = attrib[2]; + out[3] = attrib[3]; + out += 4; + break; + default: + assert(0); + } + } + } +} + + + +static void fetch_emit_prepare( struct draw_pt_middle_end *middle ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); + unsigned nr_attrs = vinfo->num_attribs; + unsigned i; + + for (i = 0; i < nr_attrs; i++) { + enum pipe_format src_format; + unsigned dst_format; + const void *src_ptr; + unsigned pitch; + + if (vinfo->emit[i] == EMIT_HEADER) { + static const unsigned zero = 0; + src_ptr = &zero; + src_format = PIPE_FORMAT_R32_FLOAT; + pitch = 0; + dst_format = EMIT_1F; + } + else if (vinfo->emit[i] == EMIT_1F_PSIZE) { + src_ptr = &feme->draw->rasterizer->point_size; + src_format = PIPE_FORMAT_R32_FLOAT; + pitch = 0; + dst_format = EMIT_1F; + } + else { + unsigned src_element = vinfo->src_index[i]; + unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index; + + src_ptr = ((const ubyte *)draw->user.vbuffer[src_buffer] + + draw->vertex_buffer[src_buffer].buffer_offset + + draw->vertex_element[src_element].src_offset); + + src_format = draw->vertex_element[src_element].src_format; + pitch = draw->vertex_buffer[src_buffer].pitch; + dst_format = vinfo->emit[i]; + } + + feme->fetch[i].src_format = src_format; + feme->fetch[i].ptr = src_ptr; + feme->fetch[i].pitch = pitch; + feme->fetch[i].dst_format = dst_format; + } + + + feme->nr_fetch = nr_attrs; + feme->hw_vertex_size = vinfo->size * 4; +} + + + + + +static void fetch_emit_run( struct draw_pt_middle_end *middle, + unsigned prim, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + void *hw_verts; + boolean ok; + + ok = draw->render->set_primitive( draw->render, + prim ); + if (!ok) { + assert(0); + return; + } + + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)feme->hw_vertex_size, + (ushort)fetch_count ); + if (!hw_verts) { + assert(0); + return; + } + + + /* Single routine to fetch vertices and emit HW verts. + */ + fetch_store_general( feme, + hw_verts, + fetch_elts, + fetch_count ); + + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + /* Done -- that was easy, wasn't it: + */ + draw->render->release_vertices( draw->render, + hw_verts, + feme->hw_vertex_size, + fetch_count ); + +} + + + +static void fetch_emit_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) +{ + struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end ); + + fetch_emit->base.prepare = fetch_emit_prepare; + fetch_emit->base.run = fetch_emit_run; + fetch_emit->base.finish = fetch_emit_finish; + fetch_emit->base.destroy = fetch_emit_destroy; + + fetch_emit->draw = draw; + + return &fetch_emit->base; +} + diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c new file mode 100644 index 0000000000..07e9f0ae5f --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +//#include "draw/draw_vbuf.h" +//#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + + +#define CACHE_MAX 32 +#define FETCH_MAX 128 +#define DRAW_MAX (16*1024) + +struct vcache_frontend { + struct draw_pt_front_end base; + + ushort in[CACHE_MAX]; + ushort out[CACHE_MAX]; + + ushort draw_elts[DRAW_MAX]; + unsigned fetch_elts[FETCH_MAX]; + + unsigned draw_count; + unsigned fetch_count; + + pt_elt_func elt_func; + const void *elt_ptr; + + struct draw_pt_middle_end *middle; + unsigned output_prim; +}; + +static void vcache_flush( struct vcache_frontend *vcache ) +{ +#if 0 + /* Should always be true if output_prim == input_prim, otherwise + * not so much... + */ + unsigned i; + for (i = 0; i < vcache->draw_count; i++) { + assert( vcache->fetch_elts[vcache->draw_elts[i]] == + vcache->elt_func(vcache->elt_ptr, i) ); + } +#endif + + if (vcache->draw_count) + vcache->middle->run( vcache->middle, + vcache->output_prim, + vcache->fetch_elts, + vcache->fetch_count, + vcache->draw_elts, + vcache->draw_count ); + + memset(vcache->in, ~0, sizeof(vcache->in)); + vcache->fetch_count = 0; + vcache->draw_count = 0; +} + +static void vcache_check_flush( struct vcache_frontend *vcache ) +{ + if ( vcache->draw_count + 6 >= DRAW_MAX || + vcache->fetch_count + 4 >= FETCH_MAX ) + { + vcache_flush( vcache ); + } +} + + +static void vcache_elt( struct vcache_frontend *vcache, + unsigned felt ) +{ + // ushort felt = elt(draw, i); + + ushort idx = felt % CACHE_MAX; + + if (vcache->in[idx] != felt) { + assert(vcache->fetch_count < FETCH_MAX); + + vcache->in[idx] = felt; + vcache->out[idx] = vcache->fetch_count; + vcache->fetch_elts[vcache->fetch_count++] = felt; + } + + vcache->draw_elts[vcache->draw_count++] = vcache->out[idx]; +} + +static void vcache_triangle( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + /* TODO: encode edgeflags in draw_elts */ + vcache_elt(vcache, i0); + vcache_elt(vcache, i1); + vcache_elt(vcache, i2); + vcache_check_flush(vcache); +} + +static void vcache_line( struct vcache_frontend *vcache, + boolean reset, + unsigned i0, + unsigned i1 ) +{ + /* TODO: encode reset-line-stipple in draw_elts */ + (void) reset; + vcache_elt(vcache, i0); + vcache_elt(vcache, i1); + vcache_check_flush(vcache); +} + + +static void vcache_point( struct vcache_frontend *vcache, + unsigned i0 ) +{ + vcache_elt(vcache, i0); + vcache_check_flush(vcache); +} + +static void vcache_quad( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + vcache_triangle( vcache, i0, i1, i3 ); + vcache_triangle( vcache, i1, i2, i3 ); +} + + +static void vcache_prepare( struct draw_pt_front_end *frontend, + struct draw_pt_middle_end *middle ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + vcache->middle = middle; + middle->prepare( middle ); +} + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +static void vcache_run( struct draw_pt_front_end *frontend, + unsigned prim, + pt_elt_func get_elt, + const void *elts, + unsigned count ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + unsigned i; + + /* These are for validation only: + */ + vcache->elt_func = get_elt; + vcache->elt_ptr = elts; + vcache->output_prim = reduced_prim[prim]; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + vcache_point( vcache, + get_elt(elts, i) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + vcache_line( vcache, + TRUE, + get_elt(elts, i + 0), + get_elt(elts, i + 1)); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + for (i = 1; i < count; i++) { + vcache_line( vcache, + i == 1, /* XXX: only if vb not split */ + get_elt(elts, i - 1), + get_elt(elts, i) ); + } + + vcache_line( vcache, + 0, + get_elt(elts, count - 1), + get_elt(elts, 0) ); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < count; i++) { + vcache_line( vcache, + i == 1, + get_elt(elts, i - 1), + get_elt(elts, i) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2) ); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 0; i+2 < count; i++) { + if (i & 1) { + vcache_triangle( vcache, + get_elt(elts, i + 1), + get_elt(elts, i + 0), + get_elt(elts, i + 2) ); + } + else { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + for (i = 0; i+2 < count; i++) { + vcache_triangle( vcache, + get_elt(elts, 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2) ); + } + break; + + + case PIPE_PRIM_QUADS: + for (i = 0; i+3 < count; i += 4) { + vcache_quad( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, i + 3)); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i+3 < count; i += 2) { + vcache_quad( vcache, + get_elt(elts, i + 2), + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 3)); + } + break; + + case PIPE_PRIM_POLYGON: + for (i = 0; i+2 < count; i++) { + vcache_triangle( vcache, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0)); + } + break; + + default: + assert(0); + break; + } + + vcache_flush( vcache ); +} + +static void vcache_finish( struct draw_pt_front_end *frontend ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + vcache->middle->finish( vcache->middle ); + vcache->middle = NULL; +} + +static void vcache_destroy( struct draw_pt_front_end *frontend ) +{ + FREE(frontend); +} + + +struct draw_pt_front_end *draw_pt_vcache( void ) +{ + struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend ); + + vcache->base.prepare = vcache_prepare; + vcache->base.run = vcache_run; + vcache->base.finish = vcache_finish; + vcache->base.destroy = vcache_destroy; + + memset(vcache->in, ~0, sizeof(vcache->in)); + + return &vcache->base; +} -- cgit v1.2.3 From 301b187ca9a811b608894d20bab934af0a10b8ab Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Mar 2008 18:21:00 +0000 Subject: draw: fix some unsigned vs ushort confusion Middle-end elements are ushort, but prior to that have to treat all elements as unsigned to avoid wrapping and/or overruns. --- src/gallium/auxiliary/draw/draw_pt.h | 2 +- src/gallium/auxiliary/draw/draw_pt_elts.c | 8 ++++---- src/gallium/auxiliary/draw/draw_pt_vcache.c | 9 ++++----- 3 files changed, 9 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 1b81d196f6..f56fa8d5cc 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" -typedef ushort (*pt_elt_func)( const void *elts, ushort idx ); +typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); /* The "front end" - prepare sets of fetch, draw elements for the diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index 585b83fa90..f337f71ea0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -37,22 +37,22 @@ * the start value into a pointer. */ -static ushort elt_uint( const void *elts, ushort idx ) +static unsigned elt_uint( const void *elts, unsigned idx ) { return *(((const uint *)elts) + idx); } -static ushort elt_ushort( const void *elts, ushort idx ) +static unsigned elt_ushort( const void *elts, unsigned idx ) { return *(((const ushort *)elts) + idx); } -static ushort elt_ubyte( const void *elts, ushort idx ) +static unsigned elt_ubyte( const void *elts, unsigned idx ) { return *(((const ubyte *)elts) + idx); } -static ushort elt_vert( const void *elts, ushort idx ) +static unsigned elt_vert( const void *elts, unsigned idx ) { return (const ubyte *)elts - (const ubyte *)NULL + idx; } diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 07e9f0ae5f..98c22eb4d4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -45,7 +45,7 @@ struct vcache_frontend { struct draw_pt_front_end base; - ushort in[CACHE_MAX]; + unsigned in[CACHE_MAX]; ushort out[CACHE_MAX]; ushort draw_elts[DRAW_MAX]; @@ -74,13 +74,14 @@ static void vcache_flush( struct vcache_frontend *vcache ) } #endif - if (vcache->draw_count) + if (vcache->draw_count) { vcache->middle->run( vcache->middle, vcache->output_prim, vcache->fetch_elts, vcache->fetch_count, vcache->draw_elts, vcache->draw_count ); + } memset(vcache->in, ~0, sizeof(vcache->in)); vcache->fetch_count = 0; @@ -100,9 +101,7 @@ static void vcache_check_flush( struct vcache_frontend *vcache ) static void vcache_elt( struct vcache_frontend *vcache, unsigned felt ) { - // ushort felt = elt(draw, i); - - ushort idx = felt % CACHE_MAX; + unsigned idx = felt % CACHE_MAX; if (vcache->in[idx] != felt) { assert(vcache->fetch_count < FETCH_MAX); -- cgit v1.2.3 From 732422f6708199d6655185b1a5daec86efe2f1b7 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 23 Mar 2008 18:38:10 +0000 Subject: gallium: Memory debugging utilities. There are no known tools for windows kernel memory debugging, so this is a simple set of malloc etc wrappers. Enabled by default on win32 debug builds --- src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/p_debug_mem.c | 172 +++++++++++++++++++++++++++++++ src/gallium/include/pipe/p_debug.h | 23 +++++ src/gallium/include/pipe/p_util.h | 21 +++- 4 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 src/gallium/auxiliary/util/p_debug_mem.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index b44f2d5e39..9b3929eb2d 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -4,6 +4,7 @@ util = env.ConvenienceLibrary( target = 'util', source = [ 'p_debug.c', + 'p_debug_mem.c', 'p_tile.c', 'p_util.c', 'u_blit.c', diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c new file mode 100644 index 0000000000..56599dafa6 --- /dev/null +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -0,0 +1,172 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Memory debugging. + * + * @author José Fonseca + */ + +#ifdef WIN32 +#include +#include +#else +#include +#include +#endif + +#include "pipe/p_debug.h" +#include "util/u_double_list.h" + + +#define DEBUG_MEMORY_MAGIC 0x6e34090aU + + +#if defined(WIN32) && !defined(WINCE) +#define real_malloc(_size) EngAllocMem(0, _size, 'D3AG') +#define real_free(_ptr) EngFreeMem(_ptr) +#else +#define real_malloc(_size) malloc(_size) +#define real_free(_ptr) free(_ptr) +#endif + + +struct debug_memory_header +{ + struct list_head head; + + unsigned long no; + const char *file; + unsigned line; + const char *function; + size_t size; + unsigned magic; +}; + +static struct list_head list = { &list, &list }; + +static unsigned long start_no = 0; +static unsigned long end_no = 0; + + +void * +debug_malloc(const char *file, unsigned line, const char *function, + size_t size) +{ + struct debug_memory_header *hdr; + + hdr = real_malloc(sizeof(*hdr) + size); + if(!hdr) + return NULL; + + hdr->no = end_no++; + hdr->file = file; + hdr->line = line; + hdr->function = function; + hdr->size = size; + hdr->magic = DEBUG_MEMORY_MAGIC; + + LIST_ADDTAIL(&hdr->head, &list); + + return (void *)((char *)hdr + sizeof(*hdr)); +} + +void +debug_free(const char *file, unsigned line, const char *function, + void *ptr) +{ + struct debug_memory_header *hdr; + + if(!ptr) + return; + + hdr = (struct debug_memory_header *)((char *)ptr - sizeof(*hdr)); + if(hdr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: freeing bad or corrupted memory %p\n", + file, line, function, + ptr); + debug_assert(0); + return; + } + + LIST_DEL(&hdr->head); + hdr->magic = 0; + + real_free(hdr); +} + +void * +debug_calloc(const char *file, unsigned line, const char *function, + size_t count, size_t size ) +{ + void *ptr = debug_malloc( file, line, function, count * size ); + if( ptr ) + memset( ptr, 0, count * size ); + return ptr; +} + +void * +debug_realloc(const char *file, unsigned line, const char *function, + void *old_ptr, size_t old_size, size_t new_size ) +{ + void *new_ptr = NULL; + + if (new_size != 0) { + new_ptr = debug_malloc( file, line, function, new_size ); + + if( new_ptr && old_ptr ) + memcpy( new_ptr, old_ptr, old_size ); + } + + debug_free( file, line, function, old_ptr ); + return new_ptr; +} + +void +debug_memory_reset(void) +{ + start_no = end_no; +} + +void +debug_memory_report(void) +{ + struct list_head *entry; + + entry = list.prev; + for (; entry != &list; entry = entry->prev) { + struct debug_memory_header *hdr; + void *ptr; + hdr = LIST_ENTRY(struct debug_memory_header, entry, head); + ptr = (void *)((char *)hdr + sizeof(*hdr)); + if(hdr->no >= start_no) + debug_printf("%s:%u:%s: %u byte st %p not freed\n", + hdr->file, hdr->line, hdr->function, + hdr->size, ptr); + } +} diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 14f8056924..c549513b6f 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -225,6 +225,29 @@ debug_dump_flags(const struct debug_named_value *names, unsigned long value); +void * +debug_malloc(const char *file, unsigned line, const char *function, + size_t size); + +void +debug_free(const char *file, unsigned line, const char *function, + void *ptr); + +void * +debug_calloc(const char *file, unsigned line, const char *function, + size_t count, size_t size ); + +void * +debug_realloc(const char *file, unsigned line, const char *function, + void *old_ptr, size_t old_size, size_t new_size ); + +void +debug_memory_reset(void); + +void +debug_memory_report(void); + + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index c2e0f8c6a5..d3b2fa2950 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -39,6 +39,22 @@ extern "C" { #endif +#if defined(WIN32) && defined(DEBUG) /* memory debugging */ + +#include "p_debug.h" + +#define MALLOC( _size ) \ + debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) +#define CALLOC( _count, _size ) \ + debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) +#define FREE( _ptr ) \ + debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) +#define REALLOC( _ptr, _old_size, _size ) \ + debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) +#define GETENV( X ) NULL + +#else + #ifdef WIN32 void * __stdcall @@ -104,7 +120,7 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define GETENV( X ) NULL -#else /* WIN32 */ +#else /* !WIN32 */ #define MALLOC( SIZE ) malloc( SIZE ) @@ -116,7 +132,8 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define GETENV( X ) getenv( X ) -#endif /* WIN32 */ +#endif /* !WIN32 */ +#endif /* !DEBUG */ #define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) -- cgit v1.2.3 From d09b92d7e48ecee898ed158353021b7b3b6bae85 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 23 Mar 2008 18:52:37 +0000 Subject: gallium: Fix typo. --- src/gallium/auxiliary/util/p_debug_mem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 56599dafa6..c160afe5b7 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -165,7 +165,7 @@ debug_memory_report(void) hdr = LIST_ENTRY(struct debug_memory_header, entry, head); ptr = (void *)((char *)hdr + sizeof(*hdr)); if(hdr->no >= start_no) - debug_printf("%s:%u:%s: %u byte st %p not freed\n", + debug_printf("%s:%u:%s: %u bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, hdr->size, ptr); } -- cgit v1.2.3 From 48ef11d308c395837c1685df6ab701a69507e8b9 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 23 Mar 2008 18:57:35 +0000 Subject: gallium: Remove the debug_mask_* stuff. Overcomplex and not much different from using a global variable... --- src/gallium/auxiliary/util/p_debug.c | 34 ------------------- src/gallium/include/pipe/p_debug.h | 64 ------------------------------------ 2 files changed, 98 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index c51e9e6a69..19ad3f4663 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -168,40 +168,6 @@ void debug_assert_fail(const char *expr, const char *file, unsigned line) } -#define DEBUG_MASK_TABLE_SIZE 256 - - -/** - * Mask hash table. - * - * For now we just take the lower bits of the key, and do no attempt to solve - * collisions. Use a proper hash table when we have dozens of drivers. - */ -static uint32_t debug_mask_table[DEBUG_MASK_TABLE_SIZE]; - - -void debug_mask_set(uint32_t uuid, uint32_t mask) -{ - unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1); - debug_mask_table[hash] = mask; -} - - -uint32_t debug_mask_get(uint32_t uuid) -{ - unsigned hash = uuid & (DEBUG_MASK_TABLE_SIZE - 1); - return debug_mask_table[hash]; -} - - -void debug_mask_vprintf(uint32_t uuid, uint32_t what, const char *format, va_list ap) -{ - uint32_t mask = debug_mask_get(uuid); - if(mask & what) - debug_vprintf(format, ap); -} - - const char * debug_dump_enum(const struct debug_named_value *names, unsigned long value) diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index c549513b6f..15fc2006d5 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -103,70 +103,6 @@ void debug_assert_fail(const char *expr, const char *file, unsigned line); #define assert(expr) debug_assert(expr) -/** - * Set a channel's debug mask. - * - * uuid is just a random 32 bit integer that uniquely identifies the debugging - * channel. - * - * @note Due to current implementation issues, make sure the lower 8 bits of - * UUID are unique. - */ -void debug_mask_set(uint32_t uuid, uint32_t mask); - - -uint32_t debug_mask_get(uint32_t uuid); - - -/** - * Conditional debug output. - * - * This is just a generalization of the debug filtering mechanism used - * throughout Gallium. - * - * You use this function as: - * - * @code - * #define MYDRIVER_UUID 0x12345678 // random 32 bit identifier - * - * static void inline - * mydriver_debug(uint32_t what, const char *format, ...) - * { - * #ifdef DEBUG - * va_list ap; - * va_start(ap, format); - * debug_mask_vprintf(MYDRIVER_UUID, what, format, ap); - * va_end(ap); - * #endif - * } - * - * ... - * - * debug_mask_set(MYDRIVER_UUID, - * MYDRIVER_DEBUG_THIS | - * MYDRIVER_DEBUG_THAT | - * ... ); - * - * ... - * - * mydriver_debug(MYDRIVER_DEBUG_THIS, - * "this and this happened\n"); - * - * mydriver_debug(MYDRIVER_DEBUG_THAT, - * "that = %f\n", that); - * ... - * @endcode - * - * You can also define several variants of mydriver_debug, with hardcoded what. - * Note that although macros with variable number of arguments would accomplish - * more in less code, they are not portable. - */ -void debug_mask_vprintf(uint32_t uuid, - uint32_t what, - const char *format, - va_list ap); - - #ifdef DEBUG #define debug_warning(__msg) \ debug_printf("%s:%i:warning: %s\n", __FILE__, __LINE__, (__msg)) -- cgit v1.2.3 From 38dc0f809d5f72b382676be6f72ea675a3929455 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 23 Mar 2008 19:38:11 +0000 Subject: gallium: Fix memory leak. pipe cso's were being destroyed, but the hash elements themselves not. proper fix is IMHO add a destructor callback to cso_hash. --- src/gallium/auxiliary/cso_cache/cso_cache.c | 41 ++++++++++++++++------------- 1 file changed, 22 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index a2764b4265..18acab0967 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -29,6 +29,7 @@ */ #include "pipe/p_util.h" +#include "pipe/p_debug.h" #include "cso_cache.h" #include "cso_hash.h" @@ -131,75 +132,77 @@ static int _cso_size_for_type(enum cso_cache_type type) static void delete_blend_state(void *state, void *data) { struct cso_blend *cso = (struct cso_blend *)state; - if (cso->delete_state && cso->data != &cso->state) + if (cso->delete_state) cso->delete_state(cso->context, cso->data); + FREE(state); } static void delete_depth_stencil_state(void *state, void *data) { struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; - if (cso->delete_state && cso->data != &cso->state) + if (cso->delete_state) cso->delete_state(cso->context, cso->data); + FREE(state); } static void delete_sampler_state(void *state, void *data) { struct cso_sampler *cso = (struct cso_sampler *)state; - if (cso->delete_state && cso->data != &cso->state) + if (cso->delete_state) cso->delete_state(cso->context, cso->data); + FREE(state); } static void delete_rasterizer_state(void *state, void *data) { struct cso_rasterizer *cso = (struct cso_rasterizer *)state; - if (cso->delete_state && cso->data != &cso->state) + if (cso->delete_state) cso->delete_state(cso->context, cso->data); + FREE(state); } static void delete_fs_state(void *state, void *data) { struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; - if (cso->delete_state && cso->data != &cso->state) + if (cso->delete_state) cso->delete_state(cso->context, cso->data); + FREE(state); } static void delete_vs_state(void *state, void *data) { struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; - if (cso->delete_state && cso->data != &cso->state) + if (cso->delete_state) cso->delete_state(cso->context, cso->data); + FREE(state); } static INLINE void delete_cso(void *state, enum cso_cache_type type) { switch (type) { - case CSO_BLEND: { + case CSO_BLEND: delete_blend_state(state, 0); - } break; - case CSO_SAMPLER: { + case CSO_SAMPLER: delete_sampler_state(state, 0); - } break; - case CSO_DEPTH_STENCIL_ALPHA: { + case CSO_DEPTH_STENCIL_ALPHA: delete_depth_stencil_state(state, 0); - } break; - case CSO_RASTERIZER: { + case CSO_RASTERIZER: delete_rasterizer_state(state, 0); - } break; - case CSO_FRAGMENT_SHADER: { + case CSO_FRAGMENT_SHADER: delete_fs_state(state, 0); - } break; - case CSO_VERTEX_SHADER: { + case CSO_VERTEX_SHADER: delete_vs_state(state, 0); - } break; + default: + assert(0); + FREE(state); } - FREE(state); } static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, -- cgit v1.2.3 From 6edaef531806f4ac6c92c4a2934da5a37262e2e3 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 23 Mar 2008 20:39:35 +0100 Subject: gallium: Add util_gen_mipmap_filter(). We need a way to specify the type of minification filter used to downsample mipmap levels. The old util_gen_mipmap() retains its behaviour and uses LINEAR filter. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 27 +++++++++++++++++++++++---- src/gallium/auxiliary/util/u_gen_mipmap.h | 5 +++++ 2 files changed, 28 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index cf02f00b1b..9203a78191 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -719,7 +719,6 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; - ctx->sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; ctx->sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; ctx->sampler.normalized_coords = 1; @@ -849,11 +848,12 @@ simple_viewport(struct pipe_context *pipe, uint width, uint height) * \param face which cube face to generate mipmaps for (0 for non-cube maps) * \param baseLevel the first mipmap level to use as a src * \param lastLevel the last mipmap level to generate + * \param filter the minification filter used to generate mipmap levels with */ void -util_gen_mipmap(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, - uint face, uint baseLevel, uint lastLevel) +util_gen_mipmap_filter(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel, uint filter) { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; @@ -914,6 +914,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, */ ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel; ctx->sampler.lod_bias = (float) srcLevel; + ctx->sampler.min_img_filter = filter; cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); #if 0 @@ -945,3 +946,21 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_restore_sampler_textures(ctx->cso); cso_restore_framebuffer(ctx->cso); } + + +/** + * Generate mipmap images with a linear minification filter. + * See util_gen_mipmap_filter for more info. + * + * \param pt the texture to generate mipmap levels for + * \param face which cube face to generate mipmaps for (0 for non-cube maps) + * \param baseLevel the first mipmap level to use as a src + * \param lastLevel the last mipmap level to generate + */ +void +util_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel) +{ + util_gen_mipmap_filter( ctx, pt, face, baseLevel, lastLevel, PIPE_TEX_FILTER_LINEAR ); +} diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index eeabf3bf07..64abdeae98 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -52,5 +52,10 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel); +extern void +util_gen_mipmap_filter(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel, uint filter); + #endif -- cgit v1.2.3 From 899fcde366646a3ab3e42ae819620e790161ac75 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Mar 2008 20:36:00 +0000 Subject: draw: restructure fetch/emit as a pair of function calls --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 261 ++++++++++++++---------- 1 file changed, 152 insertions(+), 109 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 74a946545a..f43483f883 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -76,8 +76,8 @@ struct fetch_emit_middle_end { struct { const ubyte *ptr; unsigned pitch; - enum pipe_format src_format; - unsigned dst_format; + void (*fetch)( const void *from, float *attrib); + void (*emit)( const float *attrib, float **out ); } fetch[PIPE_ATTRIB_MAX]; unsigned nr_fetch; @@ -85,6 +85,98 @@ struct fetch_emit_middle_end { }; +static void fetch_B8G8R8A8_UNORM( const void *from, + float *attrib ) +{ + ubyte *ub = (ubyte *) from; + attrib[2] = UBYTE_TO_FLOAT(ub[0]); + attrib[1] = UBYTE_TO_FLOAT(ub[1]); + attrib[0] = UBYTE_TO_FLOAT(ub[2]); + attrib[3] = UBYTE_TO_FLOAT(ub[3]); +} + +static void fetch_R32G32B32A32_FLOAT( const void *from, + float *attrib ) +{ + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = f[2]; + attrib[3] = f[3]; +} + +static void fetch_R32G32B32_FLOAT( const void *from, + float *attrib ) +{ + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = f[2]; + attrib[3] = 1.0; +} + +static void fetch_R32G32_FLOAT( const void *from, + float *attrib ) +{ + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = f[1]; + attrib[2] = 0.0; + attrib[3] = 1.0; +} + +static void fetch_R32_FLOAT( const void *from, + float *attrib ) +{ + float *f = (float *) from; + attrib[0] = f[0]; + attrib[1] = 0.0; + attrib[2] = 0.0; + attrib[3] = 1.0; +} + +static void fetch_HEADER( const void *from, + float *attrib ) +{ + attrib[0] = 0; +} + + +static void emit_1F( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out) += 1; +} + +static void emit_2F( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out) += 2; +} + +static void emit_3F( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out)[2] = attrib[2]; + (*out) += 3; +} + +static void emit_4F( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out)[2] = attrib[2]; + (*out)[3] = attrib[3]; + (*out) += 4; +} + + /** * General-purpose fetch from user's vertex arrays, emit to driver's * vertex buffer. @@ -109,81 +201,8 @@ fetch_store_general( struct fetch_emit_middle_end *feme, const ubyte *from = (feme->fetch[j].ptr + feme->fetch[j].pitch * elt); - /* The normal fetch/emit code: - */ - switch (feme->fetch[j].src_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - { - ubyte *ub = (ubyte *) from; - attrib[2] = UBYTE_TO_FLOAT(ub[0]); - attrib[1] = UBYTE_TO_FLOAT(ub[1]); - attrib[0] = UBYTE_TO_FLOAT(ub[2]); - attrib[3] = UBYTE_TO_FLOAT(ub[3]); - } - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - { - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = f[2]; - attrib[3] = f[3]; - } - break; - case PIPE_FORMAT_R32G32B32_FLOAT: - { - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = f[2]; - attrib[3] = 1.0; - } - break; - case PIPE_FORMAT_R32G32_FLOAT: - { - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = 0.0; - attrib[3] = 1.0; - } - break; - case PIPE_FORMAT_R32_FLOAT: - { - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = 0.0; - attrib[2] = 0.0; - attrib[3] = 1.0; - } - break; - default: - assert(0); - } - - if (0) debug_printf("attrib %d: %f %f %f %f\n", j, - attrib[0], attrib[1], attrib[2], attrib[3]); - - switch (feme->fetch[j].dst_format) { - case EMIT_1F: - out[0] = attrib[0]; - out += 1; - break; - case EMIT_2F: - out[0] = attrib[0]; - out[1] = attrib[1]; - out += 2; - break; - case EMIT_4F: - out[0] = attrib[0]; - out[1] = attrib[1]; - out[2] = attrib[2]; - out[3] = attrib[3]; - out += 4; - break; - default: - assert(0); - } + feme->fetch[j].fetch( from, attrib ); + feme->fetch[j].emit( attrib, &out ); } } } @@ -199,44 +218,68 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle ) unsigned i; for (i = 0; i < nr_attrs; i++) { - enum pipe_format src_format; - unsigned dst_format; - const void *src_ptr; - unsigned pitch; + unsigned src_element = vinfo->src_index[i]; + unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index; + + feme->fetch[i].ptr = ((const ubyte *)draw->user.vbuffer[src_buffer] + + draw->vertex_buffer[src_buffer].buffer_offset + + draw->vertex_element[src_element].src_offset); - if (vinfo->emit[i] == EMIT_HEADER) { - static const unsigned zero = 0; - src_ptr = &zero; - src_format = PIPE_FORMAT_R32_FLOAT; - pitch = 0; - dst_format = EMIT_1F; - } - else if (vinfo->emit[i] == EMIT_1F_PSIZE) { - src_ptr = &feme->draw->rasterizer->point_size; - src_format = PIPE_FORMAT_R32_FLOAT; - pitch = 0; - dst_format = EMIT_1F; + feme->fetch[i].pitch = draw->vertex_buffer[src_buffer].pitch; + + switch (draw->vertex_element[src_element].src_format) { + case PIPE_FORMAT_B8G8R8A8_UNORM: + feme->fetch[i].fetch = fetch_B8G8R8A8_UNORM; + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + feme->fetch[i].fetch = fetch_R32G32B32A32_FLOAT; + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + feme->fetch[i].fetch = fetch_R32G32B32_FLOAT; + break; + case PIPE_FORMAT_R32G32_FLOAT: + feme->fetch[i].fetch = fetch_R32G32_FLOAT; + break; + case PIPE_FORMAT_R32_FLOAT: + feme->fetch[i].fetch = fetch_R32_FLOAT; + break; + default: + assert(0); + feme->fetch[i].fetch = NULL; + break; } - else { - unsigned src_element = vinfo->src_index[i]; - unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index; - src_ptr = ((const ubyte *)draw->user.vbuffer[src_buffer] + - draw->vertex_buffer[src_buffer].buffer_offset + - draw->vertex_element[src_element].src_offset); - - src_format = draw->vertex_element[src_element].src_format; - pitch = draw->vertex_buffer[src_buffer].pitch; - dst_format = vinfo->emit[i]; + switch (vinfo->emit[i]) { + case EMIT_4F: + feme->fetch[i].emit = emit_4F; + break; + case EMIT_3F: + feme->fetch[i].emit = emit_3F; + break; + case EMIT_2F: + feme->fetch[i].emit = emit_2F; + break; + case EMIT_1F: + feme->fetch[i].emit = emit_1F; + break; + case EMIT_HEADER: + feme->fetch[i].ptr = NULL; + feme->fetch[i].pitch = 0; + feme->fetch[i].fetch = fetch_HEADER; + feme->fetch[i].emit = emit_1F; + break; + case EMIT_1F_PSIZE: + feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size; + feme->fetch[i].pitch = 0; + feme->fetch[i].fetch = fetch_R32_FLOAT; + feme->fetch[i].emit = emit_1F; + default: + assert(0); + feme->fetch[i].emit = NULL; + break; } - - feme->fetch[i].src_format = src_format; - feme->fetch[i].ptr = src_ptr; - feme->fetch[i].pitch = pitch; - feme->fetch[i].dst_format = dst_format; } - feme->nr_fetch = nr_attrs; feme->hw_vertex_size = vinfo->size * 4; } -- cgit v1.2.3 From 743e990831d74ff764f4677836b7bd9b044d39af Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Mar 2008 21:31:14 +0000 Subject: draw: rename emit functions to match pipe_format names --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 39 +++++++++++-------------- 1 file changed, 17 insertions(+), 22 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index f43483f883..64ef83d800 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -135,30 +135,24 @@ static void fetch_R32_FLOAT( const void *from, attrib[3] = 1.0; } -static void fetch_HEADER( const void *from, - float *attrib ) -{ - attrib[0] = 0; -} - -static void emit_1F( const float *attrib, - float **out ) +static void emit_R32_FLOAT( const float *attrib, + float **out ) { (*out)[0] = attrib[0]; (*out) += 1; } -static void emit_2F( const float *attrib, - float **out ) +static void emit_R32G32_FLOAT( const float *attrib, + float **out ) { (*out)[0] = attrib[0]; (*out)[1] = attrib[1]; (*out) += 2; } -static void emit_3F( const float *attrib, - float **out ) +static void emit_R32G32B32_FLOAT( const float *attrib, + float **out ) { (*out)[0] = attrib[0]; (*out)[1] = attrib[1]; @@ -166,8 +160,8 @@ static void emit_3F( const float *attrib, (*out) += 3; } -static void emit_4F( const float *attrib, - float **out ) +static void emit_R32G32B32A32_FLOAT( const float *attrib, + float **out ) { (*out)[0] = attrib[0]; (*out)[1] = attrib[1]; @@ -211,6 +205,7 @@ fetch_store_general( struct fetch_emit_middle_end *feme, static void fetch_emit_prepare( struct draw_pt_middle_end *middle ) { + static const float zero = 0; struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); @@ -251,28 +246,28 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle ) switch (vinfo->emit[i]) { case EMIT_4F: - feme->fetch[i].emit = emit_4F; + feme->fetch[i].emit = emit_R32G32B32A32_FLOAT; break; case EMIT_3F: - feme->fetch[i].emit = emit_3F; + feme->fetch[i].emit = emit_R32G32B32_FLOAT; break; case EMIT_2F: - feme->fetch[i].emit = emit_2F; + feme->fetch[i].emit = emit_R32G32_FLOAT; break; case EMIT_1F: - feme->fetch[i].emit = emit_1F; + feme->fetch[i].emit = emit_R32_FLOAT; break; case EMIT_HEADER: - feme->fetch[i].ptr = NULL; + feme->fetch[i].ptr = (const ubyte *)&zero; feme->fetch[i].pitch = 0; - feme->fetch[i].fetch = fetch_HEADER; - feme->fetch[i].emit = emit_1F; + feme->fetch[i].fetch = fetch_R32_FLOAT; + feme->fetch[i].emit = emit_R32_FLOAT; break; case EMIT_1F_PSIZE: feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size; feme->fetch[i].pitch = 0; feme->fetch[i].fetch = fetch_R32_FLOAT; - feme->fetch[i].emit = emit_1F; + feme->fetch[i].emit = emit_R32_FLOAT; default: assert(0); feme->fetch[i].emit = NULL; -- cgit v1.2.3 From e6ea786c003762143fdf4c63a6fc6a08f27e444e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Mar 2008 21:31:23 +0000 Subject: draw: fix crlf --- src/gallium/auxiliary/draw/draw_pt.h | 232 +++++++++++++++--------------- src/gallium/auxiliary/draw/draw_pt_elts.c | 176 +++++++++++------------ 2 files changed, 204 insertions(+), 204 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index f56fa8d5cc..dfc29b2e7d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -1,116 +1,116 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - -#ifndef DRAW_PT_H -#define DRAW_PT_H - -#include "pipe/p_compiler.h" - -typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); - - -/* The "front end" - prepare sets of fetch, draw elements for the - * middle end. - * - * Currenly one version of this: - * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims - * Later: - * - varray, varray_split - * - velement, velement_split - * - * Currenly only using the vcache version. - */ -struct draw_pt_front_end { - void (*prepare)( struct draw_pt_front_end *, - struct draw_pt_middle_end * ); - - void (*run)( struct draw_pt_front_end *, - unsigned prim, - pt_elt_func elt_func, - const void *elt_ptr, - unsigned count ); - - void (*finish)( struct draw_pt_front_end * ); - void (*destroy)( struct draw_pt_front_end * ); -}; - - -/* The "middle end" - prepares actual hardware vertices for the - * hardware backend. - * - * Currently two versions of this: - * - fetch, vertex shade, cliptest, prim-pipeline - * - fetch, emit (ie passthrough) - * Later: - * - fetch, vertex shade, cliptest, maybe-pipeline, maybe-emit - * - fetch, vertex shade, emit - * - * Currenly only using the passthrough version. - */ -struct draw_pt_middle_end { - void (*prepare)( struct draw_pt_middle_end * ); - - void (*run)( struct draw_pt_middle_end *, - unsigned prim, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ); - - void (*finish)( struct draw_pt_middle_end * ); - void (*destroy)( struct draw_pt_middle_end * ); -}; - - -/* The "back end" - supplied by the driver, defined in draw_vbuf.h. - * - * Not sure whether to wrap the prim pipeline up as an alternate - * backend. Would be a win for everything except pure passthrough - * mode... - */ -struct vbuf_render; - - -/* Helper functions. - */ -pt_elt_func draw_pt_elt_func( struct draw_context *draw ); -const void *draw_pt_elt_ptr( struct draw_context *draw, - unsigned start ); - -/* Implementations: - */ -struct draw_pt_front_end *draw_pt_vcache( void ); -struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); - - - -#endif +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#ifndef DRAW_PT_H +#define DRAW_PT_H + +#include "pipe/p_compiler.h" + +typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); + + +/* The "front end" - prepare sets of fetch, draw elements for the + * middle end. + * + * Currenly one version of this: + * - vcache - catchall implementation, decomposes to TRI/LINE/POINT prims + * Later: + * - varray, varray_split + * - velement, velement_split + * + * Currenly only using the vcache version. + */ +struct draw_pt_front_end { + void (*prepare)( struct draw_pt_front_end *, + struct draw_pt_middle_end * ); + + void (*run)( struct draw_pt_front_end *, + unsigned prim, + pt_elt_func elt_func, + const void *elt_ptr, + unsigned count ); + + void (*finish)( struct draw_pt_front_end * ); + void (*destroy)( struct draw_pt_front_end * ); +}; + + +/* The "middle end" - prepares actual hardware vertices for the + * hardware backend. + * + * Currently two versions of this: + * - fetch, vertex shade, cliptest, prim-pipeline + * - fetch, emit (ie passthrough) + * Later: + * - fetch, vertex shade, cliptest, maybe-pipeline, maybe-emit + * - fetch, vertex shade, emit + * + * Currenly only using the passthrough version. + */ +struct draw_pt_middle_end { + void (*prepare)( struct draw_pt_middle_end * ); + + void (*run)( struct draw_pt_middle_end *, + unsigned prim, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ); + + void (*finish)( struct draw_pt_middle_end * ); + void (*destroy)( struct draw_pt_middle_end * ); +}; + + +/* The "back end" - supplied by the driver, defined in draw_vbuf.h. + * + * Not sure whether to wrap the prim pipeline up as an alternate + * backend. Would be a win for everything except pure passthrough + * mode... + */ +struct vbuf_render; + + +/* Helper functions. + */ +pt_elt_func draw_pt_elt_func( struct draw_context *draw ); +const void *draw_pt_elt_ptr( struct draw_context *draw, + unsigned start ); + +/* Implementations: + */ +struct draw_pt_front_end *draw_pt_vcache( void ); +struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); + + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index f337f71ea0..d49770e7b2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -1,88 +1,88 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - -#include "draw/draw_pt.h" -#include "draw/draw_private.h" - -/* Neat get_elt func that also works for varrays drawing by encoding - * the start value into a pointer. - */ - -static unsigned elt_uint( const void *elts, unsigned idx ) -{ - return *(((const uint *)elts) + idx); -} - -static unsigned elt_ushort( const void *elts, unsigned idx ) -{ - return *(((const ushort *)elts) + idx); -} - -static unsigned elt_ubyte( const void *elts, unsigned idx ) -{ - return *(((const ubyte *)elts) + idx); -} - -static unsigned elt_vert( const void *elts, unsigned idx ) -{ - return (const ubyte *)elts - (const ubyte *)NULL + idx; -} - -pt_elt_func draw_pt_elt_func( struct draw_context *draw ) -{ - switch (draw->user.eltSize) { - case 0: return elt_vert; - case 1: return elt_ubyte; - case 2: return elt_ushort; - case 4: return elt_uint; - default: return NULL; - } -} - -const void *draw_pt_elt_ptr( struct draw_context *draw, - unsigned start ) -{ - const char *elts = draw->user.elts; - - switch (draw->user.eltSize) { - case 0: - return (const void *)(((const ubyte *)NULL) + start); - case 1: - return (const void *)(((const ubyte *)elts) + start); - case 2: - return (const void *)(((const ushort *)elts) + start); - case 4: - return (const void *)(((const uint *)elts) + start); - default: - return NULL; - } -} +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "draw/draw_pt.h" +#include "draw/draw_private.h" + +/* Neat get_elt func that also works for varrays drawing by encoding + * the start value into a pointer. + */ + +static unsigned elt_uint( const void *elts, unsigned idx ) +{ + return *(((const uint *)elts) + idx); +} + +static unsigned elt_ushort( const void *elts, unsigned idx ) +{ + return *(((const ushort *)elts) + idx); +} + +static unsigned elt_ubyte( const void *elts, unsigned idx ) +{ + return *(((const ubyte *)elts) + idx); +} + +static unsigned elt_vert( const void *elts, unsigned idx ) +{ + return (const ubyte *)elts - (const ubyte *)NULL + idx; +} + +pt_elt_func draw_pt_elt_func( struct draw_context *draw ) +{ + switch (draw->user.eltSize) { + case 0: return elt_vert; + case 1: return elt_ubyte; + case 2: return elt_ushort; + case 4: return elt_uint; + default: return NULL; + } +} + +const void *draw_pt_elt_ptr( struct draw_context *draw, + unsigned start ) +{ + const char *elts = draw->user.elts; + + switch (draw->user.eltSize) { + case 0: + return (const void *)(((const ubyte *)NULL) + start); + case 1: + return (const void *)(((const ubyte *)elts) + start); + case 2: + return (const void *)(((const ushort *)elts) + start); + case 4: + return (const void *)(((const uint *)elts) + start); + default: + return NULL; + } +} -- cgit v1.2.3 From 3e9b1bc1009ca1a2923f844d00bf478dc9d24644 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Mar 2008 21:54:44 +0000 Subject: draw: tweak the definition of draw_need_pipeline, fix minor bug --- src/gallium/auxiliary/draw/draw_validate.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 33e5559508..70b477ba5c 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -34,20 +34,14 @@ /** - * Check if we need any special pipeline stages, or whether prims/verts - * can go through untouched. + * Check if we need any special pipeline stages, or whether + * prims/verts can go through untouched. Don't test for bypass + * clipping or vs modes, this function is just about the primitive + * pipeline stages. */ boolean draw_need_pipeline(const struct draw_context *draw) { - /* clipping */ - if (!draw->rasterizer->bypass_clipping) - return TRUE; - - /* vertex shader */ - if (!draw->rasterizer->bypass_vs) - return TRUE; - /* line stipple */ if (draw->rasterizer->line_stipple_enable && draw->line_stipple) return TRUE; @@ -72,6 +66,11 @@ draw_need_pipeline(const struct draw_context *draw) if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) return TRUE; + /* unfilled polygons */ + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) + return TRUE; + /* polygon offset */ if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) return TRUE; @@ -84,9 +83,14 @@ draw_need_pipeline(const struct draw_context *draw) if (draw->rasterizer->light_twoside) return TRUE; - /* polygon cull */ + /* polygon cull - this is difficult - hardware can cull just fine + * most of the time (though sometimes CULL_NEITHER is unsupported. + * + * Generally this isn't a reason to require the pipeline, though. + * if (draw->rasterizer->cull_mode) return TRUE; + */ return FALSE; } -- cgit v1.2.3 From 3b217c7fa77d397540e4b6299a35f586ad53a1a1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 23 Mar 2008 21:55:19 +0000 Subject: draw: check need_pipeline() in passthrough --- src/gallium/auxiliary/draw/draw_pt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 219b563422..d7169f78f4 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -114,7 +114,7 @@ draw_pt_arrays(struct draw_context *draw, middle = draw->pt.middle.fetch_shade_cliptest_pipeline; } #else - if (cliptest /*|| pipeline*/ || shading) + if (cliptest || pipeline || shading) return FALSE; middle = draw->pt.middle.fetch_emit; -- cgit v1.2.3 From a6d17bf671d6bfbb187a62ba14b9ad08fb5dafe1 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Mon, 24 Mar 2008 09:56:12 +0000 Subject: draw: Fix #include order to fix Linux build. --- src/gallium/auxiliary/draw/draw_pt_elts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index d49770e7b2..02fb6d9f41 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -30,8 +30,8 @@ * Keith Whitwell */ -#include "draw/draw_pt.h" #include "draw/draw_private.h" +#include "draw/draw_pt.h" /* Neat get_elt func that also works for varrays drawing by encoding * the start value into a pointer. -- cgit v1.2.3 From f4588c175280cd2b1f5341cd02bed7e9e7d1fd05 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 24 Mar 2008 10:52:20 +0000 Subject: Revert "draw: Fix #include order to fix Linux build." This reverts commit a6d17bf671d6bfbb187a62ba14b9ad08fb5dafe1. --- src/gallium/auxiliary/draw/draw_pt_elts.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index 02fb6d9f41..d49770e7b2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -30,8 +30,8 @@ * Keith Whitwell */ -#include "draw/draw_private.h" #include "draw/draw_pt.h" +#include "draw/draw_private.h" /* Neat get_elt func that also works for varrays drawing by encoding * the start value into a pointer. -- cgit v1.2.3 From e0a9ce10f4c30b1aec08c46b6a33885782d40dd0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 24 Mar 2008 10:53:26 +0000 Subject: draw: pre-declare referenced structs --- src/gallium/auxiliary/draw/draw_pt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index dfc29b2e7d..439fa4c881 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -37,6 +37,8 @@ typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); +struct draw_pt_middle_end; +struct draw_context; /* The "front end" - prepare sets of fetch, draw elements for the * middle end. -- cgit v1.2.3 From c8c373514a18d31181cfaeefc23cd06eab3fddd0 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 24 Mar 2008 08:09:34 -0600 Subject: gallium: tweak coords in u_gen_mipmap code --- src/gallium/auxiliary/util/u_gen_mipmap.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 9203a78191..13e4531d4a 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -773,23 +773,23 @@ set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) { void *buf; - ctx->vertices[0][0][0] = 0.0f; /*x*/ - ctx->vertices[0][0][1] = 0.0f; /*y*/ + ctx->vertices[0][0][0] = -0.5f; /*x*/ + ctx->vertices[0][0][1] = -0.5f; /*y*/ ctx->vertices[0][1][0] = 0.0f; /*s*/ ctx->vertices[0][1][1] = 0.0f; /*t*/ - ctx->vertices[1][0][0] = width; /*x*/ - ctx->vertices[1][0][1] = 0.0f; /*y*/ + ctx->vertices[1][0][0] = width - 0.5f; /*x*/ + ctx->vertices[1][0][1] = -0.5f; /*y*/ ctx->vertices[1][1][0] = 1.0f; /*s*/ ctx->vertices[1][1][1] = 0.0f; /*t*/ - ctx->vertices[2][0][0] = width; - ctx->vertices[2][0][1] = height; + ctx->vertices[2][0][0] = width - 0.5f; + ctx->vertices[2][0][1] = height - 0.5f; ctx->vertices[2][1][0] = 1.0f; ctx->vertices[2][1][1] = 1.0f; - ctx->vertices[3][0][0] = 0.0f; - ctx->vertices[3][0][1] = height; + ctx->vertices[3][0][0] = -0.5f; + ctx->vertices[3][0][1] = height - 0.5f; ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; -- cgit v1.2.3 From 110b63d00fa0a555a00f5b1560452323517eafe1 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 24 Mar 2008 08:53:16 -0600 Subject: gallium: pass the filter mode to util_gen_mipmap(). Remove util_gen_mipmap_filter() when no longer used. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 25 ++++++++++--------------- src/gallium/auxiliary/util/u_gen_mipmap.h | 2 +- src/mesa/state_tracker/st_gen_mipmap.c | 3 ++- 3 files changed, 13 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 13e4531d4a..26df5f29f8 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -719,7 +719,6 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; - ctx->sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; ctx->sampler.normalized_coords = 1; @@ -849,11 +848,12 @@ simple_viewport(struct pipe_context *pipe, uint width, uint height) * \param baseLevel the first mipmap level to use as a src * \param lastLevel the last mipmap level to generate * \param filter the minification filter used to generate mipmap levels with + * \param filter one of PIPE_TEX_FILTER_LINEAR, PIPE_TEX_FILTER_NEAREST */ void -util_gen_mipmap_filter(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, - uint face, uint baseLevel, uint lastLevel, uint filter) +util_gen_mipmap(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel, uint filter) { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; @@ -914,6 +914,7 @@ util_gen_mipmap_filter(struct gen_mipmap_state *ctx, */ ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel; ctx->sampler.lod_bias = (float) srcLevel; + ctx->sampler.mag_img_filter = filter; ctx->sampler.min_img_filter = filter; cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); @@ -949,18 +950,12 @@ util_gen_mipmap_filter(struct gen_mipmap_state *ctx, /** - * Generate mipmap images with a linear minification filter. - * See util_gen_mipmap_filter for more info. - * - * \param pt the texture to generate mipmap levels for - * \param face which cube face to generate mipmaps for (0 for non-cube maps) - * \param baseLevel the first mipmap level to use as a src - * \param lastLevel the last mipmap level to generate + * XXX remove this */ void -util_gen_mipmap(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, - uint face, uint baseLevel, uint lastLevel) +util_gen_mipmap_filter(struct gen_mipmap_state *ctx, + struct pipe_texture *pt, + uint face, uint baseLevel, uint lastLevel, uint filter) { - util_gen_mipmap_filter( ctx, pt, face, baseLevel, lastLevel, PIPE_TEX_FILTER_LINEAR ); + util_gen_mipmap_filter( ctx, pt, face, baseLevel, lastLevel, filter ); } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index 64abdeae98..a5df8481bf 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -50,7 +50,7 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx); extern void util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, - uint face, uint baseLevel, uint lastLevel); + uint face, uint baseLevel, uint lastLevel, uint filter); extern void util_gen_mipmap_filter(struct gen_mipmap_state *ctx, diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 61e1d9621c..a931911227 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -92,7 +92,8 @@ st_render_mipmap(struct st_context *st, return FALSE; } - util_gen_mipmap(st->gen_mipmap, pt, face, baseLevel, lastLevel); + util_gen_mipmap(st->gen_mipmap, pt, face, baseLevel, lastLevel, + PIPE_TEX_FILTER_LINEAR); /* shaders don't go through CSO yet */ if (st->fp) -- cgit v1.2.3 From bf8de6d4dc1f956b7e70be285ff0d983b1a8eb5b Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 24 Mar 2008 16:50:39 +0100 Subject: gallium: Remove util_gen_mipmap_filter(). --- src/gallium/auxiliary/util/u_gen_mipmap.c | 12 ------------ src/gallium/auxiliary/util/u_gen_mipmap.h | 6 ------ 2 files changed, 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 26df5f29f8..427b217755 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -947,15 +947,3 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_restore_sampler_textures(ctx->cso); cso_restore_framebuffer(ctx->cso); } - - -/** - * XXX remove this - */ -void -util_gen_mipmap_filter(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, - uint face, uint baseLevel, uint lastLevel, uint filter) -{ - util_gen_mipmap_filter( ctx, pt, face, baseLevel, lastLevel, filter ); -} diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index a5df8481bf..bd9af54fb7 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -52,10 +52,4 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel, uint filter); -extern void -util_gen_mipmap_filter(struct gen_mipmap_state *ctx, - struct pipe_texture *pt, - uint face, uint baseLevel, uint lastLevel, uint filter); - - #endif -- cgit v1.2.3 From d83e75c75958673e8019475f5ba5c2cff9b8415f Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 24 Mar 2008 12:51:50 -0600 Subject: gallium: move filter assignment out of loop --- src/gallium/auxiliary/util/u_gen_mipmap.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 427b217755..e129c062be 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -890,6 +890,10 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, memset(&fb, 0, sizeof(fb)); fb.num_cbufs = 1; + /* set min/mag to same filter for faster sw speed */ + ctx->sampler.mag_img_filter = filter; + ctx->sampler.min_img_filter = filter; + /* * XXX for small mipmap levels, it may be faster to use the software * fallback path... @@ -914,8 +918,6 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, */ ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel; ctx->sampler.lod_bias = (float) srcLevel; - ctx->sampler.mag_img_filter = filter; - ctx->sampler.min_img_filter = filter; cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); #if 0 -- cgit v1.2.3 From dd51365acdd515577ee76850ceda01347ceb27c0 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 24 Mar 2008 20:18:59 +0000 Subject: gallium: cleanup p_debug Now debug_printf is disabled on release builds. Use debug_error or _debug_printf to output messages on release versions. --- src/gallium/auxiliary/util/p_debug.c | 31 ++++------ src/gallium/include/pipe/p_debug.h | 117 +++++++++++++++++++++++++++++++---- 2 files changed, 118 insertions(+), 30 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 19ad3f4663..bd58e0e3a8 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -58,7 +58,7 @@ int rpl_snprintf(char *str, size_t size, const char *format, ...); #endif -void debug_vprintf(const char *format, va_list ap) +void _debug_vprintf(const char *format, va_list ap) { #ifdef WIN32 #ifndef WINCE @@ -76,15 +76,7 @@ void debug_vprintf(const char *format, va_list ap) } -void debug_printf(const char *format, ...) -{ - va_list ap; - va_start(ap, format); - debug_vprintf(format, ap); - va_end(ap); -} - - +#ifdef DEBUG void debug_print_blob( const char *name, const void *blob, unsigned size ) @@ -99,12 +91,10 @@ void debug_print_blob( const char *name, debug_printf("%d:\t%08x\n", i, ublob[i]); } } +#endif -/* TODO: implement a debug_abort that calls EngBugCheckEx on WIN32 */ - - -static INLINE void debug_break(void) +void _debug_break(void) { #if (defined(__i386__) || defined(__386__)) && defined(__GNUC__) __asm("int3"); @@ -155,15 +145,18 @@ static unsigned abort_en() } #endif -void debug_assert_fail(const char *expr, const char *file, unsigned line) +void _debug_assert_fail(const char *expr, + const char *file, + unsigned line, + const char *function) { - debug_printf("%s:%i: Assertion `%s' failed.\n", file, line, expr); + _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr); if (abort_en()) { debug_break(); } else { - debug_printf("continuing...\n"); + _debug_printf("continuing...\n"); } } @@ -180,7 +173,7 @@ debug_dump_enum(const struct debug_named_value *names, ++names; } - snprintf(rest, sizeof(rest), "0x%08x", value); + snprintf(rest, sizeof(rest), "0x%08lx", value); return rest; } @@ -213,7 +206,7 @@ debug_dump_flags(const struct debug_named_value *names, else first = 0; - snprintf(rest, sizeof(rest), "0x%08x", value); + snprintf(rest, sizeof(rest), "0x%08lx", value); strncat(output, rest, sizeof(output)); } diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 15fc2006d5..494cc3bb71 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -60,58 +60,153 @@ extern "C" { #endif +void _debug_vprintf(const char *format, va_list ap); + + +static void INLINE +_debug_printf(const char *format, ...) +{ + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); +} + + /** * Print debug messages. * - * A debug message will be printed regardless of the DEBUG/NDEBUG macros. - * * The actual channel used to output debug message is platform specific. To * avoid misformating or truncation, follow these rules of thumb: * - output whole lines * - avoid outputing large strings (512 bytes is the current maximum length * that is guaranteed to be printed in all platforms) */ -void debug_printf(const char *format, ...); +static void INLINE +debug_printf(const char *format, ...) +{ +#ifdef DEBUG + va_list ap; + va_start(ap, format); + _debug_vprintf(format, ap); + va_end(ap); +#endif +} + +#ifdef DEBUG +#define debug_vprintf(_format, _ap) _debug_vprintf(_format, _ap) +#else +#define debug_vprintf(_format, _ap) ((void)0) +#endif -/* Dump a blob in hex to the same place that debug_printf sends its - * messages: + +/** + * Dump a blob in hex to the same place that debug_printf sends its + * messages. */ +#ifdef DEBUG void debug_print_blob( const char *name, const void *blob, unsigned size ); +#else +#define debug_print_blob(_name, _blob, _size) ((void)0) +#endif + + +void _debug_break(void); + /** - * @sa debug_printf + * Hard-coded breakpoint. */ -void debug_vprintf(const char *format, va_list ap); +#ifdef DEBUG +#if (defined(__i386__) || defined(__386__)) && defined(__GNUC__) +#define debug_break() __asm("int3") +#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) +#define debug_break() _asm {int 3} +#else +#define debug_break() _debug_break() +#endif +#else /* !DEBUG */ +#define debug_break() ((void)0) +#endif /* !DEBUG */ + -void debug_assert_fail(const char *expr, const char *file, unsigned line); +void _debug_assert_fail(const char *expr, + const char *file, + unsigned line, + const char *function); -/** Assert macro */ +/** + * Assert macro + * + * Do not expect that the assert call terminates -- errors must be handled + * regardless of assert behavior. + */ #ifdef DEBUG -#define debug_assert(expr) ((expr) ? (void)0 : debug_assert_fail(#expr, __FILE__, __LINE__)) +#define debug_assert(expr) ((expr) ? (void)0 : _debug_assert_fail(#expr, __FILE__, __LINE__, __FUNCTION__)) #else #define debug_assert(expr) ((void)0) #endif +/** Override standard assert macro */ #ifdef assert #undef assert #endif #define assert(expr) debug_assert(expr) +/** + * Output the current function name. + */ +#ifdef DEBUG +#define debug_checkpoint() \ + _debug_printf("%s\n", __FUNCTION__) +#else +#define debug_checkpoint() \ + ((void)0) +#endif + + +/** + * Output the full source code position. + */ +#ifdef DEBUG +#define debug_checkpoint_full() \ + debug_printf("%s:%u:%s", __FILE__, __LINE__, __FUNCTION__) +#else +#define debug_checkpoint_full() \ + ((void)0) +#endif + + +/** + * Output a warning message. Muted on release version. + */ #ifdef DEBUG #define debug_warning(__msg) \ - debug_printf("%s:%i:warning: %s\n", __FILE__, __LINE__, (__msg)) + _debug_printf("%s:%u:%s: warning: %s\n", __FILE__, __LINE__, __FUNCTION__, (__msg)) #else #define debug_warning(__msg) \ ((void)0) #endif +/** + * Output an error message. Not muted on release version. + */ +#ifdef DEBUG +#define debug_error(__msg) \ + _debug_printf("%s:%u:%s: error: %s\n", __FILE__, __LINE__, __FUNCTION__, (__msg)) +#else +#define debug_error(__msg) \ + _debug_printf("error: %s\n", __msg)) +#endif + + /** * Used by debug_dump_enum and debug_dump_flags to describe symbols. */ -- cgit v1.2.3 From e8c6ea4f608296ed976f1597ffd46ac0b42fc84a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 24 Mar 2008 22:30:33 +0000 Subject: gallium: Simple cross platform get-opt system. Uses getenv on Linux, and a memory mapped text file on Windows. It supports boolean options, flags, and plain strings. --- src/gallium/auxiliary/util/p_debug.c | 144 +++++++++++++++++++++++++++++++++++ src/gallium/include/pipe/p_debug.h | 27 +++++++ 2 files changed, 171 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index bd58e0e3a8..351de95c95 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -107,6 +107,150 @@ void _debug_break(void) #endif } + +#ifdef WIN32 +static const char * +find(const char *start, const char *end, char c) +{ + const char *p; + for(p = start; !end || p != end; ++p) { + if(*p == c) + return p; + if(*p < 32) + break; + } + return NULL; +} + +static int +compare(const char *start, const char *end, const char *s) +{ + const char *p, *q; + for(p = start, q = s; p != end && *q != '\0'; ++p, ++q) { + if(*p != *q) + return 0; + } + return p == end && *q == '\0'; +} + +static void +copy(char *dst, const char *start, const char *end, size_t n) +{ + const char *p; + char *q; + for(p = start, q = dst, n = n - 1; p != end && n; ++p, ++q, --n) + *q = *p; + *q = '\0'; +} +#endif + + +const char * +debug_get_option(const char *name, const char *dfault) +{ + const char *result; +#ifdef WIN32 + ULONG_PTR iFile = 0; + const void *pMap = NULL; + const char *sol, *eol, *sep; + static char output[1024]; + + pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile); + if(!pMap) + result = dfault; + else { + sol = (const char *)pMap; + while(1) { + /* TODO: handle LF line endings */ + eol = find(sol, NULL, '\r'); + if(!eol || eol == sol) + break; + sep = find(sol, eol, '='); + if(!sep) + break; + if(compare(sol, sep, name)) { + copy(output, sep + 1, eol, sizeof(output)); + result = output; + break; + } + sol = eol + 2; + } + EngUnmapFile(iFile); + } +#else + + result = getenv(name); + if(!result) + result = dfault; +#endif + + if(result) + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result); + else + debug_printf("%s: %s = (null)\n", __FUNCTION__, name); + + return result; +} + +boolean +debug_get_bool_option(const char *name, boolean dfault) +{ + const char *str = debug_get_option(name, NULL); + boolean result; + + if(str == NULL) + result = dfault; + else if(!strcmp(str, "no")) + result = FALSE; + else if(!strcmp(str, "0")) + result = FALSE; + else if(!strcmp(str, "f")) + result = FALSE; + else if(!strcmp(str, "false")) + result = FALSE; + else + result = TRUE; + + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? "TRUE" : "FALSE"); + + return result; +} + + +long +debug_get_num_option(const char *name, long dfault) +{ + /* FIXME */ + return dfault; +} + + +unsigned long +debug_get_flags_option(const char *name, + const struct debug_named_value *flags, + unsigned long dfault) +{ + unsigned long result; + const char *str; + + str = debug_get_option(name, NULL); + if(!str) + result = dfault; + else { + result = 0; + while( flags->name ) { + if (!strcmp(str, "all") || strstr(str, flags->name )) + result |= flags->value; + ++flags; + } + } + + debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + + return result; +} + + #if defined(WIN32) ULONG_PTR debug_config_file = 0; void *mapped_config_file = 0; diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 494cc3bb71..95bdf2d3b9 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -256,6 +256,33 @@ debug_dump_flags(const struct debug_named_value *names, unsigned long value); +/** + * Get option. + * + * It is an alias for getenv on Linux. + * + * On Windows it reads C:\gallium.cfg, which is a text file with CR+LF line + * endings with one option per line as + * + * NAME=value + * + * This file must be terminated with an extra empty line. + */ +const char * +debug_get_option(const char *name, const char *dfault); + +boolean +debug_get_bool_option(const char *name, boolean dfault); + +long +debug_get_unsigned_option(const char *name, long dfault); + +unsigned long +debug_get_flags_option(const char *name, + const struct debug_named_value *flags, + unsigned long dfault); + + void * debug_malloc(const char *file, unsigned line, const char *function, size_t size); -- cgit v1.2.3 From 648e26aa95b519f1f4abc429b5a23abaf4a5195b Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 24 Mar 2008 16:24:32 -0600 Subject: gallium: added tgsi_num_tokens() function to return number of tokens in token array. Maybe move to a different file someday. --- src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 13 +++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 4 ++++ 2 files changed, 17 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c index bf6b89ce56..c3526cb71f 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c @@ -317,3 +317,16 @@ tgsi_parse_token( } } + +unsigned +tgsi_num_tokens(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context ctx; + if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) { + unsigned len = (ctx.FullHeader.Header.HeaderSize + + ctx.FullHeader.Header.BodySize + + 1); + return len; + } + return 0; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index 40083728d6..a98e88e343 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -113,6 +113,10 @@ void tgsi_parse_token( struct tgsi_parse_context *ctx ); +unsigned +tgsi_num_tokens(const struct tgsi_token *tokens); + + #if defined __cplusplus } #endif -- cgit v1.2.3 From 7f430293772f201a59bcf62edd1ed4f942f8be29 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 24 Mar 2008 16:26:45 -0600 Subject: gallium: use pipe_texture_reference() in a few places (fixes refcounting bugs) --- src/gallium/auxiliary/draw/draw_aaline.c | 9 +++++++-- src/gallium/auxiliary/draw/draw_pstipple.c | 12 ++++++++++-- 2 files changed, 17 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index 6742f7f4b9..cc1873abad 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -621,7 +621,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) bind_aaline_fragment_shader(aaline); aaline->state.sampler[num] = aaline->sampler_cso; - aaline->state.texture[num] = aaline->texture; + pipe_texture_reference(&aaline->state.texture[num], aaline->texture); aaline->driver_bind_sampler_states(pipe, num + 1, aaline->state.sampler); aaline->driver_set_sampler_textures(pipe, num + 1, aaline->state.texture); @@ -769,9 +769,14 @@ aaline_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + uint i; + /* save current */ - memcpy(aaline->state.texture, texture, num * sizeof(struct pipe_texture *)); + for (i = 0; i < num; i++) { + pipe_texture_reference(&aaline->state.texture[i], texture[i]); + } aaline->num_textures = num; + /* pass-through */ aaline->driver_set_sampler_textures(aaline->pipe, num, texture); } diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index bd8d3a76ae..f00b23959b 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -398,6 +398,7 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.cpp = 1; pstip->texture = screen->texture_create(screen, &texTemp); + assert(pstip->texture->refcount == 1); //pstip_update_texture(pstip); } @@ -492,7 +493,8 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) /* plug in our sampler, texture */ pstip->state.samplers[pstip->sampler_unit] = pstip->sampler_cso; - pstip->state.textures[pstip->sampler_unit] = pstip->texture; + pipe_texture_reference(&pstip->state.textures[pstip->sampler_unit], + pstip->texture); pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); @@ -624,6 +626,7 @@ pstip_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ memcpy(pstip->state.samplers, sampler, num * sizeof(void *)); pstip->num_samplers = num; @@ -637,9 +640,14 @@ pstip_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + uint i; + /* save current */ - memcpy(pstip->state.textures, texture, num * sizeof(struct pipe_texture *)); + for (i = 0; i < num; i++) { + pipe_texture_reference(&pstip->state.textures[i], texture[i]); + } pstip->num_textures = num; + /* pass-through */ pstip->driver_set_sampler_textures(pstip->pipe, num, texture); } -- cgit v1.2.3 From ae146e4bc86aeade59d018100e39e160f7553994 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 24 Mar 2008 16:31:15 -0600 Subject: gallium: make a copy of the vertex shader's token array. This solves problems when the state tracker frees the token array when the draw module still needs it. --- src/gallium/auxiliary/draw/draw_vs_exec.c | 7 ++++++- src/gallium/auxiliary/draw/draw_vs_llvm.c | 7 ++++++- src/gallium/auxiliary/draw/draw_vs_sse.c | 6 +++++- 3 files changed, 17 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 364693e0b4..4e2fa72707 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -38,6 +38,8 @@ #include "draw_context.h" #include "draw_vs.h" +#include "tgsi/util/tgsi_parse.h" + static INLINE unsigned compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) @@ -187,6 +189,7 @@ vs_exec_run( struct draw_vertex_shader *shader, static void vs_exec_delete( struct draw_vertex_shader *dvs ) { + FREE((void*) dvs->state.tokens); FREE( dvs ); } @@ -196,11 +199,13 @@ draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *state) { struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader ); + uint nt = tgsi_num_tokens(state->tokens); if (vs == NULL) return NULL; - vs->state = *state; + /* we make a private copy of the tokens */ + vs->state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); vs->prepare = vs_exec_prepare; vs->run = vs_exec_run; vs->delete = vs_exec_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 53c260be53..bd983f2ddf 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -38,6 +38,8 @@ #include "draw_context.h" #include "draw_vs.h" +#include "tgsi/util/tgsi_parse.h" + #ifdef MESA_LLVM #include "gallivm/gallivm.h" @@ -186,6 +188,7 @@ vs_llvm_delete( struct draw_vertex_shader *base ) /* Do something to free compiled shader: */ + FREE( (void*) shader->base.state.tokens ); FREE( shader ); } @@ -197,12 +200,14 @@ draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ) { struct draw_llvm_vertex_shader *vs; + uint nt = tgsi_num_tokens(templ->tokens); vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); if (vs == NULL) return NULL; - vs->base.state = templ; + /* we make a private copy of the tokens */ + vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); vs->base.prepare = vs_llvm_prepare; vs->base.run = vs_llvm_run; vs->base.delete = vs_llvm_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 5ee2adb344..a4503c143e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -43,6 +43,7 @@ #include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_sse2.h" +#include "tgsi/util/tgsi_parse.h" typedef void (XSTDCALL *codegen_function) ( @@ -204,6 +205,7 @@ vs_sse_delete( struct draw_vertex_shader *base ) x86_release_func( &shader->sse2_program ); + FREE( (void*) shader->base.state.tokens ); FREE( shader ); } @@ -213,6 +215,7 @@ draw_create_vs_sse(struct draw_context *draw, const struct pipe_shader_state *templ) { struct draw_sse_vertex_shader *vs; + uint nt = tgsi_num_tokens(templ->tokens); if (!draw->use_sse) return NULL; @@ -221,7 +224,8 @@ draw_create_vs_sse(struct draw_context *draw, if (vs == NULL) return NULL; - vs->base.state = *templ; + /* we make a private copy of the tokens */ + vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); vs->base.prepare = vs_sse_prepare; vs->base.run = vs_sse_run; vs->base.delete = vs_sse_delete; -- cgit v1.2.3 From d6af8fc51d0288f34b92b7249c565c382e83765a Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 24 Mar 2008 18:32:48 -0600 Subject: gallium: move sampler_unit field to pstip_fragment_shader since it's per-shader Also, fix another texture refcounting bug. --- src/gallium/auxiliary/draw/draw_pstipple.c | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index f00b23959b..9cec986640 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -56,6 +56,7 @@ struct pstip_fragment_shader struct pipe_shader_state state; void *driver_fs; void *pstip_fs; + uint sampler_unit; }; @@ -67,7 +68,6 @@ struct pstip_stage struct draw_stage stage; void *sampler_cso; - uint sampler_unit; struct pipe_texture *texture; uint num_samplers; uint num_textures; @@ -329,7 +329,7 @@ generate_pstip_fs(struct pstip_stage *pstip) tgsi_dump(pstip_fs.tokens, 0); #endif - pstip->sampler_unit = transform.maxSampler + 1; + pstip->fs->sampler_unit = transform.maxSampler + 1; pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); } @@ -489,13 +489,15 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); - num_samplers = MAX2(num_samplers, pstip->sampler_unit + 1); + num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1); /* plug in our sampler, texture */ - pstip->state.samplers[pstip->sampler_unit] = pstip->sampler_cso; - pipe_texture_reference(&pstip->state.textures[pstip->sampler_unit], + pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso; + pipe_texture_reference(&pstip->state.textures[pstip->fs->sampler_unit], pstip->texture); + assert(num_samplers <= PIPE_MAX_SAMPLERS); + pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); @@ -626,9 +628,14 @@ pstip_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + uint i; /* save current */ memcpy(pstip->state.samplers, sampler, num * sizeof(void *)); + for (i = num; i < PIPE_MAX_SAMPLERS; i++) { + pstip->state.samplers[i] = NULL; + } + pstip->num_samplers = num; /* pass-through */ pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); @@ -646,6 +653,10 @@ pstip_set_sampler_textures(struct pipe_context *pipe, for (i = 0; i < num; i++) { pipe_texture_reference(&pstip->state.textures[i], texture[i]); } + for (; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&pstip->state.textures[i], NULL); + } + pstip->num_textures = num; /* pass-through */ -- cgit v1.2.3 From 4654803e2595ea041ea83baf5e13e6c68890e9a7 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 24 Mar 2008 18:49:56 -0600 Subject: gallium: fix a few bugs, warnings in the p_debug code added missing _ to a _debug_printf() call. --- src/gallium/auxiliary/util/p_debug.c | 4 ++-- src/gallium/include/pipe/p_debug.h | 9 ++++++--- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 351de95c95..cc036dabf8 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -260,7 +260,7 @@ enum { }; /* Check for aborts enabled. */ -static unsigned abort_en() +static unsigned abort_en(void) { if (!mapped_config_file) { @@ -283,7 +283,7 @@ static unsigned abort_en() return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn; } #else /* WIN32 */ -static unsigned abort_en() +static unsigned abort_en(void) { return !GETENV("GALLIUM_ABORT_ON_ASSERT"); } diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 95bdf2d3b9..577cdaebcd 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -63,7 +63,7 @@ extern "C" { void _debug_vprintf(const char *format, va_list ap); -static void INLINE +static INLINE void _debug_printf(const char *format, ...) { va_list ap; @@ -82,7 +82,7 @@ _debug_printf(const char *format, ...) * - avoid outputing large strings (512 bytes is the current maximum length * that is guaranteed to be printed in all platforms) */ -static void INLINE +static INLINE void debug_printf(const char *format, ...) { #ifdef DEBUG @@ -133,6 +133,9 @@ void _debug_break(void); #endif /* !DEBUG */ +long +debug_get_num_option(const char *name, long dfault); + void _debug_assert_fail(const char *expr, const char *file, unsigned line, @@ -176,7 +179,7 @@ void _debug_assert_fail(const char *expr, */ #ifdef DEBUG #define debug_checkpoint_full() \ - debug_printf("%s:%u:%s", __FILE__, __LINE__, __FUNCTION__) + _debug_printf("%s:%u:%s", __FILE__, __LINE__, __FUNCTION__) #else #define debug_checkpoint_full() \ ((void)0) -- cgit v1.2.3 From 6579440ea98e61871fe781c1c9c681645ddcc075 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 24 Mar 2008 19:36:44 -0600 Subject: gallium: be smarter about picking the sampler unit for pstipple, aaalines Also, if the app really uses all available sampler/texture units, don't just die. Just use the last sampler for the pstipple or aaline texture. --- src/gallium/auxiliary/draw/draw_aaline.c | 54 ++++++++++++++++++++++++------ src/gallium/auxiliary/draw/draw_pstipple.c | 41 +++++++++++++++++------ 2 files changed, 74 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index cc1873abad..a999ef1227 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -61,6 +61,7 @@ struct aaline_fragment_shader void *aaline_fs; void *aapoint_fs; /* not yet */ void *sprite_fs; /* not yet */ + uint sampler_unit; }; @@ -117,7 +118,8 @@ struct aa_transform_context { struct tgsi_transform_context base; uint tempsUsed; /**< bitmask */ int colorOutput; /**< which output is the primary color */ - int maxSampler; /**< max sampler index found */ + uint samplersUsed; /**< bitfield of samplers used */ + int freeSampler; /** an available sampler for the pstipple */ int maxInput, maxGeneric; /**< max input index found */ int colorTemp, texTemp; /**< temp registers */ boolean firstInstruction; @@ -140,8 +142,11 @@ aa_transform_decl(struct tgsi_transform_context *ctx, aactx->colorOutput = decl->u.DeclarationRange.First; } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - if ((int) decl->u.DeclarationRange.Last > aactx->maxSampler) - aactx->maxSampler = decl->u.DeclarationRange.Last + 1; + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->samplersUsed |= 1 << i; + } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) @@ -163,6 +168,21 @@ aa_transform_decl(struct tgsi_transform_context *ctx, } +/** + * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. + */ +static int +free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) + return i; + } + return -1; +} + + /** * TGSI instruction transform callback. * Replace writes to result.color w/ a temp reg. @@ -180,6 +200,11 @@ aa_transform_inst(struct tgsi_transform_context *ctx, struct tgsi_full_declaration decl; uint i; + /* find free sampler */ + aactx->freeSampler = free_bit(aactx->samplersUsed); + if (aactx->freeSampler >= PIPE_MAX_SAMPLERS) + aactx->freeSampler = PIPE_MAX_SAMPLERS - 1; + /* find two free temp regs */ for (i = 0; i < 32; i++) { if ((aactx->tempsUsed & (1 << i)) == 0) { @@ -212,7 +237,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->maxSampler + 1; + decl.u.DeclarationRange.Last = aactx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ @@ -246,7 +271,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->maxSampler + 1; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler; ctx->emit_instruction(ctx, &newInst); @@ -322,7 +347,6 @@ generate_aaline_fs(struct aaline_stage *aaline) memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; - transform.maxSampler = -1; transform.maxInput = -1; transform.maxGeneric = -1; transform.colorTemp = -1; @@ -340,6 +364,8 @@ generate_aaline_fs(struct aaline_stage *aaline) tgsi_dump(aaline_fs.tokens, 0); #endif + aaline->fs->sampler_unit = transform.freeSampler; + aaline->fs->aaline_fs = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); @@ -602,7 +628,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) auto struct aaline_stage *aaline = aaline_stage(stage); struct draw_context *draw = stage->draw; struct pipe_context *pipe = aaline->pipe; - uint num = MAX2(aaline->num_textures, aaline->num_samplers); + uint num_samplers; assert(draw->rasterizer->line_smooth); @@ -620,11 +646,17 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) */ bind_aaline_fragment_shader(aaline); - aaline->state.sampler[num] = aaline->sampler_cso; - pipe_texture_reference(&aaline->state.texture[num], aaline->texture); + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(aaline->num_textures, aaline->num_samplers); + num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1); + + aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso; + pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit], + aaline->texture); - aaline->driver_bind_sampler_states(pipe, num + 1, aaline->state.sampler); - aaline->driver_set_sampler_textures(pipe, num + 1, aaline->state.texture); + aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture); /* now really draw first line */ stage->line = aaline_line; diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c index 9cec986640..4dddb72906 100644 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pstipple.c @@ -112,7 +112,8 @@ struct pstip_transform_context { uint tempsUsed; /**< bitmask */ int wincoordInput; int maxInput; - int maxSampler; /**< max sampler index found */ + uint samplersUsed; /**< bitfield of samplers used */ + int freeSampler; /** an available sampler for the pstipple */ int texTemp; /**< temp registers */ int numImmed; boolean firstInstruction; @@ -130,8 +131,11 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - if ((int) decl->u.DeclarationRange.Last > pctx->maxSampler) - pctx->maxSampler = (int) decl->u.DeclarationRange.Last; + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + pctx->samplersUsed |= 1 << i; + } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last); @@ -159,6 +163,21 @@ pstip_transform_immed(struct tgsi_transform_context *ctx, } +/** + * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. + */ +static int +free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) + return i; + } + return -1; +} + + /** * TGSI instruction transform callback. * Replace writes to result.color w/ a temp reg. @@ -177,7 +196,11 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, struct tgsi_full_instruction newInst; uint i; int wincoordInput; - const int sampler = pctx->maxSampler + 1; + + /* find free sampler */ + pctx->freeSampler = free_bit(pctx->samplersUsed); + if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) + pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; if (pctx->wincoordInput < 0) wincoordInput = pctx->maxInput + 1; @@ -214,7 +237,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = sampler; + decl.u.DeclarationRange.Last = pctx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ @@ -274,7 +297,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = sampler; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler; ctx->emit_instruction(ctx, &newInst); /* KILP texTemp; # if texTemp < 0, KILL fragment */ @@ -313,7 +336,6 @@ generate_pstip_fs(struct pstip_stage *pstip) memset(&transform, 0, sizeof(transform)); transform.wincoordInput = -1; transform.maxInput = -1; - transform.maxSampler = -1; transform.texTemp = -1; transform.firstInstruction = TRUE; transform.base.transform_instruction = pstip_transform_inst; @@ -329,7 +351,8 @@ generate_pstip_fs(struct pstip_stage *pstip) tgsi_dump(pstip_fs.tokens, 0); #endif - pstip->fs->sampler_unit = transform.maxSampler + 1; + pstip->fs->sampler_unit = transform.freeSampler; + assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); } @@ -399,8 +422,6 @@ pstip_create_texture(struct pstip_stage *pstip) pstip->texture = screen->texture_create(screen, &texTemp); assert(pstip->texture->refcount == 1); - - //pstip_update_texture(pstip); } -- cgit v1.2.3 From 6fa0bd067176d9159a741014301dfb5fbbd9c4b5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 25 Mar 2008 11:37:24 +0000 Subject: gallium: Fix default option on Windows. --- src/gallium/auxiliary/util/p_debug.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index cc036dabf8..5447bbb6f5 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -155,10 +155,12 @@ debug_get_option(const char *name, const char *dfault) const char *sol, *eol, *sep; static char output[1024]; + result = dfault; + /* XXX: this creates the file if it does not exists, so it must either be + * disabled on release versions, or put in a less conspicuous place. + */ pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile); - if(!pMap) - result = dfault; - else { + if(pMap) { sol = (const char *)pMap; while(1) { /* TODO: handle LF line endings */ @@ -184,10 +186,7 @@ debug_get_option(const char *name, const char *dfault) result = dfault; #endif - if(result) - debug_printf("%s: %s = %s\n", __FUNCTION__, name, result); - else - debug_printf("%s: %s = (null)\n", __FUNCTION__, name); + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); return result; } -- cgit v1.2.3 From 4505acf3b28f0b88bf97838ed7898f10e9200b93 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 25 Mar 2008 15:17:53 +0000 Subject: draw: take primitive into account when deciding if the pipeline is active --- src/gallium/auxiliary/draw/draw_passthrough.c | 2 +- src/gallium/auxiliary/draw/draw_private.h | 3 +- src/gallium/auxiliary/draw/draw_pt.c | 2 +- src/gallium/auxiliary/draw/draw_validate.c | 99 ++++++++++++++++++--------- 4 files changed, 69 insertions(+), 37 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c index dd00894c5b..2198079a88 100644 --- a/src/gallium/auxiliary/draw/draw_passthrough.c +++ b/src/gallium/auxiliary/draw/draw_passthrough.c @@ -424,7 +424,7 @@ draw_passthrough_arrays(struct draw_context *draw, debug_printf("%s %d %d %d\n", __FUNCTION__, prim, start, count); - if (draw_need_pipeline(draw)) + if (draw_need_pipeline(draw, prim)) return FALSE; debug_printf("%s AAA\n", __FUNCTION__); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 0c5afcacfa..8a879809c3 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -364,7 +364,8 @@ extern void draw_vertex_shader_queue_flush( struct draw_context *draw ); extern void draw_update_vertex_fetch( struct draw_context *draw ); -extern boolean draw_need_pipeline(const struct draw_context *draw); +extern boolean draw_need_pipeline(const struct draw_context *draw, + unsigned prim ); /* Passthrough mode (second attempt): diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index d7169f78f4..3ec31ec25f 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -55,7 +55,7 @@ draw_pt_arrays(struct draw_context *draw, unsigned start, unsigned count) { - const boolean pipeline = draw_need_pipeline(draw); + const boolean pipeline = draw_need_pipeline(draw, prim); const boolean cliptest = !draw->rasterizer->bypass_clipping; const boolean shading = !draw->rasterizer->bypass_vs; struct draw_pt_front_end *frontend = NULL; diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index 70b477ba5c..e163e078f0 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -32,6 +32,22 @@ #include "pipe/p_defines.h" #include "draw_private.h" +static boolean points( unsigned prim ) +{ + return (prim == PIPE_PRIM_POINTS); +} + +static boolean lines( unsigned prim ) +{ + return (prim == PIPE_PRIM_LINES || + prim == PIPE_PRIM_LINE_STRIP || + prim == PIPE_PRIM_LINE_LOOP); +} + +static boolean triangles( unsigned prim ) +{ + return prim >= PIPE_PRIM_TRIANGLES; +} /** * Check if we need any special pipeline stages, or whether @@ -40,48 +56,63 @@ * pipeline stages. */ boolean -draw_need_pipeline(const struct draw_context *draw) +draw_need_pipeline(const struct draw_context *draw, + unsigned int prim ) { - /* line stipple */ - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) - return TRUE; - - /* wide lines */ - if (draw->rasterizer->line_width > draw->wide_line_threshold) - return TRUE; + /* Don't have to worry about triangles turning into lines/points + * and triggering the pipeline, because we have to trigger the + * pipeline *anyway* if unfilled mode is active. + */ + if (lines(prim)) + { + /* line stipple */ + if (draw->rasterizer->line_stipple_enable && draw->line_stipple) + return TRUE; - /* large points */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) - return TRUE; + /* wide lines */ + if (draw->rasterizer->line_width > draw->wide_line_threshold) + return TRUE; - /* AA lines */ - if (draw->rasterizer->line_smooth && draw->pipeline.aaline) - return TRUE; - - /* AA points */ - if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) - return TRUE; + /* AA lines */ + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) + return TRUE; + } - /* polygon stipple */ - if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) - return TRUE; + if (points(prim)) + { + /* large points */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) + return TRUE; - /* unfilled polygons */ - if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) - return TRUE; + /* AA points */ + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + return TRUE; - /* polygon offset */ - if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) - return TRUE; + /* point sprites */ + if (draw->rasterizer->point_sprite && draw->point_sprite) + return TRUE; + } - /* point sprites */ - if (draw->rasterizer->point_sprite && draw->point_sprite) - return TRUE; - /* two-side lighting */ - if (draw->rasterizer->light_twoside) - return TRUE; + if (triangles(prim)) + { + /* polygon stipple */ + if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) + return TRUE; + + /* unfilled polygons */ + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) + return TRUE; + + /* polygon offset */ + if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) + return TRUE; + + /* two-side lighting */ + if (draw->rasterizer->light_twoside) + return TRUE; + } /* polygon cull - this is difficult - hardware can cull just fine * most of the time (though sometimes CULL_NEITHER is unsupported. -- cgit v1.2.3 From cbec00849186db11d77fd00822145e11e69cb07f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 25 Mar 2008 18:09:25 +0000 Subject: draw: don't use fetch_and_store for bypass_vs mode, it's not quite right --- src/gallium/auxiliary/draw/draw_prim.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 4fe0ddc02a..75b2f79bab 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -170,10 +170,7 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) if (flags >= DRAW_FLUSH_SHADER_QUEUE) { if (draw->vs.queue_nr) { - if (draw->rasterizer->bypass_vs) - fetch_and_store(draw); - else - (*draw->shader_queue_flush)(draw); + (*draw->shader_queue_flush)(draw); } if (flags >= DRAW_FLUSH_PRIM_QUEUE) { -- cgit v1.2.3 From 05a4ecdec2b5fc590eb09cc5a6b4208e0f739c5a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 25 Mar 2008 18:15:58 +0000 Subject: draw: vertex fetch can be validated too early leading to an assertion... disable --- src/gallium/auxiliary/draw/draw_vertex_fetch.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index b56d85396d..11f99babf6 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -314,7 +314,11 @@ static fetch_func get_fetch_func( enum pipe_format format ) return NULL; /* not sure why this is needed */ default: - assert(0); + /* This can get hit because draw-state-validation is too eager, + and can jump in here validating stuff before the state tracker has set + up everything. + */ + /* assert(0); */ return NULL; } } -- cgit v1.2.3 From e1543fa55c7972e3634f3f7ba297c010337dfb0d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 25 Mar 2008 23:49:11 +0100 Subject: draw: Take flatshade_first rasterizer bit into account. --- src/gallium/auxiliary/draw/draw_prim.c | 162 ++++++++++++++++++++++++--------- 1 file changed, 117 insertions(+), 45 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 75b2f79bab..f589d0c017 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -330,6 +330,8 @@ draw_prim( struct draw_context *draw, unsigned i; boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); + boolean flatfirst = + (draw->rasterizer->flatshade & draw->rasterizer->flatshade_first) ? TRUE : FALSE; // debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); @@ -342,11 +344,21 @@ draw_prim( struct draw_context *draw, break; case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - do_line( draw, - TRUE, - start + i + 0, - start + i + 1); + if (flatfirst) { + for (i = 0; i+1 < count; i += 2) { + do_line( draw, + TRUE, + start + i + 1, + start + i + 0); + } + } + else { + for (i = 0; i+1 < count; i += 2) { + do_line( draw, + TRUE, + start + i + 0, + start + i + 1); + } } break; @@ -367,60 +379,120 @@ draw_prim( struct draw_context *draw, break; case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, - start + i - 1, - start + i ); + if (flatfirst) { + for (i = 1; i < count; i++) { + do_line( draw, + i == 1, + start + i, + start + i - 1 ); + } + } + else { + for (i = 1; i < count; i++) { + do_line( draw, + i == 1, + start + i - 1, + start + i ); + } } break; case PIPE_PRIM_TRIANGLES: - if (unfilled) { - for (i = 0; i+2 < count; i += 3) { - do_ef_triangle( draw, - 1, - ~0, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } + if (flatfirst) { + if (unfilled) { + for (i = 0; i+2 < count; i += 3) { + do_ef_triangle( draw, + 1, + ~0, + start + i + 1, + start + i + 2, + start + i + 0 ); + } + } + else { + for (i = 0; i+2 < count; i += 3) { + do_triangle( draw, + start + i + 1, + start + i + 2, + start + i + 0 ); + } + } + } else { - for (i = 0; i+2 < count; i += 3) { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } + if (unfilled) { + for (i = 0; i+2 < count; i += 3) { + do_ef_triangle( draw, + 1, + ~0, + start + i + 0, + start + i + 1, + start + i + 2 ); + } + } + else { + for (i = 0; i+2 < count; i += 3) { + do_triangle( draw, + start + i + 0, + start + i + 1, + start + i + 2 ); + } + } } break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 0; i+2 < count; i++) { - if (i & 1) { - do_triangle( draw, - start + i + 1, - start + i + 0, - start + i + 2 ); - } - else { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + if (i & 1) { + do_triangle( draw, + start + i + 2, + start + i + 1, + start + i + 0 ); + } + else { + do_triangle( draw, + start + i + 1, + start + i + 2, + start + i + 0 ); + } + } + } + else { + for (i = 0; i+2 < count; i++) { + if (i & 1) { + do_triangle( draw, + start + i + 1, + start + i + 0, + start + i + 2 ); + } + else { + do_triangle( draw, + start + i + 0, + start + i + 1, + start + i + 2 ); + } + } } break; case PIPE_PRIM_TRIANGLE_FAN: if (count >= 3) { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + 0, - start + i + 1, - start + i + 2 ); - } + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + do_triangle( draw, + start + i + 2, + start + 0, + start + i + 1 ); + } + } + else { + for (i = 0; i+2 < count; i++) { + do_triangle( draw, + start + 0, + start + i + 1, + start + i + 2 ); + } + } } break; -- cgit v1.2.3 From 84d8030735844785c3c97679db2bc1892a9c8c70 Mon Sep 17 00:00:00 2001 From: Ian Romanick Date: Mon, 24 Mar 2008 12:15:59 -0700 Subject: cell: Float convert-to and convert-from instructions use different shift bias --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 4 ++-- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 10 +++++----- 2 files changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 24be65bff9..7f6bf577b2 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -267,10 +267,10 @@ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ emit_RI7(p, _op, rT, rA, imm); \ } -#define EMIT_RI8(_name, _op) \ +#define EMIT_RI8(_name, _op, bias) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ { \ - emit_RI8(p, _op, rT, rA, 155 - imm); \ + emit_RI8(p, _op, rT, rA, bias - imm); \ } #define EMIT_RI10(_name, _op) \ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 5a1eb1ed8d..1cacc717b1 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -76,7 +76,7 @@ extern void spe_release_register(struct spe_function *p, int reg); #define EMIT_RI7(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ int imm) -#define EMIT_RI8(_name, _op) \ +#define EMIT_RI8(_name, _op, bias) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ int imm) #define EMIT_RI10(_name, _op) \ @@ -289,10 +289,10 @@ EMIT_RR (spe_dfnma, 0x35f); EMIT_R (spe_frest, 0x1b8); EMIT_R (spe_frsqest, 0x1b9); EMIT_RR (spe_fi, 0x3d4); -EMIT_RI8 (spe_csflt, 0x1da); -EMIT_RI8 (spe_cflts, 0x1d8); -EMIT_RI8 (spe_cuflt, 0x1db); -EMIT_RI8 (spe_cfltu, 0x1d9); +EMIT_RI8 (spe_csflt, 0x1da, 155); +EMIT_RI8 (spe_cflts, 0x1d8, 173); +EMIT_RI8 (spe_cuflt, 0x1db, 155); +EMIT_RI8 (spe_cfltu, 0x1d9, 173); EMIT_R (spe_frds, 0x3b9); EMIT_R (spe_fesd, 0x3b8); EMIT_RR (spe_dfceq, 0x3c3); -- cgit v1.2.3 From e55dccd0bfc41dbcf306f864c01758f8e28fc660 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 25 Mar 2008 19:19:14 -0600 Subject: gallium: the generic attrib we use for computing coverage is per-shader Fixes a very tricky conformance failure. --- src/gallium/auxiliary/draw/draw_aapoint.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index 67a7a8ebab..fcebe3e7a0 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -68,6 +68,7 @@ struct aapoint_fragment_shader struct pipe_shader_state state; void *driver_fs; /**< the regular shader */ void *aapoint_fs; /**< the aa point-augmented shader */ + int generic_attrib; /**< The generic input attrib/texcoord we'll use */ }; @@ -486,7 +487,6 @@ static void generate_aapoint_fs(struct aapoint_stage *aapoint) { const struct pipe_shader_state *orig_fs = &aapoint->fs->state; - struct draw_context *draw = aapoint->stage.draw; struct pipe_shader_state aapoint_fs; struct aa_transform_context transform; @@ -510,18 +510,16 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) MAX, &transform.base); #if 0 /* DEBUG */ + printf("draw_aapoint, orig shader:\n"); tgsi_dump(orig_fs->tokens, 0); + printf("draw_aapoint, new shader:\n"); tgsi_dump(aapoint_fs.tokens, 0); #endif aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); - /* advertise the extra post-transform vertex attributes which will have - * the texcoords. - */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; + aapoint->fs->generic_attrib = transform.maxGeneric + 1; } @@ -675,15 +673,19 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) else aapoint->radius = 0.5f * draw->rasterizer->point_size; - aapoint->tex_slot = draw->num_vs_outputs; - assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ - draw->extra_vp_outputs.slot = aapoint->tex_slot; - /* - * Bind our fragprog. + * Bind (generate) our fragprog. */ bind_aapoint_fragment_shader(aapoint); + /* update vertex attrib info */ + aapoint->tex_slot = draw->num_vs_outputs; + assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ + + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib; + draw->extra_vp_outputs.slot = aapoint->tex_slot; + /* find psize slot in post-transform vertex */ aapoint->psize_slot = -1; if (draw->rasterizer->point_size_per_vertex) { -- cgit v1.2.3 From 4abe1eb980ed76d2b2d3383eaab520d0aa2ae6f4 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 26 Mar 2008 09:36:40 +0000 Subject: gallium: Change pipe->flush() interface to optionally return a fence. The cell driver still uses an internal CELL_FLUSH_WAIT flag, in the long run proper fencing should be implemented for it. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- src/gallium/drivers/cell/ppu/cell_flush.c | 15 +++++++--- src/gallium/drivers/cell/ppu/cell_flush.h | 5 +++- src/gallium/drivers/cell/ppu/cell_vbuf.c | 2 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 2 +- src/gallium/drivers/failover/fo_context.c | 4 +-- src/gallium/drivers/i915simple/i915_batch.h | 4 +-- src/gallium/drivers/i915simple/i915_blit.c | 4 +-- src/gallium/drivers/i915simple/i915_flush.c | 14 +++------- src/gallium/drivers/i915simple/i915_prim_emit.c | 2 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_state_emit.c | 2 +- src/gallium/drivers/i915simple/i915_winsys.h | 5 ++-- src/gallium/drivers/i965simple/brw_flush.c | 13 ++------- src/gallium/drivers/softpipe/sp_flush.c | 10 +++---- src/gallium/drivers/softpipe/sp_flush.h | 4 ++- src/gallium/include/pipe/p_context.h | 5 ++-- src/gallium/include/pipe/p_defines.h | 3 +- src/gallium/winsys/dri/intel/intel_context.c | 4 +-- src/gallium/winsys/dri/intel/intel_winsys_i915.c | 25 ++++++++++------- src/gallium/winsys/xlib/xm_winsys.c | 31 +++++++++++++++++++++ src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 4 +-- src/mesa/state_tracker/st_cb_fbo.c | 2 +- src/mesa/state_tracker/st_cb_flush.c | 34 +++++++++++++---------- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_framebuffer.c | 3 +- src/mesa/state_tracker/st_public.h | 5 +++- 28 files changed, 127 insertions(+), 83 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index e129c062be..ed12768e7f 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -935,7 +935,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, 4, /* verts */ 2); /* attribs/vert */ - pipe->flush(pipe, PIPE_FLUSH_WAIT); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* need to signal that the texture has changed _after_ rendering to it */ pipe->texture_update(pipe, pt, face, (1 << dstLevel)); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.c b/src/gallium/drivers/cell/ppu/cell_flush.c index 66a5627d84..3aaf3de668 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.c +++ b/src/gallium/drivers/cell/ppu/cell_flush.c @@ -35,12 +35,19 @@ void -cell_flush(struct pipe_context *pipe, unsigned flags) +cell_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence) { struct cell_context *cell = cell_context(pipe); + if (fence) { + *fence = NULL; + /* XXX: Implement real fencing */ + flags |= CELL_FLUSH_WAIT; + } + if (flags & PIPE_FLUSH_SWAPBUFFERS) - flags |= PIPE_FLUSH_WAIT; + flags |= CELL_FLUSH_WAIT; draw_flush( cell->draw ); cell_flush_int(pipe, flags); @@ -58,7 +65,7 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags) ASSERT(!flushing); flushing = TRUE; - if (flags & PIPE_FLUSH_WAIT) { + if (flags & CELL_FLUSH_WAIT) { uint64_t *cmd = (uint64_t *) cell_batch_alloc(cell, sizeof(uint64_t)); *cmd = CELL_CMD_FINISH; } @@ -72,7 +79,7 @@ cell_flush_int(struct pipe_context *pipe, unsigned flags) } #endif - if (flags & PIPE_FLUSH_WAIT) { + if (flags & CELL_FLUSH_WAIT) { /* Wait for ack */ for (i = 0; i < cell->num_spus; i++) { uint k = wait_mbox_message(cell_global.spe_contexts[i]); diff --git a/src/gallium/drivers/cell/ppu/cell_flush.h b/src/gallium/drivers/cell/ppu/cell_flush.h index 7f940ae76b..8f0645c429 100644 --- a/src/gallium/drivers/cell/ppu/cell_flush.h +++ b/src/gallium/drivers/cell/ppu/cell_flush.h @@ -29,8 +29,11 @@ #ifndef CELL_FLUSH #define CELL_FLUSH +#define CELL_FLUSH_WAIT 0x80000000 + extern void -cell_flush(struct pipe_context *pipe, unsigned flags); +cell_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); extern void cell_flush_int(struct pipe_context *pipe, unsigned flags); diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index cc727ff4ed..3a181b585c 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -243,7 +243,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, #if 0 /* helpful for debug */ - cell_flush_int(&cell->pipe, PIPE_FLUSH_WAIT); + cell_flush_int(&cell->pipe, CELL_FLUSH_WAIT); #endif } diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index f5c27852c1..b418857ccd 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -133,7 +133,7 @@ cell_vertex_shader_queue_flush(struct draw_context *draw) vs->num_elts = n; send_mbox_message(cell_global.spe_contexts[0], CELL_CMD_VS_EXECUTE); - cell_flush_int(& cell->pipe, PIPE_FLUSH_WAIT); + cell_flush_int(& cell->pipe, CELL_FLUSH_WAIT); } draw->vs.post_nr = draw->vs.queue_nr; diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index afc0d7eb1e..cb95ba516f 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -69,7 +69,7 @@ static boolean failover_draw_elements( struct pipe_context *pipe, start, count )) { - failover->hw->flush( failover->hw, ~0 ); + failover->hw->flush( failover->hw, ~0, NULL ); failover->mode = FO_SW; } } @@ -92,7 +92,7 @@ static boolean failover_draw_elements( struct pipe_context *pipe, * intervening flush. Unlikely to be much performance impact to * this: */ - failover->sw->flush( failover->sw, ~0 ); + failover->sw->flush( failover->sw, ~0, NULL ); } return TRUE; diff --git a/src/gallium/drivers/i915simple/i915_batch.h b/src/gallium/drivers/i915simple/i915_batch.h index fb88cd6db0..4ea06ce02b 100644 --- a/src/gallium/drivers/i915simple/i915_batch.h +++ b/src/gallium/drivers/i915simple/i915_batch.h @@ -44,9 +44,9 @@ #define ADVANCE_BATCH() -#define FLUSH_BATCH() do { \ +#define FLUSH_BATCH(fence) do { \ if (0) i915_dump_batchbuffer( i915 ); \ - i915->winsys->batch_flush( i915->winsys ); \ + i915->winsys->batch_flush( i915->winsys, fence ); \ i915->batch_start = NULL; \ i915->hardware_dirty = ~0; \ } while (0) diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c index db4671ff55..24449e3fb3 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -70,7 +70,7 @@ i915_fill_blit(struct i915_context *i915, if (!BEGIN_BATCH(6, 1)) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); assert(BEGIN_BATCH(6, 1)); } OUT_BATCH(CMD); @@ -145,7 +145,7 @@ i915_copy_blit( struct i915_context *i915, if (!BEGIN_BATCH(8, 2)) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); assert(BEGIN_BATCH(8, 2)); } OUT_BATCH(CMD); diff --git a/src/gallium/drivers/i915simple/i915_flush.c b/src/gallium/drivers/i915simple/i915_flush.c index 96a54281f1..7d23e6b6b9 100644 --- a/src/gallium/drivers/i915simple/i915_flush.c +++ b/src/gallium/drivers/i915simple/i915_flush.c @@ -37,11 +37,9 @@ #include "i915_batch.h" -/** - * In future we may want a fence-like interface instead of finish. - */ static void i915_flush( struct pipe_context *pipe, - unsigned flags ) + unsigned flags, + struct pipe_fence_handle **fence ) { struct i915_context *i915 = i915_context(pipe); @@ -60,7 +58,7 @@ static void i915_flush( struct pipe_context *pipe, flush |= FLUSH_MAP_CACHE; if (!BEGIN_BATCH(1, 0)) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); assert(BEGIN_BATCH(1, 0)); } OUT_BATCH( flush ); @@ -69,11 +67,7 @@ static void i915_flush( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ - FLUSH_BATCH(); - - if (flags & PIPE_FLUSH_WAIT) { - i915->winsys->batch_finish(i915->winsys); - } + FLUSH_BATCH(fence); } diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index d8de5178f6..b6fb0a6d88 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -140,7 +140,7 @@ emit_prim( struct draw_stage *stage, assert(vertex_size >= 12); /* never smaller than 12 bytes */ if (!BEGIN_BATCH( 1 + nr * vertex_size / 4, 0 )) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index eb64f51943..7fb2adbb53 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -161,7 +161,7 @@ i915_vbuf_render_draw( struct vbuf_render *render, i915_emit_hardware_state( i915 ); if (!BEGIN_BATCH( 1 + (nr_indices + 1)/2, 1 )) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); /* Make sure state is re-emitted after a flush: */ diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index a7498d22b7..6f947d4346 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -115,7 +115,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) #endif if(!BEGIN_BATCH(dwords, relocs)) { - FLUSH_BATCH(); + FLUSH_BATCH(NULL); assert(BEGIN_BATCH(dwords, relocs)); } diff --git a/src/gallium/drivers/i915simple/i915_winsys.h b/src/gallium/drivers/i915simple/i915_winsys.h index aea3003281..5e16543f4e 100644 --- a/src/gallium/drivers/i915simple/i915_winsys.h +++ b/src/gallium/drivers/i915simple/i915_winsys.h @@ -56,6 +56,7 @@ extern "C" { */ struct pipe_buffer; +struct pipe_fence_handle; struct pipe_winsys; struct pipe_screen; @@ -103,8 +104,8 @@ struct i915_winsys { unsigned access_flags, unsigned delta ); - void (*batch_flush)( struct i915_winsys *sws ); - void (*batch_finish)( struct i915_winsys *sws ); + void (*batch_flush)( struct i915_winsys *sws, + struct pipe_fence_handle **fence ); }; #define I915_BUFFER_ACCESS_WRITE 0x1 diff --git a/src/gallium/drivers/i965simple/brw_flush.c b/src/gallium/drivers/i965simple/brw_flush.c index 5216c680cf..e6001c30d9 100644 --- a/src/gallium/drivers/i965simple/brw_flush.c +++ b/src/gallium/drivers/i965simple/brw_flush.c @@ -36,14 +36,11 @@ #include "brw_batch.h" -/** - * In future we may want a fence-like interface instead of finish. - */ static void brw_flush( struct pipe_context *pipe, - unsigned flags ) + unsigned flags, + struct pipe_fence_handle **fence ) { struct brw_context *brw = brw_context(pipe); - struct pipe_fence_handle *fence; /* Do we need to emit an MI_FLUSH command to flush the hardware * caches? @@ -65,11 +62,7 @@ static void brw_flush( struct pipe_context *pipe, /* If there are no flags, just flush pending commands to hardware: */ - FLUSH_BATCH( &fence ); - - if (flags & PIPE_FLUSH_WAIT) { -// brw->winsys->wait_fence(brw->winsys, fence); - } + FLUSH_BATCH( fence ); } diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 2cbd0d7cab..0625b69099 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -40,13 +40,10 @@ #include "sp_winsys.h" -/* There will be actual work to do here. In future we may want a - * fence-like interface instead of finish, and perhaps flush will take - * flags to indicate what type of flush is required. - */ void softpipe_flush( struct pipe_context *pipe, - unsigned flags ) + unsigned flags, + struct pipe_fence_handle **fence ) { struct softpipe_context *softpipe = softpipe_context(pipe); uint i; @@ -72,5 +69,8 @@ softpipe_flush( struct pipe_context *pipe, * to unmap surfaces when flushing. */ softpipe_unmap_surfaces(softpipe); + + if (fence) + *fence = NULL; } diff --git a/src/gallium/drivers/softpipe/sp_flush.h b/src/gallium/drivers/softpipe/sp_flush.h index 34ec617866..68d9b5fa83 100644 --- a/src/gallium/drivers/softpipe/sp_flush.h +++ b/src/gallium/drivers/softpipe/sp_flush.h @@ -29,7 +29,9 @@ #define SP_FLUSH_H struct pipe_context; +struct pipe_fence_handle; -void softpipe_flush(struct pipe_context *pipe, unsigned flags ); +void softpipe_flush(struct pipe_context *pipe, unsigned flags, + struct pipe_fence_handle **fence); #endif diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index a3824601be..b2e49bef4c 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -37,7 +37,7 @@ extern "C" { struct pipe_screen; - +struct pipe_fence_handle; struct pipe_state_cache; /* Opaque driver handles: @@ -202,7 +202,8 @@ struct pipe_context { /* Flush rendering: */ void (*flush)( struct pipe_context *pipe, - unsigned flags ); + unsigned flags, + struct pipe_fence_handle **fence ); }; diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index bc938ba253..586951d956 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -201,8 +201,7 @@ enum pipe_texture_target { */ #define PIPE_FLUSH_RENDER_CACHE 0x1 #define PIPE_FLUSH_TEXTURE_CACHE 0x2 -#define PIPE_FLUSH_WAIT 0x4 -#define PIPE_FLUSH_SWAPBUFFERS 0x8 +#define PIPE_FLUSH_SWAPBUFFERS 0x4 /** diff --git a/src/gallium/winsys/dri/intel/intel_context.c b/src/gallium/winsys/dri/intel/intel_context.c index 79b320c6bf..8eba33c313 100644 --- a/src/gallium/winsys/dri/intel/intel_context.c +++ b/src/gallium/winsys/dri/intel/intel_context.c @@ -228,7 +228,7 @@ intelDestroyContext(__DRIcontextPrivate * driContextPriv) assert(intel); /* should never be null */ if (intel) { - st_flush(intel->st, PIPE_FLUSH_WAIT); + st_finish(intel->st); intel_batchbuffer_free(intel->batch); @@ -256,7 +256,7 @@ GLboolean intelUnbindContext(__DRIcontextPrivate * driContextPriv) { struct intel_context *intel = intel_context(driContextPriv); - st_flush(intel->st, 0x0); + st_flush(intel->st, PIPE_FLUSH_RENDER_CACHE, NULL); /* XXX make_current(NULL)? */ return GL_TRUE; } diff --git a/src/gallium/winsys/dri/intel/intel_winsys_i915.c b/src/gallium/winsys/dri/intel/intel_winsys_i915.c index 2def1afc31..4d183db7c3 100644 --- a/src/gallium/winsys/dri/intel/intel_winsys_i915.c +++ b/src/gallium/winsys/dri/intel/intel_winsys_i915.c @@ -39,12 +39,14 @@ #include "intel_winsys.h" #include "pipe/p_util.h" +#include "pipe/p_winsys.h" #include "i915simple/i915_winsys.h" #include "i915simple/i915_screen.h" struct intel_i915_winsys { struct i915_winsys winsys; /**< batch buffer funcs */ + struct pipe_winsys *pws; struct intel_context *intel; }; @@ -112,19 +114,22 @@ static void intel_i915_batch_reloc( struct i915_winsys *sws, -static void intel_i915_batch_flush( struct i915_winsys *sws ) +static void intel_i915_batch_flush( struct i915_winsys *sws, + struct pipe_fence_handle **fence ) { - struct intel_context *intel = intel_i915_winsys(sws)->intel; + struct intel_i915_winsys *iws = intel_i915_winsys(sws); + struct intel_context *intel = iws->intel; + union { + struct _DriFenceObject *dri; + struct pipe_fence_handle *pipe; + } fu; - intel_batchbuffer_flush( intel->batch ); -// if (0) intel_i915_batch_wait_idle( sws ); -} + fu.dri = intel_batchbuffer_flush( intel->batch ); + if (fu.dri) + iws->pws->fence_reference(iws->pws, fence, fu.pipe); -static void intel_i915_batch_finish( struct i915_winsys *sws ) -{ - struct intel_context *intel = intel_i915_winsys(sws)->intel; - intel_batchbuffer_finish( intel->batch ); +// if (0) intel_i915_batch_wait_idle( sws ); } @@ -145,7 +150,7 @@ intel_create_i915simple( struct intel_context *intel, iws->winsys.batch_dword = intel_i915_batch_dword; iws->winsys.batch_reloc = intel_i915_batch_reloc; iws->winsys.batch_flush = intel_i915_batch_flush; - iws->winsys.batch_finish = intel_i915_batch_finish; + iws->pws = winsys; iws->intel = intel; screen = i915_create_screen(winsys, intel->intelScreen->deviceID); diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 7baaae295c..c930a1d196 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -570,6 +570,33 @@ xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) } +/* + * Fence functions - basically nothing to do, as we don't create any actual + * fence objects. + */ + +static void +xm_fence_reference(struct pipe_winsys *sws, struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ +} + + +static int +xm_fence_signalled(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + + +static int +xm_fence_finish(struct pipe_winsys *sws, struct pipe_fence_handle *fence, + unsigned flag) +{ + return 0; +} + /** * Return pointer to a pipe_winsys object. @@ -603,6 +630,10 @@ xmesa_get_pipe_winsys_aub(struct xmesa_visual *xm_vis) ws->base.surface_alloc_storage = xm_surface_alloc_storage; ws->base.surface_release = xm_surface_release; + ws->fence_reference = xm_fence_reference; + ws->fence_signalled = xm_fence_signalled; + ws->fence_finish = xm_fence_finish; + ws->base.flush_frontbuffer = xm_flush_frontbuffer; ws->base.printf = xm_printf; ws->base.get_name = xm_get_name; diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index f1fddc4e02..a623d0bcc0 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -214,7 +214,7 @@ st_Accum(GLcontext *ctx, GLenum op, GLfloat value) const GLint height = ctx->DrawBuffer->_Ymax - ypos; /* make sure color bufs aren't cached */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); switch (op) { case GL_ADD: diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 026f015fd8..43cc21d1fb 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -737,7 +737,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, GLint skipPixels; ubyte *stmap; - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* map the stencil buffer */ stmap = pipe_surface_map(ps); @@ -952,7 +952,7 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, enum pipe_format srcFormat, texFormat; /* make sure rendering has completed */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); st_validate_state(st); diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 5384252a8e..ec7788923a 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -366,7 +366,7 @@ st_finish_render_texture(GLcontext *ctx, assert(strb); - ctx->st->pipe->flush(ctx->st->pipe, PIPE_FLUSH_RENDER_CACHE); + ctx->st->pipe->flush(ctx->st->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* printf("FINISH RENDER TO TEXTURE surf=%p\n", strb->surface); diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index dbec993f1b..a536a059bd 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -43,19 +43,21 @@ #include "pipe/p_winsys.h" -void st_flush( struct st_context *st, uint pipeFlushFlags ) +void st_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ) { FLUSH_VERTICES(st->ctx, 0); - st->pipe->flush( st->pipe, pipeFlushFlags ); + st->pipe->flush( st->pipe, pipeFlushFlags, fence ); } -static void st_gl_flush( struct st_context *st, uint pipeFlushFlags ) +static void st_gl_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ) { GLframebuffer *fb = st->ctx->DrawBuffer; - FLUSH_VERTICES(st->ctx, 0); + st_flush( st, pipeFlushFlags, fence ); if (!fb) return; @@ -80,15 +82,6 @@ static void st_gl_flush( struct st_context *st, uint pipeFlushFlags ) = st_renderbuffer(fb->Attachment[BUFFER_FRONT_LEFT].Renderbuffer); struct pipe_surface *front_surf = strb->surface; - /* If we aren't rendering to the frontbuffer, this is a noop. - * This should be uncontroversial for glFlush, though people may - * feel more strongly about glFinish. - * - * Additionally, need to make sure that the frontbuffer_dirty - * flag really gets set on frontbuffer rendering. - */ - st->pipe->flush( st->pipe, pipeFlushFlags ); - /* Hook for copying "fake" frontbuffer if necessary: */ st->pipe->winsys->flush_frontbuffer( st->pipe->winsys, front_surf, @@ -103,7 +96,18 @@ static void st_gl_flush( struct st_context *st, uint pipeFlushFlags ) */ static void st_glFlush(GLcontext *ctx) { - st_gl_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE); + st_gl_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); +} + + +void st_finish( struct st_context *st ) +{ + struct pipe_fence_handle *fence; + + st_gl_flush(st, PIPE_FLUSH_RENDER_CACHE, &fence); + + st->pipe->winsys->fence_finish(st->pipe->winsys, fence, 0); + st->pipe->winsys->fence_reference(st->pipe->winsys, &fence, NULL); } @@ -112,7 +116,7 @@ static void st_glFlush(GLcontext *ctx) */ static void st_glFinish(GLcontext *ctx) { - st_gl_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_WAIT); + st_finish( ctx->st ); } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 4cf9adcd28..e9fcdf69a1 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -160,7 +160,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, return; /* make sure rendering has completed */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE); + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); if (format == GL_STENCIL_INDEX) { st_read_stencil_pixels(ctx, x, y, width, height, type, pack, dest); diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c index d46a9178b1..075e9d1bd6 100644 --- a/src/mesa/state_tracker/st_framebuffer.c +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -186,7 +186,8 @@ st_notify_swapbuffers(struct st_framebuffer *stfb) if (ctx && ctx->DrawBuffer == &stfb->Base) { st_flush( ctx->st, - PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_SWAPBUFFERS); + PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_SWAPBUFFERS, + NULL ); } } diff --git a/src/mesa/state_tracker/st_public.h b/src/mesa/state_tracker/st_public.h index 3c397b126a..9d88ce9764 100644 --- a/src/mesa/state_tracker/st_public.h +++ b/src/mesa/state_tracker/st_public.h @@ -45,6 +45,7 @@ struct st_context; struct st_framebuffer; struct pipe_context; +struct pipe_fence_handle; struct pipe_surface; @@ -78,7 +79,9 @@ void st_make_current(struct st_context *st, struct st_framebuffer *draw, struct st_framebuffer *read); -void st_flush( struct st_context *st, uint pipeFlushFlags ); +void st_flush( struct st_context *st, uint pipeFlushFlags, + struct pipe_fence_handle **fence ); +void st_finish( struct st_context *st ); void st_notify_swapbuffers(struct st_framebuffer *stfb); void st_notify_swapbuffers_complete(struct st_framebuffer *stfb); -- cgit v1.2.3 From 8cb85807d3bd42cb0e511970e4b409c542d2716b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 26 Mar 2008 08:21:17 -0600 Subject: gallium: as for aapoints, make the extra texcoord per-shader state --- src/gallium/auxiliary/draw/draw_aaline.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c index a999ef1227..e8d2a45102 100644 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ b/src/gallium/auxiliary/draw/draw_aaline.c @@ -62,6 +62,7 @@ struct aaline_fragment_shader void *aapoint_fs; /* not yet */ void *sprite_fs; /* not yet */ uint sampler_unit; + int generic_attrib; /**< texcoord/generic used for texture */ }; @@ -336,7 +337,7 @@ static void generate_aaline_fs(struct aaline_stage *aaline) { const struct pipe_shader_state *orig_fs = &aaline->fs->state; - struct draw_context *draw = aaline->stage.draw; + //struct draw_context *draw = aaline->stage.draw; struct pipe_shader_state aaline_fs; struct aa_transform_context transform; @@ -369,11 +370,7 @@ generate_aaline_fs(struct aaline_stage *aaline) aaline->fs->aaline_fs = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); - /* advertise the extra post-transform vertex attributes which will have - * the texcoords. - */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = transform.maxGeneric + 1; + aaline->fs->generic_attrib = transform.maxGeneric + 1; } @@ -637,15 +634,20 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) else aaline->half_line_width = 0.5f * draw->rasterizer->line_width; - aaline->tex_slot = draw->num_vs_outputs; - assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ - draw->extra_vp_outputs.slot = aaline->tex_slot; - /* - * Bind our fragprog, sampler and texture + * Bind (generate) our fragprog, sampler and texture */ bind_aaline_fragment_shader(aaline); + /* update vertex attrib info */ + aaline->tex_slot = draw->num_vs_outputs; + assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ + + /* advertise the extra post-transformed vertex attribute */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib; + draw->extra_vp_outputs.slot = aaline->tex_slot; + /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ num_samplers = MAX2(aaline->num_textures, aaline->num_samplers); -- cgit v1.2.3 From df1744c0433f3f73ebf4b06567fefa946a29c3d8 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 27 Mar 2008 15:33:47 -0600 Subject: gallium: remove temporary static var --- src/gallium/auxiliary/draw/draw_prim.c | 11 +++++------ src/gallium/auxiliary/draw/draw_private.h | 1 + 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index f589d0c017..ddcde01d9a 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -158,15 +158,15 @@ static INLINE void fetch_and_store(struct draw_context *draw) void draw_do_flush( struct draw_context *draw, unsigned flags ) { - static boolean flushing = FALSE; - if (0) debug_printf("Flushing with %d verts, %d prims\n", draw->vs.queue_nr, draw->pq.queue_nr ); - if (!flushing) { - flushing = TRUE; + if (draw->flushing) + return; + + draw->flushing = TRUE; if (flags >= DRAW_FLUSH_SHADER_QUEUE) { if (draw->vs.queue_nr) { @@ -189,8 +189,7 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) } } - flushing = FALSE; - } + draw->flushing = FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 8a879809c3..7007ee22c4 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -223,6 +223,7 @@ struct draw_context } pt; + boolean flushing; /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; -- cgit v1.2.3 From ba49fa39f3028d1ee44a999d84b29a0cfbfab0bd Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 27 Mar 2008 15:35:25 -0600 Subject: gallium: fix incorrect types for shaders --- src/gallium/auxiliary/util/u_blit.c | 5 ++--- src/gallium/auxiliary/util/u_gen_mipmap.c | 6 +++--- 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 28a404fd01..9986dc3570 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -58,9 +58,8 @@ struct blit_state struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; - /*struct pipe_viewport_state viewport;*/ - struct pipe_sampler_state *vs; - struct pipe_sampler_state *fs; + void *vs; + void *fs; struct pipe_buffer *vbuf; /**< quad vertices */ float vertices[4][2][4]; /**< vertex/texcoords for quad */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index ed12768e7f..5549652abb 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -61,9 +61,9 @@ struct gen_mipmap_state struct pipe_depth_stencil_alpha_state depthstencil; struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; - /*struct pipe_viewport_state viewport;*/ - struct pipe_sampler_state *vs; - struct pipe_sampler_state *fs; + + void *vs; + void *fs; struct pipe_buffer *vbuf; /**< quad vertices */ float vertices[4][2][4]; /**< vertex/texcoords for quad */ -- cgit v1.2.3 From dccbfd8bf0624250a435948029916073d3390191 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 27 Mar 2008 15:42:09 -0600 Subject: gallium: return pipe_shader_state from the simple shader functions Allows us to fix a mem leak (tokens array). --- src/gallium/auxiliary/util/u_blit.c | 10 +++++++-- src/gallium/auxiliary/util/u_gen_mipmap.c | 10 +++++++-- src/gallium/auxiliary/util/u_simple_shaders.c | 31 ++++++++++++++++----------- src/gallium/auxiliary/util/u_simple_shaders.h | 10 ++++++--- 4 files changed, 42 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 9986dc3570..eec5e600c9 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -58,6 +58,8 @@ struct blit_state struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_shader_state vert_shader; + struct pipe_shader_state frag_shader; void *vs; void *fs; @@ -129,11 +131,12 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) TGSI_SEMANTIC_GENERIC }; const uint semantic_indexes[] = { 0, 0 }; ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, - semantic_indexes); + semantic_indexes, + &ctx->vert_shader); } /* fragment shader */ - ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); ctx->vbuf = pipe->winsys->buffer_create(pipe->winsys, 32, @@ -168,6 +171,9 @@ util_destroy_blit(struct blit_state *ctx) pipe->delete_vs_state(pipe, ctx->vs); pipe->delete_fs_state(pipe, ctx->fs); + FREE((void*) ctx->vert_shader.tokens); + FREE((void*) ctx->frag_shader.tokens); + pipe->winsys->buffer_destroy(pipe->winsys, ctx->vbuf); FREE(ctx); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 5549652abb..2fd214d22e 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -62,6 +62,8 @@ struct gen_mipmap_state struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_shader_state vert_shader; + struct pipe_shader_state frag_shader; void *vs; void *fs; @@ -740,11 +742,12 @@ util_create_gen_mipmap(struct pipe_context *pipe, TGSI_SEMANTIC_GENERIC }; const uint semantic_indexes[] = { 0, 0 }; ctx->vs = util_make_vertex_passthrough_shader(pipe, 2, semantic_names, - semantic_indexes); + semantic_indexes, + &ctx->vert_shader); } /* fragment shader */ - ctx->fs = util_make_fragment_tex_shader(pipe); + ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); ctx->vbuf = pipe->winsys->buffer_create(pipe->winsys, 32, @@ -813,6 +816,9 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) pipe->delete_vs_state(pipe, ctx->vs); pipe->delete_fs_state(pipe, ctx->fs); + FREE((void*) ctx->vert_shader.tokens); + FREE((void*) ctx->frag_shader.tokens); + pipe->winsys->buffer_destroy(pipe->winsys, ctx->vbuf); FREE(ctx); diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index d230ddfbeb..5f8d12191d 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -56,7 +56,9 @@ void * util_make_vertex_passthrough_shader(struct pipe_context *pipe, uint num_attribs, const uint *semantic_names, - const uint *semantic_indexes) + const uint *semantic_indexes, + struct pipe_shader_state *shader) + { uint maxTokens = 100; struct tgsi_token *tokens; @@ -66,7 +68,6 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, struct tgsi_full_instruction inst; const uint procType = TGSI_PROCESSOR_VERTEX; uint ti, i; - struct pipe_shader_state shader; tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); @@ -145,8 +146,10 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, tgsi_dump(tokens, 0); #endif - shader.tokens = tokens; - return pipe->create_vs_state(pipe, &shader); + shader->tokens = tokens; + /*shader->num_tokens = ti;*/ + + return pipe->create_vs_state(pipe, shader); } @@ -158,7 +161,8 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, * END; */ void * -util_make_fragment_tex_shader(struct pipe_context *pipe) +util_make_fragment_tex_shader(struct pipe_context *pipe, + struct pipe_shader_state *shader) { uint maxTokens = 100; struct tgsi_token *tokens; @@ -168,7 +172,6 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) struct tgsi_full_instruction inst; const uint procType = TGSI_PROCESSOR_FRAGMENT; uint ti; - struct pipe_shader_state shader; tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); @@ -254,8 +257,10 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) tgsi_dump(tokens, 0); #endif - shader.tokens = tokens; - return pipe->create_fs_state(pipe, &shader); + shader->tokens = tokens; + /*shader->num_tokens = ti;*/ + + return pipe->create_fs_state(pipe, shader); } @@ -266,7 +271,8 @@ util_make_fragment_tex_shader(struct pipe_context *pipe) * Make simple fragment color pass-through shader. */ void * -util_make_fragment_passthrough_shader(struct pipe_context *pipe) +util_make_fragment_passthrough_shader(struct pipe_context *pipe, + struct pipe_shader_state *shader) { uint maxTokens = 40; struct tgsi_token *tokens; @@ -276,7 +282,6 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) struct tgsi_full_instruction inst; const uint procType = TGSI_PROCESSOR_FRAGMENT; uint ti; - struct pipe_shader_state shader; tokens = (struct tgsi_token *) MALLOC(maxTokens * sizeof(tokens[0])); @@ -349,7 +354,9 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe) tgsi_dump(tokens, 0); #endif - shader.tokens = tokens; - return pipe->create_fs_state(pipe, &shader); + shader->tokens = tokens; + /*shader->num_tokens = ti;*/ + + return pipe->create_fs_state(pipe, shader); } diff --git a/src/gallium/auxiliary/util/u_simple_shaders.h b/src/gallium/auxiliary/util/u_simple_shaders.h index ca219a092c..8ca4977d71 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.h +++ b/src/gallium/auxiliary/util/u_simple_shaders.h @@ -34,6 +34,7 @@ struct pipe_context; +struct pipe_shader_state; #ifdef __cplusplus @@ -45,15 +46,18 @@ extern void * util_make_vertex_passthrough_shader(struct pipe_context *pipe, uint num_attribs, const uint *semantic_names, - const uint *semantic_indexes); + const uint *semantic_indexes, + struct pipe_shader_state *shader); extern void * -util_make_fragment_tex_shader(struct pipe_context *pipe); +util_make_fragment_tex_shader(struct pipe_context *pipe, + struct pipe_shader_state *shader); extern void * -util_make_fragment_passthrough_shader(struct pipe_context *pipe); +util_make_fragment_passthrough_shader(struct pipe_context *pipe, + struct pipe_shader_state *shader); #ifdef __cplusplus -- cgit v1.2.3 From 39038c11699bbc9baab744542e96d54e91cb452a Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 27 Mar 2008 17:41:55 -0600 Subject: gallium: replace PIPE_ATTRIB_MAX with PIPE_MAX_ATTRIBS The later follows the naming scheme of other limits. Keep the old definition until all possible usage is updated. --- src/gallium/auxiliary/draw/draw_context.c | 4 ++-- src/gallium/auxiliary/draw/draw_private.h | 12 ++++++------ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- src/gallium/auxiliary/draw/draw_vf.c | 4 ++-- src/gallium/auxiliary/draw/draw_vf.h | 2 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_llvm.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_context.h | 6 +++--- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 2 +- src/gallium/drivers/cell/spu/spu_main.c | 4 ++-- src/gallium/drivers/cell/spu/spu_vertex_shader.c | 4 ++-- src/gallium/drivers/cell/spu/spu_vertex_shader.h | 8 ++++---- src/gallium/drivers/failover/fo_context.h | 4 ++-- src/gallium/drivers/failover/fo_state_emit.c | 4 ++-- src/gallium/drivers/i915simple/i915_context.c | 4 ++-- src/gallium/drivers/i915simple/i915_context.h | 2 +- src/gallium/drivers/i965simple/brw_clip.h | 2 +- src/gallium/drivers/i965simple/brw_context.h | 12 ++++++------ src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_wm.h | 4 ++-- src/gallium/drivers/softpipe/sp_context.h | 6 +++--- src/gallium/drivers/softpipe/sp_draw_arrays.c | 4 ++-- src/gallium/drivers/softpipe/sp_quad_fs.c | 4 ++-- src/gallium/drivers/softpipe/sp_state_vertex.c | 4 ++-- src/gallium/include/pipe/p_state.h | 3 ++- src/mesa/state_tracker/st_draw.c | 2 +- 29 files changed, 63 insertions(+), 62 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 903cc26766..81858e01ca 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -238,7 +238,7 @@ draw_set_vertex_buffer(struct draw_context *draw, const struct pipe_vertex_buffer *buffer) { draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - assert(attr < PIPE_ATTRIB_MAX); + assert(attr < PIPE_MAX_ATTRIBS); draw->vertex_buffer[attr] = *buffer; } @@ -249,7 +249,7 @@ draw_set_vertex_element(struct draw_context *draw, const struct pipe_vertex_element *element) { draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - assert(attr < PIPE_ATTRIB_MAX); + assert(attr < PIPE_MAX_ATTRIBS); draw->vertex_element[attr] = *element; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 7007ee22c4..8eb2f515cb 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -228,8 +228,8 @@ struct draw_context /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ @@ -242,7 +242,7 @@ struct draw_context unsigned eltSize; /** vertex arrays */ - const void *vbuffer[PIPE_ATTRIB_MAX]; + const void *vbuffer[PIPE_MAX_ATTRIBS]; /** constant buffer (for vertex shader) */ const void *constants; @@ -275,9 +275,9 @@ struct draw_context /* Vertex fetch internal state */ struct { - const ubyte *src_ptr[PIPE_ATTRIB_MAX]; - unsigned pitch[PIPE_ATTRIB_MAX]; - fetch_func fetch[PIPE_ATTRIB_MAX]; + const ubyte *src_ptr[PIPE_MAX_ATTRIBS]; + unsigned pitch[PIPE_MAX_ATTRIBS]; + fetch_func fetch[PIPE_MAX_ATTRIBS]; unsigned nr_attrs; full_fetch_func fetch_func; pt_fetch_func pt_fetch; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 64ef83d800..9b098bc173 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -78,7 +78,7 @@ struct fetch_emit_middle_end { unsigned pitch; void (*fetch)( const void *from, float *attrib); void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_ATTRIB_MAX]; + } fetch[PIPE_MAX_ATTRIBS]; unsigned nr_fetch; unsigned hw_vertex_size; diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index f4e29a6293..7bb34ace7a 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -158,7 +158,7 @@ draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, unsigned offset = 0; unsigned i, j; - assert(nr < PIPE_ATTRIB_MAX); + assert(nr < PIPE_MAX_ATTRIBS); for (j = 0, i = 0; i < nr; i++) { const unsigned format = map[i].format; @@ -390,7 +390,7 @@ struct draw_vertex_fetch *draw_vf_create( void ) struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch); unsigned i; - for (i = 0; i < PIPE_ATTRIB_MAX; i++) + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) vf->attr[i].vf = vf; vf->identity[0] = 0.0; diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h index 011c8f0ff1..7555d1bd58 100644 --- a/src/gallium/auxiliary/draw/draw_vf.h +++ b/src/gallium/auxiliary/draw/draw_vf.h @@ -169,7 +169,7 @@ struct draw_vf_attr struct draw_vertex_fetch { - struct draw_vf_attr attr[PIPE_ATTRIB_MAX]; + struct draw_vf_attr attr[PIPE_MAX_ATTRIBS]; unsigned attr_count; unsigned vertex_stride; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 4e2fa72707..487d0ea7f4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -99,8 +99,8 @@ vs_exec_run( struct draw_vertex_shader *shader, struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index bd983f2ddf..d29cb18efe 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -107,8 +107,8 @@ vs_llvm_run( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index a4503c143e..bc910dc2d0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -114,8 +114,8 @@ vs_sse_run( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 0442abddc1..7f656a9744 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -103,8 +103,8 @@ struct cell_context struct cell_texture *texture[PIPE_MAX_SAMPLERS]; uint num_textures; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; ubyte *cbuf_map[PIPE_MAX_COLOR_BUFS]; ubyte *zsbuf_map; @@ -141,7 +141,7 @@ struct cell_context struct spe_function attrib_fetch; - unsigned attrib_fetch_offsets[PIPE_ATTRIB_MAX]; + unsigned attrib_fetch_offsets[PIPE_MAX_ATTRIBS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index c839fb4d12..b896252f81 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -123,7 +123,7 @@ cell_draw_elements(struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (sp->vertex_buffer[i].buffer) { void *buf = pipe->winsys->buffer_map(pipe->winsys, sp->vertex_buffer[i].buffer, @@ -151,7 +151,7 @@ cell_draw_elements(struct pipe_context *pipe, /* * unmap vertex/index buffers - will cause draw module to flush */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (sp->vertex_buffer[i].buffer) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 563831b62d..37d25fb357 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -41,7 +41,7 @@ cell_set_vertex_element(struct pipe_context *pipe, const struct pipe_vertex_element *attrib) { struct cell_context *cell = cell_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); cell->vertex_element[index] = *attrib; /* struct copy */ cell->dirty |= CELL_NEW_VERTEX; @@ -55,7 +55,7 @@ cell_set_vertex_buffer(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffer) { struct cell_context *cell = cell_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); cell->vertex_buffer[index] = *buffer; /* struct copy */ cell->dirty |= CELL_NEW_VERTEX; diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 4828a8023b..49d5443cde 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -263,7 +263,7 @@ void cell_update_vertex_fetch(struct draw_context *draw) struct cell_context *const cell = (struct cell_context *) draw->driver_private; struct spe_function *p = &cell->attrib_fetch; - unsigned function_index[PIPE_ATTRIB_MAX]; + unsigned function_index[PIPE_MAX_ATTRIBS]; unsigned unique_attr_formats; int out_ptr; int in_ptr; diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index fccff01e10..d7f46f8024 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -55,7 +55,7 @@ struct spu_global spu; struct spu_vs_context draw; -static unsigned char attribute_fetch_code_buffer[136 * PIPE_ATTRIB_MAX] +static unsigned char attribute_fetch_code_buffer[136 * PIPE_MAX_ATTRIBS] ALIGN16_ATTRIB; static unsigned char depth_stencil_code_buffer[4 * 64] @@ -361,7 +361,7 @@ cmd_state_vs_array_info(const struct cell_array_info *vs_info) { const unsigned attr = vs_info->attr; - ASSERT(attr < PIPE_ATTRIB_MAX); + ASSERT(attr < PIPE_MAX_ATTRIBS); draw.vertex_fetch.src_ptr[attr] = vs_info->base; draw.vertex_fetch.pitch[attr] = vs_info->pitch; draw.vertex_fetch.size[attr] = vs_info->size; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 8363efeeb6..3119a78c06 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -86,8 +86,8 @@ run_vertex_program(struct spu_vs_context *draw, struct spu_exec_machine *machine = &draw->machine; unsigned int j; - ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_ATTRIB_MAX); - ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_ATTRIB_MAX); + ALIGN16_DECL(struct spu_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct spu_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.h b/src/gallium/drivers/cell/spu/spu_vertex_shader.h index 54a4b8d9b9..4c74f5e74d 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.h +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.h @@ -16,10 +16,10 @@ struct spu_vs_context { struct pipe_viewport_state viewport; struct { - uint64_t src_ptr[PIPE_ATTRIB_MAX]; - unsigned pitch[PIPE_ATTRIB_MAX]; - unsigned size[PIPE_ATTRIB_MAX]; - unsigned code_offset[PIPE_ATTRIB_MAX]; + uint64_t src_ptr[PIPE_MAX_ATTRIBS]; + unsigned pitch[PIPE_MAX_ATTRIBS]; + unsigned size[PIPE_MAX_ATTRIBS]; + unsigned code_offset[PIPE_MAX_ATTRIBS]; unsigned nr_attrs; boolean dirty; diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 8f3ad3ee79..4afe10c4b8 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -84,8 +84,8 @@ struct failover_context { struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; void *sw_sampler_state[PIPE_MAX_SAMPLERS]; void *hw_sampler_state[PIPE_MAX_SAMPLERS]; diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index 3de931e04e..bb89f925e9 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -104,7 +104,7 @@ failover_state_emit( struct failover_context *failover ) } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (failover->dirty_vertex_buffer & (1<sw->set_vertex_buffer( failover->sw, i, &failover->vertex_buffer[i] ); @@ -113,7 +113,7 @@ failover_state_emit( struct failover_context *failover ) } if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (failover->dirty_vertex_element & (1<sw->set_vertex_element( failover->sw, i, &failover->vertex_element[i] ); diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 15ff2360b7..fee33d82de 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -65,7 +65,7 @@ i915_draw_elements( struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (i915->vertex_buffer[i].buffer) { void *buf = pipe->winsys->buffer_map(pipe->winsys, @@ -96,7 +96,7 @@ i915_draw_elements( struct pipe_context *pipe, /* * unmap vertex/index buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (i915->vertex_buffer[i].buffer) { pipe->winsys->buffer_unmap(pipe->winsys, i915->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 746f18ba38..8e707ea574 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -232,7 +232,7 @@ struct i915_context struct pipe_scissor_state scissor; struct i915_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; unsigned dirty; diff --git a/src/gallium/drivers/i965simple/brw_clip.h b/src/gallium/drivers/i965simple/brw_clip.h index a89d08b791..d70fc094ff 100644 --- a/src/gallium/drivers/i965simple/brw_clip.h +++ b/src/gallium/drivers/i965simple/brw_clip.h @@ -116,7 +116,7 @@ struct brw_clip_compile { unsigned last_mrf; unsigned header_position_offset; - unsigned offset[PIPE_ATTRIB_MAX]; + unsigned offset[PIPE_MAX_ATTRIBS]; }; #define ATTR_SIZE (4*4) diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index b83a13c3b6..0c96ba1732 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -433,17 +433,17 @@ struct brw_cached_batch_item { -/* Protect against a future where PIPE_ATTRIB_MAX > 32. Wouldn't life +/* Protect against a future where PIPE_MAX_ATTRIBS > 32. Wouldn't life * be easier if C allowed arrays of packed elements? */ -#define ATTRIB_BIT_DWORDS ((PIPE_ATTRIB_MAX+31)/32) +#define ATTRIB_BIT_DWORDS ((PIPE_MAX_ATTRIBS+31)/32) struct brw_vertex_info { - unsigned varying; /* varying:1[PIPE_ATTRIB_MAX] */ - unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_ATTRIB_MAX] */ + unsigned varying; /* varying:1[PIPE_MAX_ATTRIBS] */ + unsigned sizes[ATTRIB_BIT_DWORDS * 2]; /* sizes:2[PIPE_MAX_ATTRIBS] */ }; @@ -496,9 +496,9 @@ struct brw_context /* Arrays with buffer objects to copy non-bufferobj arrays into * for upload: */ - const struct pipe_vertex_buffer *vbo_array[PIPE_ATTRIB_MAX]; + const struct pipe_vertex_buffer *vbo_array[PIPE_MAX_ATTRIBS]; - struct brw_vertex_element_state inputs[PIPE_ATTRIB_MAX]; + struct brw_vertex_element_state inputs[PIPE_MAX_ATTRIBS]; #define BRW_NR_UPLOAD_BUFS 17 #define BRW_UPLOAD_INIT_SIZE (128*1024) diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index f5efe9fc06..0d04a8a594 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -292,7 +292,7 @@ static void brw_set_vertex_element(struct pipe_context *pipe, /* flush ? */ struct brw_context *brw = brw_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); struct brw_vertex_element_state el; memset(&el, 0, sizeof(el)); diff --git a/src/gallium/drivers/i965simple/brw_wm.h b/src/gallium/drivers/i965simple/brw_wm.h index a1ac0f504a..b29c4393f0 100644 --- a/src/gallium/drivers/i965simple/brw_wm.h +++ b/src/gallium/drivers/i965simple/brw_wm.h @@ -76,7 +76,7 @@ struct brw_wm_prog_key { #define PROGRAM_INTERNAL_PARAM #define MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS 1024 /* 72 for GL_ARB_f_p */ -#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_ATTRIB_MAX + 3) +#define BRW_WM_MAX_INSN (MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS*3 + PIPE_MAX_ATTRIBS + 3) #define BRW_WM_MAX_GRF 128 /* hardware limit */ #define BRW_WM_MAX_VREG (BRW_WM_MAX_INSN * 4) #define BRW_WM_MAX_REF (BRW_WM_MAX_INSN * 12) @@ -84,7 +84,7 @@ struct brw_wm_prog_key { #define BRW_WM_MAX_CONST 256 #define BRW_WM_MAX_KILLS MAX_NV_FRAGMENT_PROGRAM_INSTRUCTIONS -#define PAYLOAD_DEPTH (PIPE_ATTRIB_MAX) +#define PAYLOAD_DEPTH (PIPE_MAX_ATTRIBS) #define MAX_IFSN 32 #define MAX_LOOP_DEPTH 32 diff --git a/src/gallium/drivers/softpipe/sp_context.h b/src/gallium/drivers/softpipe/sp_context.h index 19e6cfaf02..dc9d0e6d5d 100644 --- a/src/gallium/drivers/softpipe/sp_context.h +++ b/src/gallium/drivers/softpipe/sp_context.h @@ -77,8 +77,8 @@ struct softpipe_context { struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_ATTRIB_MAX]; - struct pipe_vertex_element vertex_element[PIPE_ATTRIB_MAX]; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; unsigned dirty; unsigned num_samplers; @@ -92,7 +92,7 @@ struct softpipe_context { /* * Mapped vertex buffers */ - ubyte *mapped_vbuffer[PIPE_ATTRIB_MAX]; + ubyte *mapped_vbuffer[PIPE_MAX_ATTRIBS]; /** Mapped constant buffers */ void *mapped_constants[PIPE_SHADER_TYPES]; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 5b5a0fe573..ab54050d3f 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -125,7 +125,7 @@ softpipe_draw_elements(struct pipe_context *pipe, /* * Map vertex buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (sp->vertex_buffer[i].buffer) { void *buf = pipe->winsys->buffer_map(pipe->winsys, @@ -153,7 +153,7 @@ softpipe_draw_elements(struct pipe_context *pipe, /* * unmap vertex/index buffers - will cause draw module to flush */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (sp->vertex_buffer[i].buffer) { draw_set_mapped_vertex_buffer(draw, i, NULL); pipe->winsys->buffer_unmap(pipe->winsys, sp->vertex_buffer[i].buffer); diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 861285101f..c10ad80e01 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -185,8 +185,8 @@ struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) uint i; /* allocate storage for program inputs/outputs, aligned to 16 bytes */ - qss->inputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->inputs) + 16); - qss->outputs = MALLOC(PIPE_ATTRIB_MAX * sizeof(*qss->outputs) + 16); + qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); + qss->outputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->outputs) + 16); qss->machine.Inputs = align16(qss->inputs); qss->machine.Outputs = align16(qss->outputs); diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index f01a10de3b..c054e76d9b 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -42,7 +42,7 @@ softpipe_set_vertex_element(struct pipe_context *pipe, const struct pipe_vertex_element *attrib) { struct softpipe_context *softpipe = softpipe_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); softpipe->vertex_element[index] = *attrib; /* struct copy */ softpipe->dirty |= SP_NEW_VERTEX; @@ -56,7 +56,7 @@ softpipe_set_vertex_buffer(struct pipe_context *pipe, const struct pipe_vertex_buffer *buffer) { struct softpipe_context *softpipe = softpipe_context(pipe); - assert(index < PIPE_ATTRIB_MAX); + assert(index < PIPE_MAX_ATTRIBS); softpipe->vertex_buffer[index] = *buffer; /* struct copy */ softpipe->dirty |= SP_NEW_VERTEX; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index a2bd8c6aaa..2490412126 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -54,7 +54,8 @@ extern "C" { #define PIPE_MAX_SAMPLERS 8 #define PIPE_MAX_CLIP_PLANES 6 #define PIPE_MAX_CONSTANT 32 -#define PIPE_ATTRIB_MAX 32 +#define PIPE_MAX_ATTRIBS 32 +#define PIPE_ATTRIB_MAX 32 /* XXX obsolete - remove */ #define PIPE_MAX_COLOR_BUFS 8 #define PIPE_MAX_TEXTURE_LEVELS 16 #define PIPE_MAX_FEEDBACK_ATTRIBS 16 diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 20af90df7d..4aca3311b7 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -606,7 +606,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* * unmap vertex/index buffers */ - for (i = 0; i < PIPE_ATTRIB_MAX; i++) { + for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (draw->vertex_buffer[i].buffer) { pipe->winsys->buffer_unmap(pipe->winsys, draw->vertex_buffer[i].buffer); -- cgit v1.2.3 From f10016b9a0639d7bc814c7b92a30d5b5b2cba5ad Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 25 Mar 2008 17:30:34 +0000 Subject: gallium: Fix some MSVC warnings. --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 +- src/gallium/auxiliary/util/u_snprintf.c | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 98c22eb4d4..da9a3a52ae 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -107,7 +107,7 @@ static void vcache_elt( struct vcache_frontend *vcache, assert(vcache->fetch_count < FETCH_MAX); vcache->in[idx] = felt; - vcache->out[idx] = vcache->fetch_count; + vcache->out[idx] = (ushort)vcache->fetch_count; vcache->fetch_elts[vcache->fetch_count++] = felt; } diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index 61c20b48f7..48426abcb7 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -783,19 +783,19 @@ rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) switch (cflags) { case PRINT_C_CHAR: charptr = va_arg(args, signed char *); - *charptr = len; + *charptr = (signed char)len; break; case PRINT_C_SHORT: shortptr = va_arg(args, short int *); - *shortptr = len; + *shortptr = (short int)len; break; case PRINT_C_LONG: longptr = va_arg(args, long int *); - *longptr = len; + *longptr = (long int)len; break; case PRINT_C_LLONG: llongptr = va_arg(args, LLONG *); - *llongptr = len; + *llongptr = (LLONG)len; break; case PRINT_C_SIZE: /* -- cgit v1.2.3 From a52c0416d1f2105960b4646e2e268aed26814689 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 29 Mar 2008 14:41:03 +0100 Subject: gallium: Set vertex state/buffers en-mass. --- src/gallium/auxiliary/draw/draw_context.c | 24 ++++--- src/gallium/auxiliary/draw/draw_context.h | 12 ++-- src/gallium/auxiliary/util/u_draw_quad.c | 16 +++-- src/gallium/drivers/cell/ppu/cell_context.c | 4 +- src/gallium/drivers/cell/ppu/cell_state.h | 12 ++-- src/gallium/drivers/cell/ppu/cell_state_vertex.c | 30 +++++---- src/gallium/drivers/failover/fo_context.h | 9 +-- src/gallium/drivers/failover/fo_state.c | 35 +++++----- src/gallium/drivers/failover/fo_state_emit.c | 22 ++----- src/gallium/drivers/i915simple/i915_state.c | 23 +++---- src/gallium/drivers/i965simple/brw_state.c | 60 +++++++++-------- src/gallium/drivers/softpipe/sp_context.c | 4 +- src/gallium/drivers/softpipe/sp_state.h | 12 ++-- src/gallium/drivers/softpipe/sp_state_vertex.c | 31 +++++---- src/gallium/include/pipe/p_context.h | 12 ++-- src/mesa/state_tracker/st_draw.c | 82 +++++++++++++----------- 16 files changed, 205 insertions(+), 183 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 81858e01ca..10bf9f54c1 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -233,24 +233,28 @@ void draw_set_viewport_state( struct draw_context *draw, void -draw_set_vertex_buffer(struct draw_context *draw, - unsigned attr, - const struct pipe_vertex_buffer *buffer) +draw_set_vertex_buffers(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_buffer *buffers) { + assert(count <= PIPE_MAX_ATTRIBS); + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - assert(attr < PIPE_MAX_ATTRIBS); - draw->vertex_buffer[attr] = *buffer; + + memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0])); } void -draw_set_vertex_element(struct draw_context *draw, - unsigned attr, - const struct pipe_vertex_element *element) +draw_set_vertex_elements(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_element *elements) { + assert(count <= PIPE_MAX_ATTRIBS); + draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - assert(attr < PIPE_MAX_ATTRIBS); - draw->vertex_element[attr] = *element; + + memcpy(draw->vertex_element, elements, count * sizeof(elements[0])); } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index dae687e590..84bae3bd78 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -138,13 +138,13 @@ void draw_delete_vertex_shader(struct draw_context *draw, * Vertex data functions */ -void draw_set_vertex_buffer(struct draw_context *draw, - unsigned attr, - const struct pipe_vertex_buffer *buffer); +void draw_set_vertex_buffers(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_buffer *buffers); -void draw_set_vertex_element(struct draw_context *draw, - unsigned attr, - const struct pipe_vertex_element *element); +void draw_set_vertex_elements(struct draw_context *draw, + unsigned count, + const struct pipe_vertex_element *elements); void draw_set_mapped_element_buffer( struct draw_context *draw, unsigned eltSize, void *elements ); diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 37e8533609..e659edb088 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -45,23 +45,25 @@ util_draw_vertex_buffer(struct pipe_context *pipe, uint num_attribs) { struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velement; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; uint i; + assert(num_attribs <= PIPE_MAX_ATTRIBS); + /* tell pipe about the vertex buffer */ vbuffer.buffer = vbuf; vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = 0; - pipe->set_vertex_buffer(pipe, 0, &vbuffer); + pipe->set_vertex_buffers(pipe, 1, &vbuffer); /* tell pipe about the vertex attributes */ for (i = 0; i < num_attribs; i++) { - velement.src_offset = i * 4 * sizeof(float); - velement.vertex_buffer_index = 0; - velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - velement.nr_components = 4; - pipe->set_vertex_element(pipe, i, &velement); + velements[i].src_offset = i * 4 * sizeof(float); + velements[i].vertex_buffer_index = 0; + velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velements[i].nr_components = 4; } + pipe->set_vertex_elements(pipe, num_attribs, velements); /* draw */ pipe->draw_arrays(pipe, prim_type, 0, num_verts); diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index ccbbd1d331..12eb5aa254 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -103,8 +103,8 @@ cell_create_context(struct pipe_screen *screen, cell->pipe.destroy = cell_destroy_context; /* state setters */ - cell->pipe.set_vertex_buffer = cell_set_vertex_buffer; - cell->pipe.set_vertex_element = cell_set_vertex_element; + cell->pipe.set_vertex_buffers = cell_set_vertex_buffers; + cell->pipe.set_vertex_elements = cell_set_vertex_elements; cell->pipe.draw_arrays = cell_draw_arrays; cell->pipe.draw_elements = cell_draw_elements; diff --git a/src/gallium/drivers/cell/ppu/cell_state.h b/src/gallium/drivers/cell/ppu/cell_state.h index 31ce505e21..82580ea35a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state.h +++ b/src/gallium/drivers/cell/ppu/cell_state.h @@ -48,13 +48,13 @@ #define CELL_NEW_VERTEX_INFO 0x8000 -void cell_set_vertex_element(struct pipe_context *, - unsigned index, - const struct pipe_vertex_element *); +void cell_set_vertex_elements(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); -void cell_set_vertex_buffer(struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer *); +void cell_set_vertex_buffers(struct pipe_context *, + unsigned count, + const struct pipe_vertex_buffer *); void cell_update_derived( struct cell_context *softpipe ); diff --git a/src/gallium/drivers/cell/ppu/cell_state_vertex.c b/src/gallium/drivers/cell/ppu/cell_state_vertex.c index 37d25fb357..6c83b8dc72 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_vertex.c +++ b/src/gallium/drivers/cell/ppu/cell_state_vertex.c @@ -36,28 +36,34 @@ void -cell_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *attrib) +cell_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) { struct cell_context *cell = cell_context(pipe); - assert(index < PIPE_MAX_ATTRIBS); - cell->vertex_element[index] = *attrib; /* struct copy */ + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(cell->vertex_element, elements, count * sizeof(elements[0])); + cell->dirty |= CELL_NEW_VERTEX; - draw_set_vertex_element(cell->draw, index, attrib); + draw_set_vertex_elements(cell->draw, count, elements); } void -cell_set_vertex_buffer(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer) +cell_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct cell_context *cell = cell_context(pipe); - assert(index < PIPE_MAX_ATTRIBS); - cell->vertex_buffer[index] = *buffer; /* struct copy */ + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(cell->vertex_buffer, buffers, count * sizeof(buffers[0])); + cell->dirty |= CELL_NEW_VERTEX; - draw_set_vertex_buffer(cell->draw, index, buffer); + draw_set_vertex_buffers(cell->draw, count, buffers); } diff --git a/src/gallium/drivers/failover/fo_context.h b/src/gallium/drivers/failover/fo_context.h index 4afe10c4b8..c6409fe1e1 100644 --- a/src/gallium/drivers/failover/fo_context.h +++ b/src/gallium/drivers/failover/fo_context.h @@ -84,15 +84,16 @@ struct failover_context { struct pipe_scissor_state scissor; struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_buffer vertex_buffers[PIPE_MAX_ATTRIBS]; + struct pipe_vertex_element vertex_elements[PIPE_MAX_ATTRIBS]; + + uint num_vertex_buffers; + uint num_vertex_elements; void *sw_sampler_state[PIPE_MAX_SAMPLERS]; void *hw_sampler_state[PIPE_MAX_SAMPLERS]; unsigned dirty; - unsigned dirty_vertex_buffer; - unsigned dirty_vertex_element; unsigned num_samplers; unsigned num_textures; diff --git a/src/gallium/drivers/failover/fo_state.c b/src/gallium/drivers/failover/fo_state.c index 11eec2714e..6a79706632 100644 --- a/src/gallium/drivers/failover/fo_state.c +++ b/src/gallium/drivers/failover/fo_state.c @@ -402,32 +402,35 @@ failover_set_viewport_state( struct pipe_context *pipe, static void -failover_set_vertex_buffer(struct pipe_context *pipe, - unsigned unit, - const struct pipe_vertex_buffer *vertex_buffer) +failover_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *vertex_buffers) { struct failover_context *failover = failover_context(pipe); - failover->vertex_buffer[unit] = *vertex_buffer; + memcpy(failover->vertex_buffers, vertex_buffers, + count * sizeof(vertex_buffers[0])); failover->dirty |= FO_NEW_VERTEX_BUFFER; - failover->dirty_vertex_buffer |= (1<sw->set_vertex_buffer( failover->sw, unit, vertex_buffer ); - failover->hw->set_vertex_buffer( failover->hw, unit, vertex_buffer ); + failover->num_vertex_buffers = count; + failover->sw->set_vertex_buffers( failover->sw, count, vertex_buffers ); + failover->hw->set_vertex_buffers( failover->hw, count, vertex_buffers ); } static void -failover_set_vertex_element(struct pipe_context *pipe, - unsigned unit, - const struct pipe_vertex_element *vertex_element) +failover_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *vertex_elements) { struct failover_context *failover = failover_context(pipe); - failover->vertex_element[unit] = *vertex_element; + memcpy(failover->vertex_elements, vertex_elements, + count * sizeof(vertex_elements[0])); + failover->dirty |= FO_NEW_VERTEX_ELEMENT; - failover->dirty_vertex_element |= (1<sw->set_vertex_element( failover->sw, unit, vertex_element ); - failover->hw->set_vertex_element( failover->hw, unit, vertex_element ); + failover->num_vertex_elements = count; + failover->sw->set_vertex_elements( failover->sw, count, vertex_elements ); + failover->hw->set_vertex_elements( failover->hw, count, vertex_elements ); } void @@ -474,7 +477,7 @@ failover_init_state_functions( struct failover_context *failover ) failover->pipe.set_scissor_state = failover_set_scissor_state; failover->pipe.set_sampler_textures = failover_set_sampler_textures; failover->pipe.set_viewport_state = failover_set_viewport_state; - failover->pipe.set_vertex_buffer = failover_set_vertex_buffer; - failover->pipe.set_vertex_element = failover_set_vertex_element; + failover->pipe.set_vertex_buffers = failover_set_vertex_buffers; + failover->pipe.set_vertex_elements = failover_set_vertex_elements; failover->pipe.set_constant_buffer = failover_set_constant_buffer; } diff --git a/src/gallium/drivers/failover/fo_state_emit.c b/src/gallium/drivers/failover/fo_state_emit.c index bb89f925e9..bd4fce9d20 100644 --- a/src/gallium/drivers/failover/fo_state_emit.c +++ b/src/gallium/drivers/failover/fo_state_emit.c @@ -53,8 +53,6 @@ void failover_state_emit( struct failover_context *failover ) { - unsigned i; - if (failover->dirty & FO_NEW_BLEND) failover->sw->bind_blend_state( failover->sw, failover->blend->sw_state ); @@ -104,24 +102,16 @@ failover_state_emit( struct failover_context *failover ) } if (failover->dirty & FO_NEW_VERTEX_BUFFER) { - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (failover->dirty_vertex_buffer & (1<sw->set_vertex_buffer( failover->sw, i, - &failover->vertex_buffer[i] ); - } - } + failover->sw->set_vertex_buffers( failover->sw, + failover->num_vertex_buffers, + failover->vertex_buffers ); } if (failover->dirty & FO_NEW_VERTEX_ELEMENT) { - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (failover->dirty_vertex_element & (1<sw->set_vertex_element( failover->sw, i, - &failover->vertex_element[i] ); - } - } + failover->sw->set_vertex_elements( failover->sw, + failover->num_vertex_elements, + failover->vertex_elements ); } failover->dirty = 0; - failover->dirty_vertex_element = 0; - failover->dirty_vertex_buffer = 0; } diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 1cec36e206..4404bc4590 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -687,23 +687,24 @@ static void i915_delete_rasterizer_state(struct pipe_context *pipe, FREE(raster); } -static void i915_set_vertex_buffer( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer ) +static void i915_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct i915_context *i915 = i915_context(pipe); - i915->vertex_buffer[index] = *buffer; + + memcpy(i915->vertex_buffer, buffers, count * sizeof(buffers[0])); /* pass-through to draw module */ - draw_set_vertex_buffer(i915->draw, index, buffer); + draw_set_vertex_buffers(i915->draw, count, buffers); } -static void i915_set_vertex_element( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *element) +static void i915_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) { struct i915_context *i915 = i915_context(pipe); /* pass-through to draw module */ - draw_set_vertex_element(i915->draw, index, element); + draw_set_vertex_elements(i915->draw, count, elements); } @@ -742,6 +743,6 @@ i915_init_state_functions( struct i915_context *i915 ) i915->pipe.set_scissor_state = i915_set_scissor_state; i915->pipe.set_sampler_textures = i915_set_sampler_textures; i915->pipe.set_viewport_state = i915_set_viewport_state; - i915->pipe.set_vertex_buffer = i915_set_vertex_buffer; - i915->pipe.set_vertex_element = i915_set_vertex_element; + i915->pipe.set_vertex_buffers = i915_set_vertex_buffers; + i915->pipe.set_vertex_elements = i915_set_vertex_elements; } diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 0d04a8a594..376f1487b2 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -277,45 +277,49 @@ static void brw_set_viewport_state( struct pipe_context *pipe, } -static void brw_set_vertex_buffer( struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer ) +static void brw_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct brw_context *brw = brw_context(pipe); - brw->vb.vbo_array[index] = buffer; + memcpy(brw->vb.vbo_array, buffers, count * sizeof(buffers[0])); } -static void brw_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *element) +static void brw_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *elements) { /* flush ? */ struct brw_context *brw = brw_context(pipe); + uint i; - assert(index < PIPE_MAX_ATTRIBS); - struct brw_vertex_element_state el; - memset(&el, 0, sizeof(el)); + assert(count <= PIPE_MAX_ATTRIBS); - el.ve0.src_offset = element->src_offset; - el.ve0.src_format = brw_translate_surface_format(element->src_format); - el.ve0.valid = 1; - el.ve0.vertex_buffer_index = element->vertex_buffer_index; + for (i = 0; i < count; i++) { + struct brw_vertex_element_state el; + memset(&el, 0, sizeof(el)); - el.ve1.dst_offset = index * 4; + el.ve0.src_offset = elements[i].src_offset; + el.ve0.src_format = brw_translate_surface_format(elements[i].src_format); + el.ve0.valid = 1; + el.ve0.vertex_buffer_index = elements[i].vertex_buffer_index; - el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; - el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.dst_offset = i * 4; - switch (element->nr_components) { - case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; - case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; - case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; - break; - } + el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_SRC; + el.ve1.vfcomponent0 = BRW_VFCOMPONENT_STORE_SRC; - brw->vb.inputs[index] = el; + switch (elements[i].nr_components) { + case 1: el.ve1.vfcomponent1 = BRW_VFCOMPONENT_STORE_0; + case 2: el.ve1.vfcomponent2 = BRW_VFCOMPONENT_STORE_0; + case 3: el.ve1.vfcomponent3 = BRW_VFCOMPONENT_STORE_1_FLT; + break; + } + + brw->vb.inputs[i] = el; + } } @@ -457,6 +461,6 @@ brw_init_state_functions( struct brw_context *brw ) brw->pipe.set_scissor_state = brw_set_scissor_state; brw->pipe.set_sampler_textures = brw_set_sampler_textures; brw->pipe.set_viewport_state = brw_set_viewport_state; - brw->pipe.set_vertex_buffer = brw_set_vertex_buffer; - brw->pipe.set_vertex_element = brw_set_vertex_element; + brw->pipe.set_vertex_buffers = brw_set_vertex_buffers; + brw->pipe.set_vertex_elements = brw_set_vertex_elements; } diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 16fb06f176..e298ed37c3 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -174,8 +174,8 @@ softpipe_create( struct pipe_screen *screen, softpipe->pipe.set_sampler_textures = softpipe_set_sampler_textures; softpipe->pipe.set_viewport_state = softpipe_set_viewport_state; - softpipe->pipe.set_vertex_buffer = softpipe_set_vertex_buffer; - softpipe->pipe.set_vertex_element = softpipe_set_vertex_element; + softpipe->pipe.set_vertex_buffers = softpipe_set_vertex_buffers; + softpipe->pipe.set_vertex_elements = softpipe_set_vertex_elements; softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 0bb1095aec..6e6501f5bc 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -151,13 +151,13 @@ void softpipe_set_sampler_textures( struct pipe_context *, void softpipe_set_viewport_state( struct pipe_context *, const struct pipe_viewport_state * ); -void softpipe_set_vertex_element(struct pipe_context *, - unsigned index, - const struct pipe_vertex_element *); +void softpipe_set_vertex_elements(struct pipe_context *, + unsigned count, + const struct pipe_vertex_element *); -void softpipe_set_vertex_buffer(struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer *); +void softpipe_set_vertex_buffers(struct pipe_context *, + unsigned count, + const struct pipe_vertex_buffer *); void softpipe_update_derived( struct softpipe_context *softpipe ); diff --git a/src/gallium/drivers/softpipe/sp_state_vertex.c b/src/gallium/drivers/softpipe/sp_state_vertex.c index c054e76d9b..e0230e16a4 100644 --- a/src/gallium/drivers/softpipe/sp_state_vertex.c +++ b/src/gallium/drivers/softpipe/sp_state_vertex.c @@ -37,28 +37,35 @@ void -softpipe_set_vertex_element(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_element *attrib) +softpipe_set_vertex_elements(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_element *attribs) { struct softpipe_context *softpipe = softpipe_context(pipe); - assert(index < PIPE_MAX_ATTRIBS); - softpipe->vertex_element[index] = *attrib; /* struct copy */ + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(softpipe->vertex_element, attribs, + count * sizeof(struct pipe_vertex_element)); + softpipe->dirty |= SP_NEW_VERTEX; - draw_set_vertex_element(softpipe->draw, index, attrib); + draw_set_vertex_elements(softpipe->draw, count, attribs); } void -softpipe_set_vertex_buffer(struct pipe_context *pipe, - unsigned index, - const struct pipe_vertex_buffer *buffer) +softpipe_set_vertex_buffers(struct pipe_context *pipe, + unsigned count, + const struct pipe_vertex_buffer *buffers) { struct softpipe_context *softpipe = softpipe_context(pipe); - assert(index < PIPE_MAX_ATTRIBS); - softpipe->vertex_buffer[index] = *buffer; /* struct copy */ + + assert(count <= PIPE_MAX_ATTRIBS); + + memcpy(softpipe->vertex_buffer, buffers, count * sizeof(buffers[0])); + softpipe->dirty |= SP_NEW_VERTEX; - draw_set_vertex_buffer(softpipe->draw, index, buffer); + draw_set_vertex_buffers(softpipe->draw, count, buffers); } diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 2d063e3f9e..324f70185a 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -155,13 +155,13 @@ struct pipe_context { unsigned num_textures, struct pipe_texture ** ); - void (*set_vertex_buffer)( struct pipe_context *, - unsigned index, - const struct pipe_vertex_buffer * ); + void (*set_vertex_buffers)( struct pipe_context *, + unsigned num_buffers, + const struct pipe_vertex_buffer * ); - void (*set_vertex_element)( struct pipe_context *, - unsigned index, - const struct pipe_vertex_element * ); + void (*set_vertex_elements)( struct pipe_context *, + unsigned num_elements, + const struct pipe_vertex_element * ); /*@}*/ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 4aca3311b7..f0f62246dd 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -210,6 +210,7 @@ st_draw_vbo(GLcontext *ctx, const struct pipe_shader_state *vs; struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; GLuint attr; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; /* sanity check for pointer arithmetic below */ assert(sizeof(arrays[0]->Ptr[0]) == 1); @@ -226,7 +227,6 @@ st_draw_vbo(GLcontext *ctx, for (attr = 0; attr < vp->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; - struct pipe_vertex_element velement; if (bufobj && bufobj->Name) { /* Attribute data is in a VBO. @@ -239,8 +239,8 @@ st_draw_vbo(GLcontext *ctx, vbuffer[attr].buffer = NULL; pipe_buffer_reference(winsys, &vbuffer[attr].buffer, stobj->buffer); vbuffer[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ - velement.src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; - assert(velement.src_offset <= 2048); /* 11-bit field */ + velements[attr].src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; + assert(velements[attr].src_offset <= 2048); /* 11-bit field */ } else { /* attribute data is in user-space memory, not a VBO */ @@ -259,24 +259,24 @@ st_draw_vbo(GLcontext *ctx, (void *) arrays[mesaAttr]->Ptr, bytes); vbuffer[attr].buffer_offset = 0; - velement.src_offset = 0; + velements[attr].src_offset = 0; } /* common-case setup */ vbuffer[attr].pitch = arrays[mesaAttr]->StrideB; /* in bytes */ vbuffer[attr].max_index = max_index; - velement.vertex_buffer_index = attr; - velement.nr_components = arrays[mesaAttr]->Size; - velement.src_format = pipe_vertex_format(arrays[mesaAttr]->Type, - arrays[mesaAttr]->Size, - arrays[mesaAttr]->Normalized); - assert(velement.src_format); - - /* tell pipe about this attribute */ - pipe->set_vertex_buffer(pipe, attr, &vbuffer[attr]); - pipe->set_vertex_element(pipe, attr, &velement); + velements[attr].vertex_buffer_index = attr; + velements[attr].nr_components = arrays[mesaAttr]->Size; + velements[attr].src_format + = pipe_vertex_format(arrays[mesaAttr]->Type, + arrays[mesaAttr]->Size, + arrays[mesaAttr]->Normalized); + assert(velements[attr].src_format); } + pipe->set_vertex_buffers(pipe, vp->num_inputs, vbuffer); + pipe->set_vertex_elements(pipe, vp->num_inputs, velements); + /* do actual drawing */ if (ib) { @@ -336,8 +336,8 @@ st_draw_vbo(GLcontext *ctx, for (attr = 0; attr < vp->num_inputs; attr++) { pipe_buffer_reference(winsys, &vbuffer[attr].buffer, NULL); assert(!vbuffer[attr].buffer); - pipe->set_vertex_buffer(pipe, attr, &vbuffer[attr]); } + pipe->set_vertex_buffers(pipe, vp->num_inputs, vbuffer); } @@ -362,7 +362,7 @@ st_draw_vertices(GLcontext *ctx, unsigned prim, struct pipe_context *pipe = ctx->st->pipe; struct pipe_buffer *vbuf; struct pipe_vertex_buffer vbuffer; - struct pipe_vertex_element velement; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; unsigned i; assert(numAttribs > 0); @@ -393,16 +393,16 @@ st_draw_vertices(GLcontext *ctx, unsigned prim, vbuffer.buffer = vbuf; vbuffer.pitch = numAttribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = 0; - pipe->set_vertex_buffer(pipe, 0, &vbuffer); + pipe->set_vertex_buffers(pipe, 1, &vbuffer); /* tell pipe about the vertex attributes */ for (i = 0; i < numAttribs; i++) { - velement.src_offset = i * 4 * sizeof(GLfloat); - velement.vertex_buffer_index = 0; - velement.src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - velement.nr_components = 4; - pipe->set_vertex_element(pipe, i, &velement); + velements[i].src_offset = i * 4 * sizeof(GLfloat); + velements[i].vertex_buffer_index = 0; + velements[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + velements[i].nr_components = 4; } + pipe->set_vertex_elements(pipe, numAttribs, velements); /* draw */ pipe->draw_arrays(pipe, prim, 0, numVertex); @@ -470,7 +470,8 @@ st_feedback_draw_vbo(GLcontext *ctx, const struct st_vertex_program *vp; const struct pipe_shader_state *vs; struct pipe_buffer *index_buffer_handle = 0; - struct pipe_vertex_buffer vbuffer[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_buffer vbuffers[PIPE_MAX_SHADER_INPUTS]; + struct pipe_vertex_element velements[PIPE_MAX_ATTRIBS]; GLuint attr, i; ubyte *mapped_constants; @@ -505,7 +506,6 @@ st_feedback_draw_vbo(GLcontext *ctx, for (attr = 0; attr < vp->num_inputs; attr++) { const GLuint mesaAttr = vp->index_to_input[attr]; struct gl_buffer_object *bufobj = arrays[mesaAttr]->BufferObj; - struct pipe_vertex_element velement; void *map; if (bufobj && bufobj->Name) { @@ -516,10 +516,10 @@ st_feedback_draw_vbo(GLcontext *ctx, struct st_buffer_object *stobj = st_buffer_object(bufobj); assert(stobj->buffer); - vbuffer[attr].buffer = NULL; - pipe_buffer_reference(winsys, &vbuffer[attr].buffer, stobj->buffer); - vbuffer[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ - velement.src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; + vbuffers[attr].buffer = NULL; + pipe_buffer_reference(winsys, &vbuffers[attr].buffer, stobj->buffer); + vbuffers[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ + velements[attr].src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; } else { /* attribute data is in user-space memory, not a VBO */ @@ -528,35 +528,39 @@ st_feedback_draw_vbo(GLcontext *ctx, * (max_index + 1)); /* wrap user data */ - vbuffer[attr].buffer + vbuffers[attr].buffer = winsys->user_buffer_create(winsys, (void *) arrays[mesaAttr]->Ptr, bytes); - vbuffer[attr].buffer_offset = 0; - velement.src_offset = 0; + vbuffers[attr].buffer_offset = 0; + velements[attr].src_offset = 0; } /* common-case setup */ - vbuffer[attr].pitch = arrays[mesaAttr]->StrideB; /* in bytes */ - vbuffer[attr].max_index = max_index; - velement.vertex_buffer_index = attr; - velement.nr_components = arrays[mesaAttr]->Size; - velement.src_format = pipe_vertex_format(arrays[mesaAttr]->Type, + vbuffers[attr].pitch = arrays[mesaAttr]->StrideB; /* in bytes */ + vbuffers[attr].max_index = max_index; + velements[attr].vertex_buffer_index = attr; + velements[attr].nr_components = arrays[mesaAttr]->Size; + velements[attr].src_format = pipe_vertex_format(arrays[mesaAttr]->Type, arrays[mesaAttr]->Size, arrays[mesaAttr]->Normalized); - assert(velement.src_format); + assert(velements[attr].src_format); /* tell draw about this attribute */ +#if 0 draw_set_vertex_buffer(draw, attr, &vbuffer[attr]); - draw_set_vertex_element(draw, attr, &velement); +#endif /* map the attrib buffer */ map = pipe->winsys->buffer_map(pipe->winsys, - vbuffer[attr].buffer, + vbuffers[attr].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, attr, map); } + draw_set_vertex_buffers(draw, vp->num_inputs, vbuffers); + draw_set_vertex_elements(draw, vp->num_inputs, velements); + if (ib) { unsigned indexSize; struct gl_buffer_object *bufobj = ib->obj; -- cgit v1.2.3 From fc9888014470286d8d651c569aaadf9cd69d8282 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 30 Mar 2008 22:48:49 +0200 Subject: draw: Fix bypass_vs semantic misuse. --- src/gallium/auxiliary/draw/draw_prim.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index ddcde01d9a..404c28d76a 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -594,6 +594,7 @@ draw_arrays(struct draw_context *draw, unsigned prim, /* drawing done here: */ if (!draw->rasterizer->bypass_vs || + (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first) || !draw_pt_arrays(draw, prim, start, count)) { /* we have to run the whole pipeline */ draw_prim(draw, prim, start, count); -- cgit v1.2.3 From 63950b11b6060e4e0d06e0d14548ff132a295067 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 30 Mar 2008 23:21:20 +0200 Subject: draw: Do not run full pipeline when flatshade_first for point primitives. --- src/gallium/auxiliary/draw/draw_prim.c | 1 - src/gallium/auxiliary/draw/draw_pt.c | 3 +++ src/gallium/auxiliary/draw/draw_validate.c | 8 ++++++++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 404c28d76a..ddcde01d9a 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -594,7 +594,6 @@ draw_arrays(struct draw_context *draw, unsigned prim, /* drawing done here: */ if (!draw->rasterizer->bypass_vs || - (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first) || !draw_pt_arrays(draw, prim, start, count)) { /* we have to run the whole pipeline */ draw_prim(draw, prim, start, count); diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 3ec31ec25f..fc9304197a 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -36,6 +36,9 @@ #include "draw/draw_pt.h" +/* XXX: Shouldn't those two functions below use the '>' operator??? + */ + static boolean too_many_verts( struct draw_context *draw, unsigned verts ) { diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index e163e078f0..ad43f06f73 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -76,6 +76,10 @@ draw_need_pipeline(const struct draw_context *draw, /* AA lines */ if (draw->rasterizer->line_smooth && draw->pipeline.aaline) return TRUE; + + /* first-vertex driven flatshading */ + if (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first) + return TRUE; } if (points(prim)) @@ -112,6 +116,10 @@ draw_need_pipeline(const struct draw_context *draw, /* two-side lighting */ if (draw->rasterizer->light_twoside) return TRUE; + + /* first-vertex driven flatshading */ + if (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first) + return TRUE; } /* polygon cull - this is difficult - hardware can cull just fine -- cgit v1.2.3 From 499d8aaa476fb67b7355122dc6fbc641e1b44ed0 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 31 Mar 2008 14:06:42 -0600 Subject: gallium: draw_passthrough.c is not used anymore --- src/gallium/auxiliary/draw/Makefile | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 0c7ce5da5b..a0db2e4555 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -15,7 +15,6 @@ C_SOURCES = \ draw_debug.c \ draw_flatshade.c \ draw_offset.c \ - draw_passthrough.c \ draw_pt.c \ draw_pt_vcache.c \ draw_pt_fetch_emit.c \ -- cgit v1.2.3 From 9cbd8400433fb27da03f300b36495baef464cc6b Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 31 Mar 2008 14:13:09 -0600 Subject: gallium: draw_passthrough.c is not used anymore --- src/gallium/auxiliary/draw/SConscript | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 9b3e7247c5..981225a8c2 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -14,7 +14,6 @@ draw = env.ConvenienceLibrary( 'draw_debug.c', 'draw_flatshade.c', 'draw_offset.c', - 'draw_passthrough.c', # going away soon 'draw_pt.c', 'draw_pt_vcache.c', 'draw_pt_fetch_emit.c', -- cgit v1.2.3 From 594dab4769533afaeb30a588e1731a6753a93f0d Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 31 Mar 2008 14:14:30 -0600 Subject: gallium: move the test for bypass_vs into the vs_XXX_run() functions Also: 1. Added an identity_viewport flag to skip viewport transformation when it has no effect. Might also add an explicit bypass_viewport flag someday. 2. Separate the code for computing clip codes and doing the viewport transform. Predicate them separately. Note: even if bypass_vs is set, we still look at the shader to determine the number of inputs and outputs. --- src/gallium/auxiliary/draw/draw_context.c | 8 ++++++ src/gallium/auxiliary/draw/draw_prim.c | 3 +-- src/gallium/auxiliary/draw/draw_private.h | 2 ++ src/gallium/auxiliary/draw/draw_vs_exec.c | 24 ++++++++++++------ src/gallium/auxiliary/draw/draw_vs_llvm.c | 42 ++++++++++++++++++++----------- src/gallium/auxiliary/draw/draw_vs_sse.c | 32 +++++++++++++++-------- 6 files changed, 76 insertions(+), 35 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 10bf9f54c1..d0d5f66b37 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -228,6 +228,14 @@ void draw_set_viewport_state( struct draw_context *draw, { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->viewport = *viewport; /* struct copy */ + draw->identity_viewport = (viewport->scale[0] == 1.0f && + viewport->scale[1] == 1.0f && + viewport->scale[2] == 1.0f && + viewport->scale[3] == 1.0f && + viewport->translate[0] == 0.0f && + viewport->translate[1] == 0.0f && + viewport->translate[2] == 0.0f && + viewport->translate[3] == 0.0f); } diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index ddcde01d9a..9779aa8440 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -593,8 +593,7 @@ draw_arrays(struct draw_context *draw, unsigned prim, } /* drawing done here: */ - if (!draw->rasterizer->bypass_vs || - !draw_pt_arrays(draw, prim, start, count)) { + if (!draw_pt_arrays(draw, prim, start, count)) { /* we have to run the whole pipeline */ draw_prim(draw, prim, start, count); } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 8eb2f515cb..9a9b25297f 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -232,6 +232,8 @@ struct draw_context struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; struct draw_vertex_shader *vertex_shader; + boolean identity_viewport; + uint num_vs_outputs; /**< convenience, from vertex_shader */ /* user-space vertex data, buffers */ diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 487d0ea7f4..c8ed17c00a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -110,13 +110,20 @@ vs_exec_run( struct draw_vertex_shader *shader, machine->Consts = (float (*)[4]) draw->user.constants; machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + if (draw->rasterizer->bypass_vs) { + /* outputs are just the inputs */ + machine->Outputs = machine->Inputs; + } + else { + machine->Outputs = ALIGN16_ASSIGN(outputs); + } draw->vertex_fetch.fetch_func( draw, machine, elts, count ); - /* run interpreter */ - tgsi_exec_machine_run( machine ); - + if (!draw->rasterizer->bypass_vs) { + /* run interpreter */ + tgsi_exec_machine_run( machine ); + } /* store machine results */ for (j = 0; j < count; j++) { @@ -136,8 +143,13 @@ vs_exec_run( struct draw_vertex_shader *shader, if (!draw->rasterizer->bypass_clipping) { vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; + } + else { + vOut[j]->clipmask = 0; + } + vOut[j]->edgeflag = 1; + if (!draw->identity_viewport) { /* divide by w */ w = 1.0f / w; x *= w; @@ -151,8 +163,6 @@ vs_exec_run( struct draw_vertex_shader *shader, vOut[j]->data[0][3] = w; } else { - vOut[j]->clipmask = 0; - vOut[j]->edgeflag = 1; vOut[j]->data[0][0] = x; vOut[j]->data[0][1] = y; vOut[j]->data[0][2] = z; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index d29cb18efe..8aa8a617bb 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -121,31 +121,45 @@ vs_llvm_run( struct draw_vertex_shader *base, machine->Consts = (float (*)[4]) draw->user.constants; machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + if (draw->rasterizer->bypass_vs) { + /* outputs are just the inputs */ + machine->Outputs = machine->Inputs; + } + else { + machine->Outputs = ALIGN16_ASSIGN(outputs); + } + draw->vertex_fetch.fetch_func( draw, machine, elts, count ); - /* run shader */ - gallivm_cpu_vs_exec(shader->llvm_prog, - machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps); + if (!draw->rasterizer->bypass_vs) { + /* run shader */ + gallivm_cpu_vs_exec(shader->llvm_prog, + machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps); + } /* store machine results */ for (j = 0; j < count; j++) { unsigned slot; float x, y, z, w; - if (!draw->rasterizer->bypass_clipping) { - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + if (!draw->rasterizer->bypass_clipping) { vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; + } + else { + vOut[j]->clipmask = 0; + } + vOut[j]->edgeflag = 1; + if (!draw->identity_viewport) { /* divide by w */ w = 1.0f / w; x *= w; @@ -159,8 +173,6 @@ vs_llvm_run( struct draw_vertex_shader *base, vOut[j]->data[0][3] = w; } else { - vOut[j]->clipmask = 0; - vOut[j]->edgeflag = 1; vOut[j]->data[0][0] = x; vOut[j]->data[0][1] = y; vOut[j]->data[0][2] = z; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index bc910dc2d0..701137f908 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -126,7 +126,13 @@ vs_sse_run( struct draw_vertex_shader *base, /* Consts does not require 16 byte alignment. */ machine->Consts = (float (*)[4]) draw->user.constants; machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); + if (draw->rasterizer->bypass_vs) { + /* outputs are just the inputs */ + machine->Outputs = machine->Inputs; + } + else { + machine->Outputs = ALIGN16_ASSIGN(outputs); + } /* Fetch vertices. This may at some point be integrated into the @@ -137,13 +143,14 @@ vs_sse_run( struct draw_vertex_shader *base, draw->vertex_fetch.fetch_func( draw, machine, elts, count ); - /* run compiled shader - */ - shader->func( - machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps ); + if (!draw->rasterizer->bypass_vs) { + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps ); + } /* XXX: Computing the clipmask and emitting results should be done @@ -161,8 +168,13 @@ vs_sse_run( struct draw_vertex_shader *base, if (!draw->rasterizer->bypass_clipping) { vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - vOut[j]->edgeflag = 1; + } + else { + vOut[j]->clipmask = 0; + } + vOut[j]->edgeflag = 1; + if (!draw->identity_viewport) { /* divide by w */ w = 1.0f / w; x *= w; @@ -176,8 +188,6 @@ vs_sse_run( struct draw_vertex_shader *base, vOut[j]->data[0][3] = w; } else { - vOut[j]->clipmask = 0; - vOut[j]->edgeflag = 1; vOut[j]->data[0][0] = x; vOut[j]->data[0][1] = y; vOut[j]->data[0][2] = z; -- cgit v1.2.3 From 7139b8ef78adb8d08c13e439fc8add31a2d79f36 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 31 Mar 2008 14:20:16 -0600 Subject: gallium: draw_passthrough.c is obsolete - removed --- src/gallium/auxiliary/draw/draw_passthrough.c | 473 -------------------------- 1 file changed, 473 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_passthrough.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_passthrough.c b/src/gallium/auxiliary/draw/draw_passthrough.c deleted file mode 100644 index 2198079a88..0000000000 --- a/src/gallium/auxiliary/draw/draw_passthrough.c +++ /dev/null @@ -1,473 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - - -/* This code is a prototype of what a passhthrough vertex shader might - * look like. - * - * Probably the best approach for us is to do: - * - vertex fetch - * - vertex shader - * - cliptest / viewport transform - * - * in one step, then examine the clipOrMask & choose between two paths: - * - * Either: - * - build primitive headers - * - clip and the primitive path - * - build clipped vertex buffers, - * - vertex-emit to vbuf buffers - * - * Or, if no clipping: - * - vertex-emit directly to vbuf buffers - * - * But when bypass clipping is enabled, we just take the latter - * choice. If (some new) passthrough-vertex-shader flag is also set, - * the pipeline degenerates to: - * - * - vertex fetch - * - vertex emit to vbuf buffers - * - * Which is what is prototyped here. - */ -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" - - -/** - * General-purpose fetch from user's vertex arrays, emit to driver's - * vertex buffer. - * - * XXX this is totally temporary. - */ -static void -fetch_store_general( struct draw_context *draw, - float *out, - unsigned start, - unsigned count ) -{ - const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); - const unsigned nr_attrs = vinfo->num_attribs; - uint i, j; - - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - - for (i = start; i < start + count; i++) { - for (j = 0; j < nr_attrs; j++) { - /* vinfo->src_index is the output of the vertex shader - * matching this hw-vertex component. - * - * In passthrough, we require a 1:1 mapping between vertex - * shader outputs and inputs, which in turn correspond to - * vertex elements in the state. So, this is the vertex - * element we're interested in... - */ - const uint jj = vinfo->src_index[j]; - const enum pipe_format srcFormat = draw->vertex_element[jj].src_format; - const ubyte *from = src[jj] + i * pitch[jj]; - float attrib[4]; - - /* Except... When we're not. Two cases EMIT_HEADER & - * EMIT_1F_PSIZE don't consume an input. Should have some - * method for indicating this, or change the logic here - * somewhat so it doesn't matter. - * - * Just hack this up now, do something better about it later. - */ - if (vinfo->emit[j] == EMIT_HEADER) { - memset(out, 0, sizeof(struct vertex_header)); - out += sizeof(struct vertex_header) / 4; - continue; - } - else if (vinfo->emit[j] == EMIT_1F_PSIZE) { - out[0] = 1.0; /* xxx */ - out += 1; - continue; - } - - - /* The normal fetch/emit code: - */ - switch (srcFormat) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - { - ubyte *ub = (ubyte *) from; - attrib[2] = UBYTE_TO_FLOAT(ub[0]); - attrib[1] = UBYTE_TO_FLOAT(ub[1]); - attrib[0] = UBYTE_TO_FLOAT(ub[2]); - attrib[3] = UBYTE_TO_FLOAT(ub[3]); - } - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - { - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = f[2]; - attrib[3] = f[3]; - } - break; - case PIPE_FORMAT_R32G32B32_FLOAT: - { - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = f[2]; - attrib[3] = 1.0; - } - break; - case PIPE_FORMAT_R32G32_FLOAT: - { - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = 0.0; - attrib[3] = 1.0; - } - break; - case PIPE_FORMAT_R32_FLOAT: - { - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = 0.0; - attrib[2] = 0.0; - attrib[3] = 1.0; - } - break; - default: - assert(0); - } - - debug_printf("attrib %d: %f %f %f %f\n", j, - attrib[0], attrib[1], attrib[2], attrib[3]); - - switch (vinfo->emit[j]) { - case EMIT_1F: - out[0] = attrib[0]; - out += 1; - break; - case EMIT_2F: - out[0] = attrib[0]; - out[1] = attrib[1]; - out += 2; - break; - case EMIT_4F: - out[0] = attrib[0]; - out[1] = attrib[1]; - out[2] = attrib[2]; - out[3] = attrib[3]; - out += 4; - break; - default: - assert(0); - } - } - debug_printf("\n"); - } -} - - - -static boolean update_shader( struct draw_context *draw ) -{ - const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); - - unsigned nr_attrs = vinfo->num_attribs; - unsigned i; - - for (i = 0; i < nr_attrs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - - draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset; - - draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; - draw->vertex_fetch.fetch[i] = NULL; - } - - draw->vertex_fetch.nr_attrs = nr_attrs; - draw->vertex_fetch.fetch_func = NULL; - draw->vertex_fetch.pt_fetch = NULL; - - draw->pt.hw_vertex_size = vinfo->size * 4; - - draw->vertex_fetch.pt_fetch = fetch_store_general; - return TRUE; -} - - - - -static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) -{ - switch (prim) { - case PIPE_PRIM_POINTS: - *first = 1; - *incr = 1; - return TRUE; - case PIPE_PRIM_LINES: - *first = 2; - *incr = 2; - return TRUE; - case PIPE_PRIM_LINE_STRIP: - *first = 2; - *incr = 1; - return TRUE; - case PIPE_PRIM_TRIANGLES: - *first = 3; - *incr = 3; - return TRUE; - case PIPE_PRIM_TRIANGLE_STRIP: - *first = 3; - *incr = 1; - return TRUE; - case PIPE_PRIM_QUADS: - *first = 4; - *incr = 4; - return TRUE; - case PIPE_PRIM_QUAD_STRIP: - *first = 4; - *incr = 2; - return TRUE; - default: - *first = 0; - *incr = 1; /* set to one so that count % incr works */ - return FALSE; - } -} - - - -static boolean set_prim( struct draw_context *draw, - unsigned prim, - unsigned count ) -{ - assert(!draw->user.elts); - - switch (prim) { - case PIPE_PRIM_LINE_LOOP: - if (count > 1024) - return FALSE; - return draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP ); - - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_POLYGON: - if (count > 1024) - return FALSE; - return draw->render->set_primitive( draw->render, prim ); - - case PIPE_PRIM_QUADS: - case PIPE_PRIM_QUAD_STRIP: - return draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES ); - - default: - return draw->render->set_primitive( draw->render, prim ); - break; - } - - return TRUE; -} - - - -#define INDEX(i) (start + (i)) -static void pt_draw_arrays( struct draw_context *draw, - unsigned start, - unsigned length ) -{ - ushort *tmp = NULL; - unsigned i, j; - - switch (draw->pt.prim) { - case PIPE_PRIM_LINE_LOOP: - tmp = MALLOC( sizeof(ushort) * (length + 1) ); - - for (i = 0; i < length; i++) - tmp[i] = INDEX(i); - tmp[length] = 0; - - draw->render->draw( draw->render, - tmp, - length+1 ); - break; - - - case PIPE_PRIM_QUAD_STRIP: - tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) ); - - for (j = i = 0; i + 3 < length; i += 2, j += 6) { - tmp[j+0] = INDEX(i+0); - tmp[j+1] = INDEX(i+1); - tmp[j+2] = INDEX(i+3); - - tmp[j+3] = INDEX(i+2); - tmp[j+4] = INDEX(i+0); - tmp[j+5] = INDEX(i+3); - } - - if (j) - draw->render->draw( draw->render, tmp, j ); - break; - - case PIPE_PRIM_QUADS: - tmp = MALLOC( sizeof(int) * (length / 4 * 6) ); - - for (j = i = 0; i + 3 < length; i += 4, j += 6) { - tmp[j+0] = INDEX(i+0); - tmp[j+1] = INDEX(i+1); - tmp[j+2] = INDEX(i+3); - - tmp[j+3] = INDEX(i+1); - tmp[j+4] = INDEX(i+2); - tmp[j+5] = INDEX(i+3); - } - - if (j) - draw->render->draw( draw->render, tmp, j ); - break; - - default: - draw->render->draw_arrays( draw->render, - start, - length ); - break; - } - - if (tmp) - FREE(tmp); -} - - - -static boolean do_draw( struct draw_context *draw, - unsigned start, unsigned count ) -{ - float *hw_verts = - draw->render->allocate_vertices( draw->render, - (ushort)draw->pt.hw_vertex_size, - (ushort)count ); - - if (!hw_verts) - return FALSE; - - /* Single routine to fetch vertices and emit HW verts. - */ - draw->vertex_fetch.pt_fetch( draw, - hw_verts, - start, count ); - - /* Draw arrays path to avoid re-emitting index list again and - * again. - */ - pt_draw_arrays( draw, - 0, - count ); - - - draw->render->release_vertices( draw->render, - hw_verts, - draw->pt.hw_vertex_size, - count ); - - return TRUE; -} - - -boolean -draw_passthrough_arrays(struct draw_context *draw, - unsigned prim, - unsigned start, - unsigned count) -{ - unsigned i = 0; - unsigned first, incr; - - //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); - - split_prim_inplace(prim, &first, &incr); - - count -= (count - first) % incr; - - debug_printf("%s %d %d %d\n", __FUNCTION__, prim, start, count); - - if (draw_need_pipeline(draw, prim)) - return FALSE; - - debug_printf("%s AAA\n", __FUNCTION__); - - if (!set_prim(draw, prim, count)) - return FALSE; - - /* XXX: need a single value that reflects the most recent call to - * driver->set_primitive: - */ - draw->pt.prim = prim; - - debug_printf("%s BBB\n", __FUNCTION__); - - if (!update_shader(draw)) - return FALSE; - - debug_printf("%s CCC\n", __FUNCTION__); - - /* Chop this up into bite-sized pieces that a driver should be able - * to devour -- problem is we don't have a quick way to query the - * driver on the maximum size for this chunk in the current state. - */ - while (i + first <= count) { - int nr = MIN2( count - i, 1024 ); - - /* snap to prim boundary - */ - nr -= (nr - first) % incr; - - if (!do_draw( draw, start + i, nr )) { - assert(0); - return FALSE; - } - - /* increment allowing for repeated vertices - */ - i += nr - (first - incr); - } - - - debug_printf("%s DONE\n", __FUNCTION__); - return TRUE; -} - - -- cgit v1.2.3 From 686a6c746851b4bc2572aaa1153d570717bdd77f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 1 Apr 2008 10:41:43 +0900 Subject: gallium: Do not assume that buffers are freed in the same order they are fenced. Also free buffers as soon as possible. This short term fix corrects the fenced list behavior but it will impact on performance. The long term fix is probably replace the linked list (legacy from the bufpool code) by a binary tree. --- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 39 ++++++++++++---------- 1 file changed, 21 insertions(+), 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 6e217eb2e0..55f32e6816 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -29,8 +29,8 @@ * \file * Implementation of fenced buffers. * - * \author José Fonseca - * \author Thomas Hellström + * \author José Fonseca + * \author Thomas Hellström */ @@ -44,7 +44,7 @@ #include "pb_buffer.h" #include "pb_buffer_fenced.h" -#ifndef __MSC__ +#ifndef WIN32 #include #endif @@ -93,8 +93,6 @@ fenced_buffer(struct pb_buffer *buf) } - - static void _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) @@ -105,15 +103,6 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int signaled = -1; list = fenced_list->delayed.next; - - if (fenced_list->numDelayed > 3) { - unsigned i; - - for (i = 0; i < fenced_list->numDelayed; i += 3) { - list = list->next; - } - } - prev = list->prev; for (; list != &fenced_list->delayed; list = prev, prev = list->prev) { @@ -128,11 +117,17 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, } } - if (signaled != 0) + if (signaled != 0) { +#if 0 /* XXX: we are assuming that buffers are freed in the same order they * are fenced which may not always be true... */ break; +#else + signaled = -1; + continue; +#endif + } winsys->fence_reference(winsys, &fenced_buf->fence, NULL); @@ -154,8 +149,16 @@ fenced_buffer_destroy(struct pb_buffer *buf) struct fenced_buffer_list *fenced_list = fenced_buf->list; if (fenced_buf->fence) { - LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); - fenced_list->numDelayed++; + struct pipe_winsys *winsys = fenced_list->winsys; + if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) { + LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); + fenced_list->numDelayed++; + } + else { + winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + pb_reference(&fenced_buf->buffer, NULL); + FREE(fenced_buf); + } } else { pb_reference(&fenced_buf->buffer, NULL); @@ -285,7 +288,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) /* Wait on outstanding fences */ while (fenced_list->numDelayed) { _glthread_UNLOCK_MUTEX(fenced_list->mutex); -#ifndef __MSC__ +#ifndef WIN32 sched_yield(); #endif _fenced_buffer_list_check_free(fenced_list, 1); -- cgit v1.2.3 From 4b1377b2403bcb34081f91991f1ffde06df17af1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Apr 2008 14:14:06 +0100 Subject: draw: flush between pt/non-pt modes --- src/gallium/auxiliary/draw/draw_pt.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index fc9304197a..df306a5fa4 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -167,6 +167,10 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.vcache; #endif + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + frontend->prepare( frontend, middle ); frontend->run( frontend, -- cgit v1.2.3 From 52f40dcc468039fc9cca45a4de20a5aa11228b67 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Apr 2008 14:14:46 +0100 Subject: draw: associate rhw divide with clipping not viewport flag --- src/gallium/auxiliary/draw/draw_vs_exec.c | 12 ++++++------ src/gallium/auxiliary/draw/draw_vs_llvm.c | 12 ++++++------ src/gallium/auxiliary/draw/draw_vs_sse.c | 12 ++++++------ 3 files changed, 18 insertions(+), 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index c8ed17c00a..c6e503686a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -143,6 +143,12 @@ vs_exec_run( struct draw_vertex_shader *shader, if (!draw->rasterizer->bypass_clipping) { vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; } else { vOut[j]->clipmask = 0; @@ -150,12 +156,6 @@ vs_exec_run( struct draw_vertex_shader *shader, vOut[j]->edgeflag = 1; if (!draw->identity_viewport) { - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - /* Viewport mapping */ vOut[j]->data[0][0] = x * scale[0] + trans[0]; vOut[j]->data[0][1] = y * scale[1] + trans[1]; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 8aa8a617bb..c8268317ef 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -153,6 +153,12 @@ vs_llvm_run( struct draw_vertex_shader *base, if (!draw->rasterizer->bypass_clipping) { vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; } else { vOut[j]->clipmask = 0; @@ -160,12 +166,6 @@ vs_llvm_run( struct draw_vertex_shader *base, vOut[j]->edgeflag = 1; if (!draw->identity_viewport) { - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - /* Viewport mapping */ vOut[j]->data[0][0] = x * scale[0] + trans[0]; vOut[j]->data[0][1] = y * scale[1] + trans[1]; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 701137f908..f40d65df08 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -168,6 +168,12 @@ vs_sse_run( struct draw_vertex_shader *base, if (!draw->rasterizer->bypass_clipping) { vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; } else { vOut[j]->clipmask = 0; @@ -175,12 +181,6 @@ vs_sse_run( struct draw_vertex_shader *base, vOut[j]->edgeflag = 1; if (!draw->identity_viewport) { - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - /* Viewport mapping */ vOut[j]->data[0][0] = x * scale[0] + trans[0]; vOut[j]->data[0][1] = y * scale[1] + trans[1]; -- cgit v1.2.3 From caa44763f7f7aa26ed0b0d1e5af0c410fba6bfe6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Apr 2008 14:48:59 +0100 Subject: draw: respect flatshade_first in flatshade stage --- src/gallium/auxiliary/draw/draw_flatshade.c | 55 +++++++++++++++++++++++++---- 1 file changed, 49 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_flatshade.c index ccad71d695..af2cb05c98 100644 --- a/src/gallium/auxiliary/draw/draw_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_flatshade.c @@ -84,8 +84,25 @@ static INLINE void copy_colors2( struct draw_stage *stage, * Flatshade tri. Required for clipping and when unfilled tris are * active, otherwise handled by hardware. */ -static void flatshade_tri( struct draw_stage *stage, - struct prim_header *header ) +static void flatshade_tri_0( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.edgeflags = header->edgeflags; + tmp.v[0] = header->v[0]; + tmp.v[1] = dup_vert(stage, header->v[1], 0); + tmp.v[2] = dup_vert(stage, header->v[2], 1); + + copy_colors2(stage, tmp.v[1], tmp.v[2], tmp.v[0]); + + stage->next->tri( stage->next, &tmp ); +} + + +static void flatshade_tri_2( struct draw_stage *stage, + struct prim_header *header ) { struct prim_header tmp; @@ -101,11 +118,27 @@ static void flatshade_tri( struct draw_stage *stage, } + + + /** * Flatshade line. Required for clipping. */ -static void flatshade_line( struct draw_stage *stage, - struct prim_header *header ) +static void flatshade_line_0( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.v[0] = header->v[0]; + tmp.v[1] = dup_vert(stage, header->v[1], 0); + + copy_colors(stage, tmp.v[1], tmp.v[0]); + + stage->next->line( stage->next, &tmp ); +} + +static void flatshade_line_1( struct draw_stage *stage, + struct prim_header *header ) { struct prim_header tmp; @@ -118,6 +151,8 @@ static void flatshade_line( struct draw_stage *stage, } +/* Flatshade point -- passthrough. + */ static void flatshade_point( struct draw_stage *stage, struct prim_header *header ) { @@ -140,8 +175,16 @@ static void flatshade_init_state( struct draw_stage *stage ) } } - stage->line = flatshade_line; - stage->tri = flatshade_tri; + /* Choose flatshade routine according to provoking vertex: + */ + if (stage->draw->rasterizer->flatshade_first) { + stage->line = flatshade_line_0; + stage->tri = flatshade_tri_0; + } + else { + stage->line = flatshade_line_1; + stage->tri = flatshade_tri_2; + } } static void flatshade_first_tri( struct draw_stage *stage, -- cgit v1.2.3 From edfa8201a50c47376b7aa0c05d7851e3e1353bde Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Apr 2008 14:49:56 +0100 Subject: draw: more flatshade_first changes - Reduce the number of changes to the normal vertex ordering - Assume that the hardware knows how to do this in the standard case. - Add support to the passthrough vcache path. --- src/gallium/auxiliary/draw/draw_prim.c | 102 +++++++--------------- src/gallium/auxiliary/draw/draw_pt.c | 2 +- src/gallium/auxiliary/draw/draw_pt.h | 2 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 127 ++++++++++++++++++++++++---- src/gallium/auxiliary/draw/draw_validate.c | 8 -- 5 files changed, 144 insertions(+), 97 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 9779aa8440..4452376a70 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -343,21 +343,11 @@ draw_prim( struct draw_context *draw, break; case PIPE_PRIM_LINES: - if (flatfirst) { - for (i = 0; i+1 < count; i += 2) { - do_line( draw, - TRUE, - start + i + 1, - start + i + 0); - } - } - else { - for (i = 0; i+1 < count; i += 2) { - do_line( draw, - TRUE, - start + i + 0, - start + i + 1); - } + for (i = 0; i+1 < count; i += 2) { + do_line( draw, + TRUE, + start + i + 0, + start + i + 1); } break; @@ -378,63 +368,31 @@ draw_prim( struct draw_context *draw, break; case PIPE_PRIM_LINE_STRIP: - if (flatfirst) { - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, - start + i, - start + i - 1 ); - } - } - else { - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, - start + i - 1, - start + i ); - } + for (i = 1; i < count; i++) { + do_line( draw, + i == 1, + start + i - 1, + start + i ); } break; case PIPE_PRIM_TRIANGLES: - if (flatfirst) { - if (unfilled) { - for (i = 0; i+2 < count; i += 3) { - do_ef_triangle( draw, - 1, - ~0, - start + i + 1, - start + i + 2, - start + i + 0 ); - } - } - else { - for (i = 0; i+2 < count; i += 3) { - do_triangle( draw, - start + i + 1, - start + i + 2, - start + i + 0 ); - } + if (unfilled) { + for (i = 0; i+2 < count; i += 3) { + do_ef_triangle( draw, + 1, + ~0, + start + i + 0, + start + i + 1, + start + i + 2 ); } - } + } else { - if (unfilled) { - for (i = 0; i+2 < count; i += 3) { - do_ef_triangle( draw, - 1, - ~0, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - else { - for (i = 0; i+2 < count; i += 3) { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } + for (i = 0; i+2 < count; i += 3) { + do_triangle( draw, + start + i + 0, + start + i + 1, + start + i + 2 ); } } break; @@ -444,15 +402,15 @@ draw_prim( struct draw_context *draw, for (i = 0; i+2 < count; i++) { if (i & 1) { do_triangle( draw, + start + i + 0, start + i + 2, - start + i + 1, - start + i + 0 ); + start + i + 1 ); } else { do_triangle( draw, + start + i + 0, start + i + 1, - start + i + 2, - start + i + 0 ); + start + i + 2 ); } } } @@ -479,9 +437,9 @@ draw_prim( struct draw_context *draw, if (flatfirst) { for (i = 0; i+2 < count; i++) { do_triangle( draw, + start + i + 1, start + i + 2, - start + 0, - start + i + 1 ); + start + 0 ); } } else { diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index df306a5fa4..2ea96c686d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -191,7 +191,7 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_emit) return FALSE; - draw->pt.front.vcache = draw_pt_vcache(); + draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) return FALSE; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 439fa4c881..f878616079 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -110,7 +110,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, /* Implementations: */ -struct draw_pt_front_end *draw_pt_vcache( void ); +struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index da9a3a52ae..16ffedf580 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -44,6 +44,7 @@ struct vcache_frontend { struct draw_pt_front_end base; + struct draw_context *draw; unsigned in[CACHE_MAX]; ushort out[CACHE_MAX]; @@ -157,14 +158,6 @@ static void vcache_quad( struct vcache_frontend *vcache, } -static void vcache_prepare( struct draw_pt_front_end *frontend, - struct draw_pt_middle_end *middle ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - vcache->middle = middle; - middle->prepare( middle ); -} - static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { PIPE_PRIM_POINTS, PIPE_PRIM_LINES, @@ -179,11 +172,11 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { }; -static void vcache_run( struct draw_pt_front_end *frontend, - unsigned prim, - pt_elt_func get_elt, - const void *elts, - unsigned count ) +static void vcache_run_pv2( struct draw_pt_front_end *frontend, + unsigned prim, + pt_elt_func get_elt, + const void *elts, + unsigned count ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; unsigned i; @@ -309,6 +302,109 @@ static void vcache_run( struct draw_pt_front_end *frontend, vcache_flush( vcache ); } + +static void vcache_run_pv0( struct draw_pt_front_end *frontend, + unsigned prim, + pt_elt_func get_elt, + const void *elts, + unsigned count ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + unsigned i; + + /* These are for validation only: + */ + vcache->elt_func = get_elt; + vcache->elt_ptr = elts; + vcache->output_prim = reduced_prim[prim]; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + vcache_point( vcache, + get_elt(elts, i) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + vcache_line( vcache, + TRUE, + get_elt(elts, i + 0), + get_elt(elts, i + 1)); + } + break; + + case PIPE_PRIM_LINE_STRIP: + for (i = 1; i < count; i++) { + vcache_line( vcache, + i == 1, + get_elt(elts, i - 1), + get_elt(elts, i) ); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2) ); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + for (i = 0; i+2 < count; i++) { + if (i & 1) { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 2), + get_elt(elts, i + 1) ); + } + else { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2) ); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + for (i = 0; i+2 < count; i++) { + vcache_triangle( vcache, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0) ); + } + break; + + + default: + assert(0); + break; + } + + vcache_flush( vcache ); +} + +static void vcache_prepare( struct draw_pt_front_end *frontend, + struct draw_pt_middle_end *middle ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + + if (vcache->draw->rasterizer->flatshade_first) + vcache->base.run = vcache_run_pv0; + else + vcache->base.run = vcache_run_pv2; + + vcache->middle = middle; + middle->prepare( middle ); +} + + + + static void vcache_finish( struct draw_pt_front_end *frontend ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; @@ -322,14 +418,15 @@ static void vcache_destroy( struct draw_pt_front_end *frontend ) } -struct draw_pt_front_end *draw_pt_vcache( void ) +struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ) { struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend ); vcache->base.prepare = vcache_prepare; - vcache->base.run = vcache_run; + vcache->base.run = NULL; vcache->base.finish = vcache_finish; vcache->base.destroy = vcache_destroy; + vcache->draw = draw; memset(vcache->in, ~0, sizeof(vcache->in)); diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c index ad43f06f73..e163e078f0 100644 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ b/src/gallium/auxiliary/draw/draw_validate.c @@ -76,10 +76,6 @@ draw_need_pipeline(const struct draw_context *draw, /* AA lines */ if (draw->rasterizer->line_smooth && draw->pipeline.aaline) return TRUE; - - /* first-vertex driven flatshading */ - if (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first) - return TRUE; } if (points(prim)) @@ -116,10 +112,6 @@ draw_need_pipeline(const struct draw_context *draw, /* two-side lighting */ if (draw->rasterizer->light_twoside) return TRUE; - - /* first-vertex driven flatshading */ - if (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first) - return TRUE; } /* polygon cull - this is difficult - hardware can cull just fine -- cgit v1.2.3 From 8f8b95ae58c6e51f3bf0bc6ed2fa5ac8b9e82f1e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 1 Apr 2008 15:19:30 +0100 Subject: draw: remove dead code --- src/gallium/auxiliary/draw/draw_prim.c | 37 ---------------------------------- 1 file changed, 37 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 4452376a70..51b6950334 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -119,43 +119,6 @@ static void draw_prim_queue_flush( struct draw_context *draw ) draw_vertex_cache_unreference( draw ); } -static INLINE void fetch_and_store(struct draw_context *draw) -{ - /* run vertex shader on vertex cache entries, four per invokation */ -#if 0 - { - const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); - memcpy(draw->vs.queue[0].vertex, draw->vs.queue[i + j].elt, - count * vinfo->size); - } -#elif 0 - unsigned i; - - draw_update_vertex_fetch(draw); - for (i = 0; i < draw->vs.queue_nr; i += 4) { - struct vertex_header *dests[4]; - unsigned elts[4]; - struct tgsi_exec_machine *machine = &draw->machine; - int j, n = MIN2(4, draw->vs.queue_nr - i); - - for (j = 0; j < n; j++) { - elts[j] = draw->vs.queue[i + j].elt; - dests[j] = draw->vs.queue[i + j].vertex; - } - - for ( ; j < 4; j++) { - elts[j] = elts[0]; - dests[j] = draw->vs.queue[i + j].vertex; - } - //fetch directly into dests - draw->vertex_fetch.fetch_func(draw, machine, dests, count); - } -#endif - - draw->vs.post_nr = draw->vs.queue_nr; - draw->vs.queue_nr = 0; -} - void draw_do_flush( struct draw_context *draw, unsigned flags ) { if (0) -- cgit v1.2.3 From bc739440c29c551fcc44e9e12d0d9c170d8d24fb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 2 Apr 2008 10:43:37 +0100 Subject: gallium: add temporary facility for rasterization-time clamping of point sizes --- src/gallium/auxiliary/draw/draw_wide_point.c | 14 +++++++++++++- src/gallium/include/pipe/p_state.h | 2 ++ src/mesa/state_tracker/st_atom_rasterizer.c | 4 ++++ 3 files changed, 19 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c index c53f7e6cb3..86281ca3d8 100644 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_wide_point.c @@ -38,6 +38,8 @@ struct widepoint_stage { struct draw_stage stage; float half_point_size; + float point_size_min; + float point_size_max; uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; @@ -128,7 +130,15 @@ static void widepoint_point( struct draw_stage *stage, /* point size is either per-vertex or fixed size */ if (wide->psize_slot >= 0) { - half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; + half_size = header->v[0]->data[wide->psize_slot][0]; + + /* XXX: temporary -- do this in the vertex shader?? + */ + half_size = CLAMP(half_size, + wide->point_size_min, + wide->point_size_max); + + half_size *= 0.5f; } else { half_size = wide->half_point_size; @@ -182,6 +192,8 @@ static void widepoint_first_point( struct draw_stage *stage, struct draw_context *draw = stage->draw; wide->half_point_size = 0.5f * draw->rasterizer->point_size; + wide->point_size_min = draw->rasterizer->point_size_min; + wide->point_size_max = draw->rasterizer->point_size_max; /* XXX we won't know the real size if it's computed by the vertex shader! */ if ((draw->rasterizer->point_size > draw->wide_point_threshold) || diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index a73028814e..e407e3bc72 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -118,6 +118,8 @@ struct pipe_rasterizer_state float line_width; float point_size; /**< used when no per-vertex size */ + float point_size_min; /* XXX - temporary, will go away */ + float point_size_max; /* XXX - temporary, will go away */ float offset_units; float offset_scale; ubyte sprite_coord_mode[PIPE_MAX_SHADER_OUTPUTS]; /**< PIPE_SPRITE_COORD_ */ diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 17d77f90ae..14c26c16c0 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -198,6 +198,10 @@ static void update_raster_state( struct st_context *st ) /* _NEW_POINT */ raster->point_size = ctx->Point.Size; + + raster->point_size_min = 0; /* temporary, will go away */ + raster->point_size_max = 1000; /* temporary, will go away */ + raster->point_smooth = ctx->Point.SmoothFlag; raster->point_sprite = ctx->Point.PointSprite; for (i = 0; i < MAX_TEXTURE_COORD_UNITS; i++) { -- cgit v1.2.3 From 8e33194837dd206d920889851d9cf22190100c99 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 2 Apr 2008 11:38:33 +0100 Subject: gallium: add a flag to turn on gl rasterization rules Use this to set up hardware rasterization (if your hardware can do it) or otherwise turn on various tweaks in the draw module. Currently only hooked up to point biasing code. --- src/gallium/auxiliary/draw/draw_wide_point.c | 19 +++++++++++++------ src/gallium/include/pipe/p_state.h | 1 + src/mesa/state_tracker/st_atom_rasterizer.c | 2 ++ 3 files changed, 16 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c index 86281ca3d8..6fc7c9fcd7 100644 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_wide_point.c @@ -41,6 +41,9 @@ struct widepoint_stage { float point_size_min; float point_size_max; + float xbias; + float ybias; + uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; uint num_texcoords; @@ -126,8 +129,6 @@ static void widepoint_point( struct draw_stage *stage, float *pos2 = v2->data[0]; float *pos3 = v3->data[0]; - const float xbias = 0.0, ybias = -0.125; - /* point size is either per-vertex or fixed size */ if (wide->psize_slot >= 0) { half_size = header->v[0]->data[wide->psize_slot][0]; @@ -144,10 +145,10 @@ static void widepoint_point( struct draw_stage *stage, half_size = wide->half_point_size; } - left_adj = -half_size + xbias; - right_adj = half_size + xbias; - bot_adj = half_size + ybias; - top_adj = -half_size + ybias; + left_adj = -half_size + wide->xbias; + right_adj = half_size + wide->xbias; + bot_adj = half_size + wide->ybias; + top_adj = -half_size + wide->ybias; pos0[0] += left_adj; pos0[1] += top_adj; @@ -194,6 +195,12 @@ static void widepoint_first_point( struct draw_stage *stage, wide->half_point_size = 0.5f * draw->rasterizer->point_size; wide->point_size_min = draw->rasterizer->point_size_min; wide->point_size_max = draw->rasterizer->point_size_max; + wide->xbias = 0.0; + wide->ybias = 0.0; + + if (draw->rasterizer->gl_rasterization_rules) { + wide->ybias = -0.125; + } /* XXX we won't know the real size if it's computed by the vertex shader! */ if ((draw->rasterizer->point_size > draw->wide_point_threshold) || diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index e407e3bc72..3593446e1c 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -115,6 +115,7 @@ struct pipe_rasterizer_state still needed though, to indicate inputs/outputs */ unsigned origin_lower_left:1; /**< Is (0,0) the lower-left corner? */ unsigned flatshade_first:1; /**< take color attribute from the first vertex of a primitive */ + unsigned gl_rasterization_rules:1; /**< enable tweaks for GL rasterization? */ float line_width; float point_size; /**< used when no per-vertex size */ diff --git a/src/mesa/state_tracker/st_atom_rasterizer.c b/src/mesa/state_tracker/st_atom_rasterizer.c index 14c26c16c0..bb14cf9045 100644 --- a/src/mesa/state_tracker/st_atom_rasterizer.c +++ b/src/mesa/state_tracker/st_atom_rasterizer.c @@ -261,6 +261,8 @@ static void update_raster_state( struct st_context *st ) if (ctx->Scissor.Enabled) raster->scissor = 1; + raster->gl_rasterization_rules = 1; + cso_set_rasterizer(st->cso_context, raster); } -- cgit v1.2.3 From ae3c91e98ce3355bca22738440f8ba313b3b8b23 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 2 Apr 2008 11:55:03 +0100 Subject: draw: Set the backend prim in the pt 'prepare' operation Leaving it until 'run' is bad as the primitive is pretty much state for some drivers and so needs to get set early. In some drivers this is used to determine things like vertex format, etc -- by the time we get to 'run', it's too late to change this. --- src/gallium/auxiliary/draw/draw_pt.c | 3 +-- src/gallium/auxiliary/draw/draw_pt.h | 6 ++--- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 33 ++++++++++++++----------- src/gallium/auxiliary/draw/draw_pt_vcache.c | 17 +++++++------ 4 files changed, 31 insertions(+), 28 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 2ea96c686d..c3baf5b7da 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -171,10 +171,9 @@ draw_pt_arrays(struct draw_context *draw, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - frontend->prepare( frontend, middle ); + frontend->prepare( frontend, prim, middle ); frontend->run( frontend, - prim, draw_pt_elt_func( draw ), draw_pt_elt_ptr( draw, start ), count ); diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index f878616079..428b1e0e6b 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -53,10 +53,10 @@ struct draw_context; */ struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, + unsigned prim, struct draw_pt_middle_end * ); void (*run)( struct draw_pt_front_end *, - unsigned prim, pt_elt_func elt_func, const void *elt_ptr, unsigned count ); @@ -79,10 +79,10 @@ struct draw_pt_front_end { * Currenly only using the passthrough version. */ struct draw_pt_middle_end { - void (*prepare)( struct draw_pt_middle_end * ); + void (*prepare)( struct draw_pt_middle_end *, + unsigned prim ); void (*run)( struct draw_pt_middle_end *, - unsigned prim, const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 9b098bc173..9339cf1f88 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -203,16 +203,29 @@ fetch_store_general( struct fetch_emit_middle_end *feme, -static void fetch_emit_prepare( struct draw_pt_middle_end *middle ) +static void fetch_emit_prepare( struct draw_pt_middle_end *middle, + unsigned prim ) { static const float zero = 0; struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; - const struct vertex_info *vinfo = draw->render->get_vertex_info(draw->render); - unsigned nr_attrs = vinfo->num_attribs; + const struct vertex_info *vinfo; unsigned i; + boolean ok; + + + ok = draw->render->set_primitive( draw->render, + prim ); + if (!ok) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); - for (i = 0; i < nr_attrs; i++) { + for (i = 0; i < vinfo->num_attribs; i++) { unsigned src_element = vinfo->src_index[i]; unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index; @@ -275,7 +288,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle ) } } - feme->nr_fetch = nr_attrs; + feme->nr_fetch = vinfo->num_attribs; feme->hw_vertex_size = vinfo->size * 4; } @@ -284,7 +297,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle ) static void fetch_emit_run( struct draw_pt_middle_end *middle, - unsigned prim, const unsigned *fetch_elts, unsigned fetch_count, const ushort *draw_elts, @@ -293,16 +305,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; void *hw_verts; - boolean ok; - - ok = draw->render->set_primitive( draw->render, - prim ); - if (!ok) { - assert(0); - return; - } - hw_verts = draw->render->allocate_vertices( draw->render, (ushort)feme->hw_vertex_size, (ushort)fetch_count ); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 16ffedf580..5a068761df 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -59,6 +59,8 @@ struct vcache_frontend { const void *elt_ptr; struct draw_pt_middle_end *middle; + + unsigned input_prim; unsigned output_prim; }; @@ -77,7 +79,6 @@ static void vcache_flush( struct vcache_frontend *vcache ) if (vcache->draw_count) { vcache->middle->run( vcache->middle, - vcache->output_prim, vcache->fetch_elts, vcache->fetch_count, vcache->draw_elts, @@ -173,7 +174,6 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { static void vcache_run_pv2( struct draw_pt_front_end *frontend, - unsigned prim, pt_elt_func get_elt, const void *elts, unsigned count ) @@ -185,9 +185,8 @@ static void vcache_run_pv2( struct draw_pt_front_end *frontend, */ vcache->elt_func = get_elt; vcache->elt_ptr = elts; - vcache->output_prim = reduced_prim[prim]; - switch (prim) { + switch (vcache->input_prim) { case PIPE_PRIM_POINTS: for (i = 0; i < count; i ++) { vcache_point( vcache, @@ -304,7 +303,6 @@ static void vcache_run_pv2( struct draw_pt_front_end *frontend, static void vcache_run_pv0( struct draw_pt_front_end *frontend, - unsigned prim, pt_elt_func get_elt, const void *elts, unsigned count ) @@ -316,9 +314,8 @@ static void vcache_run_pv0( struct draw_pt_front_end *frontend, */ vcache->elt_func = get_elt; vcache->elt_ptr = elts; - vcache->output_prim = reduced_prim[prim]; - switch (prim) { + switch (vcache->input_prim) { case PIPE_PRIM_POINTS: for (i = 0; i < count; i ++) { vcache_point( vcache, @@ -389,6 +386,7 @@ static void vcache_run_pv0( struct draw_pt_front_end *frontend, } static void vcache_prepare( struct draw_pt_front_end *frontend, + unsigned prim, struct draw_pt_middle_end *middle ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; @@ -398,8 +396,11 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, else vcache->base.run = vcache_run_pv2; + vcache->input_prim = prim; + vcache->output_prim = reduced_prim[prim]; + vcache->middle = middle; - middle->prepare( middle ); + middle->prepare( middle, vcache->output_prim ); } -- cgit v1.2.3 From add46fbc8cc04d3bce303815541a7bc5d0b33953 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 2 Apr 2008 12:05:55 +0100 Subject: draw: add missing break statement --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 9339cf1f88..f863a46d9c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -281,6 +281,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, feme->fetch[i].pitch = 0; feme->fetch[i].fetch = fetch_R32_FLOAT; feme->fetch[i].emit = emit_R32_FLOAT; + break; default: assert(0); feme->fetch[i].emit = NULL; -- cgit v1.2.3 From d2cb4ba0bb2388c784f145c59f3798f914dc7f39 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 3 Apr 2008 12:21:30 +0100 Subject: draw: add passthrough path to the pipeline This handles the case where bypass_vs is set, but vertices need to go through the pipeline for some reason - eg unfilled polygon mode. Demonstrates how to drive the pipeline from inside one of these things. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/SConscript | 1 + src/gallium/auxiliary/draw/draw_private.h | 6 + src/gallium/auxiliary/draw/draw_pt.c | 23 +- src/gallium/auxiliary/draw/draw_pt.h | 1 + src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 64 +--- .../auxiliary/draw/draw_pt_fetch_pipeline.c | 391 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vertex_fetch.c | 4 +- 8 files changed, 424 insertions(+), 67 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index a0db2e4555..b28e516396 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -18,6 +18,7 @@ C_SOURCES = \ draw_pt.c \ draw_pt_vcache.c \ draw_pt_fetch_emit.c \ + draw_pt_fetch_pipeline.c \ draw_pt_elts.c \ draw_prim.c \ draw_pstipple.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 981225a8c2..9ca4197441 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -17,6 +17,7 @@ draw = env.ConvenienceLibrary( 'draw_pt.c', 'draw_pt_vcache.c', 'draw_pt_fetch_emit.c', + 'draw_pt_fetch_pipeline.c', 'draw_pt_elts.c', 'draw_prim.c', 'draw_pstipple.c', diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 9a9b25297f..0c9f9c2e03 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -160,6 +160,11 @@ struct draw_vertex_shader { /* Internal function for vertex fetch. */ typedef void (*fetch_func)(const void *ptr, float *attrib); + +fetch_func draw_get_fetch_func( enum pipe_format format ); + + + typedef void (*full_fetch_func)( struct draw_context *draw, struct tgsi_exec_machine *machine, const unsigned *elts, @@ -211,6 +216,7 @@ struct draw_context struct { struct draw_pt_middle_end *fetch_emit; + struct draw_pt_middle_end *fetch_pipeline; struct draw_pt_middle_end *fetch_shade_emit; struct draw_pt_middle_end *fetch_shade_cliptest_pipeline_or_emit; } middle; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c3baf5b7da..f59fb86f78 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -73,7 +73,6 @@ draw_pt_arrays(struct draw_context *draw, */ -#if 0 if (!cliptest && !pipeline && !shading) { /* This is the 'passthrough' path: */ @@ -81,6 +80,14 @@ draw_pt_arrays(struct draw_context *draw, */ middle = draw->pt.middle.fetch_emit; } + else if (!cliptest && !shading) { + /* This is the 'passthrough' path targetting the pipeline backend. + */ + /* Fetch user verts, emit pipeline verts, run pipeline: + */ + middle = draw->pt.middle.fetch_pipeline; + } +#if 0 else if (!cliptest && !pipeline) { /* Fetch user verts, run vertex shader, emit hw verts: */ @@ -117,10 +124,9 @@ draw_pt_arrays(struct draw_context *draw, middle = draw->pt.middle.fetch_shade_cliptest_pipeline; } #else - if (cliptest || pipeline || shading) + else { return FALSE; - - middle = draw->pt.middle.fetch_emit; + } #endif @@ -190,6 +196,10 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_emit) return FALSE; + draw->pt.middle.fetch_pipeline = draw_pt_fetch_pipeline( draw ); + if (!draw->pt.middle.fetch_pipeline) + return FALSE; + draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) return FALSE; @@ -205,6 +215,11 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_emit = NULL; } + if (draw->pt.middle.fetch_pipeline) { + draw->pt.middle.fetch_pipeline->destroy( draw->pt.middle.fetch_pipeline ); + draw->pt.middle.fetch_pipeline = NULL; + } + if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 428b1e0e6b..800072c511 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -112,6 +112,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, */ struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); +struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index f863a46d9c..39f0b40838 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -85,45 +85,6 @@ struct fetch_emit_middle_end { }; -static void fetch_B8G8R8A8_UNORM( const void *from, - float *attrib ) -{ - ubyte *ub = (ubyte *) from; - attrib[2] = UBYTE_TO_FLOAT(ub[0]); - attrib[1] = UBYTE_TO_FLOAT(ub[1]); - attrib[0] = UBYTE_TO_FLOAT(ub[2]); - attrib[3] = UBYTE_TO_FLOAT(ub[3]); -} - -static void fetch_R32G32B32A32_FLOAT( const void *from, - float *attrib ) -{ - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = f[2]; - attrib[3] = f[3]; -} - -static void fetch_R32G32B32_FLOAT( const void *from, - float *attrib ) -{ - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = f[2]; - attrib[3] = 1.0; -} - -static void fetch_R32G32_FLOAT( const void *from, - float *attrib ) -{ - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = f[1]; - attrib[2] = 0.0; - attrib[3] = 1.0; -} static void fetch_R32_FLOAT( const void *from, float *attrib ) @@ -235,28 +196,9 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, feme->fetch[i].pitch = draw->vertex_buffer[src_buffer].pitch; - switch (draw->vertex_element[src_element].src_format) { - case PIPE_FORMAT_B8G8R8A8_UNORM: - feme->fetch[i].fetch = fetch_B8G8R8A8_UNORM; - break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - feme->fetch[i].fetch = fetch_R32G32B32A32_FLOAT; - break; - case PIPE_FORMAT_R32G32B32_FLOAT: - feme->fetch[i].fetch = fetch_R32G32B32_FLOAT; - break; - case PIPE_FORMAT_R32G32_FLOAT: - feme->fetch[i].fetch = fetch_R32G32_FLOAT; - break; - case PIPE_FORMAT_R32_FLOAT: - feme->fetch[i].fetch = fetch_R32_FLOAT; - break; - default: - assert(0); - feme->fetch[i].fetch = NULL; - break; - } - + feme->fetch[i].fetch = draw_get_fetch_func(draw->vertex_element[src_element].src_format); + + switch (vinfo->emit[i]) { case EMIT_4F: feme->fetch[i].emit = emit_R32G32B32A32_FLOAT; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c new file mode 100644 index 0000000000..94e7d01be4 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -0,0 +1,391 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +/* The simplest 'middle end' in the new vertex code. + * + * The responsibilities of a middle end are to: + * - perform vertex fetch using + * - draw vertex element/buffer state + * - a list of fetch indices we received as an input + * - run the vertex shader + * - cliptest, + * - clip coord calculation + * - viewport transformation + * - if necessary, run the primitive pipeline, passing it: + * - a linear array of vertex_header vertices constructed here + * - a set of draw indices we received as an input + * - otherwise, drive the hw backend, + * - allocate space for hardware format vertices + * - translate the vertex-shader output vertices to hw format + * - calling the backend draw functions. + * + * For convenience, we provide a helper function to drive the hardware + * backend given similar inputs to those required to run the pipeline. + * + * In the case of passthrough mode, many of these actions are disabled + * or noops, so we end up doing: + * + * - perform vertex fetch + * - drive the hw backend + * + * IE, basically just vertex fetch to post-vs-format vertices, + * followed by a call to the backend helper function. + */ + + +struct fetch_pipeline_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct { + const ubyte *ptr; + unsigned pitch; + void (*fetch)( const void *from, float *attrib); + void (*emit)( const float *attrib, float **out ); + } fetch[PIPE_MAX_ATTRIBS]; + + unsigned nr_fetch; + unsigned pipeline_vertex_size; + unsigned prim; +}; + + +static void fetch_NULL( const void *from, + float *attrib ) +{ +} + + + +static void emit_R32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out) += 1; +} + +static void emit_R32G32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out) += 2; +} + +static void emit_R32G32B32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out)[2] = attrib[2]; + (*out) += 3; +} + +static void emit_R32G32B32A32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out)[2] = attrib[2]; + (*out)[3] = attrib[3]; + (*out) += 4; +} + +static void emit_header( const float *attrib, + float **out ) +{ + (*out)[0] = 0; + (*out)[1] = 0; + (*out)[2] = 0; + (*out)[3] = 0; + (*out)[3] = 1; + (*out) += 5; +} + +/** + * General-purpose fetch from user's vertex arrays, emit to driver's + * vertex buffer. + * + * XXX this is totally temporary. + */ +static void +fetch_store_general( struct fetch_pipeline_middle_end *fpme, + void *out_ptr, + const unsigned *fetch_elts, + unsigned count ) +{ + float *out = (float *)out_ptr; + uint i, j; + + for (i = 0; i < count; i++) { + unsigned elt = fetch_elts[i]; + + for (j = 0; j < fpme->nr_fetch; j++) { + float attrib[4]; + const ubyte *from = (fpme->fetch[j].ptr + + fpme->fetch[j].pitch * elt); + + fpme->fetch[j].fetch( from, attrib ); + fpme->fetch[j].emit( attrib, &out ); + } + } +} + + +/* We aren't running a vertex shader, but are running the pipeline. + * That means the vertices we need to build look like: + * + * dw0: vertex header (zero?) + * dw1: clip coord 0 + * dw2: clip coord 1 + * dw3: clip coord 2 + * dw4: clip coord 4 + * dw5: screen coord 0 + * dw6: screen coord 0 + * dw7: screen coord 0 + * dw8: screen coord 0 + * dw9: other attribs... + * + */ +static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, + unsigned prim ) +{ + static const float zero = 0; + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned i, nr = 0; + + fpme->prim = prim; + + /* Emit the vertex header and empty clipspace coord field: + */ + { + fpme->fetch[nr].ptr = NULL; + fpme->fetch[nr].pitch = 0; + fpme->fetch[nr].fetch = fetch_NULL; + fpme->fetch[nr].emit = emit_header; + nr++; + } + + + /* Need to look at vertex shader inputs (we know it is a + * passthrough shader, so these define the outputs too). If we + * were running a shader, we'd still be looking at the inputs at + * this point. + */ + for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) { + unsigned buf = draw->vertex_element[i].vertex_buffer_index; + enum pipe_format format = draw->vertex_element[i].src_format; + + fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[i].src_offset); + + fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch; + fpme->fetch[nr].fetch = draw_get_fetch_func( format ); + + /* Always do this -- somewhat redundant... + */ + fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT; + nr++; + } + + fpme->nr_fetch = nr; + fpme->pipeline_vertex_size = (5 + (nr-1) * 4) * sizeof(float); +} + + + +/** + * Add a point to the primitive queue. + * \param i0 index into user's vertex arrays + */ +static void do_point( struct draw_context *draw, + const char *v0 ) +{ + struct prim_header prim; + + prim.reset_line_stipple = 0; + prim.edgeflags = 1; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + + draw->pipeline.first->point( draw->pipeline.first, &prim ); +} + + +/** + * Add a line to the primitive queue. + * \param i0 index into user's vertex arrays + * \param i1 index into user's vertex arrays + */ +static void do_line( struct draw_context *draw, + const char *v0, + const char *v1 ) +{ + struct prim_header prim; + + prim.reset_line_stipple = 1; /* fixme */ + prim.edgeflags = 1; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + + draw->pipeline.first->line( draw->pipeline.first, &prim ); +} + +/** + * Add a triangle to the primitive queue. + */ +static void do_triangle( struct draw_context *draw, + char *v0, + char *v1, + char *v2 ) +{ + struct prim_header prim; + +// _mesa_printf("tri %d %d %d\n", i0, i1, i2); + prim.reset_line_stipple = 1; + prim.edgeflags = ~0; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + prim.v[2] = (struct vertex_header *)v2; + + draw->pipeline.first->tri( draw->pipeline.first, &prim ); +} + + +static void run_pipeline( struct fetch_pipeline_middle_end *fpme, + char *verts, + const ushort *elts, + unsigned count ) +{ + struct draw_context *draw = fpme->draw; + unsigned stride = fpme->pipeline_vertex_size; + unsigned i; + + switch (fpme->prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) + do_point( draw, + verts + stride * elts[i] ); + break; + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) + do_line( draw, + verts + stride * elts[i+0], + verts + stride * elts[i+1]); + break; + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) + do_triangle( draw, + verts + stride * elts[i+0], + verts + stride * elts[i+1], + verts + stride * elts[i+2]); + break; + } +} + + + + +static void fetch_pipeline_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + char *pipeline_verts; + + pipeline_verts = MALLOC( fpme->pipeline_vertex_size * + fetch_count ); + if (!pipeline_verts) { + assert(0); + return; + } + + + /* Single routine to fetch vertices and emit pipeline verts. + */ + fetch_store_general( fpme, + pipeline_verts, + fetch_elts, + fetch_count ); + + + run_pipeline( fpme, + pipeline_verts, + draw_elts, + draw_count ); + + + /* Done -- that was easy, wasn't it: + */ + FREE( pipeline_verts ); +} + + + +static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ) +{ + struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end ); + + fetch_pipeline->base.prepare = fetch_pipeline_prepare; + fetch_pipeline->base.run = fetch_pipeline_run; + fetch_pipeline->base.finish = fetch_pipeline_finish; + fetch_pipeline->base.destroy = fetch_pipeline_destroy; + + fetch_pipeline->draw = draw; + + return &fetch_pipeline->base; +} + diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index 11f99babf6..9041041006 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -166,7 +166,7 @@ fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) } -static fetch_func get_fetch_func( enum pipe_format format ) +fetch_func draw_get_fetch_func( enum pipe_format format ) { #if 0 { @@ -502,7 +502,7 @@ void draw_update_vertex_fetch( struct draw_context *draw ) draw->vertex_element[i].src_offset; draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; - draw->vertex_fetch.fetch[i] = get_fetch_func( format ); + draw->vertex_fetch.fetch[i] = draw_get_fetch_func( format ); } draw->vertex_fetch.nr_attrs = nr_attrs; -- cgit v1.2.3 From 766f3a545ebb317d2115b9053a8fc13b49ceec12 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 3 Apr 2008 16:36:07 -0600 Subject: gallium: implement ycbcr->rgba tile conversion --- src/gallium/auxiliary/util/p_tile.c | 71 +++++++++++++++++++++++++++++++++++++ 1 file changed, 71 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 287d783981..c9a9c8f4f7 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -560,6 +560,69 @@ z24s8_get_tile_rgba(unsigned *src, } +/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ + +/** + * Convert YCbCr (or YCrCb) to RGBA. + */ +static void +ycbcr_get_tile_rgba(ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride, + boolean rev) +{ + const float scale = 1.0f / 255.0f; + unsigned i, j; + + /* we're assuming we're being asked for an even number of texels */ + assert((w & 1) == 0); + + for (i = 0; i < h; i++) { + float *pRow = p; + /* do two texels at a time */ + for (j = 0; j < w; j += 2, src += 2) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + float r, g, b; + + /* even pixel: y0,cr,cb */ + r = 1.164 * (y0-16) + 1.596 * (cr-128); + g = 1.164 * (y0-16) - 0.813 * (cr-128) - 0.391 * (cb-128); + b = 1.164 * (y0-16) + 2.018 * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + /* odd pixel: use y1,cr,cb */ + r = 1.164 * (y1-16) + 1.596 * (cr-128); + g = 1.164 * (y1-16) - 0.813 * (cr-128) - 0.391 * (cb-128); + b = 1.164 * (y1-16) + 2.018 * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + } + p += dst_stride; + } +} + + void pipe_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, @@ -622,6 +685,14 @@ pipe_get_tile_rgba(struct pipe_context *pipe, case PIPE_FORMAT_Z24S8_UNORM: z24s8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; + case PIPE_FORMAT_YCBCR: + assert((x & 1) == 0); + ycbcr_get_tile_rgba((ushort *) packed, w, h, p, dst_stride, FALSE); + break; + case PIPE_FORMAT_YCBCR_REV: + assert((x & 1) == 0); + ycbcr_get_tile_rgba((ushort *) packed, w, h, p, dst_stride, TRUE); + break; default: assert(0); } -- cgit v1.2.3 From 7a7bce7b24ea4f63faa1d5bfe3f71d09b412c838 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 4 Apr 2008 11:13:10 +0100 Subject: gallium: make msvc less unhappy --- src/gallium/auxiliary/util/p_tile.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index c9a9c8f4f7..520da5cecd 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -587,6 +587,8 @@ ycbcr_get_tile_rgba(ushort *src, const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ ubyte cb, cr; + float r, g, b; + if (rev) { cb = t1 & 0xff; /* chroma U */ cr = t0 & 0xff; /* chroma V */ @@ -595,12 +597,11 @@ ycbcr_get_tile_rgba(ushort *src, cb = t0 & 0xff; /* chroma U */ cr = t1 & 0xff; /* chroma V */ } - float r, g, b; /* even pixel: y0,cr,cb */ - r = 1.164 * (y0-16) + 1.596 * (cr-128); - g = 1.164 * (y0-16) - 0.813 * (cr-128) - 0.391 * (cb-128); - b = 1.164 * (y0-16) + 2.018 * (cb-128); + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); pRow[0] = r * scale; pRow[1] = g * scale; pRow[2] = b * scale; @@ -608,9 +609,9 @@ ycbcr_get_tile_rgba(ushort *src, pRow += 4; /* odd pixel: use y1,cr,cb */ - r = 1.164 * (y1-16) + 1.596 * (cr-128); - g = 1.164 * (y1-16) - 0.813 * (cr-128) - 0.391 * (cb-128); - b = 1.164 * (y1-16) + 2.018 * (cb-128); + r = 1.164f * (y1-16) + 1.596f * (cr-128); + g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y1-16) + 2.018f * (cb-128); pRow[0] = r * scale; pRow[1] = g * scale; pRow[2] = b * scale; -- cgit v1.2.3 From 0b20d1b9b5e0514a68ab460d748753d29df2e70b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 4 Apr 2008 13:18:09 +0100 Subject: draw: move code to run pipeline from pt to new file Add facility for draw_vbuf.c to reset these vertex ids on flushes. Pre-initialize vertex ids correctly. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/SConscript | 1 + src/gallium/auxiliary/draw/draw_context.c | 3 +- src/gallium/auxiliary/draw/draw_private.h | 14 ++ .../auxiliary/draw/draw_pt_fetch_pipeline.c | 114 ++------------- src/gallium/auxiliary/draw/draw_pt_pipeline.c | 162 +++++++++++++++++++++ 6 files changed, 196 insertions(+), 99 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_pipeline.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index b28e516396..28262a92c6 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -19,6 +19,7 @@ C_SOURCES = \ draw_pt_vcache.c \ draw_pt_fetch_emit.c \ draw_pt_fetch_pipeline.c \ + draw_pt_pipeline.c \ draw_pt_elts.c \ draw_prim.c \ draw_pstipple.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 9ca4197441..52107912f5 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -18,6 +18,7 @@ draw = env.ConvenienceLibrary( 'draw_pt_vcache.c', 'draw_pt_fetch_emit.c', 'draw_pt_fetch_pipeline.c', + 'draw_pt_pipeline.c', 'draw_pt_elts.c', 'draw_prim.c', 'draw_pstipple.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index d0d5f66b37..470c1c571b 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -434,7 +434,8 @@ void draw_reset_vertex_ids(struct draw_context *draw) stage = stage->next; } - draw_vertex_cache_reset_vertex_ids(draw); + draw_vertex_cache_reset_vertex_ids(draw); /* going away soon */ + draw_pt_reset_vertex_ids(draw); } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 0c9f9c2e03..48545af9e2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -227,6 +227,12 @@ struct draw_context struct draw_pt_front_end *vcache; } front; + struct { + char *verts; + unsigned vertex_stride; + unsigned vertex_count; + } pipeline; + } pt; boolean flushing; @@ -386,6 +392,14 @@ boolean draw_pt_arrays( struct draw_context *draw, unsigned start, unsigned count ); +void draw_pt_reset_vertex_ids( struct draw_context *draw ); +void draw_pt_run_pipeline( struct draw_context *draw, + unsigned prim, + char *verts, + unsigned vertex_stride, + unsigned vertex_count, + const ushort *elts, + unsigned count ); /* Prototype/hack (DEPRECATED) diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index 94e7d01be4..0ddb400f7e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -129,7 +129,13 @@ static void emit_R32G32B32A32_FLOAT( const float *attrib, static void emit_header( const float *attrib, float **out ) { - (*out)[0] = 0; + struct vertex_header *header = (struct vertex_header *) (*out); + + header->clipmask = 0; + header->edgeflag = 1; + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; + (*out)[1] = 0; (*out)[2] = 0; (*out)[3] = 0; @@ -231,99 +237,6 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, -/** - * Add a point to the primitive queue. - * \param i0 index into user's vertex arrays - */ -static void do_point( struct draw_context *draw, - const char *v0 ) -{ - struct prim_header prim; - - prim.reset_line_stipple = 0; - prim.edgeflags = 1; - prim.pad = 0; - prim.v[0] = (struct vertex_header *)v0; - - draw->pipeline.first->point( draw->pipeline.first, &prim ); -} - - -/** - * Add a line to the primitive queue. - * \param i0 index into user's vertex arrays - * \param i1 index into user's vertex arrays - */ -static void do_line( struct draw_context *draw, - const char *v0, - const char *v1 ) -{ - struct prim_header prim; - - prim.reset_line_stipple = 1; /* fixme */ - prim.edgeflags = 1; - prim.pad = 0; - prim.v[0] = (struct vertex_header *)v0; - prim.v[1] = (struct vertex_header *)v1; - - draw->pipeline.first->line( draw->pipeline.first, &prim ); -} - -/** - * Add a triangle to the primitive queue. - */ -static void do_triangle( struct draw_context *draw, - char *v0, - char *v1, - char *v2 ) -{ - struct prim_header prim; - -// _mesa_printf("tri %d %d %d\n", i0, i1, i2); - prim.reset_line_stipple = 1; - prim.edgeflags = ~0; - prim.pad = 0; - prim.v[0] = (struct vertex_header *)v0; - prim.v[1] = (struct vertex_header *)v1; - prim.v[2] = (struct vertex_header *)v2; - - draw->pipeline.first->tri( draw->pipeline.first, &prim ); -} - - -static void run_pipeline( struct fetch_pipeline_middle_end *fpme, - char *verts, - const ushort *elts, - unsigned count ) -{ - struct draw_context *draw = fpme->draw; - unsigned stride = fpme->pipeline_vertex_size; - unsigned i; - - switch (fpme->prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) - do_point( draw, - verts + stride * elts[i] ); - break; - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) - do_line( draw, - verts + stride * elts[i+0], - verts + stride * elts[i+1]); - break; - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) - do_triangle( draw, - verts + stride * elts[i+0], - verts + stride * elts[i+1], - verts + stride * elts[i+2]); - break; - } -} - - - static void fetch_pipeline_run( struct draw_pt_middle_end *middle, const unsigned *fetch_elts, @@ -351,10 +264,15 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, fetch_count ); - run_pipeline( fpme, - pipeline_verts, - draw_elts, - draw_count ); + /* Run the pipeline + */ + draw_pt_run_pipeline( fpme->draw, + fpme->prim, + pipeline_verts, + fpme->pipeline_vertex_size, + fetch_count, + draw_elts, + draw_count ); /* Done -- that was easy, wasn't it: diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c new file mode 100644 index 0000000000..6e46d3925f --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_pipeline.c @@ -0,0 +1,162 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + + +/** + * Add a point to the primitive queue. + * \param i0 index into user's vertex arrays + */ +static void do_point( struct draw_context *draw, + const char *v0 ) +{ + struct prim_header prim; + + prim.reset_line_stipple = 0; + prim.edgeflags = 1; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + + draw->pipeline.first->point( draw->pipeline.first, &prim ); +} + + +/** + * Add a line to the primitive queue. + * \param i0 index into user's vertex arrays + * \param i1 index into user's vertex arrays + */ +static void do_line( struct draw_context *draw, + const char *v0, + const char *v1 ) +{ + struct prim_header prim; + + prim.reset_line_stipple = 1; /* fixme */ + prim.edgeflags = 1; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + + draw->pipeline.first->line( draw->pipeline.first, &prim ); +} + +/** + * Add a triangle to the primitive queue. + */ +static void do_triangle( struct draw_context *draw, + char *v0, + char *v1, + char *v2 ) +{ + struct prim_header prim; + +// _mesa_printf("tri %d %d %d\n", i0, i1, i2); + prim.reset_line_stipple = 1; + prim.edgeflags = ~0; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + prim.v[2] = (struct vertex_header *)v2; + + draw->pipeline.first->tri( draw->pipeline.first, &prim ); +} + + + +void draw_pt_reset_vertex_ids( struct draw_context *draw ) +{ + unsigned i; + char *verts = draw->pt.pipeline.verts; + unsigned stride = draw->pt.pipeline.vertex_stride; + + for (i = 0; i < draw->pt.pipeline.vertex_count; i++) { + ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; + verts += stride; + } +} + + +/* Code to run the pipeline on a fairly arbitary collection of vertices. + * + * Vertex headers must be pre-initialized with the + * UNDEFINED_VERTEX_ID, this code will cause that id to become + * overwritten, so it may have to be reset if there is the intention + * to reuse the vertices. + * + * This code provides a callback to reset the vertex id's which the + * draw_vbuf.c code uses when it has to perform a flush. + */ +void draw_pt_run_pipeline( struct draw_context *draw, + unsigned prim, + char *verts, + unsigned stride, + unsigned vertex_count, + const ushort *elts, + unsigned count ) +{ + unsigned i; + + draw->pt.pipeline.verts = verts; + draw->pt.pipeline.vertex_stride = stride; + draw->pt.pipeline.vertex_count = vertex_count; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) + do_point( draw, + verts + stride * elts[i] ); + break; + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) + do_line( draw, + verts + stride * elts[i+0], + verts + stride * elts[i+1]); + break; + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) + do_triangle( draw, + verts + stride * elts[i+0], + verts + stride * elts[i+1], + verts + stride * elts[i+2]); + break; + } + + draw->pt.pipeline.verts = NULL; + draw->pt.pipeline.vertex_count = 0; +} + -- cgit v1.2.3 From 84501e68f6294370d6f2f6aec4e7eab57bcc0e72 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 4 Apr 2008 17:02:20 +0100 Subject: gallium: Handle client-supplied edgeflags. Also, implement support in the draw module. We were hardwiring these to one for quite a long time... Currently using a draw_set_edgeflags() function, may be better to push the argument into the draw_arrays() function. TBD. --- src/gallium/auxiliary/draw/draw_context.c | 16 +++++++ src/gallium/auxiliary/draw/draw_context.h | 3 ++ src/gallium/auxiliary/draw/draw_private.h | 13 ++---- .../auxiliary/draw/draw_pt_fetch_pipeline.c | 49 +++++++++++++++------- src/gallium/auxiliary/draw/draw_pt_pipeline.c | 8 ++-- src/gallium/auxiliary/draw/draw_vertex_cache.c | 2 +- src/gallium/include/pipe/p_context.h | 8 ++++ 7 files changed, 71 insertions(+), 28 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 470c1c571b..b3c65c90d6 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -444,3 +444,19 @@ void draw_set_render( struct draw_context *draw, { draw->render = render; } + +void draw_set_edgeflags( struct draw_context *draw, + const unsigned *edgeflag ) +{ + draw->user.edgeflag = edgeflag; +} + + +boolean draw_get_edgeflag( struct draw_context *draw, + unsigned idx ) +{ + if (draw->user.edgeflag) + return (draw->user.edgeflag[idx/32] & (1 << (idx%32))) != 0; + else + return 1; +} diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 84bae3bd78..c7ac32b452 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -155,6 +155,9 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, void draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer); +void draw_set_edgeflags( struct draw_context *draw, + const unsigned *edgeflag ); + /*********************************************************************** * draw_prim.c diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 48545af9e2..4d056f6dba 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -250,6 +250,8 @@ struct draw_context /* user-space vertex data, buffers */ struct { + const unsigned *edgeflag; + /** vertex element/index buffer (ex: glDrawElements) */ const void *elts; /** bytes per index (0, 1, 2 or 4) */ @@ -402,15 +404,6 @@ void draw_pt_run_pipeline( struct draw_context *draw, unsigned count ); -/* Prototype/hack (DEPRECATED) - */ -boolean -draw_passthrough_arrays(struct draw_context *draw, - unsigned prim, - unsigned start, - unsigned count); - - #define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ #define DRAW_FLUSH_PRIM_QUEUE 0x2 #define DRAW_FLUSH_VERTEX_CACHE 0x4 @@ -420,6 +413,8 @@ draw_passthrough_arrays(struct draw_context *draw, void draw_do_flush( struct draw_context *draw, unsigned flags ); +boolean draw_get_edgeflag( struct draw_context *draw, + unsigned idx ); /** diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index 0ddb400f7e..ba95420b72 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -72,6 +72,8 @@ struct fetch_pipeline_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; + void (*header)( const unsigned *edgeflag, unsigned count, float **out); + struct { const ubyte *ptr; unsigned pitch; @@ -85,12 +87,6 @@ struct fetch_pipeline_middle_end { }; -static void fetch_NULL( const void *from, - float *attrib ) -{ -} - - static void emit_R32_FLOAT( const float *attrib, float **out ) @@ -126,8 +122,9 @@ static void emit_R32G32B32A32_FLOAT( const float *attrib, (*out) += 4; } -static void emit_header( const float *attrib, - float **out ) +static void header( const unsigned *edgeflag, + unsigned idx, + float **out ) { struct vertex_header *header = (struct vertex_header *) (*out); @@ -143,6 +140,26 @@ static void emit_header( const float *attrib, (*out) += 5; } + +static void header_ef( const unsigned *edgeflag, + unsigned idx, + float **out ) +{ + struct vertex_header *header = (struct vertex_header *) (*out); + + header->clipmask = 0; + header->edgeflag = (edgeflag[idx/32] & (1 << (idx%32))) != 0; + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; + + (*out)[1] = 0; + (*out)[2] = 0; + (*out)[3] = 0; + (*out)[3] = 1; + (*out) += 5; +} + + /** * General-purpose fetch from user's vertex arrays, emit to driver's * vertex buffer. @@ -155,12 +172,15 @@ fetch_store_general( struct fetch_pipeline_middle_end *fpme, const unsigned *fetch_elts, unsigned count ) { + const unsigned *edgeflag = fpme->draw->user.edgeflag; float *out = (float *)out_ptr; uint i, j; for (i = 0; i < count; i++) { unsigned elt = fetch_elts[i]; + fpme->header( edgeflag, i, &out ); + for (j = 0; j < fpme->nr_fetch; j++) { float attrib[4]; const ubyte *from = (fpme->fetch[j].ptr + @@ -200,12 +220,11 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, /* Emit the vertex header and empty clipspace coord field: */ - { - fpme->fetch[nr].ptr = NULL; - fpme->fetch[nr].pitch = 0; - fpme->fetch[nr].fetch = fetch_NULL; - fpme->fetch[nr].emit = emit_header; - nr++; + if (draw->user.edgeflag) { + fpme->header = header_ef; + } + else { + fpme->header = header; } @@ -232,7 +251,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, } fpme->nr_fetch = nr; - fpme->pipeline_vertex_size = (5 + (nr-1) * 4) * sizeof(float); + fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); } diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c index 6e46d3925f..942df518eb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_pipeline.c @@ -86,12 +86,14 @@ static void do_triangle( struct draw_context *draw, struct prim_header prim; // _mesa_printf("tri %d %d %d\n", i0, i1, i2); - prim.reset_line_stipple = 1; - prim.edgeflags = ~0; - prim.pad = 0; prim.v[0] = (struct vertex_header *)v0; prim.v[1] = (struct vertex_header *)v1; prim.v[2] = (struct vertex_header *)v2; + prim.reset_line_stipple = 1; + prim.edgeflags = ((prim.v[0]->edgeflag) | + (prim.v[1]->edgeflag << 1) | + (prim.v[2]->edgeflag << 2)); + prim.pad = 0; draw->pipeline.first->tri( draw->pipeline.first, &prim ); } diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 161b247d4e..c0248979e2 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -101,7 +101,7 @@ static struct vertex_header *get_vertex( struct draw_context *draw, draw->vs.queue[out].elt = i; draw->vs.queue[out].vertex->clipmask = 0; - draw->vs.queue[out].vertex->edgeflag = 1; /*XXX use user's edge flag! */ + draw->vs.queue[out].vertex->edgeflag = draw_get_edgeflag(draw, i); draw->vs.queue[out].vertex->pad = 0; draw->vs.queue[out].vertex->vertex_id = UNDEFINED_VERTEX_ID; diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 324f70185a..f3a9c2cd8b 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -57,6 +57,14 @@ struct pipe_context { void (*destroy)( struct pipe_context * ); + + /* Possible interface for setting edgeflags. These aren't really + * vertex elements, so don't fit there. + */ + void (*set_edgeflags)( struct pipe_context *, + const unsigned *bitfield ); + + /** * VBO drawing (return false on fallbacks (temporary??)) */ -- cgit v1.2.3 From 5ffc5cce1507fd407399911abefeea988a69394e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Apr 2008 12:24:01 -0600 Subject: gallium: new debug code, disabled --- src/gallium/auxiliary/draw/draw_clip.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_clip.c index 200152ecab..fd1c71152e 100644 --- a/src/gallium/auxiliary/draw/draw_clip.c +++ b/src/gallium/auxiliary/draw/draw_clip.c @@ -181,6 +181,21 @@ static void emit_poly( struct draw_stage *stage, (header.v[1]->edgeflag << 1) | (header.v[2]->edgeflag << 2)); + if (0) { + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + uint j, k; + printf("Clipped tri:\n"); + for (j = 0; j < 3; j++) { + for (k = 0; k < vs->info.num_outputs; k++) { + printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, + header.v[j]->data[k][0], + header.v[j]->data[k][1], + header.v[j]->data[k][2], + header.v[j]->data[k][3]); + } + } + } + stage->next->tri( stage->next, &header ); header.v[1]->edgeflag = tmp1; -- cgit v1.2.3 From c1d26d3dccafed808349c47dc12b94081f956560 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 2 Apr 2008 10:21:24 +0900 Subject: gallium: Use the custom snprintf implementation everywhere (for Win32). Because winddk's implemenation does not handle floats. --- src/gallium/auxiliary/util/p_debug.c | 11 +++-------- src/gallium/include/pipe/p_util.h | 9 +++++++++ 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 5447bbb6f5..090e3b7794 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -43,18 +43,13 @@ #ifdef WIN32 static INLINE void -rpl_EngDebugPrint(const char *format, ...) +_EngDebugPrint(const char *format, ...) { va_list ap; va_start(ap, format); EngDebugPrint("", (PCHAR)format, ap); va_end(ap); } - -int rpl_vsnprintf(char *, size_t, const char *, va_list); -int rpl_snprintf(char *str, size_t size, const char *format, ...); -#define vsnprintf rpl_vsnprintf -#define snprintf rpl_snprintf #endif @@ -65,8 +60,8 @@ void _debug_vprintf(const char *format, va_list ap) /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation */ char buf[512 + 1]; - rpl_vsnprintf(buf, sizeof(buf), format, ap); - rpl_EngDebugPrint("%s", buf); + vsnprintf(buf, sizeof(buf), format, ap); + _EngDebugPrint("%s", buf); #else /* TODO: Implement debug print for WINCE */ #endif diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 1e7b8181f9..8e3aaee496 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -32,6 +32,7 @@ #include "p_debug.h" #include "p_pointer.h" #include +#include #ifdef __cplusplus @@ -137,6 +138,14 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define GETENV( X ) debug_get_option( X, NULL ) +#ifdef WIN32 +int rpl_vsnprintf(char *, size_t, const char *, va_list); +int rpl_snprintf(char *str, size_t size, const char *format, ...); +#define vsnprintf rpl_vsnprintf +#define snprintf rpl_snprintf +#endif + + /** * Return memory on given byte alignment */ -- cgit v1.2.3 From f1efef809caddff442ed45a59645b3f39498f521 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 5 Apr 2008 09:49:50 +0900 Subject: gallium: Fix typo. --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 55f32e6816..1ecc7d42ee 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -150,7 +150,7 @@ fenced_buffer_destroy(struct pb_buffer *buf) if (fenced_buf->fence) { struct pipe_winsys *winsys = fenced_list->winsys; - if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) { + if(winsys->fence_signalled(winsys, fenced_buf->fence, 0) != 0) { LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); fenced_list->numDelayed++; } -- cgit v1.2.3 From fdff063343ddfbfb1b2fa921e2efcc2fae35d0ad Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 5 Apr 2008 11:29:26 +0900 Subject: gallium: Keep fenced buffers list ordered. This allows to keep the list small without the overhead of full walks. --- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 152 ++++++++++++++------- 1 file changed, 99 insertions(+), 53 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 1ecc7d42ee..24ba61a0b7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -62,7 +62,6 @@ struct fenced_buffer_list struct pipe_winsys *winsys; size_t numDelayed; - size_t checkDelayed; struct list_head delayed; }; @@ -93,51 +92,93 @@ fenced_buffer(struct pb_buffer *buf) } +static INLINE void +_fenced_buffer_add(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + + assert(fenced_buf->fence); + assert(!fenced_buf->head.prev); + assert(!fenced_buf->head.next); + LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); + ++fenced_list->numDelayed; +} + + +/** + * Actually destroy the buffer. + */ +static INLINE void +_fenced_buffer_destroy(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + + assert(!fenced_buf->base.base.refcount); + assert(!fenced_buf->fence); + pb_reference(&fenced_buf->buffer, NULL); + FREE(fenced_buf); +} + + +static INLINE void +_fenced_buffer_remove(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + struct pipe_winsys *winsys = fenced_list->winsys; + + assert(fenced_buf->fence); + + assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + + assert(fenced_buf->head.prev); + assert(fenced_buf->head.next); + LIST_DEL(&fenced_buf->head); +#ifdef DEBUG + fenced_buf->head.prev = NULL; + fenced_buf->head.next = NULL; +#endif + + assert(fenced_list->numDelayed); + --fenced_list->numDelayed; + + if(!fenced_buf->base.base.refcount) + _fenced_buffer_destroy(fenced_buf); +} + + +/** + * Free as many fenced buffers from the list head as possible. + */ static void _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { struct pipe_winsys *winsys = fenced_list->winsys; - struct fenced_buffer *fenced_buf; - struct list_head *list, *prev; - int signaled = -1; - - list = fenced_list->delayed.next; - prev = list->prev; - for (; list != &fenced_list->delayed; list = prev, prev = list->prev) { - - fenced_buf = LIST_ENTRY(struct fenced_buffer, list, head); - - if (signaled != 0) { - if (wait) { - signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0); - } - else { - signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0); - } + struct list_head *curr, *next; + struct fenced_buffer *fenced_buf; + struct pipe_fence_handle *prev_fence = NULL; + + curr = fenced_list->delayed.next; + next = curr->next; + while(curr != &fenced_list->delayed) { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + + if(fenced_buf->fence != prev_fence) { + int signaled; + if (wait) + signaled = winsys->fence_finish(winsys, fenced_buf->fence, 0); + else + signaled = winsys->fence_signalled(winsys, fenced_buf->fence, 0); + if (signaled != 0) + break; + prev_fence = fenced_buf->fence; } - if (signaled != 0) { -#if 0 - /* XXX: we are assuming that buffers are freed in the same order they - * are fenced which may not always be true... - */ - break; -#else - signaled = -1; - continue; -#endif - } + _fenced_buffer_remove(fenced_buf); - winsys->fence_reference(winsys, &fenced_buf->fence, NULL); - - LIST_DEL(list); - fenced_list->numDelayed--; - - /* Do the delayed destroy: - */ - pb_reference(&fenced_buf->buffer, NULL); - FREE(fenced_buf); + curr = next; + next = curr->next; } } @@ -148,25 +189,29 @@ fenced_buffer_destroy(struct pb_buffer *buf) struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; + _glthread_LOCK_MUTEX(fenced_list->mutex); + assert(fenced_buf->base.base.refcount == 0); if (fenced_buf->fence) { struct pipe_winsys *winsys = fenced_list->winsys; - if(winsys->fence_signalled(winsys, fenced_buf->fence, 0) != 0) { - LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); - fenced_list->numDelayed++; - } + if(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0) { + struct list_head *curr, *prev; + curr = &fenced_buf->head; + prev = curr->prev; + do { + fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + _fenced_buffer_remove(fenced_buf); + curr = prev; + prev = curr->prev; + } while (curr != &fenced_list->delayed); + } else { - winsys->fence_reference(winsys, &fenced_buf->fence, NULL); - pb_reference(&fenced_buf->buffer, NULL); - FREE(fenced_buf); + /* delay destruction */ } } else { - pb_reference(&fenced_buf->buffer, NULL); - FREE(fenced_buf); + _fenced_buffer_destroy(fenced_buf); } - - if ((fenced_list->numDelayed % fenced_list->checkDelayed) == 0) - _fenced_buffer_list_check_free(fenced_list, 0); + _glthread_UNLOCK_MUTEX(fenced_list->mutex); } @@ -241,7 +286,11 @@ buffer_fence(struct pb_buffer *buf, struct pipe_winsys *winsys = fenced_list->winsys; _glthread_LOCK_MUTEX(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_buf); winsys->fence_reference(winsys, &fenced_buf->fence, fence); + if (fenced_buf->fence) + _fenced_buffer_add(fenced_buf); _glthread_UNLOCK_MUTEX(fenced_list->mutex); } @@ -261,9 +310,6 @@ fenced_buffer_list_create(struct pipe_winsys *winsys) fenced_list->numDelayed = 0; - /* TODO: don't hard code this */ - fenced_list->checkDelayed = 5; - _glthread_INIT_MUTEX(fenced_list->mutex); return fenced_list; -- cgit v1.2.3 From a8ca54955322b34c77a7459246e5639d3f8610cd Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 5 Apr 2008 10:22:47 +0200 Subject: draw: Use debug_printf(). --- src/gallium/auxiliary/draw/draw_clip.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_clip.c index fd1c71152e..e24c5d8032 100644 --- a/src/gallium/auxiliary/draw/draw_clip.c +++ b/src/gallium/auxiliary/draw/draw_clip.c @@ -184,10 +184,10 @@ static void emit_poly( struct draw_stage *stage, if (0) { const struct draw_vertex_shader *vs = stage->draw->vertex_shader; uint j, k; - printf("Clipped tri:\n"); + debug_printf("Clipped tri:\n"); for (j = 0; j < 3; j++) { for (k = 0; k < vs->info.num_outputs; k++) { - printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, + debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, header.v[j]->data[k][0], header.v[j]->data[k][1], header.v[j]->data[k][2], -- cgit v1.2.3 From 5c19e47362c2d193850e98bd43a2bc2b783b0b5c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 6 Apr 2008 14:29:30 +0100 Subject: draw: fix edgeflag handling on the pt paths Encode edgeflags (and reset_stipple info) into the top two bits of the fetch elements. This info could be moved elsewhere, but for now we can live with a 1<<30 maximum element size... Also use the primitive decomposition code from draw_prim.c verbatim, as it includes all this stuff and is known to work. --- src/gallium/auxiliary/draw/draw_pt.h | 10 + .../auxiliary/draw/draw_pt_fetch_pipeline.c | 16 +- src/gallium/auxiliary/draw/draw_pt_pipeline.c | 6 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 396 ++++++++++++--------- 4 files changed, 253 insertions(+), 175 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 800072c511..590823fd33 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -40,6 +40,16 @@ typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); struct draw_pt_middle_end; struct draw_context; +/* We use the top couple of bits in the vertex fetch index to convey a + * little API information. This limits the number of vertices we can + * address to only 1 billion -- if that becomes a problem, these could + * be moved out & passed separately. + */ +#define DRAW_PT_EDGEFLAG (1<<30) +#define DRAW_PT_RESET_STIPPLE (1<<31) +#define DRAW_PT_FLAG_MASK (3<<30) + + /* The "front end" - prepare sets of fetch, draw elements for the * middle end. * diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index ba95420b72..4c2a281b29 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -72,7 +72,7 @@ struct fetch_pipeline_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; - void (*header)( const unsigned *edgeflag, unsigned count, float **out); + void (*header)( unsigned idx, float **out); struct { const ubyte *ptr; @@ -122,8 +122,7 @@ static void emit_R32G32B32A32_FLOAT( const float *attrib, (*out) += 4; } -static void header( const unsigned *edgeflag, - unsigned idx, +static void header( unsigned idx, float **out ) { struct vertex_header *header = (struct vertex_header *) (*out); @@ -141,14 +140,15 @@ static void header( const unsigned *edgeflag, } -static void header_ef( const unsigned *edgeflag, - unsigned idx, +static void header_ef( unsigned idx, float **out ) { struct vertex_header *header = (struct vertex_header *) (*out); + /* XXX: need a reset_stipple flag in the vertex header too? + */ header->clipmask = 0; - header->edgeflag = (edgeflag[idx/32] & (1 << (idx%32))) != 0; + header->edgeflag = (idx & DRAW_PT_EDGEFLAG) != 0; header->pad = 0; header->vertex_id = UNDEFINED_VERTEX_ID; @@ -172,14 +172,14 @@ fetch_store_general( struct fetch_pipeline_middle_end *fpme, const unsigned *fetch_elts, unsigned count ) { - const unsigned *edgeflag = fpme->draw->user.edgeflag; float *out = (float *)out_ptr; uint i, j; for (i = 0; i < count; i++) { unsigned elt = fetch_elts[i]; - fpme->header( edgeflag, i, &out ); + fpme->header( elt, &out ); + elt &= ~DRAW_PT_FLAG_MASK; for (j = 0; j < fpme->nr_fetch; j++) { float attrib[4]; diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c index 942df518eb..e70e63d08f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_pipeline.c @@ -85,7 +85,6 @@ static void do_triangle( struct draw_context *draw, { struct prim_header prim; -// _mesa_printf("tri %d %d %d\n", i0, i1, i2); prim.v[0] = (struct vertex_header *)v0; prim.v[1] = (struct vertex_header *)v1; prim.v[2] = (struct vertex_header *)v2; @@ -95,6 +94,11 @@ static void do_triangle( struct draw_context *draw, (prim.v[2]->edgeflag << 2)); prim.pad = 0; + if (0) debug_printf("tri ef: %d %d %d\n", + prim.v[0]->edgeflag, + prim.v[1]->edgeflag, + prim.v[2]->edgeflag); + draw->pipeline.first->tri( draw->pipeline.first, &prim ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 5a068761df..107dcfc269 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -55,9 +55,6 @@ struct vcache_frontend { unsigned draw_count; unsigned fetch_count; - pt_elt_func elt_func; - const void *elt_ptr; - struct draw_pt_middle_end *middle; unsigned input_prim; @@ -66,17 +63,6 @@ struct vcache_frontend { static void vcache_flush( struct vcache_frontend *vcache ) { -#if 0 - /* Should always be true if output_prim == input_prim, otherwise - * not so much... - */ - unsigned i; - for (i = 0; i < vcache->draw_count; i++) { - assert( vcache->fetch_elts[vcache->draw_elts[i]] == - vcache->elt_func(vcache->elt_ptr, i) ); - } -#endif - if (vcache->draw_count) { vcache->middle->run( vcache->middle, vcache->fetch_elts, @@ -115,26 +101,73 @@ static void vcache_elt( struct vcache_frontend *vcache, vcache->draw_elts[vcache->draw_count++] = vcache->out[idx]; } + +static unsigned add_edgeflag( struct vcache_frontend *vcache, + unsigned idx, + unsigned mask ) +{ + if (mask && draw_get_edgeflag(vcache->draw, idx)) + return idx | DRAW_PT_EDGEFLAG; + else + return idx; +} + + +static unsigned add_reset_stipple( unsigned idx, + unsigned reset ) +{ + if (reset) + return idx | DRAW_PT_RESET_STIPPLE; + else + return idx; +} + static void vcache_triangle( struct vcache_frontend *vcache, unsigned i0, unsigned i1, unsigned i2 ) { - /* TODO: encode edgeflags in draw_elts */ + vcache_elt(vcache, i0 | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE); + vcache_elt(vcache, i1 | DRAW_PT_EDGEFLAG); + vcache_elt(vcache, i2 | DRAW_PT_EDGEFLAG); + vcache_check_flush(vcache); +} + + +static void vcache_ef_triangle( struct vcache_frontend *vcache, + boolean reset_stipple, + unsigned ef_mask, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + i0 = add_edgeflag( vcache, i0, (ef_mask >> 0) & 1 ); + i1 = add_edgeflag( vcache, i1, (ef_mask >> 1) & 1 ); + i2 = add_edgeflag( vcache, i2, (ef_mask >> 2) & 1 ); + + i0 = add_reset_stipple( i0, reset_stipple ); + vcache_elt(vcache, i0); vcache_elt(vcache, i1); vcache_elt(vcache, i2); vcache_check_flush(vcache); + + if (0) debug_printf("emit tri ef: %d %d %d\n", + !!(i0 & DRAW_PT_EDGEFLAG), + !!(i1 & DRAW_PT_EDGEFLAG), + !!(i2 & DRAW_PT_EDGEFLAG)); + } + static void vcache_line( struct vcache_frontend *vcache, - boolean reset, + boolean reset_stipple, unsigned i0, unsigned i1 ) { - /* TODO: encode reset-line-stipple in draw_elts */ - (void) reset; + i0 = add_reset_stipple( i0, reset_stipple ); + vcache_elt(vcache, i0); vcache_elt(vcache, i1); vcache_check_flush(vcache); @@ -158,39 +191,43 @@ static void vcache_quad( struct vcache_frontend *vcache, vcache_triangle( vcache, i1, i2, i3 ); } +static void vcache_ef_quad( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + const unsigned omitEdge2 = ~(1 << 1); + const unsigned omitEdge3 = ~(1 << 2); + vcache_ef_triangle( vcache, 1, omitEdge2, i0, i1, i3 ); + vcache_ef_triangle( vcache, 0, omitEdge3, i1, i2, i3 ); +} + -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { - PIPE_PRIM_POINTS, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES -}; -static void vcache_run_pv2( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - unsigned count ) +static void vcache_run( struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + struct draw_context *draw = vcache->draw; + + boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); unsigned i; - - /* These are for validation only: - */ - vcache->elt_func = get_elt; - vcache->elt_ptr = elts; + +// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); switch (vcache->input_prim) { case PIPE_PRIM_POINTS: for (i = 0; i < count; i ++) { - vcache_point( vcache, - get_elt(elts, i) ); + vcache_point( vcache, + get_elt(elts, i + 0) ); } break; @@ -205,17 +242,17 @@ static void vcache_run_pv2( struct draw_pt_front_end *frontend, case PIPE_PRIM_LINE_LOOP: if (count >= 2) { - for (i = 1; i < count; i++) { - vcache_line( vcache, + for (i = 1; i < count; i++) { + vcache_line( vcache, i == 1, /* XXX: only if vb not split */ get_elt(elts, i - 1), - get_elt(elts, i) ); - } + get_elt(elts, i )); + } - vcache_line( vcache, + vcache_line( vcache, 0, get_elt(elts, count - 1), - get_elt(elts, 0) ); + get_elt(elts, 0 )); } break; @@ -224,72 +261,163 @@ static void vcache_run_pv2( struct draw_pt_front_end *frontend, vcache_line( vcache, i == 1, get_elt(elts, i - 1), - get_elt(elts, i) ); + get_elt(elts, i )); } break; case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2) ); + if (unfilled) { + for (i = 0; i+2 < count; i += 3) { + vcache_ef_triangle( vcache, + 1, + ~0, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } + } + else { + for (i = 0; i+2 < count; i += 3) { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } } break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 0; i+2 < count; i++) { - if (i & 1) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 0), - get_elt(elts, i + 2) ); + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + if (i & 1) { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 2), + get_elt(elts, i + 1 )); + } + else { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } } - else { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2) ); + } + else { + for (i = 0; i+2 < count; i++) { + if (i & 1) { + vcache_triangle( vcache, + get_elt(elts, i + 1), + get_elt(elts, i + 0), + get_elt(elts, i + 2 )); + } + else { + vcache_triangle( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } } } break; case PIPE_PRIM_TRIANGLE_FAN: - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2) ); + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + vcache_triangle( vcache, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0 )); + } + } + else { + for (i = 0; i+2 < count; i++) { + vcache_triangle( vcache, + get_elt(elts, 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } + } } break; case PIPE_PRIM_QUADS: - for (i = 0; i+3 < count; i += 4) { - vcache_quad( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3)); + if (unfilled) { + for (i = 0; i+3 < count; i += 4) { + vcache_ef_quad( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, i + 3)); + } + } + else { + for (i = 0; i+3 < count; i += 4) { + vcache_quad( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, i + 3)); + } } break; case PIPE_PRIM_QUAD_STRIP: - for (i = 0; i+3 < count; i += 2) { - vcache_quad( vcache, - get_elt(elts, i + 2), - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3)); + if (unfilled) { + for (i = 0; i+3 < count; i += 2) { + vcache_ef_quad( vcache, + get_elt(elts, i + 2), + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 3)); + } + } + else { + for (i = 0; i+3 < count; i += 2) { + vcache_quad( vcache, + get_elt(elts, i + 2), + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 3)); + } } break; case PIPE_PRIM_POLYGON: - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0)); + if (unfilled) { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const unsigned edge_first = (1<<2); + const unsigned edge_middle = (1<<0); + const unsigned edge_last = (1<<1); + + for (i = 0; i+2 < count; i++) { + unsigned ef_mask = edge_middle; + + if (i == 0) + ef_mask |= edge_first; + + if (i + 3 == count) + ef_mask |= edge_last; + + vcache_ef_triangle( vcache, + i == 0, + ef_mask, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0)); + } + } + else { + for (i = 0; i+2 < count; i++) { + vcache_triangle( vcache, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0)); + } } break; @@ -302,88 +430,21 @@ static void vcache_run_pv2( struct draw_pt_front_end *frontend, } -static void vcache_run_pv0( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - unsigned count ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - unsigned i; - - /* These are for validation only: - */ - vcache->elt_func = get_elt; - vcache->elt_ptr = elts; - - switch (vcache->input_prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - vcache_point( vcache, - get_elt(elts, i) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - vcache_line( vcache, - TRUE, - get_elt(elts, i + 0), - get_elt(elts, i + 1)); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - vcache_line( vcache, - i == 1, - get_elt(elts, i - 1), - get_elt(elts, i) ); - } - break; - - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2) ); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 0; i+2 < count; i++) { - if (i & 1) { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 2), - get_elt(elts, i + 1) ); - } - else { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2) ); - } - } - break; - case PIPE_PRIM_TRIANGLE_FAN: - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0) ); - } - break; +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; - default: - assert(0); - break; - } - - vcache_flush( vcache ); -} static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned prim, @@ -391,11 +452,14 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; +/* if (vcache->draw->rasterizer->flatshade_first) vcache->base.run = vcache_run_pv0; else vcache->base.run = vcache_run_pv2; - +*/ + + vcache->base.run = vcache_run; vcache->input_prim = prim; vcache->output_prim = reduced_prim[prim]; -- cgit v1.2.3 From a8a5376406cabf5aa6a44f7d37f5f8abbb4adf56 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 7 Apr 2008 12:28:31 +0100 Subject: draw: strip edgeflags out of fetch-emit path --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 39f0b40838..0806076956 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -149,7 +149,7 @@ fetch_store_general( struct fetch_emit_middle_end *feme, uint i, j; for (i = 0; i < count; i++) { - unsigned elt = fetch_elts[i]; + unsigned elt = fetch_elts[i] & ~DRAW_PT_FLAG_MASK; for (j = 0; j < feme->nr_fetch; j++) { float attrib[4]; -- cgit v1.2.3 From 4e2127b0e5cb6411123e16dd562626cd70814a9a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 8 Apr 2008 11:30:36 +0900 Subject: gallium: Allow to debug memory leaks in nested scopes. --- src/gallium/auxiliary/util/p_debug_mem.c | 16 ++++++++-------- src/gallium/include/pipe/p_debug.h | 6 +++--- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index c160afe5b7..97ec9c5b39 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -70,8 +70,7 @@ struct debug_memory_header static struct list_head list = { &list, &list }; -static unsigned long start_no = 0; -static unsigned long end_no = 0; +static unsigned long last_no = 0; void * @@ -84,7 +83,7 @@ debug_malloc(const char *file, unsigned line, const char *function, if(!hdr) return NULL; - hdr->no = end_no++; + hdr->no = last_no++; hdr->file = file; hdr->line = line; hdr->function = function; @@ -147,14 +146,14 @@ debug_realloc(const char *file, unsigned line, const char *function, return new_ptr; } -void -debug_memory_reset(void) +unsigned long +debug_memory_begin(void) { - start_no = end_no; + return last_no; } void -debug_memory_report(void) +debug_memory_end(unsigned long start_no) { struct list_head *entry; @@ -164,7 +163,8 @@ debug_memory_report(void) void *ptr; hdr = LIST_ENTRY(struct debug_memory_header, entry, head); ptr = (void *)((char *)hdr + sizeof(*hdr)); - if(hdr->no >= start_no) + if(start_no <= hdr->no && hdr->no < last_no || + last_no < start_no && (hdr->no < last_no || start_no <= hdr->no)) debug_printf("%s:%u:%s: %u bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, hdr->size, ptr); diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 9e52ad9a37..235c47abac 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -304,11 +304,11 @@ void * debug_realloc(const char *file, unsigned line, const char *function, void *old_ptr, size_t old_size, size_t new_size ); -void -debug_memory_reset(void); +unsigned long +debug_memory_begin(void); void -debug_memory_report(void); +debug_memory_end(unsigned long beginning); #ifdef __cplusplus -- cgit v1.2.3 From 4382b0c9cba3efa8a60252f6ddf2f0653352f7d8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 8 Apr 2008 20:42:06 +0900 Subject: gallium: Fix overzealous assert. --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 24ba61a0b7..b1f7d93057 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -97,6 +97,7 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf) { struct fenced_buffer_list *fenced_list = fenced_buf->list; + assert(fenced_buf->base.base.refcount); assert(fenced_buf->fence); assert(!fenced_buf->head.prev); assert(!fenced_buf->head.next); @@ -128,7 +129,6 @@ _fenced_buffer_remove(struct fenced_buffer *fenced_buf) assert(fenced_buf->fence); - assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); winsys->fence_reference(winsys, &fenced_buf->fence, NULL); assert(fenced_buf->head.prev); @@ -174,6 +174,9 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, break; prev_fence = fenced_buf->fence; } + else { + assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); + } _fenced_buffer_remove(fenced_buf); @@ -199,6 +202,7 @@ fenced_buffer_destroy(struct pb_buffer *buf) prev = curr->prev; do { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); + assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); _fenced_buffer_remove(fenced_buf); curr = prev; prev = curr->prev; -- cgit v1.2.3 From c95dcc49629b72b95826e87e067d7a48753605fb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 8 Apr 2008 17:59:28 +0100 Subject: remove usage of vertex_header --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 6 - src/gallium/auxiliary/draw/draw_vbuf.c | 20 - src/gallium/auxiliary/draw/draw_vertex.c | 5 - src/gallium/auxiliary/draw/draw_vertex.h | 2 - src/gallium/auxiliary/draw/draw_vf.c | 56 +- src/gallium/drivers/softpipe/Makefile | 1 + src/gallium/drivers/softpipe/SConscript | 1 + src/gallium/drivers/softpipe/sp_prim_setup.c | 1189 +-------------------- src/gallium/drivers/softpipe/sp_prim_setup.h | 6 + src/gallium/drivers/softpipe/sp_prim_vbuf.c | 223 ++-- src/gallium/drivers/softpipe/sp_setup.c | 1249 +++++++++++++++++++++++ src/gallium/drivers/softpipe/sp_setup.h | 53 + src/gallium/drivers/softpipe/sp_state_derived.c | 13 +- 13 files changed, 1480 insertions(+), 1344 deletions(-) create mode 100644 src/gallium/drivers/softpipe/sp_setup.c create mode 100644 src/gallium/drivers/softpipe/sp_setup.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 0806076956..e4e144ef19 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -212,12 +212,6 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, case EMIT_1F: feme->fetch[i].emit = emit_R32_FLOAT; break; - case EMIT_HEADER: - feme->fetch[i].ptr = (const ubyte *)&zero; - feme->fetch[i].pitch = 0; - feme->fetch[i].fetch = fetch_R32_FLOAT; - feme->fetch[i].emit = emit_R32_FLOAT; - break; case EMIT_1F_PSIZE: feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size; feme->fetch[i].pitch = 0; diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c index f83b441e93..e3216ff711 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -126,13 +126,6 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) case EMIT_OMIT: debug_printf("EMIT_OMIT:"); break; - case EMIT_ALL: - assert(i == 0); - assert(j == 0); - debug_printf("EMIT_ALL:\t"); - for(k = 0; k < vinfo->size*4; ++k) - debug_printf("%02x ", *data++); - break; case EMIT_1F: debug_printf("EMIT_1F:\t"); debug_printf("%f ", *(float *)data); data += sizeof(float); @@ -217,19 +210,6 @@ emit_vertex( struct vbuf_stage *vbuf, case EMIT_OMIT: /* no-op */ break; - case EMIT_ALL: - /* just copy the whole vertex as-is to the vbuf */ - assert(i == 0); - assert(j == 0); - memcpy(vbuf->vertex_ptr, vertex, vinfo->size * 4); - vbuf->vertex_ptr += vinfo->size; - count += vinfo->size; - break; - case EMIT_HEADER: - memcpy(vbuf->vertex_ptr, vertex, sizeof(*vertex)); - *vbuf->vertex_ptr += sizeof(*vertex) / 4; - count += sizeof(*vertex) / 4; - break; case EMIT_1F: *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); count++; diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 970adc95e7..168036eee8 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -52,9 +52,6 @@ draw_compute_vertex_size(struct vertex_info *vinfo) switch (vinfo->emit[i]) { case EMIT_OMIT: break; - case EMIT_HEADER: - vinfo->size += sizeof(struct vertex_header) / 4; - break; case EMIT_4UB: /* fall-through */ case EMIT_1F_PSIZE: @@ -71,8 +68,6 @@ draw_compute_vertex_size(struct vertex_info *vinfo) case EMIT_4F: vinfo->size += 4; break; - case EMIT_ALL: - /* fall-through */ default: assert(0); } diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index abd2017ed3..65818463ca 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -47,8 +47,6 @@ */ enum attrib_emit { EMIT_OMIT, /**< don't emit the attribute */ - EMIT_ALL, /**< emit whole post-xform vertex, w/ header */ - EMIT_HEADER, /**< emit vertex_header struct (XXX temp?) */ EMIT_1F, EMIT_1F_PSIZE, /**< insert constant point size */ EMIT_2F, diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c index 7bb34ace7a..9d0154c50d 100644 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ b/src/gallium/auxiliary/draw/draw_vf.c @@ -205,7 +205,7 @@ void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, const struct vertex_info *vinfo, float point_size ) { - unsigned i, j, k; + unsigned i, j; struct draw_vf_attr *a = vf->attr; struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS]; unsigned count = 0; /* for debug/sanity */ @@ -217,60 +217,6 @@ void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, case EMIT_OMIT: /* no-op */ break; - case EMIT_ALL: { - /* just copy the whole vertex as-is to the vbuf */ - unsigned s = vinfo->size; - assert(i == 0); - assert(j == 0); - /* copy the vertex header */ - /* XXX: we actually don't copy the header, just pad it */ - attrs[nr_attrs].attrib = 0; - attrs[nr_attrs].format = DRAW_EMIT_PAD; - attrs[nr_attrs].offset = offsetof(struct vertex_header, data); - s -= offsetof(struct vertex_header, data)/4; - count += offsetof(struct vertex_header, data)/4; - nr_attrs++; - /* copy the vertex data */ - for(k = 0; k < (s & ~0x3); k += 4) { - attrs[nr_attrs].attrib = k/4; - attrs[nr_attrs].format = DRAW_EMIT_4F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 4; - } - /* tail */ - /* XXX: actually, this shouldn't be needed */ - attrs[nr_attrs].attrib = k/4; - attrs[nr_attrs].offset = 0; - switch(s & 0x3) { - case 0: - break; - case 1: - attrs[nr_attrs].format = DRAW_EMIT_1F; - nr_attrs++; - count += 1; - break; - case 2: - attrs[nr_attrs].format = DRAW_EMIT_2F; - nr_attrs++; - count += 2; - break; - case 3: - attrs[nr_attrs].format = DRAW_EMIT_3F; - nr_attrs++; - count += 3; - break; - } - break; - } - case EMIT_HEADER: - /* XXX emit new DRAW_EMIT_HEADER attribute??? */ - attrs[nr_attrs].attrib = 0; - attrs[nr_attrs].format = DRAW_EMIT_PAD; - attrs[nr_attrs].offset = offsetof(struct vertex_header, data); - count += offsetof(struct vertex_header, data)/4; - nr_attrs++; - break; case EMIT_1F: attrs[nr_attrs].attrib = j; attrs[nr_attrs].format = DRAW_EMIT_1F; diff --git a/src/gallium/drivers/softpipe/Makefile b/src/gallium/drivers/softpipe/Makefile index 19dfd8c1a7..120bdfd9dd 100644 --- a/src/gallium/drivers/softpipe/Makefile +++ b/src/gallium/drivers/softpipe/Makefile @@ -27,6 +27,7 @@ C_SOURCES = \ sp_quad_stencil.c \ sp_quad_stipple.c \ sp_screen.c \ + sp_setup.c \ sp_state_blend.c \ sp_state_clip.c \ sp_state_derived.c \ diff --git a/src/gallium/drivers/softpipe/SConscript b/src/gallium/drivers/softpipe/SConscript index 06931fa8d8..c1f7daa8ab 100644 --- a/src/gallium/drivers/softpipe/SConscript +++ b/src/gallium/drivers/softpipe/SConscript @@ -14,6 +14,7 @@ softpipe = env.ConvenienceLibrary( 'sp_flush.c', 'sp_prim_setup.c', 'sp_prim_vbuf.c', + 'sp_setup.c', 'sp_quad_alpha_test.c', 'sp_quad_blend.c', 'sp_quad.c', diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index c7eb12b3bb..6fe463b74c 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -26,7 +26,9 @@ **************************************************************************/ /** - * \brief Primitive rasterization/rendering (points, lines, triangles) + * \brief A draw stage that drives our triangle setup routines from + * within the draw pipeline. One of two ways to drive setup, the + * other being in sp_prim_vbuf.c. * * \author Keith Whitwell * \author Brian Paul @@ -34,29 +36,12 @@ #include "sp_context.h" -#include "sp_headers.h" -#include "sp_quad.h" +#include "sp_setup.h" #include "sp_state.h" #include "sp_prim_setup.h" #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" - -#define DEBUG_VERTS 0 -#define DEBUG_FRAGS 0 - -/** - * Triangle edge info - */ -struct edge { - float dx; /**< X(v1) - X(v0), used only during setup */ - float dy; /**< Y(v1) - Y(v0), used only during setup */ - float dxdy; /**< dx/dy */ - float sx, sy; /**< first sample point coord */ - int lines; /**< number of lines on this edge */ -}; - /** * Triangle setup info (derived from draw_stage). @@ -65,39 +50,7 @@ struct edge { struct setup_stage { struct draw_stage stage; /**< This must be first (base class) */ - struct softpipe_context *softpipe; - - /* Vertices are just an array of floats making up each attribute in - * turn. Currently fixed at 4 floats, but should change in time. - * Codegen will help cope with this. - */ - const struct vertex_header *vmax; - const struct vertex_header *vmid; - const struct vertex_header *vmin; - const struct vertex_header *vprovoke; - - struct edge ebot; - struct edge etop; - struct edge emaj; - - float oneoverarea; - - struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; - struct tgsi_interp_coef posCoef; /* For Z, W */ - struct quad_header quad; - - struct { - int left[2]; /**< [0] = row0, [1] = row1 */ - int right[2]; - int y; - unsigned y_flags; - unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ - } span; - -#if DEBUG_FRAGS - uint numFragsEmitted; /**< per primitive */ - uint numFragsWritten; /**< per primitive */ -#endif + struct setup_context *setup; }; @@ -111,1112 +64,50 @@ static INLINE struct setup_stage *setup_stage( struct draw_stage *stage ) } -/** - * Clip setup->quad against the scissor/surface bounds. - */ -static INLINE void -quad_clip(struct setup_stage *setup) -{ - const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - - if (setup->quad.x0 >= maxx || - setup->quad.y0 >= maxy || - setup->quad.x0 + 1 < minx || - setup->quad.y0 + 1 < miny) { - /* totally clipped */ - setup->quad.mask = 0x0; - return; - } - if (setup->quad.x0 < minx) - setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - if (setup->quad.y0 < miny) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - if (setup->quad.x0 == maxx - 1) - setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - if (setup->quad.y0 == maxy - 1) - setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); -} - - -/** - * Emit a quad (pass to next stage) with clipping. - */ -static INLINE void -clip_emit_quad(struct setup_stage *setup) -{ - quad_clip(setup); - if (setup->quad.mask) { - struct softpipe_context *sp = setup->softpipe; - sp->quad.first->run(sp->quad.first, &setup->quad); - } -} - - -/** - * Emit a quad (pass to next stage). No clipping is done. - */ -static INLINE void -emit_quad( struct setup_stage *setup, int x, int y, unsigned mask ) -{ - struct softpipe_context *sp = setup->softpipe; - setup->quad.x0 = x; - setup->quad.y0 = y; - setup->quad.mask = mask; -#if DEBUG_FRAGS - if (mask & 1) setup->numFragsEmitted++; - if (mask & 2) setup->numFragsEmitted++; - if (mask & 4) setup->numFragsEmitted++; - if (mask & 8) setup->numFragsEmitted++; -#endif - sp->quad.first->run(sp->quad.first, &setup->quad); -#if DEBUG_FRAGS - mask = setup->quad.mask; - if (mask & 1) setup->numFragsWritten++; - if (mask & 2) setup->numFragsWritten++; - if (mask & 4) setup->numFragsWritten++; - if (mask & 8) setup->numFragsWritten++; -#endif -} - - -/** - * Given an X or Y coordinate, return the block/quad coordinate that it - * belongs to. - */ -static INLINE int block( int x ) -{ - return x & ~1; -} - - -/** - * Compute mask which indicates which pixels in the 2x2 quad are actually inside - * the triangle's bounds. - * - * this is pretty nasty... may need to rework flush_spans again to - * fix it, if possible. - */ -static unsigned calculate_mask( struct setup_stage *setup, int x ) -{ - unsigned mask = 0x0; - - if (x >= setup->span.left[0] && x < setup->span.right[0]) - mask |= MASK_TOP_LEFT; - - if (x >= setup->span.left[1] && x < setup->span.right[1]) - mask |= MASK_BOTTOM_LEFT; - - if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) - mask |= MASK_TOP_RIGHT; - - if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) - mask |= MASK_BOTTOM_RIGHT; - - return mask; -} - - -/** - * Render a horizontal span of quads - */ -static void flush_spans( struct setup_stage *setup ) -{ - int minleft, maxright; - int x; - - switch (setup->span.y_flags) { - case 0x3: - /* both odd and even lines written (both quad rows) */ - minleft = MIN2(setup->span.left[0], setup->span.left[1]); - maxright = MAX2(setup->span.right[0], setup->span.right[1]); - break; - - case 0x1: - /* only even line written (quad top row) */ - minleft = setup->span.left[0]; - maxright = setup->span.right[0]; - break; - - case 0x2: - /* only odd line written (quad bottom row) */ - minleft = setup->span.left[1]; - maxright = setup->span.right[1]; - break; - - default: - return; - } - - /* XXX this loop could be moved into the above switch cases and - * calculate_mask() could be simplified a bit... - */ - for (x = block(minleft); x <= block(maxright); x += 2) { - emit_quad( setup, x, setup->span.y, - calculate_mask( setup, x ) ); - } - - setup->span.y = 0; - setup->span.y_flags = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; -} - -#if DEBUG_VERTS -static void print_vertex(const struct setup_stage *setup, - const struct vertex_header *v) -{ - int i; - debug_printf("Vertex: (%p)\n", v); - for (i = 0; i < setup->quad.nr_attrs; i++) { - debug_printf(" %d: %f %f %f %f\n", i, - v->data[i][0], v->data[i][1], v->data[i][2], v->data[i][3]); - } -} -#endif - -static boolean setup_sort_vertices( struct setup_stage *setup, - const struct prim_header *prim ) -{ - const struct vertex_header *v0 = prim->v[0]; - const struct vertex_header *v1 = prim->v[1]; - const struct vertex_header *v2 = prim->v[2]; - -#if DEBUG_VERTS - debug_printf("Triangle:\n"); - print_vertex(setup, v0); - print_vertex(setup, v1); - print_vertex(setup, v2); -#endif - - setup->vprovoke = v2; - - /* determine bottom to top order of vertices */ - { - float y0 = v0->data[0][1]; - float y1 = v1->data[0][1]; - float y2 = v2->data[0][1]; - if (y0 <= y1) { - if (y1 <= y2) { - /* y0<=y1<=y2 */ - setup->vmin = v0; - setup->vmid = v1; - setup->vmax = v2; - } - else if (y2 <= y0) { - /* y2<=y0<=y1 */ - setup->vmin = v2; - setup->vmid = v0; - setup->vmax = v1; - } - else { - /* y0<=y2<=y1 */ - setup->vmin = v0; - setup->vmid = v2; - setup->vmax = v1; - } - } - else { - if (y0 <= y2) { - /* y1<=y0<=y2 */ - setup->vmin = v1; - setup->vmid = v0; - setup->vmax = v2; - } - else if (y2 <= y1) { - /* y2<=y1<=y0 */ - setup->vmin = v2; - setup->vmid = v1; - setup->vmax = v0; - } - else { - /* y1<=y2<=y0 */ - setup->vmin = v1; - setup->vmid = v2; - setup->vmax = v0; - } - } - } - - setup->ebot.dx = setup->vmid->data[0][0] - setup->vmin->data[0][0]; - setup->ebot.dy = setup->vmid->data[0][1] - setup->vmin->data[0][1]; - setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; - setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; - setup->etop.dx = setup->vmax->data[0][0] - setup->vmid->data[0][0]; - setup->etop.dy = setup->vmax->data[0][1] - setup->vmid->data[0][1]; - - /* - * Compute triangle's area. Use 1/area to compute partial - * derivatives of attributes later. - * - * The area will be the same as prim->det, but the sign may be - * different depending on how the vertices get sorted above. - * - * To determine whether the primitive is front or back facing we - * use the prim->det value because its sign is correct. - */ - { - const float area = (setup->emaj.dx * setup->ebot.dy - - setup->ebot.dx * setup->emaj.dy); - - setup->oneoverarea = 1.0f / area; - /* - debug_printf("%s one-over-area %f area %f det %f\n", - __FUNCTION__, setup->oneoverarea, area, prim->det ); - */ - } - - /* We need to know if this is a front or back-facing triangle for: - * - the GLSL gl_FrontFacing fragment attribute (bool) - * - two-sided stencil test - */ - setup->quad.facing = (prim->det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); - - return TRUE; -} - - -/** - * Compute a0 for a constant-valued coefficient (GL_FLAT shading). - * The value value comes from vertex->data[slot][i]. - * The result will be put into setup->coef[slot].a0[i]. - * \param slot which attribute slot - * \param i which component of the slot (0..3) - */ -static void const_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - assert(i <= 3); - - coef->dadx[i] = 0; - coef->dady[i] = 0; - /* need provoking vertex info! - */ - coef->a0[i] = setup->vprovoke->data[vertSlot][i]; -} - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a triangle. - */ -static void tri_linear_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - float botda = setup->vmid->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - float majda = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - assert(i <= 3); - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - - /* calculate a0 as the value which would be sampled for the - * fragment at (0,0), taking into account that we want to sample at - * pixel centers, in other words (0.5, 0.5). - * - * this is neat but unfortunately not a good way to do things for - * triangles with very large values of dadx or dady as it will - * result in the subtraction and re-addition from a0 of a very - * large number, which means we'll end up loosing a lot of the - * fractional bits and precision from a0. the way to fix this is - * to define a0 as the sample at a pixel center somewhere near vmin - * instead - i'll switch to this later. - */ - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); - - /* - debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", - slot, "xyzw"[i], - setup->coef[slot].a0[i], - setup->coef[slot].dadx[i], - setup->coef[slot].dady[i]); - */ -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a triangle. - * We basically multiply the vertex value by 1/w before computing - * the plane coefficients (a0, dadx, dady). - * Later, when we compute the value at a particular fragment position we'll - * divide the interpolated value by the interpolated W at that fragment. - */ -static void tri_persp_coeff( struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - /* premultiply by 1/w (v->data[0][3] is always W): - */ - float mina = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; - float mida = setup->vmid->data[vertSlot][i] * setup->vmid->data[0][3]; - float maxa = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; - float botda = mida - mina; - float majda = maxa - mina; - float a = setup->ebot.dy * majda - botda * setup->emaj.dy; - float b = setup->emaj.dx * botda - majda * setup->ebot.dx; - float dadx = a * setup->oneoverarea; - float dady = b * setup->oneoverarea; - - /* - debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, - setup->vmin->data[vertSlot][i], - setup->vmid->data[vertSlot][i], - setup->vmax->data[vertSlot][i] - ); - */ - assert(i <= 3); - - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (mina - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Special coefficient setup for gl_FragCoord. - * X and Y are trivial, though Y has to be inverted for OpenGL. - * Z and W are copied from posCoef which should have already been computed. - * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. - */ static void -setup_fragcoord_coeff(struct setup_stage *setup, uint slot) -{ - /*X*/ - setup->coef[slot].a0[0] = 0; - setup->coef[slot].dadx[0] = 1.0; - setup->coef[slot].dady[0] = 0.0; - /*Y*/ - if (setup->softpipe->rasterizer->origin_lower_left) { - /* y=0=bottom */ - const int winHeight = setup->softpipe->framebuffer.height; - setup->coef[slot].a0[1] = (float) (winHeight - 1); - setup->coef[slot].dady[1] = -1.0; - } - else { - /* y=0=top */ - setup->coef[slot].a0[1] = 0.0; - setup->coef[slot].dady[1] = 1.0; - } - setup->coef[slot].dadx[1] = 0.0; - /*Z*/ - setup->coef[slot].a0[2] = setup->posCoef.a0[2]; - setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; - setup->coef[slot].dady[2] = setup->posCoef.dady[2]; - /*W*/ - setup->coef[slot].a0[3] = setup->posCoef.a0[3]; - setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; - setup->coef[slot].dady[3] = setup->posCoef.dady[3]; -} - - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. - */ -static void setup_tri_coefficients( struct setup_stage *setup ) -{ - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* z and w are done by linear interpolation: - */ - tri_linear_coeff(setup, &setup->posCoef, 0, 2); - tri_linear_coeff(setup, &setup->posCoef, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } -} - - - -static void setup_tri_edges( struct setup_stage *setup ) -{ - float vmin_x = setup->vmin->data[0][0] + 0.5f; - float vmid_x = setup->vmid->data[0][0] + 0.5f; - - float vmin_y = setup->vmin->data[0][1] - 0.5f; - float vmid_y = setup->vmid->data[0][1] - 0.5f; - float vmax_y = setup->vmax->data[0][1] - 0.5f; - - setup->emaj.sy = CEILF(vmin_y); - setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); - setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; - setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; - - setup->etop.sy = CEILF(vmid_y); - setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); - setup->etop.dxdy = setup->etop.dx / setup->etop.dy; - setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; - - setup->ebot.sy = CEILF(vmin_y); - setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); - setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; - setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; -} - - -/** - * Render the upper or lower half of a triangle. - * Scissoring/cliprect is applied here too. - */ -static void subtriangle( struct setup_stage *setup, - struct edge *eleft, - struct edge *eright, - unsigned lines ) -{ - const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; - const int minx = (int) cliprect->minx; - const int maxx = (int) cliprect->maxx; - const int miny = (int) cliprect->miny; - const int maxy = (int) cliprect->maxy; - int y, start_y, finish_y; - int sy = (int)eleft->sy; - - assert((int)eleft->sy == (int) eright->sy); - - /* clip top/bottom */ - start_y = sy; - finish_y = sy + lines; - - if (start_y < miny) - start_y = miny; - - if (finish_y > maxy) - finish_y = maxy; - - start_y -= sy; - finish_y -= sy; - - /* - debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); - */ - - for (y = start_y; y < finish_y; y++) { - - /* avoid accumulating adds as floats don't have the precision to - * accurately iterate large triangle edges that way. luckily we - * can just multiply these days. - * - * this is all drowned out by the attribute interpolation anyway. - */ - int left = (int)(eleft->sx + y * eleft->dxdy); - int right = (int)(eright->sx + y * eright->dxdy); - - /* clip left/right */ - if (left < minx) - left = minx; - if (right > maxx) - right = maxx; - - if (left < right) { - int _y = sy + y; - if (block(_y) != setup->span.y) { - flush_spans(setup); - setup->span.y = block(_y); - } - - setup->span.left[_y&1] = left; - setup->span.right[_y&1] = right; - setup->span.y_flags |= 1<<(_y&1); - } - } - - - /* save the values so that emaj can be restarted: - */ - eleft->sx += lines * eleft->dxdy; - eright->sx += lines * eright->dxdy; - eleft->sy += lines; - eright->sy += lines; -} - - -/** - * Do setup for triangle rasterization, then render the triangle. - */ -static void setup_tri( struct draw_stage *stage, - struct prim_header *prim ) +do_tri(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); - /* - debug_printf("%s\n", __FUNCTION__ ); - */ - -#if DEBUG_FRAGS - setup->numFragsEmitted = 0; - setup->numFragsWritten = 0; -#endif - - setup_sort_vertices( setup, prim ); - setup_tri_coefficients( setup ); - setup_tri_edges( setup ); - - setup->quad.prim = PRIM_TRI; - - setup->span.y = 0; - setup->span.y_flags = 0; - setup->span.right[0] = 0; - setup->span.right[1] = 0; - /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ - - /* init_constant_attribs( setup ); */ - - if (setup->oneoverarea < 0.0) { - /* emaj on left: - */ - subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); - subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); - } - else { - /* emaj on right: - */ - subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); - subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); - } - - flush_spans( setup ); - -#if DEBUG_FRAGS - printf("Tri: %u frags emitted, %u written\n", - setup->numFragsEmitted, - setup->numFragsWritten); -#endif -} - - - -/** - * Compute a0, dadx and dady for a linearly interpolated coefficient, - * for a line. - */ -static void -line_linear_coeff(struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - const float da = setup->vmax->data[vertSlot][i] - setup->vmin->data[vertSlot][i]; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Compute a0, dadx and dady for a perspective-corrected interpolant, - * for a line. - */ -static void -line_persp_coeff(struct setup_stage *setup, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - /* XXX double-check/verify this arithmetic */ - const float a0 = setup->vmin->data[vertSlot][i] * setup->vmin->data[0][3]; - const float a1 = setup->vmax->data[vertSlot][i] * setup->vmax->data[0][3]; - const float da = a1 - a0; - const float dadx = da * setup->emaj.dx * setup->oneoverarea; - const float dady = da * setup->emaj.dy * setup->oneoverarea; - coef->dadx[i] = dadx; - coef->dady[i] = dady; - coef->a0[i] = (setup->vmin->data[vertSlot][i] - - (dadx * (setup->vmin->data[0][0] - 0.5f) + - dady * (setup->vmin->data[0][1] - 0.5f))); -} - - -/** - * Compute the setup->coef[] array dadx, dady, a0 values. - * Must be called after setup->vmin,vmax are initialized. - */ -static INLINE void -setup_line_coefficients(struct setup_stage *setup, struct prim_header *prim) -{ - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* use setup->vmin, vmax to point to vertices */ - setup->vprovoke = prim->v[1]; - setup->vmin = prim->v[0]; - setup->vmax = prim->v[1]; - - setup->emaj.dx = setup->vmax->data[0][0] - setup->vmin->data[0][0]; - setup->emaj.dy = setup->vmax->data[0][1] - setup->vmin->data[0][1]; - /* NOTE: this is not really 1/area */ - setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + - setup->emaj.dy * setup->emaj.dy); - - /* z and w are done by linear interpolation: - */ - line_linear_coeff(setup, &setup->posCoef, 0, 2); - line_linear_coeff(setup, &setup->posCoef, 0, 3); - - /* setup interpolation for all the remaining attributes: - */ - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } + setup_tri( setup->setup, + prim->det, + prim->v[0]->data, + prim->v[1]->data, + prim->v[2]->data ); } - -/** - * Plot a pixel in a line segment. - */ -static INLINE void -plot(struct setup_stage *setup, int x, int y) -{ - const int iy = y & 1; - const int ix = x & 1; - const int quadX = x - ix; - const int quadY = y - iy; - const int mask = (1 << ix) << (2 * iy); - - if (quadX != setup->quad.x0 || - quadY != setup->quad.y0) - { - /* flush prev quad, start new quad */ - - if (setup->quad.x0 != -1) - clip_emit_quad(setup); - - setup->quad.x0 = quadX; - setup->quad.y0 = quadY; - setup->quad.mask = 0x0; - } - - setup->quad.mask |= mask; -} - - -/** - * Do setup for line rasterization, then render the line. - * Single-pixel width, no stipple, etc. We rely on the 'draw' module - * to handle stippling and wide lines. - */ static void -setup_line(struct draw_stage *stage, struct prim_header *prim) +do_line(struct draw_stage *stage, struct prim_header *prim) { - const struct vertex_header *v0 = prim->v[0]; - const struct vertex_header *v1 = prim->v[1]; struct setup_stage *setup = setup_stage( stage ); - int x0 = (int) v0->data[0][0]; - int x1 = (int) v1->data[0][0]; - int y0 = (int) v0->data[0][1]; - int y1 = (int) v1->data[0][1]; - int dx = x1 - x0; - int dy = y1 - y0; - int xstep, ystep; - if (dx == 0 && dy == 0) - return; - - setup_line_coefficients(setup, prim); - - if (dx < 0) { - dx = -dx; /* make positive */ - xstep = -1; - } - else { - xstep = 1; - } - - if (dy < 0) { - dy = -dy; /* make positive */ - ystep = -1; - } - else { - ystep = 1; - } - - assert(dx >= 0); - assert(dy >= 0); - - setup->quad.x0 = setup->quad.y0 = -1; - setup->quad.mask = 0x0; - setup->quad.prim = PRIM_LINE; - /* XXX temporary: set coverage to 1.0 so the line appears - * if AA mode happens to be enabled. - */ - setup->quad.coverage[0] = - setup->quad.coverage[1] = - setup->quad.coverage[2] = - setup->quad.coverage[3] = 1.0; - - if (dx > dy) { - /*** X-major line ***/ - int i; - const int errorInc = dy + dy; - int error = errorInc - dx; - const int errorDec = error - dx; - - for (i = 0; i < dx; i++) { - plot(setup, x0, y0); - - x0 += xstep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - y0 += ystep; - } - } - } - else { - /*** Y-major line ***/ - int i; - const int errorInc = dx + dx; - int error = errorInc - dy; - const int errorDec = error - dy; - - for (i = 0; i < dy; i++) { - plot(setup, x0, y0); - - y0 += ystep; - if (error < 0) { - error += errorInc; - } - else { - error += errorDec; - x0 += xstep; - } - } - } - - /* draw final quad */ - if (setup->quad.mask) { - clip_emit_quad(setup); - } + setup_line( setup->setup, + prim->v[0]->data, + prim->v[1]->data ); } - static void -point_persp_coeff(struct setup_stage *setup, - const struct vertex_header *vert, - struct tgsi_interp_coef *coef, - uint vertSlot, uint i) -{ - assert(i <= 3); - coef->dadx[i] = 0.0F; - coef->dady[i] = 0.0F; - coef->a0[i] = vert->data[vertSlot][i] * vert->data[0][3]; -} - - -/** - * Do setup for point rasterization, then render the point. - * Round or square points... - * XXX could optimize a lot for 1-pixel points. - */ -static void -setup_point(struct draw_stage *stage, struct prim_header *prim) +do_point(struct draw_stage *stage, struct prim_header *prim) { struct setup_stage *setup = setup_stage( stage ); - struct softpipe_context *softpipe = setup->softpipe; - const struct sp_fragment_shader *spfs = softpipe->fs; - const struct vertex_header *v0 = prim->v[0]; - const int sizeAttr = setup->softpipe->psize_slot; - const float size - = sizeAttr > 0 ? v0->data[sizeAttr][0] - : setup->softpipe->rasterizer->point_size; - const float halfSize = 0.5F * size; - const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; - const float x = v0->data[0][0]; /* Note: data[0] is always position */ - const float y = v0->data[0][1]; - const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); - uint fragSlot; - - /* For points, all interpolants are constant-valued. - * However, for point sprites, we'll need to setup texcoords appropriately. - * XXX: which coefficients are the texcoords??? - * We may do point sprites as textured quads... - * - * KW: We don't know which coefficients are texcoords - ultimately - * the choice of what interpolation mode to use for each attribute - * should be determined by the fragment program, using - * per-attribute declaration statements that include interpolation - * mode as a parameter. So either the fragment program will have - * to be adjusted for pointsprite vs normal point behaviour, or - * otherwise a special interpolation mode will have to be defined - * which matches the required behaviour for point sprites. But - - * the latter is not a feature of normal hardware, and as such - * probably should be ruled out on that basis. - */ - setup->vprovoke = prim->v[0]; - - /* setup Z, W */ - const_coeff(setup, &setup->posCoef, 0, 2); - const_coeff(setup, &setup->posCoef, 0, 3); - - for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; - uint j; - - switch (vinfo->interp_mode[fragSlot]) { - case INTERP_CONSTANT: - /* fall-through */ - case INTERP_LINEAR: - for (j = 0; j < NUM_CHANNELS; j++) - const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_PERSPECTIVE: - for (j = 0; j < NUM_CHANNELS; j++) - point_persp_coeff(setup, setup->vprovoke, - &setup->coef[fragSlot], vertSlot, j); - break; - case INTERP_POS: - setup_fragcoord_coeff(setup, fragSlot); - break; - default: - assert(0); - } - - if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { - /* FOG.y = front/back facing XXX fix this */ - setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; - setup->coef[fragSlot].dadx[1] = 0.0; - setup->coef[fragSlot].dady[1] = 0.0; - } - } - - setup->quad.prim = PRIM_POINT; - - if (halfSize <= 0.5 && !round) { - /* special case for 1-pixel points */ - const int ix = ((int) x) & 1; - const int iy = ((int) y) & 1; - setup->quad.x0 = (int) x - ix; - setup->quad.y0 = (int) y - iy; - setup->quad.mask = (1 << ix) << (2 * iy); - clip_emit_quad(setup); - } - else { - if (round) { - /* rounded points */ - const int ixmin = block((int) (x - halfSize)); - const int ixmax = block((int) (x + halfSize)); - const int iymin = block((int) (y - halfSize)); - const int iymax = block((int) (y + halfSize)); - const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ - const float rmax = halfSize + 0.7071F; - const float rmin2 = MAX2(0.0F, rmin * rmin); - const float rmax2 = rmax * rmax; - const float cscale = 1.0F / (rmax2 - rmin2); - int ix, iy; - - for (iy = iymin; iy <= iymax; iy += 2) { - for (ix = ixmin; ix <= ixmax; ix += 2) { - float dx, dy, dist2, cover; - - setup->quad.mask = 0x0; - - dx = (ix + 0.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 0.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_TOP_RIGHT; - } - dx = (ix + 0.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_LEFT; - } - - dx = (ix + 1.5f) - x; - dy = (iy + 1.5f) - y; - dist2 = dx * dx + dy * dy; - if (dist2 <= rmax2) { - cover = 1.0F - (dist2 - rmin2) * cscale; - setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); - setup->quad.mask |= MASK_BOTTOM_RIGHT; - } - - if (setup->quad.mask) { - setup->quad.x0 = ix; - setup->quad.y0 = iy; - clip_emit_quad(setup); - } - } - } - } - else { - /* square points */ - const int xmin = (int) (x + 0.75 - halfSize); - const int ymin = (int) (y + 0.25 - halfSize); - const int xmax = xmin + (int) size; - const int ymax = ymin + (int) size; - /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ - const int ixmin = block(xmin); - const int ixmax = block(xmax - 1); - const int iymin = block(ymin); - const int iymax = block(ymax - 1); - int ix, iy; - - /* - debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); - */ - for (iy = iymin; iy <= iymax; iy += 2) { - uint rowMask = 0xf; - if (iy < ymin) { - /* above the top edge */ - rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); - } - if (iy + 1 >= ymax) { - /* below the bottom edge */ - rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); - } - - for (ix = ixmin; ix <= ixmax; ix += 2) { - uint mask = rowMask; - - if (ix < xmin) { - /* fragment is past left edge of point, turn off left bits */ - mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); - } - if (ix + 1 >= xmax) { - /* past the right edge */ - mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); - } - - setup->quad.mask = mask; - setup->quad.x0 = ix; - setup->quad.y0 = iy; - clip_emit_quad(setup); - } - } - } - } + setup_point( setup->setup, + prim->v[0]->data ); } + static void setup_begin( struct draw_stage *stage ) { struct setup_stage *setup = setup_stage(stage); - struct softpipe_context *sp = setup->softpipe; - const struct sp_fragment_shader *fs = setup->softpipe->fs; - uint i; - - if (sp->dirty) { - softpipe_update_derived(sp); - } - /* Mark surfaces as defined now */ - for (i = 0; i < sp->framebuffer.num_cbufs; i++){ - if (sp->framebuffer.cbufs[i]) { - sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; - } - } - if (sp->framebuffer.zsbuf) { - sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; - } + setup_prepare( setup->setup ); - setup->quad.nr_attrs = fs->info.num_inputs; - - sp->quad.first->begin(sp->quad.first); - - stage->point = setup_point; - stage->line = setup_line; - stage->tri = setup_tri; + stage->point = do_point; + stage->line = do_line; + stage->tri = do_tri; } @@ -1269,19 +160,29 @@ static void render_destroy( struct draw_stage *stage ) */ struct draw_stage *sp_draw_render_stage( struct softpipe_context *softpipe ) { - struct setup_stage *setup = CALLOC_STRUCT(setup_stage); + struct setup_stage *sstage = CALLOC_STRUCT(setup_stage); - setup->softpipe = softpipe; - setup->stage.draw = softpipe->draw; - setup->stage.point = setup_first_point; - setup->stage.line = setup_first_line; - setup->stage.tri = setup_first_tri; - setup->stage.flush = setup_flush; - setup->stage.reset_stipple_counter = reset_stipple_counter; - setup->stage.destroy = render_destroy; + sstage->setup = setup_create_context(softpipe); + sstage->stage.draw = softpipe->draw; + sstage->stage.point = setup_first_point; + sstage->stage.line = setup_first_line; + sstage->stage.tri = setup_first_tri; + sstage->stage.flush = setup_flush; + sstage->stage.reset_stipple_counter = reset_stipple_counter; + sstage->stage.destroy = render_destroy; - setup->quad.coef = setup->coef; - setup->quad.posCoef = &setup->posCoef; + return (struct draw_stage *)sstage; +} - return &setup->stage; +struct setup_context * +sp_draw_setup_context( struct draw_stage *stage ) +{ + struct setup_stage *ssetup = setup_stage(stage); + return ssetup->setup; +} + +void +sp_draw_flush( struct draw_stage *stage ) +{ + stage->flush( stage, 0 ); } diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.h b/src/gallium/drivers/softpipe/sp_prim_setup.h index f3e8a79dd9..49bdd98ed8 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.h +++ b/src/gallium/drivers/softpipe/sp_prim_setup.h @@ -69,6 +69,12 @@ typedef void (*vbuf_draw_func)( struct pipe_context *pipe, extern struct draw_stage * sp_draw_render_stage( struct softpipe_context *softpipe ); +extern struct setup_context * +sp_draw_setup_context( struct draw_stage * ); + +extern void +sp_draw_flush( struct draw_stage * ); + extern struct draw_stage * sp_draw_vbuf_stage( struct draw_context *draw_context, diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index d940718ed2..025a0113bf 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /** @@ -30,7 +30,7 @@ * softpipe rendering pipeline. * Probably not desired in general, but useful for testing/debuggin. * Enabled/Disabled with SP_VBUF env var. - * + * * Authors * Brian Paul */ @@ -39,14 +39,16 @@ #include "sp_context.h" #include "sp_state.h" #include "sp_prim_vbuf.h" +#include "sp_prim_setup.h" +#include "sp_setup.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" #include "draw/draw_vbuf.h" #define SP_MAX_VBUF_INDEXES 1024 #define SP_MAX_VBUF_SIZE 4096 +typedef const float (*cptrf4)[4]; /** * Subclass of vbuf_render. @@ -69,7 +71,6 @@ softpipe_vbuf_render(struct vbuf_render *vbr) } - static const struct vertex_info * sp_vbuf_get_vertex_info(struct vbuf_render *vbr) { @@ -91,7 +92,7 @@ sp_vbuf_allocate_vertices(struct vbuf_render *vbr, static void -sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, +sp_vbuf_release_vertices(struct vbuf_render *vbr, void *vertices, unsigned vertex_size, unsigned vertices_used) { struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); @@ -114,7 +115,7 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) else { return FALSE; } - + } @@ -122,22 +123,19 @@ sp_vbuf_set_primitive(struct vbuf_render *vbr, unsigned prim) * Recalculate prim's determinant. * XXX is this needed? */ -static void -calc_det(struct prim_header *header) +static float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) { - /* Window coords: */ - const float *v0 = header->v[0]->data[0]; - const float *v1 = header->v[1]->data[0]; - const float *v2 = header->v[2]->data[0]; - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0] - v2[0]; - const float ey = v0[1] - v2[1]; - const float fx = v1[0] - v2[0]; - const float fy = v1[1] - v2[1]; - + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + /* det = cross(e,f).z */ - header->det = ex * fy - ey * fx; + return ex * fy - ey * fx; } @@ -147,48 +145,51 @@ sp_vbuf_draw(struct vbuf_render *vbr, const ushort *indices, uint nr_indices) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; - struct prim_header prim; unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i, j; void *vertex_buffer = cvbr->vertex_buffer; - - prim.det = 0; - prim.reset_line_stipple = 0; - prim.edgeflags = 0; - prim.pad = 0; + cptrf4 v[3]; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); switch (cvbr->prim) { case PIPE_PRIM_TRIANGLES: for (i = 0; i < nr_indices; i += 3) { - for (j = 0; j < 3; j++) - prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + - indices[i+j] * vertex_size); - - calc_det(&prim); - setup->tri( setup, &prim ); + for (j = 0; j < 3; j++) + v[j] = (cptrf4)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2]); } break; case PIPE_PRIM_LINES: for (i = 0; i < nr_indices; i += 2) { - for (j = 0; j < 2; j++) - prim.v[j] = (struct vertex_header *)((char *)vertex_buffer + - indices[i+j] * vertex_size); - - setup->line( setup, &prim ); + for (j = 0; j < 2; j++) + v[j] = (cptrf4)((char *)vertex_buffer + + indices[i+j] * vertex_size); + + setup_line( setup_ctx, + v[0], + v[1] ); } break; case PIPE_PRIM_POINTS: for (i = 0; i < nr_indices; i++) { - prim.v[0] = (struct vertex_header *)((char *)vertex_buffer + - indices[i] * vertex_size); - setup->point( setup, &prim ); + v[0] = (cptrf4)((char *)vertex_buffer + + indices[i] * vertex_size); + + setup_point( setup_ctx, + v[0] ); } break; } - setup->flush( setup, 0 ); + sp_draw_flush( setup ); } @@ -202,111 +203,131 @@ sp_vbuf_draw_arrays(struct vbuf_render *vbr, uint start, uint nr) struct softpipe_vbuf_render *cvbr = softpipe_vbuf_render(vbr); struct softpipe_context *softpipe = cvbr->softpipe; struct draw_stage *setup = softpipe->setup; - struct prim_header prim; const void *vertex_buffer = cvbr->vertex_buffer; const unsigned vertex_size = softpipe->vertex_info_vbuf.size * sizeof(float); unsigned i; - - prim.det = 0; - prim.reset_line_stipple = 0; - prim.edgeflags = 0; - prim.pad = 0; + struct setup_context *setup_ctx = sp_draw_setup_context(setup); + cptrf4 v[3]; #define VERTEX(I) \ - (struct vertex_header *) ((char *) vertex_buffer + (I) * vertex_size) + (cptrf4) ((char *) vertex_buffer + (I) * vertex_size) switch (cvbr->prim) { case PIPE_PRIM_POINTS: for (i = 0; i < nr; i++) { - prim.v[0] = VERTEX(i); - setup->point( setup, &prim ); + v[0] = VERTEX(i); + setup_point( setup_ctx, v[0] ); } break; case PIPE_PRIM_LINES: assert(nr % 2 == 0); for (i = 0; i < nr; i += 2) { - prim.v[0] = VERTEX(i); - prim.v[1] = VERTEX(i + 1); - setup->line( setup, &prim ); + v[0] = VERTEX(i); + v[1] = VERTEX(i + 1); + setup_line( setup_ctx, v[0], v[1] ); } break; case PIPE_PRIM_LINE_STRIP: for (i = 1; i < nr; i++) { - prim.v[0] = VERTEX(i - 1); - prim.v[1] = VERTEX(i); - setup->line( setup, &prim ); + v[0] = VERTEX(i - 1); + v[1] = VERTEX(i); + setup_line( setup_ctx, v[0], v[1] ); } break; case PIPE_PRIM_TRIANGLES: assert(nr % 3 == 0); for (i = 0; i < nr; i += 3) { - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i + 2); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i + 2); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_TRIANGLE_STRIP: assert(nr >= 3); for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_TRIANGLE_FAN: assert(nr >= 3); for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(0); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(0); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_QUADS: assert(nr % 4 == 0); for (i = 0; i < nr; i += 4) { - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i + 2); - calc_det(&prim); - setup->tri( setup, &prim ); - - prim.v[0] = VERTEX(i + 0); - prim.v[1] = VERTEX(i + 2); - prim.v[2] = VERTEX(i + 3); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i + 2); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); + + v[0] = VERTEX(i + 0); + v[1] = VERTEX(i + 2); + v[2] = VERTEX(i + 3); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_QUAD_STRIP: assert(nr >= 4); for (i = 2; i < nr; i += 2) { - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i); - prim.v[2] = VERTEX(i + 1); - calc_det(&prim); - setup->tri( setup, &prim ); - - prim.v[0] = VERTEX(i - 2); - prim.v[1] = VERTEX(i + 1); - prim.v[2] = VERTEX(i - 1); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i); + v[2] = VERTEX(i + 1); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); + + v[0] = VERTEX(i - 2); + v[1] = VERTEX(i + 1); + v[2] = VERTEX(i - 1); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; case PIPE_PRIM_POLYGON: /* draw as tri fan */ for (i = 2; i < nr; i++) { - prim.v[0] = VERTEX(0); - prim.v[1] = VERTEX(i - 1); - prim.v[2] = VERTEX(i); - calc_det(&prim); - setup->tri( setup, &prim ); + v[0] = VERTEX(0); + v[1] = VERTEX(i - 1); + v[2] = VERTEX(i); + setup_tri( setup_ctx, + calc_det(v[0], v[1], v[2]), + v[0], + v[1], + v[2] ); } break; default: diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c new file mode 100644 index 0000000000..48617a66ed --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -0,0 +1,1249 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Primitive rasterization/rendering (points, lines, triangles) + * + * \author Keith Whitwell + * \author Brian Paul + */ + +#include "sp_setup.h" + +#include "sp_context.h" +#include "sp_headers.h" +#include "sp_quad.h" +#include "sp_state.h" +#include "sp_prim_setup.h" +#include "draw/draw_private.h" +#include "draw/draw_vertex.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#define DEBUG_VERTS 0 +#define DEBUG_FRAGS 0 + +/** + * Triangle edge info + */ +struct edge { + float dx; /**< X(v1) - X(v0), used only during setup */ + float dy; /**< Y(v1) - Y(v0), used only during setup */ + float dxdy; /**< dx/dy */ + float sx, sy; /**< first sample point coord */ + int lines; /**< number of lines on this edge */ +}; + + +/** + * Triangle setup info (derived from draw_stage). + * Also used for line drawing (taking some liberties). + */ +struct setup_context { + struct softpipe_context *softpipe; + + /* Vertices are just an array of floats making up each attribute in + * turn. Currently fixed at 4 floats, but should change in time. + * Codegen will help cope with this. + */ + const float (*vmax)[4]; + const float (*vmid)[4]; + const float (*vmin)[4]; + const float (*vprovoke)[4]; + + struct edge ebot; + struct edge etop; + struct edge emaj; + + float oneoverarea; + + struct tgsi_interp_coef coef[PIPE_MAX_SHADER_INPUTS]; + struct tgsi_interp_coef posCoef; /* For Z, W */ + struct quad_header quad; + + struct { + int left[2]; /**< [0] = row0, [1] = row1 */ + int right[2]; + int y; + unsigned y_flags; + unsigned mask; /**< mask of MASK_BOTTOM/TOP_LEFT/RIGHT bits */ + } span; + +#if DEBUG_FRAGS + uint numFragsEmitted; /**< per primitive */ + uint numFragsWritten; /**< per primitive */ +#endif +}; + + + + + +/** + * Recalculate prim's determinant. + * XXX is this needed? + */ +static INLINE float +calc_det( const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0][0] - v2[0][0]; + const float ey = v0[0][1] - v2[0][1]; + const float fx = v1[0][0] - v2[0][0]; + const float fy = v1[0][1] - v2[0][1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + +/** + * Clip setup->quad against the scissor/surface bounds. + */ +static INLINE void +quad_clip(struct setup_context *setup) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + + if (setup->quad.x0 >= maxx || + setup->quad.y0 >= maxy || + setup->quad.x0 + 1 < minx || + setup->quad.y0 + 1 < miny) { + /* totally clipped */ + setup->quad.mask = 0x0; + return; + } + if (setup->quad.x0 < minx) + setup->quad.mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + if (setup->quad.y0 < miny) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + if (setup->quad.x0 == maxx - 1) + setup->quad.mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + if (setup->quad.y0 == maxy - 1) + setup->quad.mask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); +} + + +/** + * Emit a quad (pass to next stage) with clipping. + */ +static INLINE void +clip_emit_quad(struct setup_context *setup) +{ + quad_clip(setup); + if (setup->quad.mask) { + struct softpipe_context *sp = setup->softpipe; + sp->quad.first->run(sp->quad.first, &setup->quad); + } +} + + +/** + * Emit a quad (pass to next stage). No clipping is done. + */ +static INLINE void +emit_quad( struct setup_context *setup, int x, int y, unsigned mask ) +{ + struct softpipe_context *sp = setup->softpipe; + setup->quad.x0 = x; + setup->quad.y0 = y; + setup->quad.mask = mask; +#if DEBUG_FRAGS + if (mask & 1) setup->numFragsEmitted++; + if (mask & 2) setup->numFragsEmitted++; + if (mask & 4) setup->numFragsEmitted++; + if (mask & 8) setup->numFragsEmitted++; +#endif + sp->quad.first->run(sp->quad.first, &setup->quad); +#if DEBUG_FRAGS + mask = setup->quad.mask; + if (mask & 1) setup->numFragsWritten++; + if (mask & 2) setup->numFragsWritten++; + if (mask & 4) setup->numFragsWritten++; + if (mask & 8) setup->numFragsWritten++; +#endif +} + + +/** + * Given an X or Y coordinate, return the block/quad coordinate that it + * belongs to. + */ +static INLINE int block( int x ) +{ + return x & ~1; +} + + +/** + * Compute mask which indicates which pixels in the 2x2 quad are actually inside + * the triangle's bounds. + * + * this is pretty nasty... may need to rework flush_spans again to + * fix it, if possible. + */ +static unsigned calculate_mask( struct setup_context *setup, int x ) +{ + unsigned mask = 0x0; + + if (x >= setup->span.left[0] && x < setup->span.right[0]) + mask |= MASK_TOP_LEFT; + + if (x >= setup->span.left[1] && x < setup->span.right[1]) + mask |= MASK_BOTTOM_LEFT; + + if (x+1 >= setup->span.left[0] && x+1 < setup->span.right[0]) + mask |= MASK_TOP_RIGHT; + + if (x+1 >= setup->span.left[1] && x+1 < setup->span.right[1]) + mask |= MASK_BOTTOM_RIGHT; + + return mask; +} + + +/** + * Render a horizontal span of quads + */ +static void flush_spans( struct setup_context *setup ) +{ + int minleft, maxright; + int x; + + switch (setup->span.y_flags) { + case 0x3: + /* both odd and even lines written (both quad rows) */ + minleft = MIN2(setup->span.left[0], setup->span.left[1]); + maxright = MAX2(setup->span.right[0], setup->span.right[1]); + break; + + case 0x1: + /* only even line written (quad top row) */ + minleft = setup->span.left[0]; + maxright = setup->span.right[0]; + break; + + case 0x2: + /* only odd line written (quad bottom row) */ + minleft = setup->span.left[1]; + maxright = setup->span.right[1]; + break; + + default: + return; + } + + /* XXX this loop could be moved into the above switch cases and + * calculate_mask() could be simplified a bit... + */ + for (x = block(minleft); x <= block(maxright); x += 2) { + emit_quad( setup, x, setup->span.y, + calculate_mask( setup, x ) ); + } + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; +} + +#if DEBUG_VERTS +static void print_vertex(const struct setup_context *setup, + const float (*v)[4]) +{ + int i; + debug_printf("Vertex: (%p)\n", v); + for (i = 0; i < setup->quad.nr_attrs; i++) { + debug_printf(" %d: %f %f %f %f\n", i, + v[i][0], v[i][1], v[i][2], v[i][3]); + } +} +#endif + +static boolean setup_sort_vertices( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ +#if DEBUG_VERTS + debug_printf("Triangle:\n"); + print_vertex(setup, v0); + print_vertex(setup, v1); + print_vertex(setup, v2); +#endif + + setup->vprovoke = v2; + + /* determine bottom to top order of vertices */ + { + float y0 = v0[0][1]; + float y1 = v1[0][1]; + float y2 = v2[0][1]; + if (y0 <= y1) { + if (y1 <= y2) { + /* y0<=y1<=y2 */ + setup->vmin = v0; + setup->vmid = v1; + setup->vmax = v2; + } + else if (y2 <= y0) { + /* y2<=y0<=y1 */ + setup->vmin = v2; + setup->vmid = v0; + setup->vmax = v1; + } + else { + /* y0<=y2<=y1 */ + setup->vmin = v0; + setup->vmid = v2; + setup->vmax = v1; + } + } + else { + if (y0 <= y2) { + /* y1<=y0<=y2 */ + setup->vmin = v1; + setup->vmid = v0; + setup->vmax = v2; + } + else if (y2 <= y1) { + /* y2<=y1<=y0 */ + setup->vmin = v2; + setup->vmid = v1; + setup->vmax = v0; + } + else { + /* y1<=y2<=y0 */ + setup->vmin = v1; + setup->vmid = v2; + setup->vmax = v0; + } + } + } + + setup->ebot.dx = setup->vmid[0][0] - setup->vmin[0][0]; + setup->ebot.dy = setup->vmid[0][1] - setup->vmin[0][1]; + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + setup->etop.dx = setup->vmax[0][0] - setup->vmid[0][0]; + setup->etop.dy = setup->vmax[0][1] - setup->vmid[0][1]; + + /* + * Compute triangle's area. Use 1/area to compute partial + * derivatives of attributes later. + * + * The area will be the same as prim->det, but the sign may be + * different depending on how the vertices get sorted above. + * + * To determine whether the primitive is front or back facing we + * use the prim->det value because its sign is correct. + */ + { + const float area = (setup->emaj.dx * setup->ebot.dy - + setup->ebot.dx * setup->emaj.dy); + + setup->oneoverarea = 1.0f / area; + /* + debug_printf("%s one-over-area %f area %f det %f\n", + __FUNCTION__, setup->oneoverarea, area, det ); + */ + } + + /* We need to know if this is a front or back-facing triangle for: + * - the GLSL gl_FrontFacing fragment attribute (bool) + * - two-sided stencil test + */ + setup->quad.facing = (det > 0.0) ^ (setup->softpipe->rasterizer->front_winding == PIPE_WINDING_CW); + + return TRUE; +} + + +/** + * Compute a0 for a constant-valued coefficient (GL_FLAT shading). + * The value value comes from vertex[slot][i]. + * The result will be put into setup->coef[slot].a0[i]. + * \param slot which attribute slot + * \param i which component of the slot (0..3) + */ +static void const_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + + coef->dadx[i] = 0; + coef->dady[i] = 0; + + /* need provoking vertex info! + */ + coef->a0[i] = setup->vprovoke[vertSlot][i]; +} + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a triangle. + */ +static void tri_linear_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + float botda = setup->vmid[vertSlot][i] - setup->vmin[vertSlot][i]; + float majda = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + + /* calculate a0 as the value which would be sampled for the + * fragment at (0,0), taking into account that we want to sample at + * pixel centers, in other words (0.5, 0.5). + * + * this is neat but unfortunately not a good way to do things for + * triangles with very large values of dadx or dady as it will + * result in the subtraction and re-addition from a0 of a very + * large number, which means we'll end up loosing a lot of the + * fractional bits and precision from a0. the way to fix this is + * to define a0 as the sample at a pixel center somewhere near vmin + * instead - i'll switch to this later. + */ + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); + + /* + debug_printf("attr[%d].%c: %f dx:%f dy:%f\n", + slot, "xyzw"[i], + setup->coef[slot].a0[i], + setup->coef[slot].dadx[i], + setup->coef[slot].dady[i]); + */ +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a triangle. + * We basically multiply the vertex value by 1/w before computing + * the plane coefficients (a0, dadx, dady). + * Later, when we compute the value at a particular fragment position we'll + * divide the interpolated value by the interpolated W at that fragment. + */ +static void tri_persp_coeff( struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* premultiply by 1/w (v[0][3] is always W): + */ + float mina = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + float mida = setup->vmid[vertSlot][i] * setup->vmid[0][3]; + float maxa = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + float botda = mida - mina; + float majda = maxa - mina; + float a = setup->ebot.dy * majda - botda * setup->emaj.dy; + float b = setup->emaj.dx * botda - majda * setup->ebot.dx; + float dadx = a * setup->oneoverarea; + float dady = b * setup->oneoverarea; + + /* + debug_printf("tri persp %d,%d: %f %f %f\n", vertSlot, i, + setup->vmin[vertSlot][i], + setup->vmid[vertSlot][i], + setup->vmax[vertSlot][i] + ); + */ + assert(i <= 3); + + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (mina - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Special coefficient setup for gl_FragCoord. + * X and Y are trivial, though Y has to be inverted for OpenGL. + * Z and W are copied from posCoef which should have already been computed. + * We could do a bit less work if we'd examine gl_FragCoord's swizzle mask. + */ +static void +setup_fragcoord_coeff(struct setup_context *setup, uint slot) +{ + /*X*/ + setup->coef[slot].a0[0] = 0; + setup->coef[slot].dadx[0] = 1.0; + setup->coef[slot].dady[0] = 0.0; + /*Y*/ + if (setup->softpipe->rasterizer->origin_lower_left) { + /* y=0=bottom */ + const int winHeight = setup->softpipe->framebuffer.height; + setup->coef[slot].a0[1] = (float) (winHeight - 1); + setup->coef[slot].dady[1] = -1.0; + } + else { + /* y=0=top */ + setup->coef[slot].a0[1] = 0.0; + setup->coef[slot].dady[1] = 1.0; + } + setup->coef[slot].dadx[1] = 0.0; + /*Z*/ + setup->coef[slot].a0[2] = setup->posCoef.a0[2]; + setup->coef[slot].dadx[2] = setup->posCoef.dadx[2]; + setup->coef[slot].dady[2] = setup->posCoef.dady[2]; + /*W*/ + setup->coef[slot].a0[3] = setup->posCoef.a0[3]; + setup->coef[slot].dadx[3] = setup->posCoef.dadx[3]; + setup->coef[slot].dady[3] = setup->posCoef.dady[3]; +} + + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmid,vmax,vprovoke are initialized. + */ +static void setup_tri_coefficients( struct setup_context *setup ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* z and w are done by linear interpolation: + */ + tri_linear_coeff(setup, &setup->posCoef, 0, 2); + tri_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + tri_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + tri_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + + +static void setup_tri_edges( struct setup_context *setup ) +{ + float vmin_x = setup->vmin[0][0] + 0.5f; + float vmid_x = setup->vmid[0][0] + 0.5f; + + float vmin_y = setup->vmin[0][1] - 0.5f; + float vmid_y = setup->vmid[0][1] - 0.5f; + float vmax_y = setup->vmax[0][1] - 0.5f; + + setup->emaj.sy = CEILF(vmin_y); + setup->emaj.lines = (int) CEILF(vmax_y - setup->emaj.sy); + setup->emaj.dxdy = setup->emaj.dx / setup->emaj.dy; + setup->emaj.sx = vmin_x + (setup->emaj.sy - vmin_y) * setup->emaj.dxdy; + + setup->etop.sy = CEILF(vmid_y); + setup->etop.lines = (int) CEILF(vmax_y - setup->etop.sy); + setup->etop.dxdy = setup->etop.dx / setup->etop.dy; + setup->etop.sx = vmid_x + (setup->etop.sy - vmid_y) * setup->etop.dxdy; + + setup->ebot.sy = CEILF(vmin_y); + setup->ebot.lines = (int) CEILF(vmid_y - setup->ebot.sy); + setup->ebot.dxdy = setup->ebot.dx / setup->ebot.dy; + setup->ebot.sx = vmin_x + (setup->ebot.sy - vmin_y) * setup->ebot.dxdy; +} + + +/** + * Render the upper or lower half of a triangle. + * Scissoring/cliprect is applied here too. + */ +static void subtriangle( struct setup_context *setup, + struct edge *eleft, + struct edge *eright, + unsigned lines ) +{ + const struct pipe_scissor_state *cliprect = &setup->softpipe->cliprect; + const int minx = (int) cliprect->minx; + const int maxx = (int) cliprect->maxx; + const int miny = (int) cliprect->miny; + const int maxy = (int) cliprect->maxy; + int y, start_y, finish_y; + int sy = (int)eleft->sy; + + assert((int)eleft->sy == (int) eright->sy); + + /* clip top/bottom */ + start_y = sy; + finish_y = sy + lines; + + if (start_y < miny) + start_y = miny; + + if (finish_y > maxy) + finish_y = maxy; + + start_y -= sy; + finish_y -= sy; + + /* + debug_printf("%s %d %d\n", __FUNCTION__, start_y, finish_y); + */ + + for (y = start_y; y < finish_y; y++) { + + /* avoid accumulating adds as floats don't have the precision to + * accurately iterate large triangle edges that way. luckily we + * can just multiply these days. + * + * this is all drowned out by the attribute interpolation anyway. + */ + int left = (int)(eleft->sx + y * eleft->dxdy); + int right = (int)(eright->sx + y * eright->dxdy); + + /* clip left/right */ + if (left < minx) + left = minx; + if (right > maxx) + right = maxx; + + if (left < right) { + int _y = sy + y; + if (block(_y) != setup->span.y) { + flush_spans(setup); + setup->span.y = block(_y); + } + + setup->span.left[_y&1] = left; + setup->span.right[_y&1] = right; + setup->span.y_flags |= 1<<(_y&1); + } + } + + + /* save the values so that emaj can be restarted: + */ + eleft->sx += lines * eleft->dxdy; + eright->sx += lines * eright->dxdy; + eleft->sy += lines; + eright->sy += lines; +} + + +/** + * Do setup for triangle rasterization, then render the triangle. + */ +void setup_tri( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ) +{ + /* + debug_printf("%s\n", __FUNCTION__ ); + */ + +#if DEBUG_FRAGS + setup->numFragsEmitted = 0; + setup->numFragsWritten = 0; +#endif + + setup_sort_vertices( setup, calc_det(v0, v1, v2), + v0, v1, v2 ); + setup_tri_coefficients( setup ); + setup_tri_edges( setup ); + + setup->quad.prim = PRIM_TRI; + + setup->span.y = 0; + setup->span.y_flags = 0; + setup->span.right[0] = 0; + setup->span.right[1] = 0; + /* setup->span.z_mode = tri_z_mode( setup->ctx ); */ + + /* init_constant_attribs( setup ); */ + + if (setup->oneoverarea < 0.0) { + /* emaj on left: + */ + subtriangle( setup, &setup->emaj, &setup->ebot, setup->ebot.lines ); + subtriangle( setup, &setup->emaj, &setup->etop, setup->etop.lines ); + } + else { + /* emaj on right: + */ + subtriangle( setup, &setup->ebot, &setup->emaj, setup->ebot.lines ); + subtriangle( setup, &setup->etop, &setup->emaj, setup->etop.lines ); + } + + flush_spans( setup ); + +#if DEBUG_FRAGS + printf("Tri: %u frags emitted, %u written\n", + setup->numFragsEmitted, + setup->numFragsWritten); +#endif +} + + + +/** + * Compute a0, dadx and dady for a linearly interpolated coefficient, + * for a line. + */ +static void +line_linear_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + const float da = setup->vmax[vertSlot][i] - setup->vmin[vertSlot][i]; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute a0, dadx and dady for a perspective-corrected interpolant, + * for a line. + */ +static void +line_persp_coeff(struct setup_context *setup, + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + /* XXX double-check/verify this arithmetic */ + const float a0 = setup->vmin[vertSlot][i] * setup->vmin[0][3]; + const float a1 = setup->vmax[vertSlot][i] * setup->vmax[0][3]; + const float da = a1 - a0; + const float dadx = da * setup->emaj.dx * setup->oneoverarea; + const float dady = da * setup->emaj.dy * setup->oneoverarea; + coef->dadx[i] = dadx; + coef->dady[i] = dady; + coef->a0[i] = (setup->vmin[vertSlot][i] - + (dadx * (setup->vmin[0][0] - 0.5f) + + dady * (setup->vmin[0][1] - 0.5f))); +} + + +/** + * Compute the setup->coef[] array dadx, dady, a0 values. + * Must be called after setup->vmin,vmax are initialized. + */ +static INLINE void +setup_line_coefficients(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* use setup->vmin, vmax to point to vertices */ + setup->vprovoke = v1; + setup->vmin = v0; + setup->vmax = v1; + + setup->emaj.dx = setup->vmax[0][0] - setup->vmin[0][0]; + setup->emaj.dy = setup->vmax[0][1] - setup->vmin[0][1]; + /* NOTE: this is not really 1/area */ + setup->oneoverarea = 1.0f / (setup->emaj.dx * setup->emaj.dx + + setup->emaj.dy * setup->emaj.dy); + + /* z and w are done by linear interpolation: + */ + line_linear_coeff(setup, &setup->posCoef, 0, 2); + line_linear_coeff(setup, &setup->posCoef, 0, 3); + + /* setup interpolation for all the remaining attributes: + */ + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + line_linear_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + line_persp_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } +} + + +/** + * Plot a pixel in a line segment. + */ +static INLINE void +plot(struct setup_context *setup, int x, int y) +{ + const int iy = y & 1; + const int ix = x & 1; + const int quadX = x - ix; + const int quadY = y - iy; + const int mask = (1 << ix) << (2 * iy); + + if (quadX != setup->quad.x0 || + quadY != setup->quad.y0) + { + /* flush prev quad, start new quad */ + + if (setup->quad.x0 != -1) + clip_emit_quad(setup); + + setup->quad.x0 = quadX; + setup->quad.y0 = quadY; + setup->quad.mask = 0x0; + } + + setup->quad.mask |= mask; +} + + +/** + * Do setup for line rasterization, then render the line. + * Single-pixel width, no stipple, etc. We rely on the 'draw' module + * to handle stippling and wide lines. + */ +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]) +{ + int x0 = (int) v0[0][0]; + int x1 = (int) v1[0][0]; + int y0 = (int) v0[0][1]; + int y1 = (int) v1[0][1]; + int dx = x1 - x0; + int dy = y1 - y0; + int xstep, ystep; + + if (dx == 0 && dy == 0) + return; + + setup_line_coefficients(setup, v0, v1); + + if (dx < 0) { + dx = -dx; /* make positive */ + xstep = -1; + } + else { + xstep = 1; + } + + if (dy < 0) { + dy = -dy; /* make positive */ + ystep = -1; + } + else { + ystep = 1; + } + + assert(dx >= 0); + assert(dy >= 0); + + setup->quad.x0 = setup->quad.y0 = -1; + setup->quad.mask = 0x0; + setup->quad.prim = PRIM_LINE; + /* XXX temporary: set coverage to 1.0 so the line appears + * if AA mode happens to be enabled. + */ + setup->quad.coverage[0] = + setup->quad.coverage[1] = + setup->quad.coverage[2] = + setup->quad.coverage[3] = 1.0; + + if (dx > dy) { + /*** X-major line ***/ + int i; + const int errorInc = dy + dy; + int error = errorInc - dx; + const int errorDec = error - dx; + + for (i = 0; i < dx; i++) { + plot(setup, x0, y0); + + x0 += xstep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + y0 += ystep; + } + } + } + else { + /*** Y-major line ***/ + int i; + const int errorInc = dx + dx; + int error = errorInc - dy; + const int errorDec = error - dy; + + for (i = 0; i < dy; i++) { + plot(setup, x0, y0); + + y0 += ystep; + if (error < 0) { + error += errorInc; + } + else { + error += errorDec; + x0 += xstep; + } + } + } + + /* draw final quad */ + if (setup->quad.mask) { + clip_emit_quad(setup); + } +} + + +static void +point_persp_coeff(struct setup_context *setup, + const float (*vert)[4], + struct tgsi_interp_coef *coef, + uint vertSlot, uint i) +{ + assert(i <= 3); + coef->dadx[i] = 0.0F; + coef->dady[i] = 0.0F; + coef->a0[i] = vert[vertSlot][i] * vert[0][3]; +} + + +/** + * Do setup for point rasterization, then render the point. + * Round or square points... + * XXX could optimize a lot for 1-pixel points. + */ +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ) +{ + struct softpipe_context *softpipe = setup->softpipe; + const struct sp_fragment_shader *spfs = softpipe->fs; + const int sizeAttr = setup->softpipe->psize_slot; + const float size + = sizeAttr > 0 ? v0[sizeAttr][0] + : setup->softpipe->rasterizer->point_size; + const float halfSize = 0.5F * size; + const boolean round = (boolean) setup->softpipe->rasterizer->point_smooth; + const float x = v0[0][0]; /* Note: data[0] is always position */ + const float y = v0[0][1]; + const struct vertex_info *vinfo = softpipe_get_vertex_info(softpipe); + uint fragSlot; + + /* For points, all interpolants are constant-valued. + * However, for point sprites, we'll need to setup texcoords appropriately. + * XXX: which coefficients are the texcoords??? + * We may do point sprites as textured quads... + * + * KW: We don't know which coefficients are texcoords - ultimately + * the choice of what interpolation mode to use for each attribute + * should be determined by the fragment program, using + * per-attribute declaration statements that include interpolation + * mode as a parameter. So either the fragment program will have + * to be adjusted for pointsprite vs normal point behaviour, or + * otherwise a special interpolation mode will have to be defined + * which matches the required behaviour for point sprites. But - + * the latter is not a feature of normal hardware, and as such + * probably should be ruled out on that basis. + */ + setup->vprovoke = v0; + + /* setup Z, W */ + const_coeff(setup, &setup->posCoef, 0, 2); + const_coeff(setup, &setup->posCoef, 0, 3); + + for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { + const uint vertSlot = vinfo->src_index[fragSlot]; + uint j; + + switch (vinfo->interp_mode[fragSlot]) { + case INTERP_CONSTANT: + /* fall-through */ + case INTERP_LINEAR: + for (j = 0; j < NUM_CHANNELS; j++) + const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_PERSPECTIVE: + for (j = 0; j < NUM_CHANNELS; j++) + point_persp_coeff(setup, setup->vprovoke, + &setup->coef[fragSlot], vertSlot, j); + break; + case INTERP_POS: + setup_fragcoord_coeff(setup, fragSlot); + break; + default: + assert(0); + } + + if (spfs->info.input_semantic_name[fragSlot] == TGSI_SEMANTIC_FOG) { + /* FOG.y = front/back facing XXX fix this */ + setup->coef[fragSlot].a0[1] = 1.0f - setup->quad.facing; + setup->coef[fragSlot].dadx[1] = 0.0; + setup->coef[fragSlot].dady[1] = 0.0; + } + } + + setup->quad.prim = PRIM_POINT; + + if (halfSize <= 0.5 && !round) { + /* special case for 1-pixel points */ + const int ix = ((int) x) & 1; + const int iy = ((int) y) & 1; + setup->quad.x0 = (int) x - ix; + setup->quad.y0 = (int) y - iy; + setup->quad.mask = (1 << ix) << (2 * iy); + clip_emit_quad(setup); + } + else { + if (round) { + /* rounded points */ + const int ixmin = block((int) (x - halfSize)); + const int ixmax = block((int) (x + halfSize)); + const int iymin = block((int) (y - halfSize)); + const int iymax = block((int) (y + halfSize)); + const float rmin = halfSize - 0.7071F; /* 0.7071 = sqrt(2)/2 */ + const float rmax = halfSize + 0.7071F; + const float rmin2 = MAX2(0.0F, rmin * rmin); + const float rmax2 = rmax * rmax; + const float cscale = 1.0F / (rmax2 - rmin2); + int ix, iy; + + for (iy = iymin; iy <= iymax; iy += 2) { + for (ix = ixmin; ix <= ixmax; ix += 2) { + float dx, dy, dist2, cover; + + setup->quad.mask = 0x0; + + dx = (ix + 0.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 0.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_TOP_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_TOP_RIGHT; + } + + dx = (ix + 0.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_LEFT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_LEFT; + } + + dx = (ix + 1.5f) - x; + dy = (iy + 1.5f) - y; + dist2 = dx * dx + dy * dy; + if (dist2 <= rmax2) { + cover = 1.0F - (dist2 - rmin2) * cscale; + setup->quad.coverage[QUAD_BOTTOM_RIGHT] = MIN2(cover, 1.0f); + setup->quad.mask |= MASK_BOTTOM_RIGHT; + } + + if (setup->quad.mask) { + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } + else { + /* square points */ + const int xmin = (int) (x + 0.75 - halfSize); + const int ymin = (int) (y + 0.25 - halfSize); + const int xmax = xmin + (int) size; + const int ymax = ymin + (int) size; + /* XXX could apply scissor to xmin,ymin,xmax,ymax now */ + const int ixmin = block(xmin); + const int ixmax = block(xmax - 1); + const int iymin = block(ymin); + const int iymax = block(ymax - 1); + int ix, iy; + + /* + debug_printf("(%f, %f) -> X:%d..%d Y:%d..%d\n", x, y, xmin, xmax,ymin,ymax); + */ + for (iy = iymin; iy <= iymax; iy += 2) { + uint rowMask = 0xf; + if (iy < ymin) { + /* above the top edge */ + rowMask &= (MASK_BOTTOM_LEFT | MASK_BOTTOM_RIGHT); + } + if (iy + 1 >= ymax) { + /* below the bottom edge */ + rowMask &= (MASK_TOP_LEFT | MASK_TOP_RIGHT); + } + + for (ix = ixmin; ix <= ixmax; ix += 2) { + uint mask = rowMask; + + if (ix < xmin) { + /* fragment is past left edge of point, turn off left bits */ + mask &= (MASK_BOTTOM_RIGHT | MASK_TOP_RIGHT); + } + if (ix + 1 >= xmax) { + /* past the right edge */ + mask &= (MASK_BOTTOM_LEFT | MASK_TOP_LEFT); + } + + setup->quad.mask = mask; + setup->quad.x0 = ix; + setup->quad.y0 = iy; + clip_emit_quad(setup); + } + } + } + } +} + +void setup_prepare( struct setup_context *setup ) +{ + struct softpipe_context *sp = setup->softpipe; + unsigned i; + + if (sp->dirty) { + softpipe_update_derived(sp); + } + + /* Mark surfaces as defined now */ + for (i = 0; i < sp->framebuffer.num_cbufs; i++){ + if (sp->framebuffer.cbufs[i]) { + sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; + } + } + if (sp->framebuffer.zsbuf) { + sp->framebuffer.zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; + } + + { + const struct sp_fragment_shader *fs = setup->softpipe->fs; + setup->quad.nr_attrs = fs->info.num_inputs; + sp->quad.first->begin(sp->quad.first); + } +} + + + +void setup_destroy_context( struct setup_context *setup ) +{ + FREE( setup ); +} + + +/** + * Create a new primitive setup/render stage. + */ +struct setup_context *setup_create_context( struct softpipe_context *softpipe ) +{ + struct setup_context *setup = CALLOC_STRUCT(setup_context); + + setup->softpipe = softpipe; + + setup->quad.coef = setup->coef; + setup->quad.posCoef = &setup->posCoef; + + return setup; +} diff --git a/src/gallium/drivers/softpipe/sp_setup.h b/src/gallium/drivers/softpipe/sp_setup.h new file mode 100644 index 0000000000..3133fc2a3d --- /dev/null +++ b/src/gallium/drivers/softpipe/sp_setup.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ +#ifndef SP_SETUP_H +#define SP_SETUP_H + +struct setup_context; +struct softpipe_context; + +void setup_tri( struct setup_context *setup, + float det, + const float (*v0)[4], + const float (*v1)[4], + const float (*v2)[4] ); + +void +setup_line(struct setup_context *setup, + const float (*v0)[4], + const float (*v1)[4]); + +void +setup_point( struct setup_context *setup, + const float (*v0)[4] ); + + +struct setup_context *setup_create_context( struct softpipe_context *softpipe ); +void setup_prepare( struct setup_context *setup ); +void setup_destroy_context( struct setup_context *setup ); + +#endif diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index 14abb20eeb..f10a1fa471 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -71,25 +71,16 @@ softpipe_get_vertex_info(struct softpipe_context *softpipe) * simply emit the whole post-xform vertex as-is: */ struct vertex_info *vinfo_vbuf = &softpipe->vertex_info_vbuf; -#if 0 - vinfo_vbuf->num_attribs = 0; - /* special-case to allow memcpy of whole vertex */ - draw_emit_vertex_attr(vinfo_vbuf, EMIT_ALL, INTERP_NONE, 0); - /* size in dwords or floats */ - vinfo_vbuf->size = 4 * draw_num_vs_outputs(softpipe->draw) - + sizeof(struct vertex_header) / 4; -#else - /* for pass-through mode, we need a more explicit list of attribs */ const uint num = draw_num_vs_outputs(softpipe->draw); uint i; + /* No longer any need to try and emit draw vertex_header info. + */ vinfo_vbuf->num_attribs = 0; - draw_emit_vertex_attr(vinfo_vbuf, EMIT_HEADER, INTERP_NONE, 0); for (i = 0; i < num; i++) { draw_emit_vertex_attr(vinfo_vbuf, EMIT_4F, INTERP_PERSPECTIVE, i); } draw_compute_vertex_size(vinfo_vbuf); -#endif } /* -- cgit v1.2.3 From bc56e87ce180ddca63bcb9774366fc380214a2ef Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 10 Apr 2008 22:57:21 +0900 Subject: gallium: Attribute realloc leaks to the first malloc call. --- src/gallium/auxiliary/util/p_debug_mem.c | 71 +++++++++++++++++++++++++----- src/gallium/auxiliary/util/u_double_list.h | 8 ++++ 2 files changed, 68 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 97ec9c5b39..aba69c0294 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -73,6 +73,25 @@ static struct list_head list = { &list, &list }; static unsigned long last_no = 0; +static INLINE struct debug_memory_header * +header_from_data(void *data) +{ + if(data) + return (struct debug_memory_header *)((char *)data - sizeof(struct debug_memory_header)); + else + return NULL; +} + +static INLINE void * +data_from_header(struct debug_memory_header *hdr) +{ + if(hdr) + return (void *)((char *)hdr + sizeof(struct debug_memory_header)); + else + return NULL; +} + + void * debug_malloc(const char *file, unsigned line, const char *function, size_t size) @@ -92,7 +111,7 @@ debug_malloc(const char *file, unsigned line, const char *function, LIST_ADDTAIL(&hdr->head, &list); - return (void *)((char *)hdr + sizeof(*hdr)); + return data_from_header(hdr); } void @@ -104,7 +123,7 @@ debug_free(const char *file, unsigned line, const char *function, if(!ptr) return; - hdr = (struct debug_memory_header *)((char *)ptr - sizeof(*hdr)); + hdr = header_from_data(ptr); if(hdr->magic != DEBUG_MEMORY_MAGIC) { debug_printf("%s:%u:%s: freeing bad or corrupted memory %p\n", file, line, function, @@ -133,16 +152,46 @@ void * debug_realloc(const char *file, unsigned line, const char *function, void *old_ptr, size_t old_size, size_t new_size ) { - void *new_ptr = NULL; - - if (new_size != 0) { - new_ptr = debug_malloc( file, line, function, new_size ); - - if( new_ptr && old_ptr ) - memcpy( new_ptr, old_ptr, old_size ); + struct debug_memory_header *old_hdr, *new_hdr; + void *new_ptr; + + if(!old_ptr) + return debug_malloc( file, line, function, new_size ); + + if(!new_size) { + debug_free( file, line, function, old_ptr ); + return NULL; } + + old_hdr = header_from_data(old_ptr); + if(old_hdr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: reallocating bad or corrupted memory %p\n", + file, line, function, + old_ptr); + debug_assert(0); + return NULL; + } + + /* alloc new */ + new_hdr = real_malloc(sizeof(*new_hdr) + new_size); + if(!new_hdr) + return NULL; + new_hdr->no = old_hdr->no; + new_hdr->file = old_hdr->file; + new_hdr->line = old_hdr->line; + new_hdr->function = old_hdr->function; + new_hdr->size = new_size; + new_hdr->magic = DEBUG_MEMORY_MAGIC; + LIST_REPLACE(&old_hdr->head, &new_hdr->head); + + /* copy data */ + new_ptr = data_from_header(new_hdr); + memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size ); + + /* free old */ + old_hdr->magic = 0; + real_free(old_hdr); - debug_free( file, line, function, old_ptr ); return new_ptr; } @@ -162,7 +211,7 @@ debug_memory_end(unsigned long start_no) struct debug_memory_header *hdr; void *ptr; hdr = LIST_ENTRY(struct debug_memory_header, entry, head); - ptr = (void *)((char *)hdr + sizeof(*hdr)); + ptr = data_from_header(hdr); if(start_no <= hdr->no && hdr->no < last_no || last_no < start_no && (hdr->no < last_no || start_no <= hdr->no)) debug_printf("%s:%u:%s: %u bytes at %p not freed\n", diff --git a/src/gallium/auxiliary/util/u_double_list.h b/src/gallium/auxiliary/util/u_double_list.h index 8cb10c9820..d108d92e52 100644 --- a/src/gallium/auxiliary/util/u_double_list.h +++ b/src/gallium/auxiliary/util/u_double_list.h @@ -70,6 +70,14 @@ struct list_head (__list)->prev = (__item); \ } while(0) +#define LIST_REPLACE(__from, __to) \ + do { \ + (__to)->prev = (__from)->prev; \ + (__to)->next = (__from)->next; \ + (__from)->next->prev = (__to); \ + (__from)->prev->next = (__to); \ + } while (0) + #define LIST_DEL(__item) \ do { \ (__item)->prev->next = (__item)->next; \ -- cgit v1.2.3 From 2483062f10e93fbbc5e3f629627b9e8addcc3f84 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 7 Apr 2008 11:44:34 +0200 Subject: tgsi: Dump semantics before interpolator. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 7d292778ad..ad9770e474 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -746,17 +746,19 @@ dump_declaration_short( } } - if( decl->Declaration.Interpolate ) { + if (decl->Declaration.Semantic) { TXT( ", " ); - ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); + if (decl->Semantic.SemanticIndex != 0) { + CHR( '[' ); + UID( decl->Semantic.SemanticIndex ); + CHR( ']' ); + } } - if( decl->Declaration.Semantic ) { + if (decl->Declaration.Interpolate) { TXT( ", " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); - CHR( '[' ); - UID( decl->Semantic.SemanticIndex ); - CHR( ']' ); + ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); } } -- cgit v1.2.3 From f41cc50ef063e9a56dc2d0fc6564a0c0f4344b83 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 7 Apr 2008 11:46:57 +0200 Subject: tgsi: Dump processor type and version as a single token. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index ad9770e474..1d6e8b155d 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -1402,7 +1402,6 @@ dump_gen( CHR( '\n' ); ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT ); - CHR( ' ' ); UID( parse.FullVersion.Version.MajorVersion ); CHR( '.' ); UID( parse.FullVersion.Version.MinorVersion ); -- cgit v1.2.3 From 4d184cc33131b440f9aafbcdd2d657050411db49 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 11 Apr 2008 13:20:52 -0600 Subject: gallium: fix broken x86_call() --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4d33950e99..aea8b28e58 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -317,7 +317,7 @@ void x86_call( struct x86_function *p, void (*label)()) void x86_call( struct x86_function *p, struct x86_reg reg) { emit_1ub(p, 0xff); - emit_modrm(p, reg, reg); + emit_modrm_noreg(p, 2, reg); } #endif -- cgit v1.2.3 From 9e8a85ef670dfb7d356d6066bad4710683a07fd9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 13:21:39 -0600 Subject: gallium: fix SCS codegen (sin scalar src comes from X, not Y) --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 4e80597b3f..35c63feccc 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2020,7 +2020,7 @@ emit_instruction( STORE( func, *inst, 0, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 0, 0, CHAN_X ); emit_sin( func, 0 ); STORE( func, *inst, 0, 0, CHAN_Y ); } -- cgit v1.2.3 From 097301395d33d57d19bc942f236b4a8c912cc0cc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 13:44:47 -0600 Subject: gallium: comments --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 31 +++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 35c63feccc..93813f36cb 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -548,6 +548,12 @@ emit_xorps( * Data fetch helpers. */ +/** + * Copy a shader constant to xmm register + * \param xmm the destination xmm register + * \param vec the src const buffer index + * \param chan src channel to fetch (X, Y, Z or W) + */ static void emit_const( struct x86_function *func, @@ -566,6 +572,12 @@ emit_const( SHUF( 0, 0, 0, 0 ) ); } +/** + * Copy a shader input to xmm register + * \param xmm the destination xmm register + * \param vec the src input attrib + * \param chan src channel to fetch (X, Y, Z or W) + */ static void emit_inputf( struct x86_function *func, @@ -579,6 +591,12 @@ emit_inputf( get_input( vec, chan ) ); } +/** + * Store an xmm register to a shader output + * \param xmm the source xmm register + * \param vec the dest output attrib + * \param chan src dest channel to store (X, Y, Z or W) + */ static void emit_output( struct x86_function *func, @@ -592,6 +610,12 @@ emit_output( make_xmm( xmm ) ); } +/** + * Copy a shader temporary to xmm register + * \param xmm the destination xmm register + * \param vec the src temp register + * \param chan src channel to fetch (X, Y, Z or W) + */ static void emit_tempf( struct x86_function *func, @@ -605,6 +629,13 @@ emit_tempf( get_temp( vec, chan ) ); } +/** + * Load an xmm register with an input attrib coefficient (a0, dadx or dady) + * \param xmm the destination xmm register + * \param vec the src input/attribute coefficient index + * \param chan src channel to fetch (X, Y, Z or W) + * \param member 0=a0, 1=dadx, 2=dady + */ static void emit_coef( struct x86_function *func, -- cgit v1.2.3 From e3cf0cd6a9f3f072594e5712763b98ce7e579bcf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 14:18:07 -0600 Subject: gallium: implement immediates (aka literals) for SSE fragment shaders --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 82 +++++++++++++++++++++++++++-- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 4 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 10 ++-- 3 files changed, 88 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 93813f36cb..748c53dbfc 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -225,6 +225,15 @@ get_coef_base( void ) return get_output_base(); } +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + /** * Data access helpers. */ @@ -238,6 +247,16 @@ get_argument( (index + 1) * 4 ); } +static struct x86_reg +get_immediate( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_immediate_base(), + (vec * 4 + chan) * 4 ); +} + static struct x86_reg get_const( unsigned vec, @@ -572,6 +591,25 @@ emit_const( SHUF( 0, 0, 0, 0 ) ); } +static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + emit_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + /** * Copy a shader input to xmm register * \param xmm the destination xmm register @@ -1191,6 +1229,14 @@ emit_fetch( swizzle ); break; + case TGSI_FILE_IMMEDIATE: + emit_immediate( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + case TGSI_FILE_INPUT: emit_inputf( func, @@ -2343,15 +2389,21 @@ tgsi_emit_sse2( * DECLARATION and INSTRUCTION phase. * GP register holding the output argument is aliased with the coeff argument, * as outputs are not needed in the DECLARATION phase. + * + * \param tokens the TGSI input shader + * \param func the output SSE code/function + * \param immediates buffer to place immediates, later passed to SSE func */ unsigned tgsi_emit_sse2_fs( struct tgsi_token *tokens, - struct x86_function *func ) + struct x86_function *func, + float (*immediates)[4]) { struct tgsi_parse_context parse; boolean instruction_phase = FALSE; unsigned ok = 1; + uint num_immediates = 0; DUMP_START(); @@ -2362,6 +2414,7 @@ tgsi_emit_sse2_fs( func, get_input_base(), get_argument( 0 ) ); + /* skipping outputs argument here */ emit_mov( func, get_const_base(), @@ -2374,6 +2427,10 @@ tgsi_emit_sse2_fs( func, get_coef_base(), get_argument( 4 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 5 ) ); tgsi_parse_init( &parse, tokens ); @@ -2407,9 +2464,26 @@ tgsi_emit_sse2_fs( break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX implement this */ - ok = 0; - debug_printf("failed to emit immediate value to SSE\n"); + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } +#if 0 + debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", + num_immediates, + immediates[num_immediates][0], + immediates[num_immediates][1], + immediates[num_immediates][2], + immediates[num_immediates][3]); +#endif + num_immediates++; + } break; default: diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 63b8ef3911..fde06047fe 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -16,7 +16,9 @@ tgsi_emit_sse2( unsigned tgsi_emit_sse2_fs( struct tgsi_token *tokens, - struct x86_function *function ); + struct x86_function *function, + float (*immediates)[4] + ); #if defined __cplusplus } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 8095d662ee..9c7948264f 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -51,7 +51,8 @@ typedef void (XSTDCALL *codegen_function)( struct tgsi_exec_vector *output, float (*constant)[4], struct tgsi_exec_vector *temporary, - const struct tgsi_interp_coef *coef + const struct tgsi_interp_coef *coef, + float (*immediates)[4] //, const struct tgsi_exec_vector *quadPos ); @@ -60,6 +61,7 @@ struct sp_sse_fragment_shader { struct sp_fragment_shader base; struct x86_function sse2_program; codegen_function func; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -96,7 +98,8 @@ fs_sse_run( struct sp_fragment_shader *base, machine->Outputs, machine->Consts, machine->Temps, - machine->InterpCoefs + machine->InterpCoefs, + shader->immediates // , &machine->QuadPos ); @@ -129,7 +132,8 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); - if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program )) { + if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program, + shader->immediates)) { FREE(shader); return NULL; } -- cgit v1.2.3 From 7c2416f06e518bc1491fe13e145dcc9487d75449 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 15:02:21 -0600 Subject: gallium: handle TGSI immediates in SSE code for vertex shaders --- src/gallium/auxiliary/draw/draw_vs_sse.c | 9 ++++++--- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 23 +++++++++++++++++++---- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 4 +++- 3 files changed, 28 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index f40d65df08..13394129bc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -50,13 +50,15 @@ typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, float (*constant)[4], - struct tgsi_exec_vector *temporary ); + struct tgsi_exec_vector *temporary, + float (*immediates)[4] ); struct draw_sse_vertex_shader { struct draw_vertex_shader base; struct x86_function sse2_program; codegen_function func; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -149,7 +151,8 @@ vs_sse_run( struct draw_vertex_shader *base, shader->func(machine->Inputs, machine->Outputs, machine->Consts, - machine->Temps ); + machine->Temps, + shader->immediates); } @@ -243,7 +246,7 @@ draw_create_vs_sse(struct draw_context *draw, x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program )) + &vs->sse2_program, vs->immediates )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 748c53dbfc..e55fae6047 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2316,10 +2316,12 @@ emit_declaration( unsigned tgsi_emit_sse2( struct tgsi_token *tokens, - struct x86_function *func ) + struct x86_function *func, + float (*immediates)[4] ) { struct tgsi_parse_context parse; unsigned ok = 1; + uint num_immediates = 0; DUMP_START(); @@ -2341,6 +2343,10 @@ tgsi_emit_sse2( func, get_temp_base(), get_argument( 3 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 4 ) ); tgsi_parse_init( &parse, tokens ); @@ -2363,9 +2369,18 @@ tgsi_emit_sse2( break; case TGSI_TOKEN_TYPE_IMMEDIATE: - /* XXX implement this */ - ok = 0; - debug_printf("failed to emit immediate value to SSE\n"); + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + num_immediates++; + } break; default: diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index fde06047fe..d1190727d0 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -11,7 +11,9 @@ struct x86_function; unsigned tgsi_emit_sse2( struct tgsi_token *tokens, - struct x86_function *function ); + struct x86_function *function, + float (*immediates)[4] + ); unsigned tgsi_emit_sse2_fs( -- cgit v1.2.3 From 593cf5a6b55eb9b490a2aee2c3850d2d493fc4df Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 11 Apr 2008 15:27:00 -0600 Subject: gallium: merge the tgsi_emit_sse2() and tgsi_emit_sse2_fs() functions. The two functions were mostly the same. We can look at the shader header info to determine if it's a vertex or fragment shader. --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 201 +++++++++++----------------- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 7 - src/gallium/drivers/softpipe/sp_fs_sse.c | 4 +- 3 files changed, 80 insertions(+), 132 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index e55fae6047..c37e201b2b 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2313,104 +2313,25 @@ emit_declaration( } } -unsigned -tgsi_emit_sse2( - struct tgsi_token *tokens, - struct x86_function *func, - float (*immediates)[4] ) -{ - struct tgsi_parse_context parse; - unsigned ok = 1; - uint num_immediates = 0; - - DUMP_START(); - - func->csr = func->store; - - emit_mov( - func, - get_input_base(), - get_argument( 0 ) ); - emit_mov( - func, - get_output_base(), - get_argument( 1 ) ); - emit_mov( - func, - get_const_base(), - get_argument( 2 ) ); - emit_mov( - func, - get_temp_base(), - get_argument( 3 ) ); - emit_mov( - func, - get_immediate_base(), - get_argument( 4 ) ); - - tgsi_parse_init( &parse, tokens ); - - while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - ok = emit_instruction( - func, - &parse.FullToken.FullInstruction ); - - if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE\n", - parse.FullToken.FullInstruction.Instruction.Opcode ); - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* simply copy the immediate values into the next immediates[] slot */ - { - const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; - uint i; - assert(size <= 4); - assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); - for( i = 0; i < size; i++ ) { - immediates[num_immediates][i] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - } - num_immediates++; - } - break; - - default: - assert( 0 ); - ok = 0; - break; - } - } - - tgsi_parse_free( &parse ); - - DUMP_END(); - - return ok; -} /** - * Fragment shaders are responsible for interpolating shader inputs. Because on - * x86 we have only 4 GP registers, and here we have 5 shader arguments (input, - * output, const, temp and coef), the code is split into two phases -- - * DECLARATION and INSTRUCTION phase. - * GP register holding the output argument is aliased with the coeff argument, - * as outputs are not needed in the DECLARATION phase. + * Translate a TGSI vertex/fragment shader to SSE2 code. + * Slightly different things are done for vertex vs. fragment shaders. + * + * Note that fragment shaders are responsible for interpolating shader + * inputs. Because on x86 we have only 4 GP registers, and here we + * have 5 shader arguments (input, output, const, temp and coef), the + * code is split into two phases -- DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff + * argument, as outputs are not needed in the DECLARATION phase. * * \param tokens the TGSI input shader * \param func the output SSE code/function * \param immediates buffer to place immediates, later passed to SSE func + * \param return 1 for success, 0 if translation failed */ unsigned -tgsi_emit_sse2_fs( +tgsi_emit_sse2( struct tgsi_token *tokens, struct x86_function *func, float (*immediates)[4]) @@ -2424,50 +2345,84 @@ tgsi_emit_sse2_fs( func->csr = func->store; - /* DECLARATION phase, do not load output argument. */ - emit_mov( - func, - get_input_base(), - get_argument( 0 ) ); - /* skipping outputs argument here */ - emit_mov( - func, - get_const_base(), - get_argument( 2 ) ); - emit_mov( - func, - get_temp_base(), - get_argument( 3 ) ); - emit_mov( - func, - get_coef_base(), - get_argument( 4 ) ); - emit_mov( - func, - get_immediate_base(), - get_argument( 5 ) ); - tgsi_parse_init( &parse, tokens ); + /* + * Different function args for vertex/fragment shaders: + */ + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + emit_mov( + func, + get_input_base(), + get_argument( 0 ) ); + /* skipping outputs argument here */ + emit_mov( + func, + get_const_base(), + get_argument( 2 ) ); + emit_mov( + func, + get_temp_base(), + get_argument( 3 ) ); + emit_mov( + func, + get_coef_base(), + get_argument( 4 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 5 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + emit_mov( + func, + get_input_base(), + get_argument( 0 ) ); + emit_mov( + func, + get_output_base(), + get_argument( 1 ) ); + emit_mov( + func, + get_const_base(), + get_argument( 2 ) ); + emit_mov( + func, + get_temp_base(), + get_argument( 3 ) ); + emit_mov( + func, + get_immediate_base(), + get_argument( 4 ) ); + } + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { tgsi_parse_token( &parse ); switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - emit_declaration( - func, - &parse.FullToken.FullDeclaration ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + } break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if( !instruction_phase ) { - /* INSTRUCTION phase, overwrite coeff with output. */ - instruction_phase = TRUE; - emit_mov( - func, - get_output_base(), - get_argument( 1 ) ); + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + emit_mov( + func, + get_output_base(), + get_argument( 1 ) ); + } } + ok = emit_instruction( func, &parse.FullToken.FullInstruction ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index d1190727d0..d56bf7f98a 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -15,13 +15,6 @@ tgsi_emit_sse2( float (*immediates)[4] ); -unsigned -tgsi_emit_sse2_fs( - struct tgsi_token *tokens, - struct x86_function *function, - float (*immediates)[4] - ); - #if defined __cplusplus } #endif diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 9c7948264f..5ef02a7142 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -132,8 +132,8 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); - if (!tgsi_emit_sse2_fs( templ->tokens, &shader->sse2_program, - shader->immediates)) { + if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, + shader->immediates)) { FREE(shader); return NULL; } -- cgit v1.2.3 From 2ebc99fcbc0c8fc6f6ce50e2ee674312e214ea2f Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 12 Apr 2008 11:03:56 -0600 Subject: gallium: move duplicated compute_clipmask() code to draw_vs.h --- src/gallium/auxiliary/draw/draw_vs.h | 33 +++++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_exec.c | 26 ------------------------ src/gallium/auxiliary/draw/draw_vs_llvm.c | 27 ------------------------- src/gallium/auxiliary/draw/draw_vs_sse.c | 28 -------------------------- 4 files changed, 33 insertions(+), 81 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 4ee7e705e9..33ce1e335e 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -31,6 +31,10 @@ #ifndef DRAW_VS_H #define DRAW_VS_H +#include "draw_context.h" +#include "draw_private.h" + + struct draw_vertex_shader; struct draw_context; struct pipe_shader_state; @@ -47,4 +51,33 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); + +/* Should be part of the generated shader: + */ +static INLINE unsigned +compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0x0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1< Date: Sat, 12 Apr 2008 22:59:17 +0200 Subject: tgsi: Fix source register short dump code. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 62 +++++++++++++++-------------- 1 file changed, 32 insertions(+), 30 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 1d6e8b155d..cb3573ceb6 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -968,18 +968,18 @@ dump_instruction_short( } CHR( ' ' ); - if( src->SrcRegisterExtMod.Complement ) { - TXT( "(1 - " ); - } - if( src->SrcRegisterExtMod.Negate ) { - CHR( '-' ); - } - if( src->SrcRegisterExtMod.Absolute ) { + if (src->SrcRegisterExtMod.Negate) + TXT( "-(" ); + if (src->SrcRegisterExtMod.Absolute) CHR( '|' ); - } - if( src->SrcRegister.Negate ) { + if (src->SrcRegisterExtMod.Scale2X) + TXT( "2*(" ); + if (src->SrcRegisterExtMod.Bias) + CHR( '(' ); + if (src->SrcRegisterExtMod.Complement) + TXT( "1-(" ); + if (src->SrcRegister.Negate) CHR( '-' ); - } ENM( src->SrcRegister.File, TGSI_FILES_SHORT ); @@ -987,35 +987,37 @@ dump_instruction_short( SID( src->SrcRegister.Index ); CHR( ']' ); - if (src->SrcRegister.Extended) { - if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { - CHR( '.' ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT ); - } - } - else if( src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W ) { + if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT ); ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT ); } + if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { + CHR( '.' ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT ); + } - if( src->SrcRegisterExtMod.Absolute ) { + if (src->SrcRegisterExtMod.Complement) + CHR( ')' ); + if (src->SrcRegisterExtMod.Bias) + TXT( ")-.5" ); + if (src->SrcRegisterExtMod.Scale2X) + CHR( ')' ); + if (src->SrcRegisterExtMod.Absolute) CHR( '|' ); - } - if( src->SrcRegisterExtMod.Complement ) { + if (src->SrcRegisterExtMod.Negate) CHR( ')' ); - } first_reg = FALSE; } -- cgit v1.2.3 From cd5931240688cb8bd12834e3ba23f858f26dbf8c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Apr 2008 12:23:26 +0900 Subject: gallium: OS independent time-manipulation functions. --- src/gallium/auxiliary/util/Makefile | 3 +- src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_time.c | 152 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_time.h | 99 ++++++++++++++++++++++ 4 files changed, 254 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/util/u_time.c create mode 100644 src/gallium/auxiliary/util/u_time.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 9b6c2708b6..fe82fa6378 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -14,7 +14,8 @@ C_SOURCES = \ u_hash_table.c \ u_mm.c \ u_simple_shaders.c \ - u_snprintf.c + u_snprintf.c \ + u_mm.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 9b3929eb2d..d55d2c7081 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -15,6 +15,7 @@ util = env.ConvenienceLibrary( 'u_mm.c', 'u_simple_shaders.c', 'u_snprintf.c', + 'u_time.c', ]) auxiliaries.insert(0, util) diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c new file mode 100644 index 0000000000..e6c0b19ff6 --- /dev/null +++ b/src/gallium/auxiliary/util/u_time.c @@ -0,0 +1,152 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca + */ + + +#ifndef WIN32 +#include +#else +#include +#include +#endif + +#include "util/u_time.h" + + +#ifdef WIN32 +static LONGLONG frequency = 0; +#endif + + +void +util_time_get(struct util_time *t) +{ +#ifndef WIN32 + gettimeofday(&t->tv, NULL); +#else + EngQueryPerformanceCounter(&t->counter); +#endif +} + + +void +util_time_add(const struct util_time *t1, + int64_t usecs, + struct util_time *t2) +{ +#ifndef WIN32 + t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; + t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; +#else + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + t2->counter = t1->counter + (usecs * frequency + 999999LL)/1000000LL; +#endif +} + + +int64_t +util_time_diff(const struct util_time *t1, + const struct util_time *t2) +{ +#ifndef WIN32 + return (t2->tv.tv_usec - t1->tv.tv_usec) + + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; +#else + return (t2->counter - t1->counter)*1000000LL/frequency; +#endif +} + + +/** + * Compare two time values. + * + * Not publicly available because it does not take in account wrap-arounds. + * Use util_time_timeout instead. + */ +static INLINE int +util_time_compare(const struct util_time *t1, + const struct util_time *t2) +{ +#ifndef WIN32 + if (t1->tv.tv_sec < t2->tv.tv_sec) + return -1; + else if(t1->tv.tv_sec > t2->tv.tv_sec) + return 1; + else if (t1->tv.tv_usec < t2->tv.tv_usec) + return -1; + else if(t1->tv.tv_usec > t2->tv.tv_usec) + return 1; + else + return 0; +#else + if (t1->counter < t2->counter) + return -1; + else if(t1->counter > t2->counter) + return 1; + else + return 0; +#endif +} + + +int +util_time_timeout(const struct util_time *start, + const struct util_time *end, + const struct util_time *curr) +{ + if(util_time_compare(start, end) <= 0) + return util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0; + else + return util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0; +} + + +#ifdef WIN32 +void util_time_usleep(unsigned usecs) +{ + LONGLONG start, curr, end; + + EngQueryPerformanceCounter(&start); + + if(!frequency) + EngQueryPerformanceFrequency(&frequency); + + end = start + (usecs * frequency + 999999LL)/1000000LL; + + do { + EngQueryPerformanceCounter(&curr); + } while(start <= curr && curr < end || + end < start && (curr < end || start <= curr)); +} +#endif diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h new file mode 100644 index 0000000000..32035cceb5 --- /dev/null +++ b/src/gallium/auxiliary/util/u_time.h @@ -0,0 +1,99 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * OS independent time-manipulation functions. + * + * @author Jose Fonseca + */ + +#ifndef U_TIME_H_ +#define U_TIME_H_ + + +#ifndef WIN32 +#include /* timeval */ +#include /* usleep */ +#endif + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Time abstraction. + * + * Do not access this structure directly. Use the provided function instead. + */ +struct util_time +{ +#ifndef WIN32 + struct timeval tv; +#else + long long counter; +#endif +}; + + +void +util_time_get(struct util_time *t); + +void +util_time_add(const struct util_time *t1, + int64_t usecs, + struct util_time *t2); + +int64_t +util_time_diff(const struct util_time *t1, + const struct util_time *t2); + +/** + * Returns zero when the timeout expires, non zero otherwise. + */ +int +util_time_timeout(const struct util_time *start, + const struct util_time *end, + const struct util_time *curr); + +#ifndef WIN32 +#define util_time_sleep usleep +#else +int +util_time_sleep(unsigned usecs); +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_TIME_H_ */ -- cgit v1.2.3 From 21c302b0ec39480a7eaab7827cce5b609d196606 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Apr 2008 14:10:46 +0900 Subject: gallium: Initial port of Thomas slab suballocator to pipebuffer. Not tested yet -- just compiles. This includes only the slab algorithm. Fencing is already implemented in pb_bufmgr_fence and time-based caching will be commited in a separate module shortly. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 9 + src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 429 ++++++++++++++++++++++ 4 files changed, 440 insertions(+) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index a9fa518c67..0770cad45a 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -9,6 +9,7 @@ C_SOURCES = \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ pb_bufmgr_pool.c \ + pb_bufmgr_slab.c \ pb_winsys.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 3d41fd84a6..14c3c77e37 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -8,6 +8,7 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', 'pb_bufmgr_pool.c', + 'pb_bufmgr_slab.c', 'pb_winsys.c', ]) diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 0cf8e92e37..a2377f70e2 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -114,6 +114,15 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, size_t size, size_t align2); +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + const struct pb_desc *desc, + size_t smallestSize, + size_t numSizes, + size_t desiredNumBuffers, + size_t maxSlabSize, + size_t pageAlignment); + /** * Fenced buffer manager. * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c new file mode 100644 index 0000000000..676e8e29b9 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -0,0 +1,429 @@ +/************************************************************************** + * + * Copyright 2006-2008 Tungsten Graphics, Inc., Cedar Park, TX., USA + * All Rights Reserved. + * + * Permission is hereby granted, FREE of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ + +/** + * @file + * S-lab pool implementation. + * + * @author Thomas Hellstrom + * @author Jose Fonseca + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" +#include "pipe/p_debug.h" +#include "pipe/p_thread.h" +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +#define DRI_SLABPOOL_ALLOC_RETRIES 100 + + +struct pb_slab; + +struct pb_slab_buffer +{ + struct pb_buffer base; + + struct pb_slab *slab; + struct list_head head; + unsigned mapCount; + size_t start; + _glthread_Cond event; +}; + +struct pb_slab +{ + struct list_head head; + struct list_head freeBuffers; + size_t numBuffers; + size_t numFree; + struct pb_slab_buffer *buffers; + struct pb_slab_size_header *header; + + struct pb_buffer *bo; + size_t pageAlignment; + void *virtual; +}; + +struct pb_slab_size_header +{ + struct list_head slabs; + struct list_head freeSlabs; + struct pb_slab_manager *pool; + size_t bufSize; + _glthread_Mutex mutex; +}; + +/** + * The data of this structure remains constant after + * initialization and thus needs no mutex protection. + */ +struct pb_slab_manager +{ + struct pb_manager base; + + struct pb_desc desc; + size_t *bucketSizes; + size_t numBuckets; + size_t pageSize; + struct pb_manager *provider; + unsigned pageAlignment; + unsigned maxSlabSize; + unsigned desiredNumBuffers; + struct pb_slab_size_header *headers; +}; + + +static INLINE struct pb_slab_buffer * +pb_slab_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_slab_buffer *)buf; +} + + +static INLINE struct pb_slab_manager * +pb_slab_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_manager *)mgr; +} + + +/** + * Delete a buffer from the slab header delayed list and put + * it on the slab FREE list. + */ +static void +pb_slab_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + struct pb_slab *slab = buf->slab; + struct pb_slab_size_header *header = slab->header; + struct list_head *list = &buf->head; + + _glthread_LOCK_MUTEX(header->mutex); + + assert(buf->base.base.refcount == 0); + + buf->mapCount = 0; + + LIST_DEL(list); + LIST_ADDTAIL(list, &slab->freeBuffers); + slab->numFree++; + + if (slab->head.next == &slab->head) + LIST_ADDTAIL(&slab->head, &header->slabs); + + if (slab->numFree == slab->numBuffers) { + list = &slab->head; + LIST_DEL(list); + LIST_ADDTAIL(list, &header->freeSlabs); + } + + if (header->slabs.next == &header->slabs || slab->numFree + != slab->numBuffers) { + + struct list_head *next; + + for (list = header->freeSlabs.next, next = list->next; list + != &header->freeSlabs; list = next, next = list->next) { + + slab = LIST_ENTRY(struct pb_slab, list, head); + + LIST_DELINIT(list); + pb_reference(&slab->bo, NULL); + FREE(slab->buffers); + FREE(slab); + } + } + + _glthread_UNLOCK_MUTEX(header->mutex); +} + + +static void * +pb_slab_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + ++buf->mapCount; + return (void *) ((uint8_t *) buf->slab->virtual + buf->start); +} + + +static void +pb_slab_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + + --buf->mapCount; + if (buf->mapCount == 0) + _glthread_COND_BROADCAST(buf->event); +} + + +static void +pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_get_base_buffer(buf->slab->bo, base_buf, offset); + *offset += buf->start; +} + + +static const struct pb_vtbl +pb_slab_buffer_vtbl = { + pb_slab_buffer_destroy, + pb_slab_buffer_map, + pb_slab_buffer_unmap, + pb_slab_buffer_get_base_buffer +}; + + +static enum pipe_error +pb_slab_create(struct pb_slab_size_header *header) +{ + struct pb_slab_manager *pool = header->pool; + size_t size = header->bufSize * pool->desiredNumBuffers; + struct pb_slab *slab; + struct pb_slab_buffer *buf; + size_t numBuffers; + int ret; + unsigned i; + + slab = CALLOC_STRUCT(pb_slab); + if (!slab) + return PIPE_ERROR_OUT_OF_MEMORY; + + /* + * FIXME: We should perhaps allow some variation in slabsize in order + * to efficiently reuse slabs. + */ + + size = (size <= pool->maxSlabSize) ? size : pool->maxSlabSize; + size = (size + pool->pageSize - 1) & ~(pool->pageSize - 1); + + slab->bo = pool->provider->create_buffer(pool->provider, size, &pool->desc); + if(!slab->bo) + goto out_err0; + + slab->virtual = pb_map(slab->bo, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!slab->virtual) + goto out_err1; + + pb_unmap(slab->bo); + + numBuffers = slab->bo->base.size / header->bufSize; + + slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers)); + if (!slab->buffers) { + ret = PIPE_ERROR_OUT_OF_MEMORY; + goto out_err1; + } + + LIST_INITHEAD(&slab->head); + LIST_INITHEAD(&slab->freeBuffers); + slab->numBuffers = numBuffers; + slab->numFree = 0; + slab->header = header; + + buf = slab->buffers; + for (i=0; i < numBuffers; ++i) { + buf->base.base.refcount = 0; + buf->base.base.size = header->bufSize; + buf->base.base.alignment = 0; + buf->base.base.usage = 0; + buf->base.vtbl = &pb_slab_buffer_vtbl; + buf->slab = slab; + buf->start = i* header->bufSize; + buf->mapCount = 0; + _glthread_INIT_COND(buf->event); + LIST_ADDTAIL(&buf->head, &slab->freeBuffers); + slab->numFree++; + buf++; + } + + LIST_ADDTAIL(&slab->head, &header->slabs); + + return PIPE_OK; + +out_err1: + pb_reference(&slab->bo, NULL); +out_err0: + FREE(slab); + return ret; +} + + +static struct pb_buffer * +pb_slab_manager_create_buffer(struct pb_manager *_pool, + size_t size, + const struct pb_desc *desc) +{ + struct pb_slab_manager *pool = pb_slab_manager(_pool); + struct pb_slab_size_header *header; + unsigned i; + static struct pb_slab_buffer *buf; + struct pb_slab *slab; + struct list_head *list; + int count = DRI_SLABPOOL_ALLOC_RETRIES; + + /* + * FIXME: Check for compatibility. + */ + + header = pool->headers; + for (i=0; inumBuckets; ++i) { + if (header->bufSize >= size) + break; + header++; + } + + if (i >= pool->numBuckets) + /* Fall back to allocate a buffer object directly from the provider. */ + return pool->provider->create_buffer(pool->provider, size, desc); + + + _glthread_LOCK_MUTEX(header->mutex); + while (header->slabs.next == &header->slabs && count > 0) { + if (header->slabs.next != &header->slabs) + break; + + _glthread_UNLOCK_MUTEX(header->mutex); + if (count != DRI_SLABPOOL_ALLOC_RETRIES) + util_time_sleep(1); + _glthread_LOCK_MUTEX(header->mutex); + (void) pb_slab_create(header); + count--; + } + + list = header->slabs.next; + if (list == &header->slabs) { + _glthread_UNLOCK_MUTEX(header->mutex); + return NULL; + } + slab = LIST_ENTRY(struct pb_slab, list, head); + if (--slab->numFree == 0) + LIST_DELINIT(list); + + list = slab->freeBuffers.next; + LIST_DELINIT(list); + + _glthread_UNLOCK_MUTEX(header->mutex); + buf = LIST_ENTRY(struct pb_slab_buffer, list, head); + ++buf->base.base.refcount; + return &buf->base; +} + + +static void +pb_slab_manager_destroy(struct pb_manager *_pool) +{ + struct pb_slab_manager *pool = pb_slab_manager(_pool); + + FREE(pool->headers); + FREE(pool->bucketSizes); + FREE(pool); +} + + +struct pb_manager * +pb_slab_manager_create(struct pb_manager *provider, + const struct pb_desc *desc, + size_t smallestSize, + size_t numSizes, + size_t desiredNumBuffers, + size_t maxSlabSize, + size_t pageAlignment) +{ + struct pb_slab_manager *pool; + size_t i; + + pool = CALLOC_STRUCT(pb_slab_manager); + if (!pool) + goto out_err0; + + pool->bucketSizes = CALLOC(numSizes, sizeof(*pool->bucketSizes)); + if (!pool->bucketSizes) + goto out_err1; + + pool->headers = CALLOC(numSizes, sizeof(*pool->headers)); + if (!pool->headers) + goto out_err2; + + pool->desc = *desc; + pool->numBuckets = numSizes; +#ifdef WIN32 + pool->pageSize = 4096; +#else + pool->pageSize = getpagesize(); +#endif + pool->provider = provider; + pool->pageAlignment = pageAlignment; + pool->maxSlabSize = maxSlabSize; + pool->desiredNumBuffers = desiredNumBuffers; + + for (i=0; inumBuckets; ++i) { + struct pb_slab_size_header *header = &pool->headers[i]; + + pool->bucketSizes[i] = (smallestSize << i); + + _glthread_INIT_MUTEX(header->mutex); + + LIST_INITHEAD(&header->slabs); + LIST_INITHEAD(&header->freeSlabs); + + header->pool = pool; + header->bufSize = (smallestSize << i); + } + + pool->base.destroy = pb_slab_manager_destroy; + pool->base.create_buffer = pb_slab_manager_create_buffer; + + return &pool->base; + +out_err2: + FREE(pool->bucketSizes); +out_err1: + FREE(pool); +out_err0: + return NULL; +} -- cgit v1.2.3 From fb2b5f7a4ac411a5bb5cde12ba15265b30c032e8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Apr 2008 15:10:01 +0900 Subject: gallium: Buffer cache. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 20 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 299 +++++++++++++++++++++ 4 files changed, 318 insertions(+), 3 deletions(-) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 0770cad45a..d654dbcc91 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -6,6 +6,7 @@ LIBNAME = pipebuffer C_SOURCES = \ pb_buffer_fenced.c \ pb_buffer_malloc.c \ + pb_bufmgr_cache.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ pb_bufmgr_pool.c \ diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 14c3c77e37..604a217982 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -5,6 +5,7 @@ pipebuffer = env.ConvenienceLibrary( source = [ 'pb_buffer_fenced.c', 'pb_buffer_malloc.c', + 'pb_bufmgr_cache.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', 'pb_bufmgr_pool.c', diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index a2377f70e2..b2d2520b67 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -80,7 +80,7 @@ struct pb_manager /** - * Static buffer pool manager. + * Static buffer pool sub-allocator. * * Manages the allocation of equally sized buffers. It does so by allocating * a single big buffer and divide it equally sized buffers. @@ -94,7 +94,7 @@ pool_bufmgr_create(struct pb_manager *provider, /** - * Wraper around the old memory manager. + * Static sub-allocator based the old memory manager. * * It managers buffers of different sizes. It does so by allocating a buffer * with the size of the heap, and then using the old mm memory manager to manage @@ -114,6 +114,9 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, size_t size, size_t align2); +/** + * Slab sub-allocator. + */ struct pb_manager * pb_slab_manager_create(struct pb_manager *provider, const struct pb_desc *desc, @@ -122,7 +125,18 @@ pb_slab_manager_create(struct pb_manager *provider, size_t desiredNumBuffers, size_t maxSlabSize, size_t pageAlignment); - + + +/** + * Time-based buffer cache. + * + * This manager keeps a cache of destroyed buffers during a time interval. + */ +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs); + + /** * Fenced buffer manager. * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c new file mode 100644 index 0000000000..06de0bb6c3 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -0,0 +1,299 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Buffer cache. + * + * \author José Fonseca + * \author Thomas Hellström + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pb_cache_manager; + + +/** + * Wrapper around a pipe buffer which adds delayed destruction. + */ +struct pb_cache_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + struct pb_cache_manager *mgr; + + /** Caching time interval */ + struct util_time start, end; + + struct list_head head; +}; + + +struct pb_cache_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + unsigned usecs; + + _glthread_Mutex mutex; + + struct list_head delayed; + size_t numDelayed; +}; + + +static INLINE struct pb_cache_buffer * +pb_cache_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_cache_buffer *)buf; +} + + +static INLINE struct pb_cache_manager * +pb_cache_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_cache_manager *)mgr; +} + + +/** + * Actually destroy the buffer. + */ +static INLINE void +_pb_cache_buffer_destroy(struct pb_cache_buffer *buf) +{ + struct pb_cache_manager *mgr = buf->mgr; + + LIST_DEL(&buf->head); + assert(mgr->numDelayed); + --mgr->numDelayed; + assert(!buf->base.base.refcount); + pb_reference(&buf->buffer, NULL); + FREE(buf); +} + + +/** + * Free as many cache buffers from the list head as possible. + */ +static void +_pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr) +{ + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + struct util_time now; + + util_time_get(&now); + + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + + if(util_time_timeout(&buf->start, &buf->end, &now) != 0) + break; + + _pb_cache_buffer_destroy(buf); + + curr = next; + next = curr->next; + } +} + + +static void +pb_cache_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + struct pb_cache_manager *mgr = buf->mgr; + + _glthread_LOCK_MUTEX(mgr->mutex); + assert(buf->base.base.refcount == 0); + + _pb_cache_buffer_list_check_free(mgr); + + util_time_get(&buf->start); + util_time_add(&buf->start, mgr->usecs, &buf->end); + LIST_ADDTAIL(&buf->head, &mgr->delayed); + ++mgr->numDelayed; + _glthread_UNLOCK_MUTEX(mgr->mutex); +} + + +static void * +pb_cache_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_map(buf->buffer, flags); +} + + +static void +pb_cache_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_unmap(buf->buffer); +} + + +static void +pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_cache_buffer_vtbl = { + pb_cache_buffer_destroy, + pb_cache_buffer_map, + pb_cache_buffer_unmap, + pb_cache_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_cache_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct pb_cache_buffer *buf; + struct list_head *curr, *next; + struct util_time now; + + util_time_get(&now); + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + + if(buf->base.base.size == size && + buf->base.base.alignment >= desc->alignment && + (buf->base.base.alignment % desc->alignment) == 0 && + /* buf->base.base.usage == usage */ 1) { + ++buf->base.base.refcount; + return &buf->base; + } + + if(util_time_timeout(&buf->start, &buf->end, &now) != 0) + _pb_cache_buffer_destroy(buf); + + curr = next; + next = curr->next; + } + + buf = CALLOC_STRUCT(pb_cache_buffer); + if(!buf) + return NULL; + + buf->buffer = mgr->provider->create_buffer(mgr->provider, size, desc); + if(!buf->buffer) { + FREE(buf); + return NULL; + } + + buf->base.base.refcount = 1; + buf->base.base.alignment = buf->buffer->base.alignment; + buf->base.base.usage = buf->buffer->base.usage; + buf->base.base.size = buf->buffer->base.size; + + buf->base.vtbl = &pb_cache_buffer_vtbl; + buf->mgr = mgr; + + return &buf->base; +} + + +static void +pb_cache_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_cache_manager *mgr = pb_cache_manager(_mgr); + struct list_head *curr, *next; + struct pb_cache_buffer *buf; + + _glthread_LOCK_MUTEX(mgr->mutex); + curr = mgr->delayed.next; + next = curr->next; + while(curr != &mgr->delayed) { + buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + _pb_cache_buffer_destroy(buf); + curr = next; + next = curr->next; + } + _glthread_UNLOCK_MUTEX(mgr->mutex); + + FREE(mgr); +} + + +struct pb_manager * +pb_cache_manager_create(struct pb_manager *provider, + unsigned usecs) +{ + struct pb_cache_manager *mgr; + + mgr = (struct pb_cache_manager *)CALLOC(1, sizeof(*mgr)); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_cache_manager_destroy; + mgr->base.create_buffer = pb_cache_manager_create_buffer; + mgr->provider = provider; + mgr->usecs = usecs; + LIST_INITHEAD(&mgr->delayed); + mgr->numDelayed = 0; + _glthread_INIT_MUTEX(mgr->mutex); + + return &mgr->base; +} -- cgit v1.2.3 From 84994693f53668d5ca72d57765a0729fe343593b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Apr 2008 18:52:54 +0900 Subject: gallium: Add u_time.c --- src/gallium/auxiliary/util/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index fe82fa6378..05bc43131a 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -15,6 +15,7 @@ C_SOURCES = \ u_mm.c \ u_simple_shaders.c \ u_snprintf.c \ + u_time.c \ u_mm.c include ../../Makefile.template -- cgit v1.2.3 From 4f550ab821f9aef9f19d9f1e10785f8c1f511ad4 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 7 Apr 2008 20:38:39 -0400 Subject: introduce a define to maxout the processed vertices --- src/gallium/auxiliary/draw/draw_vertex_shader.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 133418baca..d5f37bca21 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -37,7 +37,7 @@ #include "draw_context.h" #include "draw_vs.h" - +#define MAX_SHADER_VERTICES 4 /** * Run the vertex shader on all vertices in the vertex queue. @@ -58,23 +58,23 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) // fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr ); /* run vertex shader on vertex cache entries, four per invokation */ - for (i = 0; i < draw->vs.queue_nr; i += 4) { - struct vertex_header *dests[4]; - unsigned elts[4]; - int j, n = MIN2(4, draw->vs.queue_nr - i); + for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) { + struct vertex_header *dests[MAX_SHADER_VERTICES]; + unsigned elts[MAX_SHADER_VERTICES]; + int j, n = MIN2(MAX_SHADER_VERTICES, - i); for (j = 0; j < n; j++) { elts[j] = draw->vs.queue[i + j].elt; dests[j] = draw->vs.queue[i + j].vertex; } - for ( ; j < 4; j++) { + for ( ; j < MAX_SHADER_VERTICES; j++) { elts[j] = elts[0]; dests[j] = draw->vs.queue[i + j].vertex; } assert(n > 0); - assert(n <= 4); + assert(n <= MAX_SHADER_VERTICES); shader->run(shader, draw, elts, n, dests); } -- cgit v1.2.3 From aadbb1d7fbbaada6e378cb60194e5861cadf98d1 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 12 Apr 2008 15:45:28 -0400 Subject: pass arbitrary number of vertices to the shader execution cycle --- src/gallium/auxiliary/draw/draw_private.h | 2 + src/gallium/auxiliary/draw/draw_vertex_shader.c | 4 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 144 ++++++++++++------------ src/gallium/auxiliary/draw/draw_vs_sse.c | 139 +++++++++++------------ 4 files changed, 146 insertions(+), 143 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4d056f6dba..f9aea9f355 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -56,6 +56,8 @@ struct gallivm_cpu_engine; struct draw_pt_middle_end; struct draw_pt_front_end; +#define MAX_SHADER_VERTICES 128 + /** * Basic vertex info. * Carry some useful information around with the vertices in the prim pipe. diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index d5f37bca21..726921d77b 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -37,8 +37,6 @@ #include "draw_context.h" #include "draw_vs.h" -#define MAX_SHADER_VERTICES 4 - /** * Run the vertex shader on all vertices in the vertex queue. * Called by the draw module when the vertx cache needs to be flushed. @@ -61,7 +59,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) { struct vertex_header *dests[MAX_SHADER_VERTICES]; unsigned elts[MAX_SHADER_VERTICES]; - int j, n = MIN2(MAX_SHADER_VERTICES, - i); + int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i); for (j = 0; j < n; j++) { elts[j] = draw->vs.queue[i + j].elt; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 9629410abb..df0051d693 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -40,6 +40,7 @@ #include "tgsi/util/tgsi_parse.h" +#define MAX_TGSI_VERTICES 4 static void vs_exec_prepare( struct draw_vertex_shader *shader, @@ -71,14 +72,13 @@ vs_exec_run( struct draw_vertex_shader *shader, struct vertex_header *vOut[] ) { struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; + unsigned int i, j; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; - assert(count <= 4); assert(draw->vertex_shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); @@ -92,80 +92,82 @@ vs_exec_run( struct draw_vertex_shader *shader, machine->Outputs = ALIGN16_ASSIGN(outputs); } - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices ); - if (!draw->rasterizer->bypass_vs) { - /* run interpreter */ - tgsi_exec_machine_run( machine ); - } - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; + if (!draw->rasterizer->bypass_vs) { + /* run interpreter */ + tgsi_exec_machine_run( machine ); } - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } + /* store machine results */ + for (j = 0; j < max_vertices; j++) { + unsigned slot; + float x, y, z, w; + + /* Handle attr[0] (position) specially: + * + * XXX: Computing the clipmask should be done in the vertex + * program as a set of DP4 instructions appended to the + * user-provided code. + */ + x = vOut[i + j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[i + j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[i + j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[i + j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + if (!draw->rasterizer->bypass_clipping) { + vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, + draw->nr_planes); + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + } + else { + vOut[i + j]->clipmask = 0; + } + vOut[i + j]->edgeflag = 1; + + if (!draw->identity_viewport) { + /* Viewport mapping */ + vOut[i + j]->data[0][0] = x * scale[0] + trans[0]; + vOut[i + j]->data[0][1] = y * scale[1] + trans[1]; + vOut[i + j]->data[0][2] = z * scale[2] + trans[2]; + vOut[i + j]->data[0][3] = w; + } + else { + vOut[i + j]->data[0][0] = x; + vOut[i + j]->data[0][1] = y; + vOut[i + j]->data[0][2] = z; + vOut[i + j]->data[0][3] = w; + } + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + vOut[i + j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[i + j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[i + j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[i + j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } #if 0 /*DEBUG*/ - printf("Post xform vert:\n"); - for (slot = 0; slot < draw->num_vs_outputs; slot++) { - printf("%d: %f %f %f %f\n", slot, - vOut[j]->data[slot][0], - vOut[j]->data[slot][1], - vOut[j]->data[slot][2], - vOut[j]->data[slot][3]); - } -#endif - - - } /* loop over vertices */ + printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < draw->num_vs_outputs; slot++) { + printf("\t%d: %f %f %f %f\n", slot, + vOut[i + j]->data[slot][0], + vOut[i + j]->data[slot][1], + vOut[i + j]->data[slot][2], + vOut[i + j]->data[slot][3]); + } +#endif + } /* loop over vertices */ + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0ee991d764..bfec89254e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -45,6 +45,7 @@ #include "tgsi/exec/tgsi_sse2.h" #include "tgsi/util/tgsi_parse.h" +#define SSE_MAX_VERTICES 4 typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, @@ -86,14 +87,13 @@ vs_sse_run( struct draw_vertex_shader *base, { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; - unsigned int j; + unsigned int i, j; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; - assert(count <= 4); assert(draw->vertex_shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); @@ -108,77 +108,78 @@ vs_sse_run( struct draw_vertex_shader *base, machine->Outputs = ALIGN16_ASSIGN(outputs); } - - /* Fetch vertices. This may at some point be integrated into the - * compiled shader -- that would require a reorganization where - * multiple versions of the compiled shader might exist, - * specialized for each fetch state. - */ - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); - - - if (!draw->rasterizer->bypass_vs) { - /* run compiled shader - */ - shader->func(machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps, - shader->immediates); - } - - - /* XXX: Computing the clipmask and emitting results should be done - * in the vertex program as a set of instructions appended to - * the user-provided code. - */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; + for (i = 0; i < count; i += SSE_MAX_VERTICES) { + unsigned int max_vertices = MIN2(SSE_MAX_VERTICES, count - i); + /* Fetch vertices. This may at some point be integrated into the + * compiled shader -- that would require a reorganization where + * multiple versions of the compiled shader might exist, + * specialized for each fetch state. + */ + draw->vertex_fetch.fetch_func(draw, machine, &elts[i], max_vertices); + + if (!draw->rasterizer->bypass_vs) { + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + machine->Consts, + machine->Temps, + shader->immediates); } - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. + /* XXX: Computing the clipmask and emitting results should be done + * in the vertex program as a set of instructions appended to + * the user-provided code. */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + for (j = 0; j < max_vertices; j++) { + unsigned slot; + float x, y, z, w; + + x = vOut[i + j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = vOut[i + j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = vOut[i + j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = vOut[i + j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + + if (!draw->rasterizer->bypass_clipping) { + vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, + draw->nr_planes); + + /* divide by w */ + w = 1.0f / w; + x *= w; + y *= w; + z *= w; + } + else { + vOut[i + j]->clipmask = 0; + } + vOut[j]->edgeflag = 1; + + if (!draw->identity_viewport) { + /* Viewport mapping */ + vOut[i + j]->data[0][0] = x * scale[0] + trans[0]; + vOut[i + j]->data[0][1] = y * scale[1] + trans[1]; + vOut[i + j]->data[0][2] = z * scale[2] + trans[2]; + vOut[i + j]->data[0][3] = w; + } + else { + vOut[i + j]->data[0][0] = x; + vOut[i + j]->data[0][1] = y; + vOut[i + j]->data[0][2] = z; + vOut[i + j]->data[0][3] = w; + } + + /* Remaining attributes are packed into sequential post-transform + * vertex attrib slots. + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + vOut[i + j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + vOut[i + j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + vOut[i + j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + vOut[i + j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } } - } + } } -- cgit v1.2.3 From 808f968f3ad0cb32e86f517753d5715d00e9ec2c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 11 Apr 2008 19:58:22 -0400 Subject: return true if one of the vertices has been clipped --- src/gallium/auxiliary/draw/draw_private.h | 12 ++++++------ src/gallium/auxiliary/draw/draw_vs_exec.c | 5 ++++- src/gallium/auxiliary/draw/draw_vs_llvm.c | 18 +++++++++++------- src/gallium/auxiliary/draw/draw_vs_sse.c | 5 ++++- 4 files changed, 25 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index f9aea9f355..5c710667fc 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -148,12 +148,12 @@ struct draw_vertex_shader { /* Run the shader - this interface will get cleaned up in the * future: */ - void (*run)( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - struct vertex_header *vOut[] ); - + boolean (*run)( struct draw_vertex_shader *shader, + struct draw_context *draw, + const unsigned *elts, + unsigned count, + struct vertex_header *vOut[] ); + void (*delete)( struct draw_vertex_shader * ); }; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index df0051d693..09e0d0eaab 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -64,7 +64,7 @@ vs_exec_prepare( struct draw_vertex_shader *shader, * \param count number of vertices to shade [1..4] * \param vOut array of pointers to four output vertices */ -static void +static boolean vs_exec_run( struct draw_vertex_shader *shader, struct draw_context *draw, const unsigned *elts, @@ -73,6 +73,7 @@ vs_exec_run( struct draw_vertex_shader *shader, { struct tgsi_exec_machine *machine = &draw->machine; unsigned int i, j; + unsigned int clipped = 0; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); @@ -120,6 +121,7 @@ vs_exec_run( struct draw_vertex_shader *shader, if (!draw->rasterizer->bypass_clipping) { vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, draw->nr_planes); + clipped += vOut[i + j]->clipmask; /* divide by w */ w = 1.0f / w; @@ -168,6 +170,7 @@ vs_exec_run( struct draw_vertex_shader *shader, #endif } /* loop over vertices */ } + return clipped != 0; } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 7e48428cdd..6db1e65a2d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -67,18 +67,19 @@ vs_llvm_prepare( struct draw_vertex_shader *base, * \param count number of vertices to shade [1..4] * \param vOut array of pointers to four output vertices */ -static void +static boolean vs_llvm_run( struct draw_vertex_shader *base, - struct draw_context *draw, - const unsigned *elts, + struct draw_context *draw, + const unsigned *elts, unsigned count, struct vertex_header *vOut[] ) { - struct draw_llvm_vertex_shader *shader = + struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; unsigned int j; + unsigned int clipped = 0; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); @@ -125,19 +126,21 @@ vs_llvm_run( struct draw_vertex_shader *base, w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, draw->nr_planes); + vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, + draw->nr_planes); + clipped += vOut[j]->clipmask; /* divide by w */ w = 1.0f / w; x *= w; y *= w; - z *= w; + z *= w; } else { vOut[j]->clipmask = 0; } vOut[j]->edgeflag = 1; - + if (!draw->identity_viewport) { /* Viewport mapping */ vOut[j]->data[0][0] = x * scale[0] + trans[0]; @@ -162,6 +165,7 @@ vs_llvm_run( struct draw_vertex_shader *base, vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } } /* loop over vertices */ + return clipped != 0; } static void diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index bfec89254e..6e8d2021f5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -78,7 +78,7 @@ vs_sse_prepare( struct draw_vertex_shader *base, * \param count number of vertices to shade [1..4] * \param vOut array of pointers to four output vertices */ -static void +static boolean vs_sse_run( struct draw_vertex_shader *base, struct draw_context *draw, const unsigned *elts, @@ -88,6 +88,7 @@ vs_sse_run( struct draw_vertex_shader *base, struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; unsigned int i, j; + unsigned int clipped = 0; ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); @@ -143,6 +144,7 @@ vs_sse_run( struct draw_vertex_shader *base, if (!draw->rasterizer->bypass_clipping) { vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, draw->nr_planes); + clipped += vOut[i + j]->clipmask; /* divide by w */ w = 1.0f / w; @@ -180,6 +182,7 @@ vs_sse_run( struct draw_vertex_shader *base, } } } + return clipped != 0; } -- cgit v1.2.3 From 3f7a3dd58c0ce2719af83ff1d89a26185d08c04c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 12 Apr 2008 21:52:46 -0400 Subject: Make shaders operate on a block of memory instead of arrays of vertex_header's --- src/gallium/auxiliary/draw/draw_context.c | 10 ++--- src/gallium/auxiliary/draw/draw_private.h | 15 ++++--- src/gallium/auxiliary/draw/draw_vertex_cache.c | 33 +++++++++------ src/gallium/auxiliary/draw/draw_vertex_shader.c | 7 ++-- src/gallium/auxiliary/draw/draw_vs_exec.c | 54 ++++++++++++------------ src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 56 +++++++++++++++---------- 7 files changed, 100 insertions(+), 77 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index b3c65c90d6..cdca556fbd 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -86,14 +86,12 @@ struct draw_context *draw_create( void ) /* Statically allocate maximum sized vertices for the cache - could be cleverer... */ { - uint i; const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; - char *tmp = align_malloc(Elements(draw->vs.queue) * size, 16); + char *tmp = align_malloc(VS_QUEUE_LENGTH * size, 16); if (!tmp) goto fail; - for (i = 0; i < Elements(draw->vs.queue); i++) - draw->vs.queue[i].vertex = (struct vertex_header *)(tmp + i * size); + draw->vs.vertex_cache = tmp; } draw->shader_queue_flush = draw_vertex_shader_queue_flush; @@ -156,8 +154,8 @@ void draw_destroy( struct draw_context *draw ) tgsi_exec_machine_free_data(&draw->machine); - if (draw->vs.queue[0].vertex) - align_free( draw->vs.queue[0].vertex ); /* Frees all the vertices. */ + if (draw->vs.vertex_cache) + align_free( draw->vs.vertex_cache ); /* Frees all the vertices. */ /* Not so fast -- we're just borrowing this at the moment. * diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 5c710667fc..1115166192 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -152,7 +152,7 @@ struct draw_vertex_shader { struct draw_context *draw, const unsigned *elts, unsigned count, - struct vertex_header *vOut[] ); + void *out ); void (*delete)( struct draw_vertex_shader * ); @@ -321,10 +321,8 @@ struct draw_context /* Vertex shader queue: */ struct { - struct { - unsigned elt; /**< index into the user's vertex arrays */ - struct vertex_header *vertex; - } queue[VS_QUEUE_LENGTH]; + unsigned elts[VS_QUEUE_LENGTH]; /**< index into the user's vertex arrays */ + char *vertex_cache; unsigned queue_nr; unsigned post_nr; } vs; @@ -450,4 +448,11 @@ dot4(const float *a, const float *b) return result; } +static INLINE struct vertex_header * +draw_header_from_block(char *block, int num) +{ + static const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; + return (struct vertex_header*)(block + num * size); +} + #endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index c0248979e2..9256dec4ea 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -71,25 +71,27 @@ static struct vertex_header *get_vertex( struct draw_context *draw, /* Cache hit? */ if (draw->vcache.idx[slot].in == i) { -// _mesa_printf("HIT %d %d\n", slot, i); + /*debug_printf("HIT %d %d\n", slot, i);*/ assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); - return draw->vs.queue[draw->vcache.idx[slot].out].vertex; + return draw_header_from_block(draw->vs.vertex_cache, + draw->vcache.idx[slot].out); } /* Otherwise a collision */ slot = VCACHE_SIZE + draw->vcache.overflow++; -// _mesa_printf("XXX %d --> %d\n", i, slot); + /*debug_printf("XXX %d --> %d\n", i, slot);*/ } /* Deal with the cache miss: */ { unsigned out; - + struct vertex_header *header; + assert(slot < Elements(draw->vcache.idx)); -// _mesa_printf("NEW %d %d\n", slot, i); + /*debug_printf("NEW %d %d\n", slot, i);*/ draw->vcache.idx[slot].in = i; draw->vcache.idx[slot].out = out = draw->vs.queue_nr++; draw->vcache.referenced |= (1 << slot); @@ -99,17 +101,19 @@ static struct vertex_header *get_vertex( struct draw_context *draw, */ assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - draw->vs.queue[out].elt = i; - draw->vs.queue[out].vertex->clipmask = 0; - draw->vs.queue[out].vertex->edgeflag = draw_get_edgeflag(draw, i); - draw->vs.queue[out].vertex->pad = 0; - draw->vs.queue[out].vertex->vertex_id = UNDEFINED_VERTEX_ID; + header = draw_header_from_block(draw->vs.vertex_cache, out); + draw->vs.elts[out] = i; + header->clipmask = 0; + header->edgeflag = draw_get_edgeflag(draw, i); + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; /* Need to set the vertex's edge flag here. If we're being called * by do_ef_triangle(), that function needs edge flag info! */ - return draw->vs.queue[draw->vcache.idx[slot].out].vertex; + return draw_header_from_block(draw->vs.vertex_cache, + draw->vcache.idx[slot].out); } } @@ -142,8 +146,11 @@ void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) { unsigned i; - for (i = 0; i < draw->vs.post_nr; i++) - draw->vs.queue[i].vertex->vertex_id = UNDEFINED_VERTEX_ID; + for (i = 0; i < draw->vs.post_nr; i++) { + struct vertex_header * header = + draw_header_from_block(draw->vs.vertex_cache, i); + header->vertex_id = UNDEFINED_VERTEX_ID; + } } diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 726921d77b..452d0175c3 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -57,18 +57,17 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) /* run vertex shader on vertex cache entries, four per invokation */ for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) { - struct vertex_header *dests[MAX_SHADER_VERTICES]; unsigned elts[MAX_SHADER_VERTICES]; int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i); + struct vertex_header *dests = + draw_header_from_block(draw->vs.vertex_cache, i); for (j = 0; j < n; j++) { - elts[j] = draw->vs.queue[i + j].elt; - dests[j] = draw->vs.queue[i + j].vertex; + elts[j] = draw->vs.elts[i + j]; } for ( ; j < MAX_SHADER_VERTICES; j++) { elts[j] = elts[0]; - dests[j] = draw->vs.queue[i + j].vertex; } assert(n > 0); diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 09e0d0eaab..6fe4e554d5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -69,7 +69,7 @@ vs_exec_run( struct draw_vertex_shader *shader, struct draw_context *draw, const unsigned *elts, unsigned count, - struct vertex_header *vOut[] ) + void *vOut ) { struct tgsi_exec_machine *machine = &draw->machine; unsigned int i, j; @@ -106,6 +106,8 @@ vs_exec_run( struct draw_vertex_shader *shader, for (j = 0; j < max_vertices; j++) { unsigned slot; float x, y, z, w; + struct vertex_header *out = + draw_header_from_block(vOut, i + j); /* Handle attr[0] (position) specially: * @@ -113,15 +115,15 @@ vs_exec_run( struct draw_vertex_shader *shader, * program as a set of DP4 instructions appended to the * user-provided code. */ - x = vOut[i + j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[i + j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[i + j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[i + j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { - vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, - draw->nr_planes); - clipped += vOut[i + j]->clipmask; + out->clipmask = compute_clipmask(out->clip, draw->plane, + draw->nr_planes); + clipped += out->clipmask; /* divide by w */ w = 1.0f / w; @@ -130,42 +132,42 @@ vs_exec_run( struct draw_vertex_shader *shader, z *= w; } else { - vOut[i + j]->clipmask = 0; + out->clipmask = 0; } - vOut[i + j]->edgeflag = 1; + out->edgeflag = 1; if (!draw->identity_viewport) { /* Viewport mapping */ - vOut[i + j]->data[0][0] = x * scale[0] + trans[0]; - vOut[i + j]->data[0][1] = y * scale[1] + trans[1]; - vOut[i + j]->data[0][2] = z * scale[2] + trans[2]; - vOut[i + j]->data[0][3] = w; + out->data[0][0] = x * scale[0] + trans[0]; + out->data[0][1] = y * scale[1] + trans[1]; + out->data[0][2] = z * scale[2] + trans[2]; + out->data[0][3] = w; } else { - vOut[i + j]->data[0][0] = x; - vOut[i + j]->data[0][1] = y; - vOut[i + j]->data[0][2] = z; - vOut[i + j]->data[0][3] = w; + out->data[0][0] = x; + out->data[0][1] = y; + out->data[0][2] = z; + out->data[0][3] = w; } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. */ for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[i + j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[i + j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[i + j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[i + j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } #if 0 /*DEBUG*/ printf("%d) Post xform vert:\n", i + j); for (slot = 0; slot < draw->num_vs_outputs; slot++) { printf("\t%d: %f %f %f %f\n", slot, - vOut[i + j]->data[slot][0], - vOut[i + j]->data[slot][1], - vOut[i + j]->data[slot][2], - vOut[i + j]->data[slot][3]); + out->data[slot][0], + out->data[slot][1], + out->data[slot][2], + out->data[slot][3]); } #endif } /* loop over vertices */ diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 6db1e65a2d..72317c67ae 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -72,7 +72,7 @@ vs_llvm_run( struct draw_vertex_shader *base, struct draw_context *draw, const unsigned *elts, unsigned count, - struct vertex_header *vOut[] ) + void *vOut ) { struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 6e8d2021f5..c877f5ee3a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -83,7 +83,7 @@ vs_sse_run( struct draw_vertex_shader *base, struct draw_context *draw, const unsigned *elts, unsigned count, - struct vertex_header *vOut[] ) + void *vOut ) { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; @@ -135,16 +135,18 @@ vs_sse_run( struct draw_vertex_shader *base, for (j = 0; j < max_vertices; j++) { unsigned slot; float x, y, z, w; + struct vertex_header *out = + draw_header_from_block(vOut, i + j); - x = vOut[i + j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[i + j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[i + j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[i + j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; + y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; + z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j]; + w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { - vOut[i + j]->clipmask = compute_clipmask(vOut[i + j]->clip, draw->plane, - draw->nr_planes); - clipped += vOut[i + j]->clipmask; + out->clipmask = compute_clipmask(out->clip, draw->plane, + draw->nr_planes); + clipped += out->clipmask; /* divide by w */ w = 1.0f / w; @@ -153,33 +155,43 @@ vs_sse_run( struct draw_vertex_shader *base, z *= w; } else { - vOut[i + j]->clipmask = 0; + out->clipmask = 0; } - vOut[j]->edgeflag = 1; + out->edgeflag = 1; if (!draw->identity_viewport) { /* Viewport mapping */ - vOut[i + j]->data[0][0] = x * scale[0] + trans[0]; - vOut[i + j]->data[0][1] = y * scale[1] + trans[1]; - vOut[i + j]->data[0][2] = z * scale[2] + trans[2]; - vOut[i + j]->data[0][3] = w; + out->data[0][0] = x * scale[0] + trans[0]; + out->data[0][1] = y * scale[1] + trans[1]; + out->data[0][2] = z * scale[2] + trans[2]; + out->data[0][3] = w; } else { - vOut[i + j]->data[0][0] = x; - vOut[i + j]->data[0][1] = y; - vOut[i + j]->data[0][2] = z; - vOut[i + j]->data[0][3] = w; + out->data[0][0] = x; + out->data[0][1] = y; + out->data[0][2] = z; + out->data[0][3] = w; } /* Remaining attributes are packed into sequential post-transform * vertex attrib slots. */ for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[i + j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[i + j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[i + j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[i + j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } +#if 0 /*DEBUG*/ + printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < draw->num_vs_outputs; slot++) { + printf("\t%d: %f %f %f %f\n", slot, + out->data[slot][0], + out->data[slot][1], + out->data[slot][2], + out->data[slot][3]); + } +#endif } } return clipped != 0; -- cgit v1.2.3 From 0c1cb54923f3ab31caa2821e095685277174dd2f Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 13 Apr 2008 01:47:07 -0400 Subject: Implement fetch/shade/pipeline or emit vertex passthrough. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_pt.c | 54 +++-- src/gallium/auxiliary/draw/draw_pt.h | 2 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 253 +++++++++++++++++++++ 4 files changed, 287 insertions(+), 23 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 28262a92c6..5ab3cfe5ce 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -19,6 +19,7 @@ C_SOURCES = \ draw_pt_vcache.c \ draw_pt_fetch_emit.c \ draw_pt_fetch_pipeline.c \ + draw_pt_fetch_shade_pipeline.c \ draw_pt_pipeline.c \ draw_pt_elts.c \ draw_prim.c \ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f59fb86f78..c8663c0e84 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -38,19 +38,23 @@ /* XXX: Shouldn't those two functions below use the '>' operator??? */ - -static boolean too_many_verts( struct draw_context *draw, - unsigned verts ) +static boolean too_many_elts( struct draw_context *draw, + unsigned elts ) { - return verts < 1024; + return elts > (8 * 1024); } -static boolean too_many_elts( struct draw_context *draw, - unsigned elts ) +static INLINE unsigned reduced_prim(unsigned prim) { - return elts < (16 * 1024); + /*FIXME*/ + return prim; } +static INLINE boolean good_prim(unsigned prim) +{ + /*FIXME*/ + return FALSE; +} boolean draw_pt_arrays(struct draw_context *draw, @@ -64,6 +68,9 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; + if (!draw->render) + return FALSE; + /*debug_printf("XXXXXXXXXX needs_pipeline = %d\n", pipeline);*/ /* Overall we do: * - frontend -- prepare fetch_elts, draw_elts - eg vcache @@ -87,7 +94,6 @@ draw_pt_arrays(struct draw_context *draw, */ middle = draw->pt.middle.fetch_pipeline; } -#if 0 else if (!cliptest && !pipeline) { /* Fetch user verts, run vertex shader, emit hw verts: */ @@ -111,23 +117,15 @@ draw_pt_arrays(struct draw_context *draw, */ middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; } - else if (!cliptest) { - /* Fetch user verts, run vertex shader, run pipeline: - */ - middle = draw->pt.middle.fetch_shade_pipeline; - } else { /* This is what we're currently always doing: */ - /* Fetch user verts, run vertex shader, cliptest, run pipeline: + /* Fetch user verts, run vertex shader, cliptest, run pipeline + * or + * Fetch user verts, run vertex shader, run pipeline */ - middle = draw->pt.middle.fetch_shade_cliptest_pipeline; - } -#else - else { - return FALSE; + middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; } -#endif /* If !pipeline, need to make sure we respect the driver's limited @@ -143,7 +141,7 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.vcache; hw_prim = reduced_prim(prim); } - +#if 0 if (too_many_verts(nr_verts)) { /* if (is_verts(draw) && can_split(prim)) { draw = draw_arrays_split; @@ -153,6 +151,7 @@ draw_pt_arrays(struct draw_context *draw, hw_prim = reduced_prim(prim); } } +#endif if (too_many_elts(count)) { @@ -166,7 +165,7 @@ draw_pt_arrays(struct draw_context *draw, } if (!good_prim(hw_prim)) { - draw = draw->pt.front.vcache; + frontend = draw->pt.front.vcache; } } #else @@ -200,6 +199,11 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_pipeline) return FALSE; + draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = + draw_pt_fetch_pipeline_or_emit( draw ); + if (!draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) + return FALSE; + draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) return FALSE; @@ -220,6 +224,12 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_pipeline = NULL; } + if (draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) { + draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit->destroy( + draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit ); + draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = NULL; + } + if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 590823fd33..48413b648a 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -123,7 +123,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ); - +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c new file mode 100644 index 0000000000..e7550fe328 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -0,0 +1,253 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +struct fetch_pipeline_middle_end { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct { + const ubyte *ptr; + unsigned pitch; + void (*fetch)( const void *from, float *attrib); + void (*emit)( const float *attrib, float **out ); + } fetch[PIPE_MAX_ATTRIBS]; + + unsigned nr_fetch; + unsigned pipeline_vertex_size; + unsigned hw_vertex_size; + unsigned prim; +}; + +static void emit_R32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out) += 1; +} + +static void emit_R32G32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out) += 2; +} + +static void emit_R32G32B32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out)[2] = attrib[2]; + (*out) += 3; +} + +static void emit_R32G32B32A32_FLOAT( const float *attrib, + float **out ) +{ + (*out)[0] = attrib[0]; + (*out)[1] = attrib[1]; + (*out)[2] = attrib[2]; + (*out)[3] = attrib[3]; + (*out) += 4; +} + +static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, + unsigned prim ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + unsigned i, nr = 0; + boolean ok; + const struct vertex_info *vinfo; + + fpme->prim = prim; + + ok = draw->render->set_primitive(draw->render, prim); + if (!ok) { + assert(0); + return; + } + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + /* Need to look at vertex shader inputs (we know it is a + * passthrough shader, so these define the outputs too). If we + * were running a shader, we'd still be looking at the inputs at + * this point. + */ + for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) { + unsigned buf = draw->vertex_element[i].vertex_buffer_index; + enum pipe_format format = draw->vertex_element[i].src_format; + + fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] + + draw->vertex_buffer[buf].buffer_offset + + draw->vertex_element[i].src_offset); + + fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch; + fpme->fetch[nr].fetch = draw_get_fetch_func( format ); + + /* Always do this -- somewhat redundant... + */ + fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT; + nr++; + } + + fpme->nr_fetch = nr; + //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + fpme->pipeline_vertex_size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; + fpme->hw_vertex_size = vinfo->size * 4; +} + + + + +static void fetch_pipeline_run( struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vertex_shader; + char *pipeline_verts; + unsigned i; + + //debug_printf("fc = %d, VS = %d\n", fetch_count, VS_QUEUE_LENGTH); + if (fetch_count < VS_QUEUE_LENGTH) { + pipeline_verts = draw->vs.vertex_cache; + } else { + pipeline_verts = MALLOC(fpme->pipeline_vertex_size * + fetch_count); + } + + if (!pipeline_verts) { + assert(0); + return; + } + + /*FIXME: this init phase should go away */ + for (i = 0; i < fetch_count; ++i) { + struct vertex_header *header = + (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i)); + header->clipmask = 0; + header->edgeflag = draw_get_edgeflag(draw, i); + header->pad = 0; + header->vertex_id = UNDEFINED_VERTEX_ID; + } + + /* Shade + */ + shader->prepare(shader, draw); + if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts)) { + /* Run the pipeline */ + draw_pt_run_pipeline( fpme->draw, + fpme->prim, + pipeline_verts, + fpme->pipeline_vertex_size, + fetch_count, + draw_elts, + draw_count ); + } else { + unsigned i, j; + void *hw_verts; + float *out; + + hw_verts = draw->render->allocate_vertices(draw->render, + (ushort)fpme->hw_vertex_size, + (ushort)fetch_count); + if (!hw_verts) { + assert(0); + return; + } + + out = (float *)hw_verts; + for (i = 0; i < fetch_count; i++) { + struct vertex_header *header = + (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i)); + + for (j = 0; j < fpme->nr_fetch; j++) { + float *attrib = header->data[j]; + /*debug_printf("emiting [%f, %f, %f, %f]\n", + attrib[0], attrib[1], + attrib[2], attrib[3]);*/ + fpme->fetch[j].emit(attrib, &out); + } + } + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw(draw->render, + draw_elts, + draw_count); + + draw->render->release_vertices(draw->render, + hw_verts, + fpme->hw_vertex_size, + fetch_count); + } + + + if (pipeline_verts != draw->vs.vertex_cache) + FREE(pipeline_verts); +} + + + +static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) +{ + /* nothing to do */ +} + +static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + + +struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw ) +{ + struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end ); + + fetch_pipeline->base.prepare = fetch_pipeline_prepare; + fetch_pipeline->base.run = fetch_pipeline_run; + fetch_pipeline->base.finish = fetch_pipeline_finish; + fetch_pipeline->base.destroy = fetch_pipeline_destroy; + + fetch_pipeline->draw = draw; + + return &fetch_pipeline->base; +} -- cgit v1.2.3 From caf293343fd236e97ce399533ac0ada3c7afee7a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 12:08:46 +0100 Subject: draw: hide passthrough shading paths behind an environment variable --- src/gallium/auxiliary/draw/draw_context.c | 2 ++ src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_pt.c | 3 +++ 3 files changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index cdca556fbd..1e70a77523 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -49,6 +49,8 @@ struct draw_context *draw_create( void ) draw->use_sse = FALSE; #endif + draw->use_pt_shaders = GETENV( "GALLIUM_PT_SHADERS" ) != NULL; + /* create pipeline stages */ draw->pipeline.wide_line = draw_wide_line_stage( draw ); draw->pipeline.wide_point = draw_wide_point_stage( draw ); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 1115166192..3042d26847 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -276,6 +276,7 @@ struct draw_context boolean line_stipple; /**< do line stipple? */ boolean point_sprite; /**< convert points to quads for sprites? */ boolean use_sse; + boolean use_pt_shaders; /* temporary flag to switch on pt shader paths */ /* If a prim stage introduces new vertex attributes, they'll be stored here */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c8663c0e84..c67a217982 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -79,6 +79,9 @@ draw_pt_arrays(struct draw_context *draw, * - backend -- the vbuf_render provided by the driver. */ + if (shading && !draw->use_pt_shaders) + return FALSE; + if (!cliptest && !pipeline && !shading) { /* This is the 'passthrough' path: -- cgit v1.2.3 From e106b2d3d65585b0aaa0e60afd541da020d9e220 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 12:27:25 +0100 Subject: draw: move vertex header init out of fetch_shade_pipeline.c --- src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c | 2 -- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 9 --------- src/gallium/auxiliary/draw/draw_vs_exec.c | 1 + src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 + src/gallium/auxiliary/draw/draw_vs_sse.c | 1 + 5 files changed, 3 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index 4c2a281b29..0914a90440 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -211,7 +211,6 @@ fetch_store_general( struct fetch_pipeline_middle_end *fpme, static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned prim ) { - static const float zero = 0; struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; unsigned i, nr = 0; @@ -264,7 +263,6 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, unsigned draw_count ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; char *pipeline_verts; pipeline_verts = MALLOC( fpme->pipeline_vertex_size * diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index e7550fe328..fb9b2da5c0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -159,15 +159,6 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, return; } - /*FIXME: this init phase should go away */ - for (i = 0; i < fetch_count; ++i) { - struct vertex_header *header = - (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i)); - header->clipmask = 0; - header->edgeflag = draw_get_edgeflag(draw, i); - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - } /* Shade */ diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 6fe4e554d5..27cf060cc9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -135,6 +135,7 @@ vs_exec_run( struct draw_vertex_shader *shader, out->clipmask = 0; } out->edgeflag = 1; + out->vertex_id = UNDEFINED_VERTEX_ID; if (!draw->identity_viewport) { /* Viewport mapping */ diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 72317c67ae..73076d2467 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -140,6 +140,7 @@ vs_llvm_run( struct draw_vertex_shader *base, vOut[j]->clipmask = 0; } vOut[j]->edgeflag = 1; + vOut[j]->vertex_id = UNDEFINED_VERTEX_ID; if (!draw->identity_viewport) { /* Viewport mapping */ diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index c877f5ee3a..92b9947e9f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -158,6 +158,7 @@ vs_sse_run( struct draw_vertex_shader *base, out->clipmask = 0; } out->edgeflag = 1; + out->vertex_id = UNDEFINED_VERTEX_ID; if (!draw->identity_viewport) { /* Viewport mapping */ -- cgit v1.2.3 From a82e4996a13ef3cae1497fef95c2fca7631cd889 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 12:32:53 +0100 Subject: draw: flush pipeline before trying to allocate more hw vertices --- src/gallium/auxiliary/draw/draw_pt.c | 4 ---- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 4 ++++ src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 4 ++++ 3 files changed, 8 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c67a217982..5c16165c15 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -175,10 +175,6 @@ draw_pt_arrays(struct draw_context *draw, frontend = draw->pt.front.vcache; #endif - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? - */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - frontend->prepare( frontend, prim, middle ); frontend->run( frontend, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index e4e144ef19..2f8b08db79 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -243,6 +243,10 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, struct draw_context *draw = feme->draw; void *hw_verts; + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + hw_verts = draw->render->allocate_vertices( draw->render, (ushort)feme->hw_vertex_size, (ushort)fetch_count ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index fb9b2da5c0..b5d2f81a14 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -177,6 +177,10 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, void *hw_verts; float *out; + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + hw_verts = draw->render->allocate_vertices(draw->render, (ushort)fpme->hw_vertex_size, (ushort)fetch_count); -- cgit v1.2.3 From 36bacf97a6b10f7274f0d3fcf37bf7ebf9388161 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 12:46:47 +0100 Subject: draw: always malloc verts for fetch_shade_pipeline --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 13 +++---------- 1 file changed, 3 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index b5d2f81a14..427128f335 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -144,15 +144,9 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; char *pipeline_verts; - unsigned i; - //debug_printf("fc = %d, VS = %d\n", fetch_count, VS_QUEUE_LENGTH); - if (fetch_count < VS_QUEUE_LENGTH) { - pipeline_verts = draw->vs.vertex_cache; - } else { - pipeline_verts = MALLOC(fpme->pipeline_vertex_size * - fetch_count); - } + pipeline_verts = MALLOC(fpme->pipeline_vertex_size * + fetch_count); if (!pipeline_verts) { assert(0); @@ -216,8 +210,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, } - if (pipeline_verts != draw->vs.vertex_cache) - FREE(pipeline_verts); + FREE(pipeline_verts); } -- cgit v1.2.3 From 8cbda9f1088718e5dbb97b9a6ddcc43737f94351 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 14 Apr 2008 16:15:39 +0100 Subject: draw: remove dead code --- src/gallium/auxiliary/draw/draw_vf.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h index 7555d1bd58..0ef98d6257 100644 --- a/src/gallium/auxiliary/draw/draw_vf.h +++ b/src/gallium/auxiliary/draw/draw_vf.h @@ -128,9 +128,6 @@ draw_vf_destroy( struct draw_vertex_fetch *vf ); struct draw_vf_attr; -typedef void (*draw_vf_extract_func)( const struct draw_vf_attr *a, - float *out, - const uint8_t *v ); typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a, uint8_t *v, @@ -164,7 +161,6 @@ struct draw_vf_attr uint8_t *inputptr; const draw_vf_insert_func *insert; draw_vf_insert_func do_insert; - draw_vf_extract_func extract; }; struct draw_vertex_fetch -- cgit v1.2.3 From e3309197855b5caf7c4c167d1e7beedf33ed2fdd Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 14 Apr 2008 12:27:24 -0400 Subject: pass vertex size to shaders so that callee can decide on the size of the vertices and not always have to use the maximum vertex allocation size for them --- src/gallium/auxiliary/draw/draw_private.h | 7 ++++--- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 ++- src/gallium/auxiliary/draw/draw_vertex_cache.c | 8 ++++++-- src/gallium/auxiliary/draw/draw_vertex_shader.c | 5 +++-- src/gallium/auxiliary/draw/draw_vs_exec.c | 5 +++-- src/gallium/auxiliary/draw/draw_vs_sse.c | 5 +++-- 6 files changed, 21 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3042d26847..c8cb96c8ba 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -78,6 +78,7 @@ struct vertex_header { /* XXX This is too large */ #define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) +#define MAX_VERTEX_ALLOCATION ((MAX_VERTEX_SIZE + 0x0f) & ~0x0f) @@ -152,7 +153,8 @@ struct draw_vertex_shader { struct draw_context *draw, const unsigned *elts, unsigned count, - void *out ); + void *out, + unsigned vertex_size); void (*delete)( struct draw_vertex_shader * ); @@ -450,9 +452,8 @@ dot4(const float *a, const float *b) } static INLINE struct vertex_header * -draw_header_from_block(char *block, int num) +draw_header_from_block(char *block, int size, int num) { - static const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; return (struct vertex_header*)(block + num * size); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 427128f335..3fa6dcc5e7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -157,7 +157,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, /* Shade */ shader->prepare(shader, draw); - if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts)) { + if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts, + fpme->pipeline_vertex_size)) { /* Run the pipeline */ draw_pt_run_pipeline( fpme->draw, fpme->prim, diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c index 9256dec4ea..730c18bcb3 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ b/src/gallium/auxiliary/draw/draw_vertex_cache.c @@ -74,6 +74,7 @@ static struct vertex_header *get_vertex( struct draw_context *draw, /*debug_printf("HIT %d %d\n", slot, i);*/ assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); return draw_header_from_block(draw->vs.vertex_cache, + MAX_VERTEX_ALLOCATION, draw->vcache.idx[slot].out); } @@ -101,7 +102,8 @@ static struct vertex_header *get_vertex( struct draw_context *draw, */ assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - header = draw_header_from_block(draw->vs.vertex_cache, out); + header = draw_header_from_block(draw->vs.vertex_cache, MAX_VERTEX_ALLOCATION, + out); draw->vs.elts[out] = i; header->clipmask = 0; header->edgeflag = draw_get_edgeflag(draw, i); @@ -113,6 +115,7 @@ static struct vertex_header *get_vertex( struct draw_context *draw, */ return draw_header_from_block(draw->vs.vertex_cache, + MAX_VERTEX_ALLOCATION, draw->vcache.idx[slot].out); } } @@ -148,7 +151,8 @@ void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) for (i = 0; i < draw->vs.post_nr; i++) { struct vertex_header * header = - draw_header_from_block(draw->vs.vertex_cache, i); + draw_header_from_block(draw->vs.vertex_cache, + MAX_VERTEX_ALLOCATION, i); header->vertex_id = UNDEFINED_VERTEX_ID; } } diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 452d0175c3..8572a6d40c 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -60,7 +60,8 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) unsigned elts[MAX_SHADER_VERTICES]; int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i); struct vertex_header *dests = - draw_header_from_block(draw->vs.vertex_cache, i); + draw_header_from_block(draw->vs.vertex_cache, + MAX_VERTEX_ALLOCATION, i); for (j = 0; j < n; j++) { elts[j] = draw->vs.elts[i + j]; @@ -73,7 +74,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) assert(n > 0); assert(n <= MAX_SHADER_VERTICES); - shader->run(shader, draw, elts, n, dests); + shader->run(shader, draw, elts, n, dests, MAX_VERTEX_ALLOCATION); } draw->vs.post_nr = draw->vs.queue_nr; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 27cf060cc9..5c88c2e24e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -69,7 +69,8 @@ vs_exec_run( struct draw_vertex_shader *shader, struct draw_context *draw, const unsigned *elts, unsigned count, - void *vOut ) + void *vOut, + unsigned vertex_size) { struct tgsi_exec_machine *machine = &draw->machine; unsigned int i, j; @@ -107,7 +108,7 @@ vs_exec_run( struct draw_vertex_shader *shader, unsigned slot; float x, y, z, w; struct vertex_header *out = - draw_header_from_block(vOut, i + j); + draw_header_from_block(vOut, vertex_size, i + j); /* Handle attr[0] (position) specially: * diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 92b9947e9f..ee0a3105b9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -83,7 +83,8 @@ vs_sse_run( struct draw_vertex_shader *base, struct draw_context *draw, const unsigned *elts, unsigned count, - void *vOut ) + void *vOut, + unsigned vertex_size ) { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = &draw->machine; @@ -136,7 +137,7 @@ vs_sse_run( struct draw_vertex_shader *base, unsigned slot; float x, y, z, w; struct vertex_header *out = - draw_header_from_block(vOut, i + j); + draw_header_from_block(vOut, vertex_size, i + j); x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; -- cgit v1.2.3 From 2ba6e1fa71be07a2d75abe2d085d485046c0932b Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 14 Apr 2008 12:29:23 -0400 Subject: silence some warnings --- src/gallium/auxiliary/draw/draw_pt.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 -- src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 ++- 4 files changed, 6 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 5c16165c15..3d2e7bf7b8 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -36,13 +36,13 @@ #include "draw/draw_pt.h" -/* XXX: Shouldn't those two functions below use the '>' operator??? - */ +#if 0 static boolean too_many_elts( struct draw_context *draw, unsigned elts ) { return elts > (8 * 1024); } +#endif static INLINE unsigned reduced_prim(unsigned prim) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 2f8b08db79..3a26a5d712 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -145,7 +145,6 @@ fetch_store_general( struct fetch_emit_middle_end *feme, unsigned count ) { float *out = (float *)out_ptr; - struct vbuf_render *render = feme->draw->render; uint i, j; for (i = 0; i < count; i++) { @@ -167,7 +166,6 @@ fetch_store_general( struct fetch_emit_middle_end *feme, static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned prim ) { - static const float zero = 0; struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; const struct vertex_info *vinfo; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index 0914a90440..a70d129c93 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -87,7 +87,7 @@ struct fetch_pipeline_middle_end { }; - +#if 0 static void emit_R32_FLOAT( const float *attrib, float **out ) { @@ -111,7 +111,7 @@ static void emit_R32G32B32_FLOAT( const float *attrib, (*out)[2] = attrib[2]; (*out) += 3; } - +#endif static void emit_R32G32B32A32_FLOAT( const float *attrib, float **out ) { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 3fa6dcc5e7..b49c9efa65 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -49,6 +49,7 @@ struct fetch_pipeline_middle_end { unsigned prim; }; +#if 0 static void emit_R32_FLOAT( const float *attrib, float **out ) { @@ -72,7 +73,7 @@ static void emit_R32G32B32_FLOAT( const float *attrib, (*out)[2] = attrib[2]; (*out) += 3; } - +#endif static void emit_R32G32B32A32_FLOAT( const float *attrib, float **out ) { -- cgit v1.2.3 From 983b6a73e1842b436d158dc1d018bd483a1c9929 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 14 Apr 2008 12:32:25 -0400 Subject: use the new macro --- src/gallium/auxiliary/draw/draw_context.c | 3 +-- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 1e70a77523..0c314f6e1d 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -88,8 +88,7 @@ struct draw_context *draw_create( void ) /* Statically allocate maximum sized vertices for the cache - could be cleverer... */ { - const unsigned size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; - char *tmp = align_malloc(VS_QUEUE_LENGTH * size, 16); + char *tmp = align_malloc(VS_QUEUE_LENGTH * MAX_VERTEX_ALLOCATION, 16); if (!tmp) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index b49c9efa65..04b3d2c4cf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -128,7 +128,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, fpme->nr_fetch = nr; //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); - fpme->pipeline_vertex_size = (MAX_VERTEX_SIZE + 0x0f) & ~0x0f; + fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION; fpme->hw_vertex_size = vinfo->size * 4; } -- cgit v1.2.3 From 21ae3d2721326d56c76370fd8bfcc1536203925d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 14 Apr 2008 22:39:33 +0900 Subject: gallium: Serialize buffers writes. Allow concurrent reads from buffers by the CPU/GPU, but serialize all writes. --- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 81 ++++++++++++++++++++-- 1 file changed, 76 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index b1f7d93057..65b6584003 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -35,6 +35,7 @@ #include "pipe/p_compiler.h" +#include "pipe/p_error.h" #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" @@ -54,6 +55,13 @@ */ #define SUPER(__derived) (&(__derived)->base) +#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) +#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) +#define PIPE_BUFFER_USAGE_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) + struct fenced_buffer_list { @@ -76,6 +84,15 @@ struct fenced_buffer struct pb_buffer *buffer; + /* FIXME: protect access with mutex */ + + /** + * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current + * buffer usage. + */ + unsigned flags; + + unsigned mapcount; struct pipe_fence_handle *fence; struct list_head head; @@ -98,7 +115,9 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf) struct fenced_buffer_list *fenced_list = fenced_buf->list; assert(fenced_buf->base.base.refcount); + assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); assert(fenced_buf->fence); + assert(!fenced_buf->head.prev); assert(!fenced_buf->head.next); LIST_ADDTAIL(&fenced_buf->head, &fenced_list->delayed); @@ -130,6 +149,7 @@ _fenced_buffer_remove(struct fenced_buffer *fenced_buf) assert(fenced_buf->fence); winsys->fence_reference(winsys, &fenced_buf->fence, NULL); + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; assert(fenced_buf->head.prev); assert(fenced_buf->head.next); @@ -186,6 +206,34 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, } +/** + * Serialize writes, but allow concurrent reads. + */ +static INLINE enum pipe_error +fenced_buffer_serialize(struct fenced_buffer *fenced_buf, unsigned flags) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + struct pipe_winsys *winsys = fenced_list->winsys; + + if(((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_WRITE) == 0) + return PIPE_OK; + + if(fenced_buf->mapcount) { + /* FIXME */ + debug_warning("attemp to write concurrently to buffer"); + return PIPE_ERROR_RETRY; + } + + if(fenced_buf->fence) { + if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) + return PIPE_ERROR_RETRY; + _fenced_buffer_remove(fenced_buf); + } + + return PIPE_OK; +} + + static void fenced_buffer_destroy(struct pb_buffer *buf) { @@ -223,16 +271,30 @@ static void * fenced_buffer_map(struct pb_buffer *buf, unsigned flags) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - return pb_map(fenced_buf->buffer, flags); + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + void *map; + assert((flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE) == 0); + + if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) + return NULL; + + map = pb_map(fenced_buf->buffer, flags); + if(map) + ++fenced_buf->mapcount; + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; + return map; } static void fenced_buffer_unmap(struct pb_buffer *buf) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + assert(fenced_buf->mapcount); pb_unmap(fenced_buf->buffer); + --fenced_buf->mapcount; + if(!fenced_buf->mapcount) + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; } @@ -288,13 +350,22 @@ buffer_fence(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pipe_winsys *winsys = fenced_list->winsys; + /* FIXME: receive this as a parameter */ + unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; + + if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) { + /* FIXME: propagate error */ + (void)0; + } _glthread_LOCK_MUTEX(fenced_list->mutex); if (fenced_buf->fence) _fenced_buffer_remove(fenced_buf); - winsys->fence_reference(winsys, &fenced_buf->fence, fence); - if (fenced_buf->fence) + if (fence) { + winsys->fence_reference(winsys, &fenced_buf->fence, fence); + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; _fenced_buffer_add(fenced_buf); + } _glthread_UNLOCK_MUTEX(fenced_list->mutex); } -- cgit v1.2.3 From 5b8fa518476868530d748ce6d03674e9cca3d89f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 14 Apr 2008 23:55:36 +0900 Subject: gallium: Don't assume snprintf are always available. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 9 ++-- src/gallium/auxiliary/util/p_debug.c | 7 +-- src/gallium/auxiliary/util/u_snprintf.c | 16 +++--- src/gallium/auxiliary/util/u_string.h | 63 ++++++++++++++++++++++ .../drivers/i915simple/i915_fpc_translate.c | 3 +- src/gallium/drivers/i915simple/i915_screen.c | 3 +- src/gallium/drivers/i965simple/brw_screen.c | 3 +- src/gallium/include/pipe/p_format.h | 4 +- src/gallium/include/pipe/p_util.h | 8 --- 9 files changed, 88 insertions(+), 28 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_string.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index cb3573ceb6..ff6a2c4194 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -30,6 +30,7 @@ #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_parse.h" #include "tgsi_build.h" @@ -147,7 +148,7 @@ gen_dump_uix( { char str[36]; - sprintf( str, "0x%x", ui ); + util_snprintf( str, sizeof(str), "0x%x", ui ); gen_dump_str( dump, str ); } @@ -158,7 +159,7 @@ gen_dump_uid( { char str[16]; - sprintf( str, "%u", ui ); + util_snprintf( str, sizeof(str), "%u", ui ); gen_dump_str( dump, str ); } @@ -169,7 +170,7 @@ gen_dump_sid( { char str[16]; - sprintf( str, "%d", si ); + util_snprintf( str, sizeof(str), "%d", si ); gen_dump_str( dump, str ); } @@ -180,7 +181,7 @@ gen_dump_flt( { char str[48]; - sprintf( str, "%10.4f", flt ); + util_snprintf( str, sizeof(str), "%10.4f", flt ); gen_dump_str( dump, str ); } diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 090e3b7794..f9366467cd 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -39,6 +39,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_string.h" #ifdef WIN32 @@ -60,7 +61,7 @@ void _debug_vprintf(const char *format, va_list ap) /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation */ char buf[512 + 1]; - vsnprintf(buf, sizeof(buf), format, ap); + util_vsnprintf(buf, sizeof(buf), format, ap); _EngDebugPrint("%s", buf); #else /* TODO: Implement debug print for WINCE */ @@ -311,7 +312,7 @@ debug_dump_enum(const struct debug_named_value *names, ++names; } - snprintf(rest, sizeof(rest), "0x%08lx", value); + util_snprintf(rest, sizeof(rest), "0x%08lx", value); return rest; } @@ -344,7 +345,7 @@ debug_dump_flags(const struct debug_named_value *names, else first = 0; - snprintf(rest, sizeof(rest), "0x%08lx", value); + util_snprintf(rest, sizeof(rest), "0x%08lx", value); strncat(output, rest, sizeof(output)); } diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index 48426abcb7..c4f4bbd30c 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -166,8 +166,8 @@ #include #else #ifdef WIN32 -#define vsnprintf rpl_vsnprintf -#define snprintf rpl_snprintf +#define vsnprintf util_vsnprintf +#define snprintf util_snprintf #define HAVE_VSNPRINTF 0 #define HAVE_SNPRINTF 0 #define HAVE_VASPRINTF 1 /* not needed */ @@ -445,7 +445,7 @@ static UINTMAX_T myround(LDOUBLE); static LDOUBLE mypow10(int); int -rpl_vsnprintf(char *str, size_t size, const char *format, va_list args) +util_vsnprintf(char *str, size_t size, const char *format, va_list args) { LDOUBLE fvalue; INTMAX_T value; @@ -1404,7 +1404,7 @@ mymemcpy(void *dst, void *src, size_t len) #endif /* NEED_MYMEMCPY */ int -rpl_vasprintf(char **ret, const char *format, va_list ap) +util_vasprintf(char **ret, const char *format, va_list ap) { size_t size; int len; @@ -1422,10 +1422,10 @@ rpl_vasprintf(char **ret, const char *format, va_list ap) #if !HAVE_SNPRINTF #if HAVE_STDARG_H int -rpl_snprintf(char *str, size_t size, const char *format, ...) +util_snprintf(char *str, size_t size, const char *format, ...) #else int -rpl_snprintf(va_alist) va_dcl +util_snprintf(va_alist) va_dcl #endif /* HAVE_STDARG_H */ { #if !HAVE_STDARG_H @@ -1449,10 +1449,10 @@ rpl_snprintf(va_alist) va_dcl #if !HAVE_ASPRINTF #if HAVE_STDARG_H int -rpl_asprintf(char **ret, const char *format, ...) +util_asprintf(char **ret, const char *format, ...) #else int -rpl_asprintf(va_alist) va_dcl +util_asprintf(va_alist) va_dcl #endif /* HAVE_STDARG_H */ { #if !HAVE_STDARG_H diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h new file mode 100644 index 0000000000..b99d4e8021 --- /dev/null +++ b/src/gallium/auxiliary/util/u_string.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Platform independent functions for string manipulation. + * + * @author Jose Fonseca + */ + +#ifndef U_STRING_H_ +#define U_STRING_H_ + +#ifndef WIN32 +#include +#endif +#include +#include + + +#ifdef __cplusplus +extern "C" { +#endif + + +#ifdef WIN32 +int util_vsnprintf(char *, size_t, const char *, va_list); +int util_snprintf(char *str, size_t size, const char *format, ...); +#else +#define util_vsnprintf vsnprintf +#define util_snprintf snprintf +#endif + + +#ifdef __cplusplus +} +#endif + +#endif /* U_STRING_H_ */ diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 7b4fca5db1..3ccf74c72c 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,6 +33,7 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" +#include "util/u_string.h" #include "tgsi/util/tgsi_parse.h" #include "tgsi/util/tgsi_dump.h" @@ -122,7 +123,7 @@ i915_program_error(struct i915_fp_compile *p, const char *msg, ...) debug_printf("i915_program_error: "); va_start( args, msg ); - vsprintf( buffer, msg, args ); + util_vsnprintf( buffer, sizeof(buffer), msg, args ); va_end( args ); debug_printf(buffer); debug_printf("\n"); diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 839b98c0ce..9ae594ce54 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -28,6 +28,7 @@ #include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_string.h" #include "i915_reg.h" #include "i915_context.h" @@ -78,7 +79,7 @@ i915_get_name( struct pipe_screen *pscreen ) break; } - sprintf(buffer, "i915 (chipset: %s)", chipset); + util_snprintf(buffer, sizeof(buffer), "i915 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index 5be369fe52..6845c7abde 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -28,6 +28,7 @@ #include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_string.h" #include "brw_context.h" #include "brw_screen.h" @@ -66,7 +67,7 @@ brw_get_name( struct pipe_screen *screen ) break; } - sprintf(buffer, "i965 (chipset: %s)", chipset); + util_snprintf(buffer, sizeof(buffer), "i965 (chipset: %s)", chipset); return buffer; } diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 9e0f91f202..ef9e3a3d6c 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -28,7 +28,7 @@ #ifndef PIPE_FORMAT_H #define PIPE_FORMAT_H -#include /* for sprintf */ +#include "util/u_string.h" #include "p_compiler.h" #include "p_debug.h" @@ -367,7 +367,7 @@ static INLINE char *pf_sprint_name( char *str, enum pipe_format format ) strcat( str, "S" ); break; } - sprintf( &str[strlen( str )], "%u", size * scale ); + util_snprintf( &str[strlen( str )], 32, "%u", size * scale ); } if (i != 0) { strcat( str, "_" ); diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 8e3aaee496..dbca080a4b 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -138,14 +138,6 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define GETENV( X ) debug_get_option( X, NULL ) -#ifdef WIN32 -int rpl_vsnprintf(char *, size_t, const char *, va_list); -int rpl_snprintf(char *str, size_t size, const char *format, ...); -#define vsnprintf rpl_vsnprintf -#define snprintf rpl_snprintf -#endif - - /** * Return memory on given byte alignment */ -- cgit v1.2.3 From 01c7dd2629d161bf87af679a3045e1e2d54259fc Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 10:38:05 +0900 Subject: gallium: Add draw_pt_fetch_shade_pipeline.c to scons build. --- src/gallium/auxiliary/draw/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 52107912f5..a7fb5dbd61 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -18,6 +18,7 @@ draw = env.ConvenienceLibrary( 'draw_pt_vcache.c', 'draw_pt_fetch_emit.c', 'draw_pt_fetch_pipeline.c', + 'draw_pt_fetch_shade_pipeline.c', 'draw_pt_pipeline.c', 'draw_pt_elts.c', 'draw_prim.c', -- cgit v1.2.3 From a175e15f20b2a231cc9d09099e7b6d8aea6c624e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 12:34:33 +0900 Subject: gallium: Allow to use a single slab. We often want to use a pool of equally sized buffers, so this makes the slab suballocator a drop-in replacement. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 22 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 315 +++++++++++++--------- 2 files changed, 209 insertions(+), 128 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index b2d2520b67..96f9af3825 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -118,13 +118,21 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, * Slab sub-allocator. */ struct pb_manager * -pb_slab_manager_create(struct pb_manager *provider, - const struct pb_desc *desc, - size_t smallestSize, - size_t numSizes, - size_t desiredNumBuffers, - size_t maxSlabSize, - size_t pageAlignment); +pb_slab_manager_create(struct pb_manager *provider, + size_t bufSize, + size_t slabSize, + const struct pb_desc *desc); + +/** + * Allow a range of buffer size, by aggregating multiple slabs sub-allocators + * with different bucket sizes. + */ +struct pb_manager * +pb_slab_range_manager_create(struct pb_manager *provider, + size_t minBufSize, + size_t maxBufSize, + size_t slabSize, + const struct pb_desc *desc); /** diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 676e8e29b9..9506ac9ae1 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -70,19 +70,24 @@ struct pb_slab size_t numBuffers; size_t numFree; struct pb_slab_buffer *buffers; - struct pb_slab_size_header *header; + struct pb_slab_manager *mgr; struct pb_buffer *bo; - size_t pageAlignment; void *virtual; }; -struct pb_slab_size_header +struct pb_slab_manager { + struct pb_manager base; + + struct pb_manager *provider; + size_t bufSize; + size_t slabSize; + struct pb_desc desc; + struct list_head slabs; struct list_head freeSlabs; - struct pb_slab_manager *pool; - size_t bufSize; + _glthread_Mutex mutex; }; @@ -90,19 +95,18 @@ struct pb_slab_size_header * The data of this structure remains constant after * initialization and thus needs no mutex protection. */ -struct pb_slab_manager +struct pb_slab_range_manager { struct pb_manager base; + struct pb_manager *provider; + size_t minBufSize; + size_t maxBufSize; struct pb_desc desc; + + unsigned numBuckets; size_t *bucketSizes; - size_t numBuckets; - size_t pageSize; - struct pb_manager *provider; - unsigned pageAlignment; - unsigned maxSlabSize; - unsigned desiredNumBuffers; - struct pb_slab_size_header *headers; + struct pb_manager **buckets; }; @@ -122,8 +126,16 @@ pb_slab_manager(struct pb_manager *mgr) } +static INLINE struct pb_slab_range_manager * +pb_slab_range_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_slab_range_manager *)mgr; +} + + /** - * Delete a buffer from the slab header delayed list and put + * Delete a buffer from the slab delayed list and put * it on the slab FREE list. */ static void @@ -131,10 +143,10 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf) { struct pb_slab_buffer *buf = pb_slab_buffer(_buf); struct pb_slab *slab = buf->slab; - struct pb_slab_size_header *header = slab->header; + struct pb_slab_manager *mgr = slab->mgr; struct list_head *list = &buf->head; - _glthread_LOCK_MUTEX(header->mutex); + _glthread_LOCK_MUTEX(mgr->mutex); assert(buf->base.base.refcount == 0); @@ -145,21 +157,21 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf) slab->numFree++; if (slab->head.next == &slab->head) - LIST_ADDTAIL(&slab->head, &header->slabs); + LIST_ADDTAIL(&slab->head, &mgr->slabs); if (slab->numFree == slab->numBuffers) { list = &slab->head; LIST_DEL(list); - LIST_ADDTAIL(list, &header->freeSlabs); + LIST_ADDTAIL(list, &mgr->freeSlabs); } - if (header->slabs.next == &header->slabs || slab->numFree + if (mgr->slabs.next == &mgr->slabs || slab->numFree != slab->numBuffers) { struct list_head *next; - for (list = header->freeSlabs.next, next = list->next; list - != &header->freeSlabs; list = next, next = list->next) { + for (list = mgr->freeSlabs.next, next = list->next; list + != &mgr->freeSlabs; list = next, next = list->next) { slab = LIST_ENTRY(struct pb_slab, list, head); @@ -170,7 +182,7 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf) } } - _glthread_UNLOCK_MUTEX(header->mutex); + _glthread_UNLOCK_MUTEX(mgr->mutex); } @@ -217,15 +229,13 @@ pb_slab_buffer_vtbl = { static enum pipe_error -pb_slab_create(struct pb_slab_size_header *header) +pb_slab_create(struct pb_slab_manager *mgr) { - struct pb_slab_manager *pool = header->pool; - size_t size = header->bufSize * pool->desiredNumBuffers; struct pb_slab *slab; struct pb_slab_buffer *buf; - size_t numBuffers; - int ret; + unsigned numBuffers; unsigned i; + enum pipe_error ret; slab = CALLOC_STRUCT(pb_slab); if (!slab) @@ -236,22 +246,23 @@ pb_slab_create(struct pb_slab_size_header *header) * to efficiently reuse slabs. */ - size = (size <= pool->maxSlabSize) ? size : pool->maxSlabSize; - size = (size + pool->pageSize - 1) & ~(pool->pageSize - 1); - - slab->bo = pool->provider->create_buffer(pool->provider, size, &pool->desc); - if(!slab->bo) + slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc); + if(!slab->bo) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto out_err0; + } slab->virtual = pb_map(slab->bo, - PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE); - if(!slab->virtual) + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE); + if(!slab->virtual) { + ret = PIPE_ERROR_OUT_OF_MEMORY; goto out_err1; + } pb_unmap(slab->bo); - numBuffers = slab->bo->base.size / header->bufSize; + numBuffers = slab->bo->base.size / mgr->bufSize; slab->buffers = CALLOC(numBuffers, sizeof(*slab->buffers)); if (!slab->buffers) { @@ -263,17 +274,17 @@ pb_slab_create(struct pb_slab_size_header *header) LIST_INITHEAD(&slab->freeBuffers); slab->numBuffers = numBuffers; slab->numFree = 0; - slab->header = header; + slab->mgr = mgr; buf = slab->buffers; for (i=0; i < numBuffers; ++i) { buf->base.base.refcount = 0; - buf->base.base.size = header->bufSize; + buf->base.base.size = mgr->bufSize; buf->base.base.alignment = 0; buf->base.base.usage = 0; buf->base.vtbl = &pb_slab_buffer_vtbl; buf->slab = slab; - buf->start = i* header->bufSize; + buf->start = i* mgr->bufSize; buf->mapCount = 0; _glthread_INIT_COND(buf->event); LIST_ADDTAIL(&buf->head, &slab->freeBuffers); @@ -281,7 +292,7 @@ pb_slab_create(struct pb_slab_size_header *header) buf++; } - LIST_ADDTAIL(&slab->head, &header->slabs); + LIST_ADDTAIL(&slab->head, &mgr->slabs); return PIPE_OK; @@ -293,51 +304,55 @@ out_err0: } +static int +check_alignment(size_t requested, size_t provided) +{ + return requested <= provided && (provided % requested) == 0; +} + + static struct pb_buffer * -pb_slab_manager_create_buffer(struct pb_manager *_pool, +pb_slab_manager_create_buffer(struct pb_manager *_mgr, size_t size, const struct pb_desc *desc) { - struct pb_slab_manager *pool = pb_slab_manager(_pool); - struct pb_slab_size_header *header; - unsigned i; + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); static struct pb_slab_buffer *buf; struct pb_slab *slab; struct list_head *list; int count = DRI_SLABPOOL_ALLOC_RETRIES; - /* - * FIXME: Check for compatibility. - */ - - header = pool->headers; - for (i=0; inumBuckets; ++i) { - if (header->bufSize >= size) - break; - header++; - } - - if (i >= pool->numBuckets) - /* Fall back to allocate a buffer object directly from the provider. */ - return pool->provider->create_buffer(pool->provider, size, desc); - + /* check size */ + assert(size == mgr->bufSize); + if(size != mgr->bufSize) + return NULL; + + /* check if we can provide the requested alignment */ + assert(check_alignment(desc->alignment, mgr->desc.alignment)); + if(!check_alignment(desc->alignment, mgr->desc.alignment)) + return NULL; + assert(check_alignment(desc->alignment, mgr->bufSize)); + if(!check_alignment(desc->alignment, mgr->bufSize)) + return NULL; - _glthread_LOCK_MUTEX(header->mutex); - while (header->slabs.next == &header->slabs && count > 0) { - if (header->slabs.next != &header->slabs) + /* XXX: check for compatible buffer usage too? */ + + _glthread_LOCK_MUTEX(mgr->mutex); + while (mgr->slabs.next == &mgr->slabs && count > 0) { + if (mgr->slabs.next != &mgr->slabs) break; - _glthread_UNLOCK_MUTEX(header->mutex); + _glthread_UNLOCK_MUTEX(mgr->mutex); if (count != DRI_SLABPOOL_ALLOC_RETRIES) util_time_sleep(1); - _glthread_LOCK_MUTEX(header->mutex); - (void) pb_slab_create(header); + _glthread_LOCK_MUTEX(mgr->mutex); + (void) pb_slab_create(mgr); count--; } - list = header->slabs.next; - if (list == &header->slabs) { - _glthread_UNLOCK_MUTEX(header->mutex); + list = mgr->slabs.next; + if (list == &mgr->slabs) { + _glthread_UNLOCK_MUTEX(mgr->mutex); return NULL; } slab = LIST_ENTRY(struct pb_slab, list, head); @@ -347,83 +362,141 @@ pb_slab_manager_create_buffer(struct pb_manager *_pool, list = slab->freeBuffers.next; LIST_DELINIT(list); - _glthread_UNLOCK_MUTEX(header->mutex); + _glthread_UNLOCK_MUTEX(mgr->mutex); buf = LIST_ENTRY(struct pb_slab_buffer, list, head); + ++buf->base.base.refcount; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + return &buf->base; } static void -pb_slab_manager_destroy(struct pb_manager *_pool) +pb_slab_manager_destroy(struct pb_manager *_mgr) { - struct pb_slab_manager *pool = pb_slab_manager(_pool); + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); - FREE(pool->headers); - FREE(pool->bucketSizes); - FREE(pool); + /* TODO: cleanup all allocated buffers */ + FREE(mgr); } struct pb_manager * -pb_slab_manager_create(struct pb_manager *provider, - const struct pb_desc *desc, - size_t smallestSize, - size_t numSizes, - size_t desiredNumBuffers, - size_t maxSlabSize, - size_t pageAlignment) +pb_slab_manager_create(struct pb_manager *provider, + size_t bufSize, + size_t slabSize, + const struct pb_desc *desc) +{ + struct pb_slab_manager *mgr; + + mgr = CALLOC_STRUCT(pb_slab_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_slab_manager_destroy; + mgr->base.create_buffer = pb_slab_manager_create_buffer; + + mgr->provider = provider; + mgr->bufSize = bufSize; + mgr->slabSize = slabSize; + mgr->desc = *desc; + + LIST_INITHEAD(&mgr->slabs); + LIST_INITHEAD(&mgr->freeSlabs); + + _glthread_INIT_MUTEX(mgr->mutex); + + return &mgr->base; +} + + +static struct pb_buffer * +pb_slab_range_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) { - struct pb_slab_manager *pool; - size_t i; + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + size_t bufSize; + unsigned i; - pool = CALLOC_STRUCT(pb_slab_manager); - if (!pool) + bufSize = mgr->minBufSize; + for (i = 0; i < mgr->numBuckets; ++i) { + if(bufSize >= size) + return mgr->buckets[i]->create_buffer(mgr->buckets[i], size, desc); + bufSize *= 2; + } + + /* Fall back to allocate a buffer object directly from the provider. */ + return mgr->provider->create_buffer(mgr->provider, size, desc); +} + + +static void +pb_slab_range_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + unsigned i; + + for (i = 0; i < mgr->numBuckets; ++i) + mgr->buckets[i]->destroy(mgr->buckets[i]); + FREE(mgr->buckets); + FREE(mgr->bucketSizes); + FREE(mgr); +} + + +struct pb_manager * +pb_slab_range_manager_create(struct pb_manager *provider, + size_t minBufSize, + size_t maxBufSize, + size_t slabSize, + const struct pb_desc *desc) +{ + struct pb_slab_range_manager *mgr; + size_t bufSize; + unsigned i; + + mgr = CALLOC_STRUCT(pb_slab_range_manager); + if (!mgr) goto out_err0; - pool->bucketSizes = CALLOC(numSizes, sizeof(*pool->bucketSizes)); - if (!pool->bucketSizes) - goto out_err1; + mgr->base.destroy = pb_slab_range_manager_destroy; + mgr->base.create_buffer = pb_slab_range_manager_create_buffer; - pool->headers = CALLOC(numSizes, sizeof(*pool->headers)); - if (!pool->headers) - goto out_err2; - - pool->desc = *desc; - pool->numBuckets = numSizes; -#ifdef WIN32 - pool->pageSize = 4096; -#else - pool->pageSize = getpagesize(); -#endif - pool->provider = provider; - pool->pageAlignment = pageAlignment; - pool->maxSlabSize = maxSlabSize; - pool->desiredNumBuffers = desiredNumBuffers; - - for (i=0; inumBuckets; ++i) { - struct pb_slab_size_header *header = &pool->headers[i]; - - pool->bucketSizes[i] = (smallestSize << i); - - _glthread_INIT_MUTEX(header->mutex); - - LIST_INITHEAD(&header->slabs); - LIST_INITHEAD(&header->freeSlabs); - - header->pool = pool; - header->bufSize = (smallestSize << i); + mgr->provider = provider; + mgr->minBufSize = minBufSize; + mgr->maxBufSize = maxBufSize; + + mgr->numBuckets = 1; + bufSize = minBufSize; + while(bufSize < maxBufSize) { + bufSize *= 2; + ++mgr->numBuckets; } + + mgr->buckets = CALLOC(mgr->numBuckets, sizeof(*mgr->buckets)); + if (!mgr->buckets) + goto out_err1; - pool->base.destroy = pb_slab_manager_destroy; - pool->base.create_buffer = pb_slab_manager_create_buffer; + bufSize = minBufSize; + for (i = 0; i < mgr->numBuckets; ++i) { + mgr->buckets[i] = pb_slab_manager_create(provider, bufSize, slabSize, desc); + if(!mgr->buckets[i]) + goto out_err2; + bufSize *= 2; + } - return &pool->base; + return &mgr->base; out_err2: - FREE(pool->bucketSizes); + for (i = 0; i < mgr->numBuckets; ++i) + if(mgr->buckets[i]) + mgr->buckets[i]->destroy(mgr->buckets[i]); + FREE(mgr->buckets); out_err1: - FREE(pool); + FREE(mgr); out_err0: return NULL; } -- cgit v1.2.3 From 0b995b44e5a02dd4a3abdb6b2a0821a32e597e7c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 12:35:00 +0900 Subject: gallium: Fix mismatching prototypes. --- src/gallium/auxiliary/util/u_time.c | 2 +- src/gallium/auxiliary/util/u_time.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index e6c0b19ff6..04f85c42ef 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -133,7 +133,7 @@ util_time_timeout(const struct util_time *start, #ifdef WIN32 -void util_time_usleep(unsigned usecs) +void util_time_sleep(unsigned usecs) { LONGLONG start, curr, end; diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index 32035cceb5..2133958446 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -87,7 +87,7 @@ util_time_timeout(const struct util_time *start, #ifndef WIN32 #define util_time_sleep usleep #else -int +void util_time_sleep(unsigned usecs); #endif -- cgit v1.2.3 From d005befcb9e191ae90619fbdd3c37e262ae3b03e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 15:40:35 +0900 Subject: gallium: Less confusing interface for timeouts. --- src/gallium/auxiliary/util/u_time.c | 6 +++--- src/gallium/auxiliary/util/u_time.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index 04f85c42ef..01112ebe5a 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -120,15 +120,15 @@ util_time_compare(const struct util_time *t1, } -int +boolean util_time_timeout(const struct util_time *start, const struct util_time *end, const struct util_time *curr) { if(util_time_compare(start, end) <= 0) - return util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0; + return !(util_time_compare(start, curr) <= 0 && util_time_compare(curr, end) < 0); else - return util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0; + return !(util_time_compare(start, curr) <= 0 || util_time_compare(curr, end) < 0); } diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index 2133958446..c8836c137f 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -77,9 +77,9 @@ util_time_diff(const struct util_time *t1, const struct util_time *t2); /** - * Returns zero when the timeout expires, non zero otherwise. + * Returns non-zero when the timeout expires. */ -int +boolean util_time_timeout(const struct util_time *start, const struct util_time *end, const struct util_time *curr); -- cgit v1.2.3 From 95aeeb6d746e57473116ef4d72c05330902f68a5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 15:41:08 +0900 Subject: gallium: Several fixes to buffer caching. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 11 ++++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 67 +++++++++++++++++----- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 15 ++--- 3 files changed, 68 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 4b09c80b2a..49705cb862 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -192,6 +192,17 @@ pb_reference(struct pb_buffer **dst, } +/** + * Utility function to check whether a requested alignment is consistent with + * the provided alignment or not. + */ +static INLINE int +pb_check_alignment(size_t requested, size_t provided) +{ + return requested <= provided && (provided % requested) == 0; +} + + /** * Malloc-based buffer to store data that can't be used by the graphics * hardware. diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 06de0bb6c3..543fd51253 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -136,7 +136,7 @@ _pb_cache_buffer_list_check_free(struct pb_cache_manager *mgr) while(curr != &mgr->delayed) { buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); - if(util_time_timeout(&buf->start, &buf->end, &now) != 0) + if(!util_time_timeout(&buf->start, &buf->end, &now)) break; _pb_cache_buffer_destroy(buf); @@ -202,6 +202,24 @@ pb_cache_buffer_vtbl = { }; +static INLINE boolean +pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, + size_t size, + const struct pb_desc *desc) +{ + /* TODO: be more lenient with size */ + if(buf->base.base.size != size) + return FALSE; + + if(!pb_check_alignment(desc->alignment, buf->base.base.alignment)) + return FALSE; + + /* XXX: check usage too? */ + + return TRUE; +} + + static struct pb_buffer * pb_cache_manager_create_buffer(struct pb_manager *_mgr, size_t size, @@ -209,29 +227,45 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, { struct pb_cache_manager *mgr = pb_cache_manager(_mgr); struct pb_cache_buffer *buf; + struct pb_cache_buffer *curr_buf; struct list_head *curr, *next; struct util_time now; - util_time_get(&now); + _glthread_LOCK_MUTEX(mgr->mutex); + + buf = NULL; curr = mgr->delayed.next; next = curr->next; + + /* search in the expired buffers, freeing them in the process */ + util_time_get(&now); while(curr != &mgr->delayed) { - buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); - - if(buf->base.base.size == size && - buf->base.base.alignment >= desc->alignment && - (buf->base.base.alignment % desc->alignment) == 0 && - /* buf->base.base.usage == usage */ 1) { - ++buf->base.base.refcount; - return &buf->base; - } - - if(util_time_timeout(&buf->start, &buf->end, &now) != 0) - _pb_cache_buffer_destroy(buf); + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + if(!buf && pb_cache_is_buffer_compat(curr_buf, size, desc)) + buf = curr_buf; + else if(util_time_timeout(&curr_buf->start, &curr_buf->end, &now)) + _pb_cache_buffer_destroy(curr_buf); + curr = next; + next = curr->next; + } + /* keep searching in the hot buffers */ + while(!buf && curr != &mgr->delayed) { + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + if(pb_cache_is_buffer_compat(curr_buf, size, desc)) + buf = curr_buf; curr = next; next = curr->next; } + + if(buf) { + LIST_DEL(&buf->head); + _glthread_UNLOCK_MUTEX(mgr->mutex); + ++buf->base.base.refcount; + return &buf->base; + } + + _glthread_UNLOCK_MUTEX(mgr->mutex); buf = CALLOC_STRUCT(pb_cache_buffer); if(!buf) @@ -243,6 +277,11 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, return NULL; } + assert(buf->buffer->base.refcount >= 1); + assert(pb_check_alignment(desc->alignment, buf->buffer->base.alignment)); + assert((buf->buffer->base.usage & desc->usage) == desc->usage); + assert(buf->buffer->base.size >= size); + buf->base.base.refcount = 1; buf->base.base.alignment = buf->buffer->base.alignment; buf->base.base.usage = buf->buffer->base.usage; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 9506ac9ae1..b931455056 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -304,13 +304,6 @@ out_err0: } -static int -check_alignment(size_t requested, size_t provided) -{ - return requested <= provided && (provided % requested) == 0; -} - - static struct pb_buffer * pb_slab_manager_create_buffer(struct pb_manager *_mgr, size_t size, @@ -328,11 +321,11 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, return NULL; /* check if we can provide the requested alignment */ - assert(check_alignment(desc->alignment, mgr->desc.alignment)); - if(!check_alignment(desc->alignment, mgr->desc.alignment)) + assert(pb_check_alignment(desc->alignment, mgr->desc.alignment)); + if(!pb_check_alignment(desc->alignment, mgr->desc.alignment)) return NULL; - assert(check_alignment(desc->alignment, mgr->bufSize)); - if(!check_alignment(desc->alignment, mgr->bufSize)) + assert(pb_check_alignment(desc->alignment, mgr->bufSize)); + if(!pb_check_alignment(desc->alignment, mgr->bufSize)) return NULL; /* XXX: check for compatible buffer usage too? */ -- cgit v1.2.3 From 7619240cc0c24d3ad4d2424e65110c0326a12dad Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 15:58:14 +0900 Subject: gallium: Fix seg fault (James Vogt). --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index bffca5b244..9d809e2f9b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -30,7 +30,7 @@ * \file * A buffer manager that wraps buffers in fenced buffers. * - * \author José Fonseca + * \author José Fonseca */ @@ -101,7 +101,8 @@ fenced_bufmgr_destroy(struct pb_manager *mgr) fenced_buffer_list_destroy(fenced_mgr->fenced_list); - fenced_mgr->provider->destroy(fenced_mgr->provider); + if(fenced_mgr->provider) + fenced_mgr->provider->destroy(fenced_mgr->provider); FREE(fenced_mgr); } @@ -113,6 +114,9 @@ fenced_bufmgr_create(struct pb_manager *provider, { struct fenced_pb_manager *fenced_mgr; + if(!provider) + return NULL; + fenced_mgr = (struct fenced_pb_manager *)CALLOC(1, sizeof(*fenced_mgr)); if (!fenced_mgr) return NULL; -- cgit v1.2.3 From 3c4f1ba5a2edefd69b2c47abaf534fb3af3f259d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 17:08:10 +0900 Subject: gallium: Eliminate stdio file usage. Remove unused stuff. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 334 +++------------------------- src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 10 - 2 files changed, 34 insertions(+), 310 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index ff6a2c4194..26bfc2051f 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -25,8 +25,6 @@ * **************************************************************************/ -#include - #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" @@ -35,196 +33,28 @@ #include "tgsi_parse.h" #include "tgsi_build.h" -struct gen_dump -{ - unsigned tabs; - void (* write)( - struct gen_dump *dump, - const void *data, - unsigned size ); -}; - -struct text_dump -{ - struct gen_dump base; - char *text; - unsigned length; - unsigned capacity; -}; - -static void -_text_dump_write( - struct gen_dump *dump, - const void *data, - unsigned size ) -{ - struct text_dump *td = (struct text_dump *) dump; - unsigned new_length = td->length + size; - - if( new_length >= td->capacity ) { - unsigned new_capacity = td->capacity; - - do { - if( new_capacity == 0 ) { - new_capacity = 256; - } - else { - new_capacity *= 2; - } - } while( new_length >= new_capacity ); - td->text = (char *) REALLOC( - td->text, - td->capacity, - new_capacity ); - td->capacity = new_capacity; - } - memcpy( - &td->text[td->length], - data, - size ); - td->length = new_length; - td->text[td->length] = '\0'; -} - -struct file_dump -{ - struct gen_dump base; - FILE *file; -}; - -static void -_file_dump_write( - struct gen_dump *dump, - const void *data, - unsigned size ) -{ - struct file_dump *fd = (struct file_dump *) dump; - -#if 0 - fwrite( data, 1, size, fd->file ); -#else - { - unsigned i; - - for (i = 0; i < size; i++ ) { - fprintf( fd->file, "%c", ((const char *) data)[i] ); - } - } -#endif -} - -static void -gen_dump_str( - struct gen_dump *dump, - const char *str ) -{ - unsigned i; - size_t len = strlen( str ); - - for (i = 0; i < len; i++) { - dump->write( dump, &str[i], 1 ); - if (str[i] == '\n') { - unsigned i; - - for (i = 0; i < dump->tabs; i++) { - dump->write( dump, " ", 4 ); - } - } - } -} - -static void -gen_dump_chr( - struct gen_dump *dump, - const char chr ) -{ - dump->write( dump, &chr, 1 ); -} - -static void -gen_dump_uix( - struct gen_dump *dump, - const unsigned ui ) -{ - char str[36]; - - util_snprintf( str, sizeof(str), "0x%x", ui ); - gen_dump_str( dump, str ); -} - -static void -gen_dump_uid( - struct gen_dump *dump, - const unsigned ui ) -{ - char str[16]; - - util_snprintf( str, sizeof(str), "%u", ui ); - gen_dump_str( dump, str ); -} - -static void -gen_dump_sid( - struct gen_dump *dump, - const int si ) -{ - char str[16]; - - util_snprintf( str, sizeof(str), "%d", si ); - gen_dump_str( dump, str ); -} - static void -gen_dump_flt( - struct gen_dump *dump, - const float flt ) -{ - char str[48]; - - util_snprintf( str, sizeof(str), "%10.4f", flt ); - gen_dump_str( dump, str ); -} - -static void -gen_dump_enum( - struct gen_dump *dump, +dump_enum( const unsigned e, const char **enums, const unsigned enums_count ) { if (e >= enums_count) { - gen_dump_uid( dump, e ); + debug_printf( "%u", e ); } else { - gen_dump_str( dump, enums[e] ); + debug_printf( "%s", enums[e] ); } } -static void -gen_dump_tab( - struct gen_dump *dump ) -{ - ++dump->tabs; -} - -static void -gen_dump_untab( - struct gen_dump *dump ) -{ - assert( dump->tabs > 0 ); - - --dump->tabs; -} - -#define TXT(S) gen_dump_str( dump, S ) -#define CHR(C) gen_dump_chr( dump, C ) -#define UIX(I) gen_dump_uix( dump, I ) -#define UID(I) gen_dump_uid( dump, I ) -#define SID(I) gen_dump_sid( dump, I ) -#define FLT(F) gen_dump_flt( dump, F ) -#define TAB() gen_dump_tab( dump ) -#define UNT() gen_dump_untab( dump ) -#define ENM(E,ENUMS) gen_dump_enum( dump, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) +#define EOL() debug_printf( "\n" ) +#define TXT(S) debug_printf( "%s", S ) +#define CHR(C) debug_printf( "%c", C ) +#define UIX(I) debug_printf( "0x%x", I ) +#define UID(I) debug_printf( "%u", I ) +#define SID(I) debug_printf( "%d", I ) +#define FLT(F) debug_printf( "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) static const char *TGSI_PROCESSOR_TYPES[] = { @@ -711,7 +541,6 @@ static const char *TGSI_MODULATES[] = static void dump_declaration_short( - struct gen_dump *dump, struct tgsi_full_declaration *decl ) { TXT( "\nDCL " ); @@ -765,7 +594,6 @@ dump_declaration_short( static void dump_declaration_verbose( - struct gen_dump *dump, struct tgsi_full_declaration *decl, unsigned ignored, unsigned deflt, @@ -803,7 +631,7 @@ dump_declaration_verbose( UIX( decl->Declaration.Padding ); } - CHR( '\n' ); + EOL(); switch( decl->Declaration.Declare ) { case TGSI_DECLARE_RANGE: TXT( "\nFirst: " ); @@ -822,7 +650,7 @@ dump_declaration_verbose( } if( decl->Declaration.Interpolate ) { - CHR( '\n' ); + EOL(); TXT( "\nInterpolate: " ); ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES ); if( ignored ) { @@ -832,7 +660,7 @@ dump_declaration_verbose( } if( decl->Declaration.Semantic ) { - CHR( '\n' ); + EOL(); TXT( "\nSemanticName : " ); ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); TXT( "\nSemanticIndex: " ); @@ -846,7 +674,6 @@ dump_declaration_verbose( static void dump_immediate_short( - struct gen_dump *dump, struct tgsi_full_immediate *imm ) { unsigned i; @@ -874,7 +701,6 @@ dump_immediate_short( static void dump_immediate_verbose( - struct gen_dump *dump, struct tgsi_full_immediate *imm, unsigned ignored ) { @@ -888,7 +714,7 @@ dump_immediate_verbose( } for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - CHR( '\n' ); + EOL(); switch( imm->Immediate.DataType ) { case TGSI_IMM_FLOAT32: TXT( "\nFloat: " ); @@ -903,14 +729,13 @@ dump_immediate_verbose( static void dump_instruction_short( - struct gen_dump *dump, struct tgsi_full_instruction *inst, unsigned instno ) { unsigned i; boolean first_reg = TRUE; - CHR( '\n' ); + EOL(); UID( instno ); CHR( ':' ); ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT ); @@ -1042,7 +867,6 @@ dump_instruction_short( static void dump_instruction_verbose( - struct gen_dump *dump, struct tgsi_full_instruction *inst, unsigned ignored, unsigned deflt, @@ -1070,7 +894,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { @@ -1124,7 +948,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { @@ -1142,7 +966,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { @@ -1163,7 +987,7 @@ dump_instruction_verbose( struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; - CHR( '\n' ); + EOL(); TXT( "\nFile : " ); ENM( dst->DstRegister.File, TGSI_FILES ); if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { @@ -1194,7 +1018,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { @@ -1232,7 +1056,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { @@ -1254,7 +1078,7 @@ dump_instruction_verbose( struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; - CHR( '\n' ); + EOL(); TXT( "\nFile : "); ENM( src->SrcRegister.File, TGSI_FILES ); if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { @@ -1299,7 +1123,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { @@ -1345,7 +1169,7 @@ dump_instruction_verbose( } if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { - CHR( '\n' ); + EOL(); TXT( "\nType : " ); ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { @@ -1380,9 +1204,8 @@ dump_instruction_verbose( } } -static void -dump_gen( - struct gen_dump *dump, +void +tgsi_dump( const struct tgsi_token *tokens, unsigned flags ) { @@ -1394,16 +1217,16 @@ dump_gen( unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT); unsigned instno = 0; - dump->tabs = 0; - - /* sanity check */ + /* sanity checks */ assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); + assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0); tgsi_parse_init( &parse, tokens ); TXT( "tgsi-dump begin -----------------" ); - CHR( '\n' ); + EOL(); ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT ); UID( parse.FullVersion.Version.MajorVersion ); CHR( '.' ); @@ -1414,7 +1237,7 @@ dump_gen( UID( parse.FullVersion.Version.MajorVersion ); TXT( "\nMinorVersion: " ); UID( parse.FullVersion.Version.MinorVersion ); - CHR( '\n' ); + EOL(); TXT( "\nHeaderSize: " ); UID( parse.FullHeader.Header.HeaderSize ); @@ -1422,7 +1245,7 @@ dump_gen( UID( parse.FullHeader.Header.BodySize ); TXT( "\nProcessor : " ); ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); - CHR( '\n' ); + EOL(); } fi = tgsi_default_full_instruction(); @@ -1434,19 +1257,16 @@ dump_gen( switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: dump_declaration_short( - dump, &parse.FullToken.FullDeclaration ); break; case TGSI_TOKEN_TYPE_IMMEDIATE: dump_immediate_short( - dump, &parse.FullToken.FullImmediate ); break; case TGSI_TOKEN_TYPE_INSTRUCTION: dump_instruction_short( - dump, &parse.FullToken.FullInstruction, instno ); instno++; @@ -1471,7 +1291,6 @@ dump_gen( switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: dump_declaration_verbose( - dump, &parse.FullToken.FullDeclaration, ignored, deflt, @@ -1480,14 +1299,12 @@ dump_gen( case TGSI_TOKEN_TYPE_IMMEDIATE: dump_immediate_verbose( - dump, &parse.FullToken.FullImmediate, ignored ); break; case TGSI_TOKEN_TYPE_INSTRUCTION: dump_instruction_verbose( - dump, &parse.FullToken.FullInstruction, ignored, deflt, @@ -1498,7 +1315,7 @@ dump_gen( assert( 0 ); } - CHR( '\n' ); + EOL(); } } @@ -1506,86 +1323,3 @@ dump_gen( tgsi_parse_free( &parse ); } - - -static void -sanity_checks(void) -{ - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); - assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0); -} - - -void -tgsi_dump( - const struct tgsi_token *tokens, - unsigned flags ) -{ - struct file_dump dump; - - sanity_checks(); - - dump.base.write = _file_dump_write; -#if 0 - { - static unsigned counter = 0; - char buffer[64]; - sprintf( buffer, "tgsi-dump-%.4u.txt", counter++ ); - dump.file = fopen( buffer, "wt" ); - } -#else - dump.file = stderr; -#endif - - dump_gen( - &dump.base, - tokens, - flags ); - -#if 0 - fclose( dump.file ); -#endif -} - -void -tgsi_dump_str( - char **str, - const struct tgsi_token *tokens, - unsigned flags ) -{ - struct text_dump dump; - - dump.base.write = _text_dump_write; - dump.text = NULL; - dump.length = 0; - dump.capacity = 0; - - dump_gen( - &dump.base, - tokens, - flags ); - - *str = dump.text; -} - - -void tgsi_debug_dump( struct tgsi_token *tokens ) -{ - char *str, *p; - - tgsi_dump_str( &str, tokens, 0 ); - - p = str; - while (p != NULL) - { - char *end = strchr( p, '\n' ); - if (end != NULL) - { - *end++ = '\0'; - } - debug_printf( "%s\n", p ); - p = end; - } - - FREE( str ); -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index 51d79a0362..beb0155d56 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -14,16 +14,6 @@ tgsi_dump( const struct tgsi_token *tokens, unsigned flags ); -void -tgsi_dump_str( - char **str, - const struct tgsi_token *tokens, - unsigned flags ); - -/* Dump to debug_printf() - */ -void tgsi_debug_dump( struct tgsi_token *tokens ); - #if defined __cplusplus } #endif -- cgit v1.2.3 From a68f664124592829a3b715388e6cfa43f82900c8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 15 Apr 2008 18:11:47 +0900 Subject: gallium: Cache one line worth of debug output on windows. The windbg connection seems synchronous, so this speeds up when printing little text at a time (e.g., tgsi output). --- src/gallium/auxiliary/util/p_debug.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index f9366467cd..c195f61820 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -59,10 +59,15 @@ void _debug_vprintf(const char *format, va_list ap) #ifdef WIN32 #ifndef WINCE /* EngDebugPrint does not handle float point arguments, so we need to use - * our own vsnprintf implementation */ - char buf[512 + 1]; - util_vsnprintf(buf, sizeof(buf), format, ap); - _EngDebugPrint("%s", buf); + * our own vsnprintf implementation. It is also very slow, so buffer until + * we find a newline. */ + static char buf[512 + 1] = {'\0'}; + size_t len = strlen(buf); + int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); + if(ret > (int)(sizeof(buf) - len - 1) || strchr(buf + len, '\n')) { + _EngDebugPrint("%s", buf); + buf[0] = '\0'; + } #else /* TODO: Implement debug print for WINCE */ #endif -- cgit v1.2.3 From 6a26a9c58cc38ff636ee88ce01fed40eea500fc0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 15 Apr 2008 14:28:41 +0100 Subject: draw: fetch_shade_pipeline needs to translate to hw vertex format (from get_vertex_info) --- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 158 +++++++++++++-------- 1 file changed, 102 insertions(+), 56 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 04b3d2c4cf..557cd43f5b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -36,52 +36,60 @@ struct fetch_pipeline_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; + const ubyte *input_buf[2]; + struct { - const ubyte *ptr; - unsigned pitch; - void (*fetch)( const void *from, float *attrib); - void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_MAX_ATTRIBS]; + const ubyte **input_buf; + unsigned input_offset; + unsigned output_offset; + + void (*emit)( const float *attrib, void *ptr ); + } translate[PIPE_MAX_ATTRIBS]; + unsigned nr_translate; - unsigned nr_fetch; unsigned pipeline_vertex_size; unsigned hw_vertex_size; unsigned prim; }; -#if 0 + +static void emit_NULL( const float *attrib, + void *ptr ) +{ +} + static void emit_R32_FLOAT( const float *attrib, - float **out ) + void *ptr ) { - (*out)[0] = attrib[0]; - (*out) += 1; + float *out = (float *)ptr; + out[0] = attrib[0]; } static void emit_R32G32_FLOAT( const float *attrib, - float **out ) + void *ptr ) { - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out) += 2; + float *out = (float *)ptr; + out[0] = attrib[0]; + out[1] = attrib[1]; } static void emit_R32G32B32_FLOAT( const float *attrib, - float **out ) + void *ptr ) { - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out) += 3; + float *out = (float *)ptr; + out[0] = attrib[0]; + out[1] = attrib[1]; + out[2] = attrib[2]; } -#endif + static void emit_R32G32B32A32_FLOAT( const float *attrib, - float **out ) + void *ptr ) { - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out)[3] = attrib[3]; - (*out) += 4; + float *out = (float *)ptr; + out[0] = attrib[0]; + out[1] = attrib[1]; + out[2] = attrib[2]; + out[3] = attrib[3]; } static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, @@ -89,9 +97,10 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - unsigned i, nr = 0; + unsigned i; boolean ok; const struct vertex_info *vinfo; + unsigned dst_offset; fpme->prim = prim; @@ -100,33 +109,63 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, assert(0); return; } + /* Must do this after set_primitive() above: */ vinfo = draw->render->get_vertex_info(draw->render); - /* Need to look at vertex shader inputs (we know it is a - * passthrough shader, so these define the outputs too). If we - * were running a shader, we'd still be looking at the inputs at - * this point. - */ - for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; - - fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset); - fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch; - fpme->fetch[nr].fetch = draw_get_fetch_func( format ); + /* In passthrough mode, need to translate from vertex shader + * outputs to hw vertices. + */ + dst_offset = 0; + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned src_offset = (sizeof(struct vertex_header) + + vinfo->src_index[i] * 4 * sizeof(float) ); + + + + switch (vinfo->emit[i]) { + case EMIT_4F: + fpme->translate[i].emit = emit_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + fpme->translate[i].emit = emit_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + fpme->translate[i].emit = emit_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + fpme->translate[i].emit = emit_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + fpme->translate[i].emit = emit_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + default: + assert(0); + fpme->translate[i].emit = emit_NULL; + emit_sz = 0; + break; + } - /* Always do this -- somewhat redundant... - */ - fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT; - nr++; + fpme->translate[i].input_buf = &fpme->input_buf[src_buffer]; + fpme->translate[i].input_offset = src_offset; + fpme->translate[i].output_offset = dst_offset; + dst_offset += emit_sz; } - fpme->nr_fetch = nr; + fpme->nr_translate = vinfo->num_attribs; + fpme->hw_vertex_size = vinfo->size * 4; + //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION; fpme->hw_vertex_size = vinfo->size * 4; @@ -171,7 +210,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, } else { unsigned i, j; void *hw_verts; - float *out; + char *out_buf; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? */ @@ -185,22 +224,29 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, return; } - out = (float *)hw_verts; + out_buf = (char *)hw_verts; + fpme->input_buf[0] = (const ubyte *)pipeline_verts; + fpme->input_buf[1] = (const ubyte *)&fpme->draw->rasterizer->point_size; + for (i = 0; i < fetch_count; i++) { - struct vertex_header *header = - (struct vertex_header*)(pipeline_verts + (fpme->pipeline_vertex_size * i)); - for (j = 0; j < fpme->nr_fetch; j++) { - float *attrib = header->data[j]; + for (j = 0; j < fpme->nr_translate; j++) { + + const float *attrib = (const float *)( (*fpme->translate[i].input_buf) + + fpme->translate[i].input_offset ); + + char *dest = out_buf + fpme->translate[i].output_offset; + /*debug_printf("emiting [%f, %f, %f, %f]\n", attrib[0], attrib[1], attrib[2], attrib[3]);*/ - fpme->fetch[j].emit(attrib, &out); + + fpme->translate[j].emit(attrib, dest); } + + fpme->input_buf[0] += fpme->pipeline_vertex_size; } - /* XXX: Draw arrays path to avoid re-emitting index list again and - * again. - */ + draw->render->draw(draw->render, draw_elts, draw_count); -- cgit v1.2.3 From c81bbab6f6c0413996799800cac6fb49a698e765 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 15 Apr 2008 14:35:29 +0100 Subject: gallium: add a generic vertex (or other) buffer translation module --- src/gallium/auxiliary/translate/Makefile | 12 + src/gallium/auxiliary/translate/SConscript | 9 + src/gallium/auxiliary/translate/translate.h | 82 +++ .../auxiliary/translate/translate_generic.c | 630 +++++++++++++++++++++ 4 files changed, 733 insertions(+) create mode 100644 src/gallium/auxiliary/translate/Makefile create mode 100644 src/gallium/auxiliary/translate/SConscript create mode 100644 src/gallium/auxiliary/translate/translate.h create mode 100644 src/gallium/auxiliary/translate/translate_generic.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile new file mode 100644 index 0000000000..051987bb7e --- /dev/null +++ b/src/gallium/auxiliary/translate/Makefile @@ -0,0 +1,12 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = translate + +C_SOURCES = \ + translate_generic.c + +include ../../Makefile.template + +symlinks: + diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript new file mode 100644 index 0000000000..947b9438cb --- /dev/null +++ b/src/gallium/auxiliary/translate/SConscript @@ -0,0 +1,9 @@ +Import('*') + +cso_cache = env.ConvenienceLibrary( + target = 'translate', + source = [ + 'translate_generic.c', + ]) + +auxiliaries.insert(0, translate) diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h new file mode 100644 index 0000000000..ac6bc7088b --- /dev/null +++ b/src/gallium/auxiliary/translate/translate.h @@ -0,0 +1,82 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + + +/** + * Vertex fetch/store/convert code. This functionality is used in two places: + * 1. Vertex fetch/convert - to grab vertex data from incoming vertex + * arrays and convert to format needed by vertex shaders. + * 2. Vertex store/emit - to convert simple float[][4] vertex attributes + * (which is the organization used throughout the draw/prim pipeline) to + * hardware-specific formats and emit into hardware vertex buffers. + * + * + * Authors: + * Keith Whitwell + */ + +#ifndef _TRANSLATE_H +#define _TRANSLATE_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_format.h" + +struct translate_element +{ + enum pipe_format input_format; + unsigned input_buffer; + unsigned input_offset; + + enum pipe_format output_format; + unsigned output_offset; +}; + + +struct translate { + void (*destroy)( struct translate * ); + + void (*set_buffer)( struct translate *, + unsigned i, + const void *ptr, + unsigned stride ); + + void (*run_elts)( struct translate *, + const unsigned *elts, + unsigned count, + void *output_buffer); +}; + + + +struct translate *translate_sse2_create( unsigned output_stride, + const struct translate_element *elements, + unsigned nr_elements ); + +struct translate *translate_generic_create( unsigned output_stride, + const struct translate_element *elements, + unsigned nr_elements ); + + +#endif diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c new file mode 100644 index 0000000000..643f6430c5 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -0,0 +1,630 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "translate.h" + + +#define DRAW_DBG 0 + +typedef void (*fetch_func)(const void *ptr, float *attrib); +typedef void (*emit_func)(const float *attrib, void *ptr); + + + +struct translate_generic { + struct translate translate; + + struct { + fetch_func fetch; + unsigned buffer; + unsigned input_offset; + + emit_func emit; + unsigned output_offset; + + char *input_ptr; + unsigned input_stride; + + } attrib[PIPE_MAX_ATTRIBS]; + + unsigned nr_attrib; + unsigned output_stride; +}; + + +static struct translate_generic *translate_generic( struct translate *translate ) +{ + return (struct translate_generic *)translate; +} + +/** + * Fetch a float[4] vertex attribute from memory, doing format/type + * conversion as needed. + * + * This is probably needed/dupliocated elsewhere, eg format + * conversion, texture sampling etc. + */ +#define ATTRIB( NAME, SZ, TYPE, FROM, TO ) \ +static void \ +fetch_##NAME(const void *ptr, float *attrib) \ +{ \ + const float defaults[4] = { 0,0,0,1 }; \ + int i; \ + \ + for (i = 0; i < SZ; i++) { \ + attrib[i] = FROM(i); \ + } \ + \ + for (; i < 4; i++) { \ + attrib[i] = defaults[i]; \ + } \ +} \ + \ +static void \ +emit_##NAME(const float *attrib, void *ptr) \ +{ \ + unsigned i; \ + TYPE *out = (TYPE *)ptr; \ + \ + for (i = 0; i < SZ; i++) { \ + out[i] = TO(attrib[i]); \ + } \ +} + + +#define FROM_64_FLOAT(i) ((float) ((double *) ptr)[i]) +#define FROM_32_FLOAT(i) (((float *) ptr)[i]) + +#define FROM_8_USCALED(i) ((float) ((unsigned char *) ptr)[i]) +#define FROM_16_USCALED(i) ((float) ((unsigned short *) ptr)[i]) +#define FROM_32_USCALED(i) ((float) ((unsigned int *) ptr)[i]) + +#define FROM_8_SSCALED(i) ((float) ((char *) ptr)[i]) +#define FROM_16_SSCALED(i) ((float) ((short *) ptr)[i]) +#define FROM_32_SSCALED(i) ((float) ((int *) ptr)[i]) + +#define FROM_8_UNORM(i) ((float) ((unsigned char *) ptr)[i] / 255.0f) +#define FROM_16_UNORM(i) ((float) ((unsigned short *) ptr)[i] / 65535.0f) +#define FROM_32_UNORM(i) ((float) ((unsigned int *) ptr)[i] / 4294967295.0f) + +#define FROM_8_SNORM(i) ((float) ((char *) ptr)[i] / 127.0f) +#define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f) +#define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f) + +#define TO_64_FLOAT(f) ((double) f) +#define TO_32_FLOAT(f) (f) + +#define TO_8_USCALED(f) ((unsigned char) f) +#define TO_16_USCALED(f) ((unsigned short) f) +#define TO_32_USCALED(f) ((unsigned int) f) + +#define TO_8_SSCALED(f) ((char) f) +#define TO_16_SSCALED(f) ((short) f) +#define TO_32_SSCALED(f) ((int) f) + +#define TO_8_UNORM(f) ((unsigned char) (f * 255.0f)) +#define TO_16_UNORM(f) ((unsigned short) (f * 65535.0f)) +#define TO_32_UNORM(f) ((unsigned int) (f * 4294967295.0f)) + +#define TO_8_SNORM(f) ((char) (f * 127.0f)) +#define TO_16_SNORM(f) ((short) (f * 32767.0f)) +#define TO_32_SNORM(f) ((int) (f * 2147483647.0f)) + + + +ATTRIB( R64G64B64A64_FLOAT, 4, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64G64B64_FLOAT, 3, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64G64_FLOAT, 2, double, FROM_64_FLOAT, TO_64_FLOAT ) +ATTRIB( R64_FLOAT, 1, double, FROM_64_FLOAT, TO_64_FLOAT ) + +ATTRIB( R32G32B32A32_FLOAT, 4, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32G32B32_FLOAT, 3, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32G32_FLOAT, 2, float, FROM_32_FLOAT, TO_32_FLOAT ) +ATTRIB( R32_FLOAT, 1, float, FROM_32_FLOAT, TO_32_FLOAT ) + +ATTRIB( R32G32B32A32_USCALED, 4, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32G32B32_USCALED, 3, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32G32_USCALED, 2, unsigned, FROM_32_USCALED, TO_32_USCALED ) +ATTRIB( R32_USCALED, 1, unsigned, FROM_32_USCALED, TO_32_USCALED ) + +ATTRIB( R32G32B32A32_SSCALED, 4, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32G32B32_SSCALED, 3, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32G32_SSCALED, 2, int, FROM_32_SSCALED, TO_32_SSCALED ) +ATTRIB( R32_SSCALED, 1, int, FROM_32_SSCALED, TO_32_SSCALED ) + +ATTRIB( R32G32B32A32_UNORM, 4, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32G32B32_UNORM, 3, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32G32_UNORM, 2, unsigned, FROM_32_UNORM, TO_32_UNORM ) +ATTRIB( R32_UNORM, 1, unsigned, FROM_32_UNORM, TO_32_UNORM ) + +ATTRIB( R32G32B32A32_SNORM, 4, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32G32B32_SNORM, 3, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32G32_SNORM, 2, int, FROM_32_SNORM, TO_32_SNORM ) +ATTRIB( R32_SNORM, 1, int, FROM_32_SNORM, TO_32_SNORM ) + +ATTRIB( R16G16B16A16_USCALED, 4, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16G16B16_USCALED, 3, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16G16_USCALED, 2, ushort, FROM_16_USCALED, TO_16_USCALED ) +ATTRIB( R16_USCALED, 1, ushort, FROM_16_USCALED, TO_16_USCALED ) + +ATTRIB( R16G16B16A16_SSCALED, 4, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16G16B16_SSCALED, 3, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16G16_SSCALED, 2, short, FROM_16_SSCALED, TO_16_SSCALED ) +ATTRIB( R16_SSCALED, 1, short, FROM_16_SSCALED, TO_16_SSCALED ) + +ATTRIB( R16G16B16A16_UNORM, 4, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16G16B16_UNORM, 3, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16G16_UNORM, 2, ushort, FROM_16_UNORM, TO_16_UNORM ) +ATTRIB( R16_UNORM, 1, ushort, FROM_16_UNORM, TO_16_UNORM ) + +ATTRIB( R16G16B16A16_SNORM, 4, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16G16B16_SNORM, 3, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16G16_SNORM, 2, short, FROM_16_SNORM, TO_16_SNORM ) +ATTRIB( R16_SNORM, 1, short, FROM_16_SNORM, TO_16_SNORM ) + +ATTRIB( R8G8B8A8_USCALED, 4, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8G8B8_USCALED, 3, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8G8_USCALED, 2, ubyte, FROM_8_USCALED, TO_8_USCALED ) +ATTRIB( R8_USCALED, 1, ubyte, FROM_8_USCALED, TO_8_USCALED ) + +ATTRIB( R8G8B8A8_SSCALED, 4, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8G8B8_SSCALED, 3, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8G8_SSCALED, 2, char, FROM_8_SSCALED, TO_8_SSCALED ) +ATTRIB( R8_SSCALED, 1, char, FROM_8_SSCALED, TO_8_SSCALED ) + +ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8G8B8_UNORM, 3, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8G8_UNORM, 2, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R8_UNORM, 1, ubyte, FROM_8_UNORM, TO_8_UNORM ) + +ATTRIB( R8G8B8A8_SNORM, 4, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8G8B8_SNORM, 3, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8G8_SNORM, 2, char, FROM_8_SNORM, TO_8_SNORM ) +ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM ) + +ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +//ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) + + + +static void +fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) +{ + attrib[2] = FROM_8_UNORM(0); + attrib[1] = FROM_8_UNORM(1); + attrib[0] = FROM_8_UNORM(2); + attrib[3] = FROM_8_UNORM(3); +} + +static void +emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) +{ + ubyte *out = (ubyte *)ptr; + out[2] = TO_8_UNORM(out[0]); + out[1] = TO_8_UNORM(out[1]); + out[0] = TO_8_UNORM(out[2]); + out[3] = TO_8_UNORM(out[3]); +} + +static void +fetch_NULL( const void *ptr, float *attrib ) +{ + attrib[0] = 0; + attrib[1] = 0; + attrib[2] = 0; + attrib[3] = 1; +} + +static void +emit_NULL( const float *attrib, void *ptr ) +{ + /* do nothing is the only sensible option */ +} + +static fetch_func get_fetch_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return fetch_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return fetch_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return fetch_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return fetch_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return fetch_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return fetch_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return fetch_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return fetch_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return fetch_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return fetch_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return fetch_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return fetch_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return fetch_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return fetch_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return fetch_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return fetch_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return fetch_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return fetch_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return fetch_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return fetch_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return fetch_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return fetch_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return fetch_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return fetch_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return fetch_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return fetch_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return fetch_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return fetch_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return fetch_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return fetch_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return fetch_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return fetch_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return fetch_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return fetch_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return fetch_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return fetch_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return fetch_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return fetch_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return fetch_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return fetch_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return fetch_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return fetch_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return fetch_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return fetch_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return fetch_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return fetch_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return fetch_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return fetch_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return fetch_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return fetch_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return fetch_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return fetch_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return fetch_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return fetch_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return fetch_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return fetch_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return fetch_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return fetch_B8G8R8A8_UNORM; + + default: + assert(0); + return fetch_NULL; + } +} + + + + +static emit_func get_emit_func( enum pipe_format format ) +{ + switch (format) { + case PIPE_FORMAT_R64_FLOAT: + return emit_R64_FLOAT; + case PIPE_FORMAT_R64G64_FLOAT: + return emit_R64G64_FLOAT; + case PIPE_FORMAT_R64G64B64_FLOAT: + return emit_R64G64B64_FLOAT; + case PIPE_FORMAT_R64G64B64A64_FLOAT: + return emit_R64G64B64A64_FLOAT; + + case PIPE_FORMAT_R32_FLOAT: + return emit_R32_FLOAT; + case PIPE_FORMAT_R32G32_FLOAT: + return emit_R32G32_FLOAT; + case PIPE_FORMAT_R32G32B32_FLOAT: + return emit_R32G32B32_FLOAT; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + return emit_R32G32B32A32_FLOAT; + + case PIPE_FORMAT_R32_UNORM: + return emit_R32_UNORM; + case PIPE_FORMAT_R32G32_UNORM: + return emit_R32G32_UNORM; + case PIPE_FORMAT_R32G32B32_UNORM: + return emit_R32G32B32_UNORM; + case PIPE_FORMAT_R32G32B32A32_UNORM: + return emit_R32G32B32A32_UNORM; + + case PIPE_FORMAT_R32_USCALED: + return emit_R32_USCALED; + case PIPE_FORMAT_R32G32_USCALED: + return emit_R32G32_USCALED; + case PIPE_FORMAT_R32G32B32_USCALED: + return emit_R32G32B32_USCALED; + case PIPE_FORMAT_R32G32B32A32_USCALED: + return emit_R32G32B32A32_USCALED; + + case PIPE_FORMAT_R32_SNORM: + return emit_R32_SNORM; + case PIPE_FORMAT_R32G32_SNORM: + return emit_R32G32_SNORM; + case PIPE_FORMAT_R32G32B32_SNORM: + return emit_R32G32B32_SNORM; + case PIPE_FORMAT_R32G32B32A32_SNORM: + return emit_R32G32B32A32_SNORM; + + case PIPE_FORMAT_R32_SSCALED: + return emit_R32_SSCALED; + case PIPE_FORMAT_R32G32_SSCALED: + return emit_R32G32_SSCALED; + case PIPE_FORMAT_R32G32B32_SSCALED: + return emit_R32G32B32_SSCALED; + case PIPE_FORMAT_R32G32B32A32_SSCALED: + return emit_R32G32B32A32_SSCALED; + + case PIPE_FORMAT_R16_UNORM: + return emit_R16_UNORM; + case PIPE_FORMAT_R16G16_UNORM: + return emit_R16G16_UNORM; + case PIPE_FORMAT_R16G16B16_UNORM: + return emit_R16G16B16_UNORM; + case PIPE_FORMAT_R16G16B16A16_UNORM: + return emit_R16G16B16A16_UNORM; + + case PIPE_FORMAT_R16_USCALED: + return emit_R16_USCALED; + case PIPE_FORMAT_R16G16_USCALED: + return emit_R16G16_USCALED; + case PIPE_FORMAT_R16G16B16_USCALED: + return emit_R16G16B16_USCALED; + case PIPE_FORMAT_R16G16B16A16_USCALED: + return emit_R16G16B16A16_USCALED; + + case PIPE_FORMAT_R16_SNORM: + return emit_R16_SNORM; + case PIPE_FORMAT_R16G16_SNORM: + return emit_R16G16_SNORM; + case PIPE_FORMAT_R16G16B16_SNORM: + return emit_R16G16B16_SNORM; + case PIPE_FORMAT_R16G16B16A16_SNORM: + return emit_R16G16B16A16_SNORM; + + case PIPE_FORMAT_R16_SSCALED: + return emit_R16_SSCALED; + case PIPE_FORMAT_R16G16_SSCALED: + return emit_R16G16_SSCALED; + case PIPE_FORMAT_R16G16B16_SSCALED: + return emit_R16G16B16_SSCALED; + case PIPE_FORMAT_R16G16B16A16_SSCALED: + return emit_R16G16B16A16_SSCALED; + + case PIPE_FORMAT_R8_UNORM: + return emit_R8_UNORM; + case PIPE_FORMAT_R8G8_UNORM: + return emit_R8G8_UNORM; + case PIPE_FORMAT_R8G8B8_UNORM: + return emit_R8G8B8_UNORM; + case PIPE_FORMAT_R8G8B8A8_UNORM: + return emit_R8G8B8A8_UNORM; + + case PIPE_FORMAT_R8_USCALED: + return emit_R8_USCALED; + case PIPE_FORMAT_R8G8_USCALED: + return emit_R8G8_USCALED; + case PIPE_FORMAT_R8G8B8_USCALED: + return emit_R8G8B8_USCALED; + case PIPE_FORMAT_R8G8B8A8_USCALED: + return emit_R8G8B8A8_USCALED; + + case PIPE_FORMAT_R8_SNORM: + return emit_R8_SNORM; + case PIPE_FORMAT_R8G8_SNORM: + return emit_R8G8_SNORM; + case PIPE_FORMAT_R8G8B8_SNORM: + return emit_R8G8B8_SNORM; + case PIPE_FORMAT_R8G8B8A8_SNORM: + return emit_R8G8B8A8_SNORM; + + case PIPE_FORMAT_R8_SSCALED: + return emit_R8_SSCALED; + case PIPE_FORMAT_R8G8_SSCALED: + return emit_R8G8_SSCALED; + case PIPE_FORMAT_R8G8B8_SSCALED: + return emit_R8G8B8_SSCALED; + case PIPE_FORMAT_R8G8B8A8_SSCALED: + return emit_R8G8B8A8_SSCALED; + + case PIPE_FORMAT_A8R8G8B8_UNORM: + return emit_A8R8G8B8_UNORM; + + case PIPE_FORMAT_B8G8R8A8_UNORM: + return emit_B8G8R8A8_UNORM; + + default: + assert(0); + return emit_NULL; + } +} + + + +/** + * Fetch vertex attributes for 'count' vertices. + */ +static void generic_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + unsigned i; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = *elts++; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + + const char *src = (tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt); + + char *dst = (vert + + tg->attrib[attr].output_offset); + + tg->attrib[attr].fetch( src, data ); + tg->attrib[attr].emit( data, dst ); + } + + vert += tg->output_stride; + } +} + + + +static void generic_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct translate_generic *tg = translate_generic(translate); + unsigned i; + + for (i = 0; i < tg->nr_attrib; i++) { + if (tg->attrib[i].buffer == buf) { + tg->attrib[i].input_ptr = ((char *)ptr + + tg->attrib[i].input_offset); + tg->attrib[i].input_stride = stride; + } + } +} + + +static void generic_destroy( struct translate *translate ) +{ + FREE(translate); +} + +struct translate *translate_generic_create( unsigned output_stride, + const struct translate_element *elements, + unsigned nr_elements ) +{ + struct translate_generic *tg = CALLOC_STRUCT(translate_generic); + unsigned i; + + if (tg == NULL) + return NULL; + + tg->translate.destroy = generic_destroy; + tg->translate.set_buffer = generic_set_buffer; + tg->translate.run_elts = generic_run_elts; + + for (i = 0; i < nr_elements; i++) { + tg->attrib[i].fetch = get_fetch_func(elements[i].input_format); + tg->attrib[i].buffer = elements[i].input_buffer; + tg->attrib[i].input_offset = elements[i].input_offset; + + tg->attrib[i].emit = get_emit_func(elements[i].output_format); + tg->attrib[i].output_offset = elements[i].output_offset; + } + + tg->nr_attrib = nr_elements; + + + return &tg->translate; +} -- cgit v1.2.3 From 59f68f36c4ad37496e3ed5d7d3142e3ae35dd351 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 15 Apr 2008 14:40:16 +0100 Subject: translate: typo in emit_B8G8R8A8_UNORM --- src/gallium/auxiliary/translate/translate_generic.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 643f6430c5..7e75ba8365 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -231,10 +231,10 @@ static void emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) { ubyte *out = (ubyte *)ptr; - out[2] = TO_8_UNORM(out[0]); - out[1] = TO_8_UNORM(out[1]); - out[0] = TO_8_UNORM(out[2]); - out[3] = TO_8_UNORM(out[3]); + out[2] = TO_8_UNORM(attrib[0]); + out[1] = TO_8_UNORM(attrib[1]); + out[0] = TO_8_UNORM(attrib[2]); + out[3] = TO_8_UNORM(attrib[3]); } static void -- cgit v1.2.3 From 7eb6f130a1dfb8179ff371eb4e75b47d6ee45d2a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 15 Apr 2008 14:44:39 +0100 Subject: draw: fix first glitch in vertex emit --- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 557cd43f5b..889cab700c 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -92,6 +92,16 @@ static void emit_R32G32B32A32_FLOAT( const float *attrib, out[3] = attrib[3]; } +static void +emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) +{ + ubyte *out = (ubyte *)ptr; + out[2] = float_to_ubyte(attrib[0]); + out[1] = float_to_ubyte(attrib[1]); + out[0] = float_to_ubyte(attrib[2]); + out[3] = float_to_ubyte(attrib[3]); +} + static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned prim ) { @@ -150,6 +160,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, src_buffer = 1; src_offset = 0; break; + case EMIT_4UB: + fpme->translate[i].emit = emit_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); default: assert(0); fpme->translate[i].emit = emit_NULL; @@ -232,10 +245,10 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, for (j = 0; j < fpme->nr_translate; j++) { - const float *attrib = (const float *)( (*fpme->translate[i].input_buf) + - fpme->translate[i].input_offset ); + const float *attrib = (const float *)( (*fpme->translate[j].input_buf) + + fpme->translate[j].input_offset ); - char *dest = out_buf + fpme->translate[i].output_offset; + char *dest = out_buf + fpme->translate[j].output_offset; /*debug_printf("emiting [%f, %f, %f, %f]\n", attrib[0], attrib[1], -- cgit v1.2.3 From 8cac6f3fcf9d6c08959efc20f8fce9eddbdcd0ef Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 15 Apr 2008 14:52:56 +0100 Subject: draw: increment output vertex properly --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 889cab700c..75bbcc8d41 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -250,14 +250,16 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, char *dest = out_buf + fpme->translate[j].output_offset; - /*debug_printf("emiting [%f, %f, %f, %f]\n", - attrib[0], attrib[1], - attrib[2], attrib[3]);*/ + if (0) + debug_printf("emiting [%f, %f, %f, %f]\n", + attrib[0], attrib[1], + attrib[2], attrib[3]); fpme->translate[j].emit(attrib, dest); } fpme->input_buf[0] += fpme->pipeline_vertex_size; + out_buf += fpme->hw_vertex_size; } draw->render->draw(draw->render, -- cgit v1.2.3 From a5a7dc24ba2dcf9bbdd73709c4c182e324bdc3a5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 15 Apr 2008 19:14:31 +0100 Subject: gallium: Switch one vertex path over to new translate module Will eventually do this for all instances where we are converting vertices from one format to another. --- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 146 +++++++-------------- src/gallium/auxiliary/translate/translate.h | 36 ++++- .../auxiliary/translate/translate_generic.c | 66 ++++++++-- 3 files changed, 128 insertions(+), 120 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 75bbcc8d41..2d83e11940 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -31,77 +31,20 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "translate/translate.h" + struct fetch_pipeline_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; - const ubyte *input_buf[2]; - - struct { - const ubyte **input_buf; - unsigned input_offset; - unsigned output_offset; - - void (*emit)( const float *attrib, void *ptr ); - } translate[PIPE_MAX_ATTRIBS]; - unsigned nr_translate; + struct translate *translate; unsigned pipeline_vertex_size; - unsigned hw_vertex_size; unsigned prim; }; -static void emit_NULL( const float *attrib, - void *ptr ) -{ -} - -static void emit_R32_FLOAT( const float *attrib, - void *ptr ) -{ - float *out = (float *)ptr; - out[0] = attrib[0]; -} - -static void emit_R32G32_FLOAT( const float *attrib, - void *ptr ) -{ - float *out = (float *)ptr; - out[0] = attrib[0]; - out[1] = attrib[1]; -} - -static void emit_R32G32B32_FLOAT( const float *attrib, - void *ptr ) -{ - float *out = (float *)ptr; - out[0] = attrib[0]; - out[1] = attrib[1]; - out[2] = attrib[2]; -} - -static void emit_R32G32B32A32_FLOAT( const float *attrib, - void *ptr ) -{ - float *out = (float *)ptr; - out[0] = attrib[0]; - out[1] = attrib[1]; - out[2] = attrib[2]; - out[3] = attrib[3]; -} - -static void -emit_B8G8R8A8_UNORM( const float *attrib, void *ptr) -{ - ubyte *out = (ubyte *)ptr; - out[2] = float_to_ubyte(attrib[0]); - out[1] = float_to_ubyte(attrib[1]); - out[0] = float_to_ubyte(attrib[2]); - out[3] = float_to_ubyte(attrib[3]); -} - static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned prim ) { @@ -111,6 +54,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, boolean ok; const struct vertex_info *vinfo; unsigned dst_offset; + struct translate_key hw_key; fpme->prim = prim; @@ -132,6 +76,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, for (i = 0; i < vinfo->num_attribs; i++) { unsigned emit_sz = 0; unsigned src_buffer = 0; + unsigned output_format; unsigned src_offset = (sizeof(struct vertex_header) + vinfo->src_index[i] * 4 * sizeof(float) ); @@ -139,49 +84,64 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, switch (vinfo->emit[i]) { case EMIT_4F: - fpme->translate[i].emit = emit_R32G32B32A32_FLOAT; + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); break; case EMIT_3F: - fpme->translate[i].emit = emit_R32G32B32_FLOAT; + output_format = PIPE_FORMAT_R32G32B32_FLOAT; emit_sz = 3 * sizeof(float); break; case EMIT_2F: - fpme->translate[i].emit = emit_R32G32_FLOAT; + output_format = PIPE_FORMAT_R32G32_FLOAT; emit_sz = 2 * sizeof(float); break; case EMIT_1F: - fpme->translate[i].emit = emit_R32_FLOAT; + output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); break; case EMIT_1F_PSIZE: - fpme->translate[i].emit = emit_R32_FLOAT; + output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); src_buffer = 1; src_offset = 0; break; case EMIT_4UB: - fpme->translate[i].emit = emit_B8G8R8A8_UNORM; + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; emit_sz = 4 * sizeof(ubyte); default: assert(0); - fpme->translate[i].emit = emit_NULL; + output_format = PIPE_FORMAT_NONE; emit_sz = 0; break; } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; - fpme->translate[i].input_buf = &fpme->input_buf[src_buffer]; - fpme->translate[i].input_offset = src_offset; - fpme->translate[i].output_offset = dst_offset; dst_offset += emit_sz; } - fpme->nr_translate = vinfo->num_attribs; - fpme->hw_vertex_size = vinfo->size * 4; + hw_key.nr_elements = vinfo->num_attribs; + hw_key.output_stride = vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!fpme->translate || + memcmp(&fpme->translate->key, &hw_key, sizeof(hw_key)) != 0) + { + if (fpme->translate) + fpme->translate->release(fpme->translate); + + fpme->translate = translate_generic_create( &hw_key ); + } + + //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION; - fpme->hw_vertex_size = vinfo->size * 4; } @@ -221,46 +181,34 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, draw_elts, draw_count ); } else { - unsigned i, j; + struct translate *translate = fpme->translate; void *hw_verts; - char *out_buf; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); hw_verts = draw->render->allocate_vertices(draw->render, - (ushort)fpme->hw_vertex_size, + (ushort)fpme->translate->key.output_stride, (ushort)fetch_count); if (!hw_verts) { assert(0); return; } - out_buf = (char *)hw_verts; - fpme->input_buf[0] = (const ubyte *)pipeline_verts; - fpme->input_buf[1] = (const ubyte *)&fpme->draw->rasterizer->point_size; - - for (i = 0; i < fetch_count; i++) { - - for (j = 0; j < fpme->nr_translate; j++) { + translate->set_buffer(translate, + 0, + pipeline_verts, + fpme->pipeline_vertex_size ); - const float *attrib = (const float *)( (*fpme->translate[j].input_buf) + - fpme->translate[j].input_offset ); + translate->set_buffer(translate, + 1, + &fpme->draw->rasterizer->point_size, + 0); - char *dest = out_buf + fpme->translate[j].output_offset; - - if (0) - debug_printf("emiting [%f, %f, %f, %f]\n", - attrib[0], attrib[1], - attrib[2], attrib[3]); - - fpme->translate[j].emit(attrib, dest); - } - - fpme->input_buf[0] += fpme->pipeline_vertex_size; - out_buf += fpme->hw_vertex_size; - } + translate->run( translate, + 0, fetch_count, + hw_verts ); draw->render->draw(draw->render, draw_elts, @@ -268,7 +216,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, draw->render->release_vertices(draw->render, hw_verts, - fpme->hw_vertex_size, + fpme->translate->key.output_stride, fetch_count); } diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index ac6bc7088b..4f9f40e51a 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -54,8 +54,17 @@ struct translate_element }; +struct translate_key { + unsigned output_stride; + unsigned nr_elements; + struct translate_element element[PIPE_MAX_ATTRIBS]; +}; + + struct translate { - void (*destroy)( struct translate * ); + struct translate_key key; + + void (*release)( struct translate * ); void (*set_buffer)( struct translate *, unsigned i, @@ -66,17 +75,30 @@ struct translate { const unsigned *elts, unsigned count, void *output_buffer); + + void (*run)( struct translate *, + unsigned start, + unsigned count, + void *output_buffer); }; -struct translate *translate_sse2_create( unsigned output_stride, - const struct translate_element *elements, - unsigned nr_elements ); +#if 0 +struct translate_context *translate_context_create( void ); +void translate_context_destroy( struct translate_context * ); + +struct translate *translate_lookup_or_create( struct translate_context *tctx, + const struct translate_key *key ); +#endif + + +/******************************************************************************* + * Private: + */ +struct translate *translate_sse2_create( const struct translate_key *key ); -struct translate *translate_generic_create( unsigned output_stride, - const struct translate_element *elements, - unsigned nr_elements ); +struct translate *translate_generic_create( const struct translate_key *key ); #endif diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 7e75ba8365..fc9060900b 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -59,7 +59,6 @@ struct translate_generic { } attrib[PIPE_MAX_ATTRIBS]; unsigned nr_attrib; - unsigned output_stride; }; @@ -571,7 +570,42 @@ static void generic_run_elts( struct translate *translate, tg->attrib[attr].emit( data, dst ); } - vert += tg->output_stride; + vert += tg->translate.key.output_stride; + } +} + + + +static void generic_run( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct translate_generic *tg = translate_generic(translate); + char *vert = output_buffer; + unsigned nr_attrs = tg->nr_attrib; + unsigned attr; + unsigned i; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = start + i; + + for (attr = 0; attr < nr_attrs; attr++) { + float data[4]; + + const char *src = (tg->attrib[attr].input_ptr + + tg->attrib[attr].input_stride * elt); + + char *dst = (vert + + tg->attrib[attr].output_offset); + + tg->attrib[attr].fetch( src, data ); + tg->attrib[attr].emit( data, dst ); + } + + vert += tg->translate.key.output_stride; } } @@ -595,14 +629,14 @@ static void generic_set_buffer( struct translate *translate, } -static void generic_destroy( struct translate *translate ) +static void generic_release( struct translate *translate ) { + /* Refcount? + */ FREE(translate); } -struct translate *translate_generic_create( unsigned output_stride, - const struct translate_element *elements, - unsigned nr_elements ) +struct translate *translate_generic_create( const struct translate_key *key ) { struct translate_generic *tg = CALLOC_STRUCT(translate_generic); unsigned i; @@ -610,20 +644,24 @@ struct translate *translate_generic_create( unsigned output_stride, if (tg == NULL) return NULL; - tg->translate.destroy = generic_destroy; + tg->translate.key = *key; + tg->translate.release = generic_release; tg->translate.set_buffer = generic_set_buffer; tg->translate.run_elts = generic_run_elts; + tg->translate.run = generic_run; + + for (i = 0; i < key->nr_elements; i++) { + + tg->attrib[i].fetch = get_fetch_func(key->element[i].input_format); + tg->attrib[i].buffer = key->element[i].input_buffer; + tg->attrib[i].input_offset = key->element[i].input_offset; - for (i = 0; i < nr_elements; i++) { - tg->attrib[i].fetch = get_fetch_func(elements[i].input_format); - tg->attrib[i].buffer = elements[i].input_buffer; - tg->attrib[i].input_offset = elements[i].input_offset; + tg->attrib[i].emit = get_emit_func(key->element[i].output_format); + tg->attrib[i].output_offset = key->element[i].output_offset; - tg->attrib[i].emit = get_emit_func(elements[i].output_format); - tg->attrib[i].output_offset = elements[i].output_offset; } - tg->nr_attrib = nr_elements; + tg->nr_attrib = key->nr_elements; return &tg->translate; -- cgit v1.2.3 From e2269e94c0ecaa97c7153815489e59c9ca64c243 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 16 Apr 2008 11:39:22 +0900 Subject: gallium: Build and link the translate module. --- src/gallium/SConscript | 1 + src/gallium/auxiliary/translate/SConscript | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/SConscript b/src/gallium/SConscript index f09778ce99..2653f91bd2 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -18,6 +18,7 @@ SConscript([ 'auxiliary/rtasm/SConscript', 'auxiliary/tgsi/SConscript', 'auxiliary/cso_cache/SConscript', + 'auxiliary/translate/SConscript', 'auxiliary/draw/SConscript', 'auxiliary/pipebuffer/SConscript', ]) diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript index 947b9438cb..42513ba3b0 100644 --- a/src/gallium/auxiliary/translate/SConscript +++ b/src/gallium/auxiliary/translate/SConscript @@ -1,6 +1,6 @@ Import('*') -cso_cache = env.ConvenienceLibrary( +translate = env.ConvenienceLibrary( target = 'translate', source = [ 'translate_generic.c', -- cgit v1.2.3 From fd6acabd2f62fe006b078ae7640a944c7f65903c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 16 Apr 2008 11:42:08 +0900 Subject: gallium: Get the translate module to build on msvc. Appearently MSVC c-preprocessor parses "255.0f" as two tokens: "255.0" and "f", and performs variable substitution on "f". --- .../auxiliary/translate/translate_generic.c | 32 +++++++++++----------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index fc9060900b..e7fb1dcb2d 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -78,8 +78,8 @@ static struct translate_generic *translate_generic( struct translate *translate static void \ fetch_##NAME(const void *ptr, float *attrib) \ { \ - const float defaults[4] = { 0,0,0,1 }; \ - int i; \ + const float defaults[4] = { 0.0f,0.0f,0.0f,1.0f }; \ + unsigned i; \ \ for (i = 0; i < SZ; i++) { \ attrib[i] = FROM(i); \ @@ -121,24 +121,24 @@ emit_##NAME(const float *attrib, void *ptr) \ #define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f) #define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f) -#define TO_64_FLOAT(f) ((double) f) -#define TO_32_FLOAT(f) (f) +#define TO_64_FLOAT(x) ((double) x) +#define TO_32_FLOAT(x) (x) -#define TO_8_USCALED(f) ((unsigned char) f) -#define TO_16_USCALED(f) ((unsigned short) f) -#define TO_32_USCALED(f) ((unsigned int) f) +#define TO_8_USCALED(x) ((unsigned char) x) +#define TO_16_USCALED(x) ((unsigned short) x) +#define TO_32_USCALED(x) ((unsigned int) x) -#define TO_8_SSCALED(f) ((char) f) -#define TO_16_SSCALED(f) ((short) f) -#define TO_32_SSCALED(f) ((int) f) +#define TO_8_SSCALED(x) ((char) x) +#define TO_16_SSCALED(x) ((short) x) +#define TO_32_SSCALED(x) ((int) x) -#define TO_8_UNORM(f) ((unsigned char) (f * 255.0f)) -#define TO_16_UNORM(f) ((unsigned short) (f * 65535.0f)) -#define TO_32_UNORM(f) ((unsigned int) (f * 4294967295.0f)) +#define TO_8_UNORM(x) ((unsigned char) (x * 255.0f)) +#define TO_16_UNORM(x) ((unsigned short) (x * 65535.0f)) +#define TO_32_UNORM(x) ((unsigned int) (x * 4294967295.0f)) -#define TO_8_SNORM(f) ((char) (f * 127.0f)) -#define TO_16_SNORM(f) ((short) (f * 32767.0f)) -#define TO_32_SNORM(f) ((int) (f * 2147483647.0f)) +#define TO_8_SNORM(x) ((char) (x * 127.0f)) +#define TO_16_SNORM(x) ((short) (x * 32767.0f)) +#define TO_32_SNORM(x) ((int) (x * 2147483647.0f)) -- cgit v1.2.3 From a8582efaca35d09c8ca18918a243a9284583356d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 16 Apr 2008 10:03:18 +0100 Subject: draw: make pt run pipeline when need_pipeline is true, not just when clipped --- src/gallium/auxiliary/draw/draw_private.h | 13 +- src/gallium/auxiliary/draw/draw_pt.c | 182 +++++---------------- src/gallium/auxiliary/draw/draw_pt.h | 6 +- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 +- .../auxiliary/draw/draw_pt_fetch_pipeline.c | 3 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 18 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 5 +- 7 files changed, 75 insertions(+), 155 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index c8cb96c8ba..8ac061cc9f 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -182,6 +182,12 @@ typedef void (*pt_fetch_func)( struct draw_context *draw, struct vbuf_render; + +#define PT_SHADE 0x1 +#define PT_CLIPTEST 0x2 +#define PT_PIPELINE 0x4 +#define PT_MAX_MIDDLE 0x8 + /** * Private context for the drawing module. */ @@ -219,15 +225,10 @@ struct draw_context unsigned hw_vertex_size; /* XXX: to be removed */ struct { - struct draw_pt_middle_end *fetch_emit; - struct draw_pt_middle_end *fetch_pipeline; - struct draw_pt_middle_end *fetch_shade_emit; - struct draw_pt_middle_end *fetch_shade_cliptest_pipeline_or_emit; + struct draw_pt_middle_end *opt[PT_MAX_MIDDLE]; } middle; struct { - struct draw_pt_front_end *noop; - struct draw_pt_front_end *split_arrays; struct draw_pt_front_end *vcache; } front; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 3d2e7bf7b8..895224c952 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -36,146 +36,58 @@ #include "draw/draw_pt.h" -#if 0 -static boolean too_many_elts( struct draw_context *draw, - unsigned elts ) -{ - return elts > (8 * 1024); -} -#endif - -static INLINE unsigned reduced_prim(unsigned prim) -{ - /*FIXME*/ - return prim; -} -static INLINE boolean good_prim(unsigned prim) -{ - /*FIXME*/ - return FALSE; -} +/* Overall we split things into: + * - frontend -- prepare fetch_elts, draw_elts - eg vcache + * - middle -- fetch, shade, cliptest, viewport + * - pipeline -- the prim pipeline: clipping, wide lines, etc + * - backend -- the vbuf_render provided by the driver. + */ boolean draw_pt_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - const boolean pipeline = draw_need_pipeline(draw, prim); - const boolean cliptest = !draw->rasterizer->bypass_clipping; - const boolean shading = !draw->rasterizer->bypass_vs; struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; + unsigned opt = 0; if (!draw->render) return FALSE; /*debug_printf("XXXXXXXXXX needs_pipeline = %d\n", pipeline);*/ - /* Overall we do: - * - frontend -- prepare fetch_elts, draw_elts - eg vcache - * - middle -- fetch, shade, cliptest, viewport - * - pipeline -- the prim pipeline: clipping, wide lines, etc - * - backend -- the vbuf_render provided by the driver. - */ - - if (shading && !draw->use_pt_shaders) - return FALSE; - - if (!cliptest && !pipeline && !shading) { - /* This is the 'passthrough' path: - */ - /* Fetch user verts, emit hw verts: - */ - middle = draw->pt.middle.fetch_emit; - } - else if (!cliptest && !shading) { - /* This is the 'passthrough' path targetting the pipeline backend. - */ - /* Fetch user verts, emit pipeline verts, run pipeline: - */ - middle = draw->pt.middle.fetch_pipeline; - } - else if (!cliptest && !pipeline) { - /* Fetch user verts, run vertex shader, emit hw verts: - */ - middle = draw->pt.middle.fetch_shade_emit; - } - else if (!pipeline) { - /* Even though !pipeline, we have to run it to get clipping. We - * do know that the pipeline is just the clipping operation, but - * that probably doesn't help much. - * - * This is going to be the most important path for a lot of - * swtnl cards. - */ - /* Fetch user verts, - * run vertex shader, - * cliptest and viewport trasform - * if no clipped vertices, - * emit hw verts - * else - * run pipline - */ - middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; - } - else { - /* This is what we're currently always doing: - */ - /* Fetch user verts, run vertex shader, cliptest, run pipeline - * or - * Fetch user verts, run vertex shader, run pipeline - */ - middle = draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit; + if (draw_need_pipeline(draw, prim)) { + opt |= PT_PIPELINE; } + if (!draw->rasterizer->bypass_clipping) { + opt |= PT_CLIPTEST; + } - /* If !pipeline, need to make sure we respect the driver's limited - * capabilites to receive blocks of vertex data and elements. - */ -#if 0 - if (!pipeline) { - unsigned vertex_mode = passthrough; - unsigned nr_verts = count_vertices( draw, start, count ); - unsigned hw_prim = prim; - - if (is_elts(draw)) { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } -#if 0 - if (too_many_verts(nr_verts)) { - /* if (is_verts(draw) && can_split(prim)) { - draw = draw_arrays_split; - } - else */ { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } - } -#endif + if (!draw->rasterizer->bypass_vs) { + opt |= PT_SHADE; - if (too_many_elts(count)) { + if (!draw->use_pt_shaders) + return FALSE; + } - /* if (is_elts(draw) && can_split(prim)) { - draw = draw_elts_split; - } - else */ { - frontend = draw->pt.front.vcache; - hw_prim = reduced_prim(prim); - } - } - if (!good_prim(hw_prim)) { - frontend = draw->pt.front.vcache; - } + if (draw->pt.middle.opt[opt] == NULL) { + opt = PT_PIPELINE | PT_CLIPTEST | PT_SHADE; } -#else + + middle = draw->pt.middle.opt[opt]; + assert(middle); + + /* May create a short-circuited version of this for small primitives: + */ frontend = draw->pt.front.vcache; -#endif - frontend->prepare( frontend, prim, middle ); + + frontend->prepare( frontend, prim, middle, opt ); frontend->run( frontend, draw_pt_elt_func( draw ), @@ -190,21 +102,19 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { - draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); - if (!draw->pt.middle.fetch_emit) - return FALSE; - - draw->pt.middle.fetch_pipeline = draw_pt_fetch_pipeline( draw ); - if (!draw->pt.middle.fetch_pipeline) + draw->pt.front.vcache = draw_pt_vcache( draw ); + if (!draw->pt.front.vcache) return FALSE; - draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = + draw->pt.middle.opt[0] = draw_pt_fetch_emit( draw ); + draw->pt.middle.opt[PT_PIPELINE] = draw_pt_fetch_pipeline( draw ); +// draw->pt.middle.opt[PT_SHADE] = draw_pt_shade_emit( draw ); +// draw->pt.middle.opt[PT_SHADE | PT_PIPELINE] = draw_pt_shade_pipeline( draw ); +// draw->pt.middle.opt[PT_SHADE | PT_CLIPTEST] = draw_pt_shade_clip_either( draw ); + draw->pt.middle.opt[PT_SHADE | PT_CLIPTEST | PT_PIPELINE] = draw_pt_fetch_pipeline_or_emit( draw ); - if (!draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) - return FALSE; - draw->pt.front.vcache = draw_pt_vcache( draw ); - if (!draw->pt.front.vcache) + if (!draw->pt.middle.opt[PT_SHADE | PT_CLIPTEST | PT_PIPELINE]) return FALSE; return TRUE; @@ -213,21 +123,13 @@ boolean draw_pt_init( struct draw_context *draw ) void draw_pt_destroy( struct draw_context *draw ) { - if (draw->pt.middle.fetch_emit) { - draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit ); - draw->pt.middle.fetch_emit = NULL; - } + int i; - if (draw->pt.middle.fetch_pipeline) { - draw->pt.middle.fetch_pipeline->destroy( draw->pt.middle.fetch_pipeline ); - draw->pt.middle.fetch_pipeline = NULL; - } - - if (draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit) { - draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit->destroy( - draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit ); - draw->pt.middle.fetch_shade_cliptest_pipeline_or_emit = NULL; - } + for (i = 0; i < PT_MAX_MIDDLE; i++) + if (draw->pt.middle.opt[i]) { + draw->pt.middle.opt[i]->destroy( draw->pt.middle.opt[i] ); + draw->pt.middle.opt[i] = NULL; + } if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 48413b648a..08afb60645 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -64,7 +64,8 @@ struct draw_context; struct draw_pt_front_end { void (*prepare)( struct draw_pt_front_end *, unsigned prim, - struct draw_pt_middle_end * ); + struct draw_pt_middle_end *, + unsigned opt ); void (*run)( struct draw_pt_front_end *, pt_elt_func elt_func, @@ -90,7 +91,8 @@ struct draw_pt_front_end { */ struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, - unsigned prim ); + unsigned prim, + unsigned opt ); void (*run)( struct draw_pt_middle_end *, const unsigned *fetch_elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 3a26a5d712..1a9d331a06 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -164,7 +164,8 @@ fetch_store_general( struct fetch_emit_middle_end *feme, static void fetch_emit_prepare( struct draw_pt_middle_end *middle, - unsigned prim ) + unsigned prim, + unsigned opt ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index a70d129c93..db6d6c76ea 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -209,7 +209,8 @@ fetch_store_general( struct fetch_pipeline_middle_end *fpme, * */ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned prim ) + unsigned prim, + unsigned opt ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 2d83e11940..52be4a64aa 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -42,11 +42,13 @@ struct fetch_pipeline_middle_end { unsigned pipeline_vertex_size; unsigned prim; + unsigned opt; }; static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned prim ) + unsigned prim, + unsigned opt ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; @@ -57,6 +59,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct translate_key hw_key; fpme->prim = prim; + fpme->opt = opt; ok = draw->render->set_primitive(draw->render, prim); if (!ok) { @@ -157,6 +160,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; char *pipeline_verts; + unsigned pipeline = PT_PIPELINE; pipeline_verts = MALLOC(fpme->pipeline_vertex_size * fetch_count); @@ -170,9 +174,17 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, /* Shade */ shader->prepare(shader, draw); + if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts, - fpme->pipeline_vertex_size)) { - /* Run the pipeline */ + fpme->pipeline_vertex_size)) + { + pipeline |= PT_CLIPTEST; + } + + + /* Do we need to run the pipeline? + */ + if (fpme->opt & pipeline) { draw_pt_run_pipeline( fpme->draw, fpme->prim, pipeline_verts, diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 107dcfc269..5561f2b6fb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -448,7 +448,8 @@ static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned prim, - struct draw_pt_middle_end *middle ) + struct draw_pt_middle_end *middle, + unsigned opt ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; @@ -464,7 +465,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, vcache->output_prim = reduced_prim[prim]; vcache->middle = middle; - middle->prepare( middle, vcache->output_prim ); + middle->prepare( middle, vcache->output_prim, opt ); } -- cgit v1.2.3 From 909e8ce543a6c1e97d55791d2069cbdc56ea9db6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 16 Apr 2008 10:12:12 +0100 Subject: draw: update debug code --- src/gallium/auxiliary/draw/draw_vbuf.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c index e3216ff711..eeb194afaa 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -113,12 +113,12 @@ check_space( struct vbuf_stage *vbuf, unsigned nr ) } -#if 0 +#if 1 static INLINE void dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) { - assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - unsigned i, j, k; +// assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); + unsigned i, j; for (i = 0; i < vinfo->num_attribs; i++) { j = vinfo->src_index[i]; @@ -264,6 +264,8 @@ emit_vertex( struct vbuf_stage *vbuf, } else { draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr); + + if (0) dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); vbuf->vertex_ptr += vbuf->vertex_size/4; } -- cgit v1.2.3 From d8c389171872b69af3c94ebab02ad5f4bcd2d4df Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 16 Apr 2008 12:31:19 +0100 Subject: draw: remove vbuf non-vf debug path, fix some failure modes in draw_vbuf_create() --- src/gallium/auxiliary/draw/draw_vbuf.c | 106 ++++++--------------------------- 1 file changed, 19 insertions(+), 87 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c index eeb194afaa..59b63f5bfa 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -113,7 +113,6 @@ check_space( struct vbuf_stage *vbuf, unsigned nr ) } -#if 1 static INLINE void dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) { @@ -167,7 +166,6 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) } debug_printf("\n"); } -#endif /** @@ -181,94 +179,22 @@ static INLINE void emit_vertex( struct vbuf_stage *vbuf, struct vertex_header *vertex ) { -#if 0 - debug_printf("emit vertex %d to %p\n", - vbuf->nr_vertices, vbuf->vertex_ptr); -#endif - if(vertex->vertex_id != UNDEFINED_VERTEX_ID) { if(vertex->vertex_id < vbuf->nr_vertices) return; else debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n", - vertex->vertex_id, vbuf->nr_vertices); + vertex->vertex_id, vbuf->nr_vertices); return; } vertex->vertex_id = vbuf->nr_vertices++; - if(!vbuf->vf) { - const struct vertex_info *vinfo = vbuf->vinfo; - uint i; - uint count = 0; /* for debug/sanity */ - - assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - - for (i = 0; i < vinfo->num_attribs; i++) { - uint j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - /* no-op */ - break; - case EMIT_1F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - count++; - break; - case EMIT_1F_PSIZE: - *vbuf->vertex_ptr++ = fui(vbuf->stage.draw->rasterizer->point_size); - count++; - break; - case EMIT_2F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - count += 2; - break; - case EMIT_3F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); - count += 3; - break; - case EMIT_4F: - *vbuf->vertex_ptr++ = fui(vertex->data[j][0]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][1]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][2]); - *vbuf->vertex_ptr++ = fui(vertex->data[j][3]); - count += 4; - break; - case EMIT_4UB: - *vbuf->vertex_ptr++ = pack_ub4(float_to_ubyte( vertex->data[j][2] ), - float_to_ubyte( vertex->data[j][1] ), - float_to_ubyte( vertex->data[j][0] ), - float_to_ubyte( vertex->data[j][3] )); - count += 1; - break; - default: - assert(0); - } - } - assert(count == vinfo->size); -#if 0 - { - static float data[256]; - draw_vf_emit_vertex(vbuf->vf, vertex, data); - if(memcmp((uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size, data, vbuf->vertex_size)) { - debug_printf("With VF:\n"); - dump_emitted_vertex(vbuf->vinfo, (uint8_t *)data); - debug_printf("Without VF:\n"); - dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr - vbuf->vertex_size); - assert(0); - } - } -#endif - } - else { - draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr); + draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr); - if (0) dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); + if (0) dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); - vbuf->vertex_ptr += vbuf->vertex_size/4; - } + vbuf->vertex_ptr += vbuf->vertex_size/4; } @@ -526,7 +452,7 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); if(!vbuf) - return NULL; + goto fail; vbuf->stage.draw = draw; vbuf->stage.point = vbuf_first_point; @@ -537,21 +463,27 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, vbuf->stage.destroy = vbuf_destroy; vbuf->render = render; + vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1); - assert(render->max_indices < UNDEFINED_VERTEX_ID); - vbuf->max_indices = render->max_indices; - vbuf->indices = (ushort *) - align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 ); + vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * + sizeof(vbuf->indices[0]), + 16 ); if(!vbuf->indices) - vbuf_destroy(&vbuf->stage); + goto fail; vbuf->vertices = NULL; vbuf->vertex_ptr = vbuf->vertices; vbuf->prim = ~0; - - if(!GETENV("GALLIUM_NOVF")) - vbuf->vf = draw_vf_create(); + vbuf->vf = draw_vf_create(); + if (!vbuf->vf) + goto fail; return &vbuf->stage; + + fail: + if (vbuf) + vbuf_destroy(&vbuf->stage); + + return NULL; } -- cgit v1.2.3 From 3be453bf7fcffd945ee0238e66ff4247a4491f1c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 16 Apr 2008 12:55:47 +0100 Subject: draw: allow pt paths to run without a vbuf render stage --- src/gallium/auxiliary/draw/draw_pt.c | 7 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 137 +++++++++++---------- 2 files changed, 72 insertions(+), 72 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 895224c952..c0125c906f 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -54,10 +54,9 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_middle_end *middle = NULL; unsigned opt = 0; - if (!draw->render) - return FALSE; - /*debug_printf("XXXXXXXXXX needs_pipeline = %d\n", pipeline);*/ - + if (!draw->render) { + opt |= PT_PIPELINE; + } if (draw_need_pipeline(draw, prim)) { opt |= PT_PIPELINE; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 52be4a64aa..a66e8b0ba2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -61,88 +61,89 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, fpme->prim = prim; fpme->opt = opt; - ok = draw->render->set_primitive(draw->render, prim); - if (!ok) { - assert(0); - return; - } + if (!(opt & PT_PIPELINE)) { + ok = draw->render->set_primitive(draw->render, prim); + if (!ok) { + assert(0); + return; + } - /* Must do this after set_primitive() above: - */ - vinfo = draw->render->get_vertex_info(draw->render); + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); - /* In passthrough mode, need to translate from vertex shader - * outputs to hw vertices. - */ - dst_offset = 0; - for (i = 0; i < vinfo->num_attribs; i++) { - unsigned emit_sz = 0; - unsigned src_buffer = 0; - unsigned output_format; - unsigned src_offset = (sizeof(struct vertex_header) + - vinfo->src_index[i] * 4 * sizeof(float) ); + /* In passthrough mode, need to translate from vertex shader + * outputs to hw vertices. + */ + dst_offset = 0; + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (sizeof(struct vertex_header) + + vinfo->src_index[i] * 4 * sizeof(float) ); - switch (vinfo->emit[i]) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - src_buffer = 1; - src_offset = 0; - break; - case EMIT_4UB: - output_format = PIPE_FORMAT_B8G8R8A8_UNORM; - emit_sz = 4 * sizeof(ubyte); - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - break; - } + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } - hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - hw_key.element[i].input_buffer = src_buffer; - hw_key.element[i].input_offset = src_offset; - hw_key.element[i].output_format = output_format; - hw_key.element[i].output_offset = dst_offset; + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; - dst_offset += emit_sz; - } + dst_offset += emit_sz; + } - hw_key.nr_elements = vinfo->num_attribs; - hw_key.output_stride = vinfo->size * 4; + hw_key.nr_elements = vinfo->num_attribs; + hw_key.output_stride = vinfo->size * 4; - /* Don't bother with caching at this stage: - */ - if (!fpme->translate || - memcmp(&fpme->translate->key, &hw_key, sizeof(hw_key)) != 0) - { - if (fpme->translate) - fpme->translate->release(fpme->translate); + /* Don't bother with caching at this stage: + */ + if (!fpme->translate || + memcmp(&fpme->translate->key, &hw_key, sizeof(hw_key)) != 0) + { + if (fpme->translate) + fpme->translate->release(fpme->translate); - fpme->translate = translate_generic_create( &hw_key ); + fpme->translate = translate_generic_create( &hw_key ); + } } - //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION; } -- cgit v1.2.3 From 3a765bbe00940c4dffce72b659f625d97ea17971 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 16 Apr 2008 22:32:10 +0900 Subject: gallium: Use debug_get_bool_option for controlling assert failure behaviour. Add GALLIUM_ABORT_ON_ASSERT=no to C:\gallium.cfg instead. --- src/gallium/auxiliary/util/p_debug.c | 47 +++--------------------------------- 1 file changed, 4 insertions(+), 43 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index c195f61820..25b132b40c 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -200,6 +200,8 @@ debug_get_bool_option(const char *name, boolean dfault) if(str == NULL) result = dfault; + else if(!strcmp(str, "n")) + result = FALSE; else if(!strcmp(str, "no")) result = FALSE; else if(!strcmp(str, "0")) @@ -251,57 +253,16 @@ debug_get_flags_option(const char *name, } -#if defined(WIN32) -ULONG_PTR debug_config_file = 0; -void *mapped_config_file = 0; - -enum { - eAssertAbortEn = 0x1, -}; - -/* Check for aborts enabled. */ -static unsigned abort_en(void) -{ - if (!mapped_config_file) - { - /* Open an 8 byte file for configuration data. */ - mapped_config_file = EngMapFile(L"\\??\\c:\\gaDebug.cfg", 8, &debug_config_file); - } - - /* A value of "0" (ascii) in the configuration file will clear the - * first 8 bits in the test byte. - * - * A value of "1" (ascii) in the configuration file will set the - * first bit in the test byte. - * - * A value of "2" (ascii) in the configuration file will set the - * second bit in the test byte. - * - * Currently the only interesting values are 0 and 1, which clear - * and set abort-on-assert behaviour respectively. - */ - return ((((char *)mapped_config_file)[0]) - 0x30) & eAssertAbortEn; -} -#else /* WIN32 */ -static unsigned abort_en(void) -{ - return !GETENV("GALLIUM_ABORT_ON_ASSERT"); -} -#endif - void _debug_assert_fail(const char *expr, const char *file, unsigned line, const char *function) { _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr); - if (abort_en()) - { + if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE)) debug_break(); - } else - { + else _debug_printf("continuing...\n"); - } } -- cgit v1.2.3 From 52544aa23bebe68d6fc9b13dc55f6bb8c03430ff Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 16 Apr 2008 09:46:38 -0600 Subject: gallium: added util_pack_color_ub() --- src/gallium/auxiliary/util/u_pack_color.h | 39 +++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index cd13823985..1f6604c554 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -39,6 +39,45 @@ #include "pipe/p_format.h" +/** + * Pack ubyte R,G,B,A into dest pixel. + */ +static INLINE void +util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, + enum pipe_format format, void *dest) +{ + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | a; + } + return; + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (a << 24) | (r << 16) | (g << 8) | b; + } + return; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | a; + } + return; + case PIPE_FORMAT_R5G6B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); + } + return; + /* XXX lots more cases to add */ + default: + debug_printf("gallium: unhandled format in util_pack_color_ub()"); + } +} + + /** * Note rgba outside [0,1] will be clamped for int pixel formats. */ -- cgit v1.2.3 From 8abfcea690c66f75d61905f0ec5497b8fd7950d4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 10:37:53 +0100 Subject: draw: remove misleading comments --- src/gallium/auxiliary/draw/draw_pt_pipeline.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c index e70e63d08f..17ce6febec 100644 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_pipeline.c @@ -36,11 +36,6 @@ #include "draw/draw_vertex.h" #include "draw/draw_pt.h" - -/** - * Add a point to the primitive queue. - * \param i0 index into user's vertex arrays - */ static void do_point( struct draw_context *draw, const char *v0 ) { @@ -55,11 +50,6 @@ static void do_point( struct draw_context *draw, } -/** - * Add a line to the primitive queue. - * \param i0 index into user's vertex arrays - * \param i1 index into user's vertex arrays - */ static void do_line( struct draw_context *draw, const char *v0, const char *v1 ) @@ -75,9 +65,7 @@ static void do_line( struct draw_context *draw, draw->pipeline.first->line( draw->pipeline.first, &prim ); } -/** - * Add a triangle to the primitive queue. - */ + static void do_triangle( struct draw_context *draw, char *v0, char *v1, -- cgit v1.2.3 From c96d565643de271c6bda066e892b25d0a97ea4d0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 13:14:22 +0100 Subject: draw: keep record of number of active vertex buffers --- src/gallium/auxiliary/draw/draw_context.c | 1 + src/gallium/auxiliary/draw/draw_private.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 0c314f6e1d..cdcbfdc323 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -249,6 +249,7 @@ draw_set_vertex_buffers(struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0])); + draw->nr_vertex_buffers = count; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 8ac061cc9f..359a8b8a3d 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -245,7 +245,10 @@ struct draw_context /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; struct draw_vertex_shader *vertex_shader; -- cgit v1.2.3 From 49becd2d7c751e563ce6be9051dd8e6dad88d1f7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 13:14:55 +0100 Subject: draw: add comment --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 1a9d331a06..d696afa1aa 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -185,6 +185,11 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, */ vinfo = draw->render->get_vertex_info(draw->render); + + /* This is unique as it transforms straight from API vertex + * information to HW vertices. All other cases go through our + * intermediate float[4] format. + */ for (i = 0; i < vinfo->num_attribs; i++) { unsigned src_element = vinfo->src_index[i]; unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index; -- cgit v1.2.3 From 938ec19210c5b4e19dcb2b606c9ade415f2c1f84 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 14:19:03 +0100 Subject: tsgi: make Consts const --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h index 45c49dd007..92e2e5e985 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -166,7 +166,7 @@ struct tgsi_exec_machine float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; unsigned ImmLimit; - float (*Consts)[4]; + const float (*Consts)[4]; struct tgsi_exec_vector *Inputs; struct tgsi_exec_vector *Outputs; const struct tgsi_token *Tokens; -- cgit v1.2.3 From 280bcff1fa200b790d8712946a4ffbaa47a67433 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 14:20:00 +0100 Subject: draw: add vertex shader run_linear function --- src/gallium/auxiliary/draw/draw_aapoint.c | 2 +- src/gallium/auxiliary/draw/draw_clip.c | 2 +- src/gallium/auxiliary/draw/draw_context.c | 17 +++- src/gallium/auxiliary/draw/draw_flatshade.c | 2 +- src/gallium/auxiliary/draw/draw_private.h | 28 +------ .../auxiliary/draw/draw_pt_fetch_pipeline.c | 2 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 2 +- src/gallium/auxiliary/draw/draw_twoside.c | 2 +- src/gallium/auxiliary/draw/draw_vertex_fetch.c | 2 +- src/gallium/auxiliary/draw/draw_vertex_shader.c | 7 +- src/gallium/auxiliary/draw/draw_vs.h | 40 ++++++++- src/gallium/auxiliary/draw/draw_vs_exec.c | 95 +++++++++++++++++++--- src/gallium/auxiliary/draw/draw_vs_llvm.c | 66 ++++++++++++++- src/gallium/auxiliary/draw/draw_vs_sse.c | 76 ++++++++++++++++- src/gallium/auxiliary/draw/draw_wide_point.c | 2 +- 15 files changed, 286 insertions(+), 59 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c index fcebe3e7a0..e84d380e50 100644 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_aapoint.c @@ -48,7 +48,7 @@ #include "tgsi/util/tgsi_dump.h" #include "draw_context.h" -#include "draw_private.h" +#include "draw_vs.h" /* diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_clip.c index e24c5d8032..0ac3a240e5 100644 --- a/src/gallium/auxiliary/draw/draw_clip.c +++ b/src/gallium/auxiliary/draw/draw_clip.c @@ -36,7 +36,7 @@ #include "pipe/p_shader_tokens.h" #include "draw_context.h" -#include "draw_private.h" +#include "draw_vs.h" #ifndef IS_NEGATIVE diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index cdcbfdc323..9b2dcc0b57 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -33,8 +33,8 @@ #include "pipe/p_util.h" #include "draw_context.h" -#include "draw_private.h" #include "draw_vbuf.h" +#include "draw_vs.h" struct draw_context *draw_create( void ) @@ -108,6 +108,8 @@ struct draw_context *draw_create( void ) draw_vertex_cache_invalidate( draw ); draw_set_mapped_element_buffer( draw, 0, NULL ); + tgsi_exec_machine_init(&draw->machine); + if (!draw_pt_init( draw )) goto fail; @@ -460,3 +462,16 @@ boolean draw_get_edgeflag( struct draw_context *draw, else return 1; } + + +#if 0 +/* Crufty init function. Fix me. + */ +boolean draw_init_machine( struct draw_context *draw ) +{ + ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); + ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); + machine->Inputs = ALIGN16_ASSIGN(inputs); + machine->Outputs = ALIGN16_ASSIGN(outputs); +} +#endif diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_flatshade.c index af2cb05c98..54baa1fbc9 100644 --- a/src/gallium/auxiliary/draw/draw_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_flatshade.c @@ -30,7 +30,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_vs.h" /** subclass of draw_stage */ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 359a8b8a3d..7953dbb7d9 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -55,6 +55,7 @@ struct gallivm_cpu_engine; struct draw_pt_middle_end; struct draw_pt_front_end; +struct draw_vertex_shader; #define MAX_SHADER_VERTICES 128 @@ -132,33 +133,6 @@ struct draw_stage #define VCACHE_OVERFLOW 4 #define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ -/** - * Private version of the compiled vertex_shader - */ -struct draw_vertex_shader { - - /* This member will disappear shortly: - */ - struct pipe_shader_state state; - - struct tgsi_shader_info info; - - void (*prepare)( struct draw_vertex_shader *shader, - struct draw_context *draw ); - - /* Run the shader - this interface will get cleaned up in the - * future: - */ - boolean (*run)( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *out, - unsigned vertex_size); - - - void (*delete)( struct draw_vertex_shader * ); -}; /* Internal function for vertex fetch. diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index db6d6c76ea..79548d4156 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -32,9 +32,9 @@ #include "pipe/p_util.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_vs.h" /* The simplest 'middle end' in the new vertex code. * diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index a66e8b0ba2..cf37493eb6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -27,10 +27,10 @@ #include "pipe/p_util.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_vs.h" #include "translate/translate.h" diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_twoside.c index 3debaac282..01d905c153 100644 --- a/src/gallium/auxiliary/draw/draw_twoside.c +++ b/src/gallium/auxiliary/draw/draw_twoside.c @@ -31,7 +31,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_vs.h" struct twoside_stage { diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c index 9041041006..0bc2fcb424 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ b/src/gallium/auxiliary/draw/draw_vertex_fetch.c @@ -32,7 +32,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_vs.h" #include "draw_context.h" diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 8572a6d40c..c0c17c116e 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -95,10 +95,8 @@ draw_create_vertex_shader(struct draw_context *draw, vs = draw_create_vs_exec( draw, shader ); } } - assert(vs); - - tgsi_scan_shader(shader->tokens, &vs->info); + assert(vs); return vs; } @@ -113,9 +111,6 @@ draw_bind_vertex_shader(struct draw_context *draw, { draw->vertex_shader = dvs; draw->num_vs_outputs = dvs->info.num_outputs; - - tgsi_exec_machine_init(&draw->machine); - dvs->prepare( dvs, draw ); } else { diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 33ce1e335e..e88d00e247 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -35,10 +35,46 @@ #include "draw_private.h" -struct draw_vertex_shader; struct draw_context; struct pipe_shader_state; +/** + * Private version of the compiled vertex_shader + */ +struct draw_vertex_shader { + + /* This member will disappear shortly: + */ + struct pipe_shader_state state; + + struct tgsi_shader_info info; + + void (*prepare)( struct draw_vertex_shader *shader, + struct draw_context *draw ); + + /* Run the shader - this interface will get cleaned up in the + * future: + */ + boolean (*run)( struct draw_vertex_shader *shader, + struct draw_context *draw, + const unsigned *elts, + unsigned count, + void *out, + unsigned vertex_size); + + void (*run_linear)( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ); + + + void (*delete)( struct draw_vertex_shader * ); +}; + + struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *templ); @@ -79,5 +115,7 @@ compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) return mask; } +#define MAX_TGSI_VERTICES 4 + #endif diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 5c88c2e24e..0e05b79715 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -40,8 +40,20 @@ #include "tgsi/util/tgsi_parse.h" -#define MAX_TGSI_VERTICES 4 +struct exec_vertex_shader { + struct draw_vertex_shader base; + struct tgsi_exec_machine *machine; +}; + +static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs ) +{ + return (struct exec_vertex_shader *)vs; +} + + +/* Not required for run_linear. + */ static void vs_exec_prepare( struct draw_vertex_shader *shader, struct draw_context *draw ) @@ -81,10 +93,9 @@ vs_exec_run( struct draw_vertex_shader *shader, const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; - assert(draw->vertex_shader->info.output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); + assert(shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); - machine->Consts = (float (*)[4]) draw->user.constants; + machine->Consts = (const float (*)[4]) draw->user.constants; machine->Inputs = ALIGN16_ASSIGN(inputs); if (draw->rasterizer->bypass_vs) { /* outputs are just the inputs */ @@ -179,6 +190,64 @@ vs_exec_run( struct draw_vertex_shader *shader, +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_exec_run_linear( struct draw_vertex_shader *shader, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + struct tgsi_exec_machine *machine = evs->machine; + unsigned int i, j; + unsigned slot; + + machine->Consts = constants; + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < shader->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + } + + /* run interpreter */ + tgsi_exec_machine_run( machine ); + + /* Unswizzle all output results. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < shader->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + } + + /* Advance input, output pointers: + */ + input = (const float (*)[4])((const char *)input + input_stride); + output = (float (*)[4])((char *)output + output_stride); + } +} + + + + static void vs_exec_delete( struct draw_vertex_shader *dvs ) { @@ -191,17 +260,23 @@ struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *state) { - struct draw_vertex_shader *vs = CALLOC_STRUCT( draw_vertex_shader ); + struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader ); uint nt = tgsi_num_tokens(state->tokens); if (vs == NULL) return NULL; /* we make a private copy of the tokens */ - vs->state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); - vs->prepare = vs_exec_prepare; - vs->run = vs_exec_run; - vs->delete = vs_exec_delete; + vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); + tgsi_scan_shader(state->tokens, &vs->base.info); + + + vs->base.prepare = vs_exec_prepare; + vs->base.run = vs_exec_run; + vs->base.run_linear = vs_exec_run_linear; + vs->base.delete = vs_exec_delete; + vs->machine = &draw->machine; + - return vs; + return &vs->base; } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 73076d2467..d0baca715e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -88,8 +88,7 @@ vs_llvm_run( struct draw_vertex_shader *base, assert(count <= 4); - assert(draw->vertex_shader->state->output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); + assert(base->state->output_semantic_name[0] == TGSI_SEMANTIC_POSITION); /* Consts does not require 16 byte alignment. */ machine->Consts = (float (*)[4]) draw->user.constants; @@ -169,6 +168,65 @@ vs_llvm_run( struct draw_vertex_shader *base, return clipped != 0; } + + + +static void +vs_llvm_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_llvm_vertex_shader *shader = + (struct draw_llvm_vertex_shader *)base; + + struct tgsi_exec_machine *machine = &draw->machine; + unsigned int j; + + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < draw->num_vs_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + } + + /* run shader */ + gallivm_cpu_vs_exec(shader->llvm_prog, + machine->Inputs, + machine->Outputs, + (float (*)[4]) constants, + machine->Temps); + + + /* Unswizzle all output results + */ + for (slot = 1; slot < draw->num_vs_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + + /* Advance input, output pointers: + */ + input = (const float (*)[4])((const char *)input + input_stride); + output = (float (*)[4])((char *)output + output_stride); + } +} + + + static void vs_llvm_delete( struct draw_vertex_shader *base ) { @@ -198,8 +256,12 @@ draw_create_vs_llvm(struct draw_context *draw, /* we make a private copy of the tokens */ vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + + tgsi_scan_shader(shader->tokens, &vs->base.info); + vs->base.prepare = vs_llvm_prepare; vs->base.run = vs_llvm_run; + vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; { diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index ee0a3105b9..873ecfdc5d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -58,7 +58,11 @@ typedef void (XSTDCALL *codegen_function) ( struct draw_sse_vertex_shader { struct draw_vertex_shader base; struct x86_function sse2_program; + codegen_function func; + + struct tgsi_exec_machine *machine; + float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -96,11 +100,10 @@ vs_sse_run( struct draw_vertex_shader *base, const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; - assert(draw->vertex_shader->info.output_semantic_name[0] - == TGSI_SEMANTIC_POSITION); + assert(base->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); /* Consts does not require 16 byte alignment. */ - machine->Consts = (float (*)[4]) draw->user.constants; + machine->Consts = (const float (*)[4]) draw->user.constants; machine->Inputs = ALIGN16_ASSIGN(inputs); if (draw->rasterizer->bypass_vs) { /* outputs are just the inputs */ @@ -124,7 +127,7 @@ vs_sse_run( struct draw_vertex_shader *base, */ shader->func(machine->Inputs, machine->Outputs, - machine->Consts, + (float (*)[4])machine->Consts, machine->Temps, shader->immediates); } @@ -200,6 +203,67 @@ vs_sse_run( struct draw_vertex_shader *base, } +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_sse_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i, j; + unsigned slot; + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < base->info.num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + } + + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + shader->immediates); + + + /* Unswizzle all output results. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < base->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + } + + /* Advance input, output pointers: + */ + input = (const float (*)[4])((const char *)input + input_stride); + output = (float (*)[4])((char *)output + output_stride); + } +} + + + static void vs_sse_delete( struct draw_vertex_shader *base ) @@ -229,8 +293,12 @@ draw_create_vs_sse(struct draw_context *draw, /* we make a private copy of the tokens */ vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + + tgsi_scan_shader(templ->tokens, &vs->base.info); + vs->base.prepare = vs_sse_prepare; vs->base.run = vs_sse_run; + vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; x86_init_func( &vs->sse2_program ); diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c index 6fc7c9fcd7..0fa7c9313c 100644 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_wide_point.c @@ -31,7 +31,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_vs.h" struct widepoint_stage { -- cgit v1.2.3 From fe8af141246bdfc0e98beec0089acdb92407bcfa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 14:42:05 +0100 Subject: draw: don't always run pipeline if clipping --- src/gallium/auxiliary/draw/draw_pt.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c0125c906f..60a47f3911 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -74,11 +74,11 @@ draw_pt_arrays(struct draw_context *draw, } - if (draw->pt.middle.opt[opt] == NULL) { - opt = PT_PIPELINE | PT_CLIPTEST | PT_SHADE; + middle = draw->pt.middle.opt[opt]; + if (middle == NULL) { + middle = draw->pt.middle.opt[PT_PIPELINE | PT_CLIPTEST | PT_SHADE]; } - middle = draw->pt.middle.opt[opt]; assert(middle); /* May create a short-circuited version of this for small primitives: -- cgit v1.2.3 From c503e55d74cf84f87f82b3dab3cb4d38b201d47a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 14:43:40 +0100 Subject: draw: move hw vertex emit to a new module --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_private.h | 8 - src/gallium/auxiliary/draw/draw_pt.h | 33 ++++ src/gallium/auxiliary/draw/draw_pt_emit.c | 208 +++++++++++++++++++++ .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 161 +++------------- 5 files changed, 270 insertions(+), 141 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_emit.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 5ab3cfe5ce..836e98f086 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -21,6 +21,7 @@ C_SOURCES = \ draw_pt_fetch_pipeline.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_pipeline.c \ + draw_pt_emit.c \ draw_pt_elts.c \ draw_prim.c \ draw_pstipple.c \ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 7953dbb7d9..9407217fd3 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -376,14 +376,6 @@ boolean draw_pt_arrays( struct draw_context *draw, unsigned count ); void draw_pt_reset_vertex_ids( struct draw_context *draw ); -void draw_pt_run_pipeline( struct draw_context *draw, - unsigned prim, - char *verts, - unsigned vertex_stride, - unsigned vertex_count, - const ushort *elts, - unsigned count ); - #define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ #define DRAW_FLUSH_PRIM_QUEUE 0x2 diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 08afb60645..31d18ec62b 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -128,4 +128,37 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); +/* More helpers: + */ +void draw_pt_run_pipeline( struct draw_context *draw, + unsigned prim, + char *verts, + unsigned vertex_stride, + unsigned vertex_count, + const ushort *elts, + unsigned count ); + + +/* HW vertex emit: + */ +struct pt_emit; + +void draw_pt_emit_prepare( struct pt_emit *emit, + unsigned prim, + unsigned opt ); + +void draw_pt_emit( struct pt_emit *emit, + char *verts, + unsigned stride, + unsigned vertex_count, + const ushort *elts, + unsigned count ); + +void draw_pt_emit_destroy( struct pt_emit *emit ); + +struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); + + + + #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c new file mode 100644 index 0000000000..e9ed29450a --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -0,0 +1,208 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" + + +struct pt_emit { + struct draw_context *draw; + + struct translate *translate; + + unsigned pipeline_vertex_size; + unsigned prim; + unsigned opt; +}; + + +void draw_pt_emit_prepare( struct pt_emit *emit, + unsigned prim, + unsigned opt ) +{ + struct draw_context *draw = emit->draw; + const struct vertex_info *vinfo; + unsigned dst_offset; + struct translate_key hw_key; + unsigned i; + boolean ok; + + ok = draw->render->set_primitive(draw->render, prim); + if (!ok) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + /* In passthrough mode, need to translate from vertex shader + * outputs to hw vertices. + */ + dst_offset = 0; + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (sizeof(struct vertex_header) + + vinfo->src_index[i] * 4 * sizeof(float) ); + + + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vinfo->num_attribs; + hw_key.output_stride = vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!emit->translate || + memcmp(&emit->translate->key, &hw_key, sizeof(hw_key)) != 0) + { + if (emit->translate) + emit->translate->release(emit->translate); + + emit->translate = translate_generic_create( &hw_key ); + } +} + + +void draw_pt_emit( struct pt_emit *emit, + char *verts, + unsigned stride, + unsigned vertex_count, + const ushort *elts, + unsigned count ) +{ + struct draw_context *draw = emit->draw; + struct translate *translate = emit->translate; + struct vbuf_render *render = draw->render; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)count); + if (!hw_verts) { + assert(0); + return; + } + + translate->set_buffer(translate, + 0, + verts, + stride ); + + translate->set_buffer(translate, + 1, + &draw->rasterizer->point_size, + 0); + + translate->run( translate, + 0, + vertex_count, + hw_verts ); + + render->draw(render, + elts, + count); + + render->release_vertices(render, + hw_verts, + translate->key.output_stride, + vertex_count); +} + + +struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) +{ + struct pt_emit *emit = CALLOC_STRUCT(pt_emit); + if (!emit) + return NULL; + + emit->draw = draw; + + return emit; +} + +void draw_pt_emit_destroy( struct pt_emit *emit ) +{ + if (emit->translate) + emit->translate->release( emit->translate ); + + FREE(emit); +} diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index cf37493eb6..8db9e31e2d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -38,7 +38,7 @@ struct fetch_pipeline_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; - struct translate *translate; + struct pt_emit *emit; unsigned pipeline_vertex_size; unsigned prim; @@ -51,98 +51,12 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned opt ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned i; - boolean ok; - const struct vertex_info *vinfo; - unsigned dst_offset; - struct translate_key hw_key; fpme->prim = prim; fpme->opt = opt; - if (!(opt & PT_PIPELINE)) { - ok = draw->render->set_primitive(draw->render, prim); - if (!ok) { - assert(0); - return; - } - - /* Must do this after set_primitive() above: - */ - vinfo = draw->render->get_vertex_info(draw->render); - - - /* In passthrough mode, need to translate from vertex shader - * outputs to hw vertices. - */ - dst_offset = 0; - for (i = 0; i < vinfo->num_attribs; i++) { - unsigned emit_sz = 0; - unsigned src_buffer = 0; - unsigned output_format; - unsigned src_offset = (sizeof(struct vertex_header) + - vinfo->src_index[i] * 4 * sizeof(float) ); - - - - switch (vinfo->emit[i]) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - src_buffer = 1; - src_offset = 0; - break; - case EMIT_4UB: - output_format = PIPE_FORMAT_B8G8R8A8_UNORM; - emit_sz = 4 * sizeof(ubyte); - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - break; - } - - hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - hw_key.element[i].input_buffer = src_buffer; - hw_key.element[i].input_offset = src_offset; - hw_key.element[i].output_format = output_format; - hw_key.element[i].output_offset = dst_offset; - - dst_offset += emit_sz; - } - - hw_key.nr_elements = vinfo->num_attribs; - hw_key.output_stride = vinfo->size * 4; - - /* Don't bother with caching at this stage: - */ - if (!fpme->translate || - memcmp(&fpme->translate->key, &hw_key, sizeof(hw_key)) != 0) - { - if (fpme->translate) - fpme->translate->release(fpme->translate); - - fpme->translate = translate_generic_create( &hw_key ); - } - } - + if (!(opt & PT_PIPELINE)) + draw_pt_emit_prepare( fpme->emit, prim, opt ); //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION; @@ -194,43 +108,12 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, draw_elts, draw_count ); } else { - struct translate *translate = fpme->translate; - void *hw_verts; - - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? - */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - - hw_verts = draw->render->allocate_vertices(draw->render, - (ushort)fpme->translate->key.output_stride, - (ushort)fetch_count); - if (!hw_verts) { - assert(0); - return; - } - - translate->set_buffer(translate, - 0, - pipeline_verts, - fpme->pipeline_vertex_size ); - - translate->set_buffer(translate, - 1, - &fpme->draw->rasterizer->point_size, - 0); - - translate->run( translate, - 0, fetch_count, - hw_verts ); - - draw->render->draw(draw->render, - draw_elts, - draw_count); - - draw->render->release_vertices(draw->render, - hw_verts, - fpme->translate->key.output_stride, - fetch_count); + draw_pt_emit( fpme->emit, + pipeline_verts, + fpme->pipeline_vertex_size, + fetch_count, + draw_elts, + draw_count ); } @@ -252,14 +135,26 @@ static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context *draw ) { - struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end ); + struct fetch_pipeline_middle_end *fpme = CALLOC_STRUCT( fetch_pipeline_middle_end ); + if (!fpme) + goto fail; + + fpme->base.prepare = fetch_pipeline_prepare; + fpme->base.run = fetch_pipeline_run; + fpme->base.finish = fetch_pipeline_finish; + fpme->base.destroy = fetch_pipeline_destroy; + + fpme->draw = draw; + + fpme->emit = draw_pt_emit_create( draw ); + if (!fpme->emit) + goto fail; - fetch_pipeline->base.prepare = fetch_pipeline_prepare; - fetch_pipeline->base.run = fetch_pipeline_run; - fetch_pipeline->base.finish = fetch_pipeline_finish; - fetch_pipeline->base.destroy = fetch_pipeline_destroy; + return &fpme->base; - fetch_pipeline->draw = draw; + fail: + if (fpme) + fetch_pipeline_destroy( &fpme->base ); - return &fetch_pipeline->base; + return NULL; } -- cgit v1.2.3 From 4dcb09d48981f07a82d7f609fe492453c8fdbcf4 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Thu, 17 Apr 2008 17:53:33 +0100 Subject: gallium: Add new file to scons build. --- src/gallium/auxiliary/draw/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index a7fb5dbd61..62e0cd9e50 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -16,6 +16,7 @@ draw = env.ConvenienceLibrary( 'draw_offset.c', 'draw_pt.c', 'draw_pt_vcache.c', + 'draw_pt_emit.c', 'draw_pt_fetch_emit.c', 'draw_pt_fetch_pipeline.c', 'draw_pt_fetch_shade_pipeline.c', -- cgit v1.2.3 From 01b6354e72a84f8c3c22be1f77eab8d9c05920a3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 17 Apr 2008 15:26:37 -0600 Subject: gallium: tweak x/ybias values --- src/gallium/auxiliary/draw/draw_wide_point.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c index 0fa7c9313c..3d0add0c1a 100644 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_wide_point.c @@ -199,7 +199,7 @@ static void widepoint_first_point( struct draw_stage *stage, wide->ybias = 0.0; if (draw->rasterizer->gl_rasterization_rules) { - wide->ybias = -0.125; + wide->xbias = 0.125; } /* XXX we won't know the real size if it's computed by the vertex shader! */ -- cgit v1.2.3 From a773f06e969a3992451dd7fe6fd55ea96b2774fa Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 17 Apr 2008 23:44:32 +0100 Subject: draw: split off all the extra functionality in the vertex shader This will at least allow us to make the initial gains to get decent vertex performance much more quickly & with higher confidence of getting it right. At some later point can look again at code-generating all the fetch/cliptest/viewport extras in the same block as the vertex shader. For now, just need to get some decent baseline performance. --- src/gallium/auxiliary/draw/Makefile | 4 +- src/gallium/auxiliary/draw/draw_context.c | 26 +-- src/gallium/auxiliary/draw/draw_private.h | 2 + src/gallium/auxiliary/draw/draw_pt.h | 51 +++++- src/gallium/auxiliary/draw/draw_pt_emit.c | 16 +- src/gallium/auxiliary/draw/draw_pt_fetch.c | 175 ++++++++++++++++++ .../auxiliary/draw/draw_pt_fetch_pipeline.c | 4 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 109 +++++++++-- src/gallium/auxiliary/draw/draw_pt_pipeline.c | 5 +- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 202 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_exec.c | 82 ++++++--- src/gallium/auxiliary/draw/draw_vs_llvm.c | 39 ++-- src/gallium/auxiliary/draw/draw_vs_sse.c | 38 ++-- 13 files changed, 635 insertions(+), 118 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch.c create mode 100644 src/gallium/auxiliary/draw/draw_pt_post_vs.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 836e98f086..154c8a99b5 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -20,8 +20,10 @@ C_SOURCES = \ draw_pt_fetch_emit.c \ draw_pt_fetch_pipeline.c \ draw_pt_fetch_shade_pipeline.c \ - draw_pt_pipeline.c \ + draw_pt_fetch.c \ + draw_pt_post_vs.c \ draw_pt_emit.c \ + draw_pt_pipeline.c \ draw_pt_elts.c \ draw_prim.c \ draw_pstipple.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 9b2dcc0b57..4838b68ed1 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -110,6 +110,12 @@ struct draw_context *draw_create( void ) tgsi_exec_machine_init(&draw->machine); + /* FIXME: give this machine thing a proper constructor: + */ + draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + + if (!draw_pt_init( draw )) goto fail; @@ -155,8 +161,13 @@ void draw_destroy( struct draw_context *draw ) if (draw->pipeline.rasterize) draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); + if (draw->machine.Inputs) + align_free(draw->machine.Inputs); + if (draw->machine.Outputs) + align_free(draw->machine.Outputs); tgsi_exec_machine_free_data(&draw->machine); - + + if (draw->vs.vertex_cache) align_free( draw->vs.vertex_cache ); /* Frees all the vertices. */ @@ -265,6 +276,7 @@ draw_set_vertex_elements(struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); memcpy(draw->vertex_element, elements, count * sizeof(elements[0])); + draw->nr_vertex_elements = count; } @@ -463,15 +475,3 @@ boolean draw_get_edgeflag( struct draw_context *draw, return 1; } - -#if 0 -/* Crufty init function. Fix me. - */ -boolean draw_init_machine( struct draw_context *draw ) -{ - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); - machine->Inputs = ALIGN16_ASSIGN(inputs); - machine->Outputs = ALIGN16_ASSIGN(outputs); -} -#endif diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 9407217fd3..da94e69781 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -224,6 +224,8 @@ struct draw_context unsigned nr_vertex_buffers; struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + struct draw_vertex_shader *vertex_shader; boolean identity_viewport; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 31d18ec62b..316289969b 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -112,6 +112,7 @@ struct draw_pt_middle_end { * mode... */ struct vbuf_render; +struct vertex_header; /* Helper functions. @@ -132,25 +133,25 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *d */ void draw_pt_run_pipeline( struct draw_context *draw, unsigned prim, - char *verts, - unsigned vertex_stride, + struct vertex_header *verts, unsigned vertex_count, + unsigned vertex_stride, const ushort *elts, unsigned count ); -/* HW vertex emit: +/******************************************************************************* + * HW vertex emit: */ struct pt_emit; void draw_pt_emit_prepare( struct pt_emit *emit, - unsigned prim, - unsigned opt ); + unsigned prim ); void draw_pt_emit( struct pt_emit *emit, - char *verts, - unsigned stride, + const float (*vertex_data)[4], unsigned vertex_count, + unsigned stride, const ushort *elts, unsigned count ); @@ -159,6 +160,42 @@ void draw_pt_emit_destroy( struct pt_emit *emit ); struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); +/******************************************************************************* + * API vertex fetch: + */ + +struct pt_fetch; +void draw_pt_fetch_prepare( struct pt_fetch *fetch, + boolean emit_header, + unsigned vertex_size ); + +void draw_pt_fetch_run( struct pt_fetch *fetch, + const unsigned *elts, + unsigned count, + char *verts ); + +void draw_pt_fetch_destroy( struct pt_fetch *fetch ); + +struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); + +/******************************************************************************* + * Post-VS: cliptest, rhw, viewport + */ +struct pt_post_vs; + +boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, + struct vertex_header *pipeline_verts, + unsigned stride, + unsigned count ); + +void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, + boolean bypass_clipping, + boolean identity_viewport, + boolean opengl ); + +struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); + +void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index e9ed29450a..ef9db70a02 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -38,16 +38,11 @@ struct pt_emit { struct draw_context *draw; struct translate *translate; - - unsigned pipeline_vertex_size; - unsigned prim; - unsigned opt; }; void draw_pt_emit_prepare( struct pt_emit *emit, - unsigned prim, - unsigned opt ) + unsigned prim ) { struct draw_context *draw = emit->draw; const struct vertex_info *vinfo; @@ -75,8 +70,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned emit_sz = 0; unsigned src_buffer = 0; unsigned output_format; - unsigned src_offset = (sizeof(struct vertex_header) + - vinfo->src_index[i] * 4 * sizeof(float) ); + unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) ); @@ -139,9 +133,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit, void draw_pt_emit( struct pt_emit *emit, - char *verts, - unsigned stride, + const float (*vertex_data)[4], unsigned vertex_count, + unsigned stride, const ushort *elts, unsigned count ) { @@ -164,7 +158,7 @@ void draw_pt_emit( struct pt_emit *emit, translate->set_buffer(translate, 0, - verts, + vertex_data, stride ); translate->set_buffer(translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c new file mode 100644 index 0000000000..a7553023b8 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "translate/translate.h" + + +struct pt_fetch { + struct draw_context *draw; + + struct translate *translate; + + unsigned vertex_size; +}; + + + +/* Perform the fetch from API vertex elements & vertex buffers, to a + * contiguous set of float[4] attributes as required for the + * vertex_shader->run_linear() method. + * + * This is used in all cases except pure passthrough + * (draw_pt_fetch_emit.c) which has its own version to translate + * directly to hw vertices. + * + */ +void draw_pt_fetch_prepare( struct pt_fetch *fetch, + boolean emit_header, + unsigned vertex_size ) +{ + struct draw_context *draw = fetch->draw; + unsigned i, nr = 0; + unsigned dst_offset = 0; + struct translate_key key; + + fetch->vertex_size = vertex_size; + + memset(&key, 0, sizeof(key)); + + /* If PT_SHADE is not set, then we are creating post-shader + * vertices, meaning that we need to emit/leave space for a vertex + * header. + * + * It's worth considering whether the vertex headers should contain + * a pointer to the 'data', rather than having it inline. + * Something to look at after we've fully switched over to the pt + * paths. + */ + if (emit_header) + { + /* Need to set header->vertex_id = 0xffff somehow. + */ + key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; + key.element[nr].input_buffer = draw->nr_vertex_buffers; + key.element[nr].input_offset = 0; + key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; + key.element[nr].output_offset = dst_offset; + dst_offset += 1 * sizeof(float); + nr++; + + + /* Just leave the clip[] array untouched. + */ + dst_offset += 4 * sizeof(float); + } + + + for (i = 0; i < draw->nr_vertex_elements; i++) { + key.element[nr].input_format = draw->vertex_element[i].src_format; + key.element[nr].input_buffer = draw->vertex_element[i].vertex_buffer_index; + key.element[nr].input_offset = draw->vertex_element[i].src_offset; + key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + key.element[nr].output_offset = dst_offset; + + dst_offset += 4 * sizeof(float); + nr++; + } + + assert(dst_offset <= vertex_size); + + key.nr_elements = nr; + key.output_stride = vertex_size; + + + /* Don't bother with caching at this stage: + */ + if (!fetch->translate || + memcmp(&fetch->translate->key, &key, sizeof(key)) != 0) + { + if (fetch->translate) + fetch->translate->release(fetch->translate); + + fetch->translate = translate_generic_create( &key ); + + if (emit_header) { + static struct vertex_header vh = { 0, 0, 0, 0xffff }; + fetch->translate->set_buffer(fetch->translate, + draw->nr_vertex_buffers, + &vh, + 0); + } + } +} + + + + +void draw_pt_fetch_run( struct pt_fetch *fetch, + const unsigned *elts, + unsigned count, + char *verts ) +{ + struct draw_context *draw = fetch->draw; + struct translate *translate = fetch->translate; + unsigned i; + + for (i = 0; i < draw->nr_vertex_buffers; i++) { + translate->set_buffer(translate, + i, + ((char *)draw->user.vbuffer[i] + + draw->vertex_buffer[i].buffer_offset), + draw->vertex_buffer[i].pitch ); + } + + translate->run_elts( translate, + elts, + count, + verts ); +} + + +struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) +{ + struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch); + if (!fetch) + return NULL; + + fetch->draw = draw; + return fetch; +} + +void draw_pt_fetch_destroy( struct pt_fetch *fetch ) +{ + FREE(fetch); +} + diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c index 79548d4156..26d0b37286 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c @@ -286,9 +286,9 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, */ draw_pt_run_pipeline( fpme->draw, fpme->prim, - pipeline_verts, - fpme->pipeline_vertex_size, + (struct vertex_header *)pipeline_verts, fetch_count, + fpme->pipeline_vertex_size, draw_elts, draw_count ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 8db9e31e2d..0b9e8d15ba 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -39,8 +39,11 @@ struct fetch_pipeline_middle_end { struct draw_context *draw; struct pt_emit *emit; + struct pt_fetch *fetch; + struct pt_post_vs *post_vs; - unsigned pipeline_vertex_size; + unsigned vertex_data_offset; + unsigned vertex_size; unsigned prim; unsigned opt; }; @@ -51,15 +54,43 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned opt ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *vs = draw->vertex_shader; + unsigned nr = MAX2( vs->info.num_inputs, + vs->info.num_outputs ); fpme->prim = prim; fpme->opt = opt; + /* Always leave room for the vertex header whether we need it or + * not. It's hard to get rid of it in particular because of the + * viewport code in draw_pt_post_vs.c. + */ + fpme->vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); + + + + draw_pt_fetch_prepare( fpme->fetch, + (opt & (PT_CLIPTEST | PT_PIPELINE)) != 0, + fpme->vertex_size ); + + /* XXX: it's not really gl rasterization rules we care about here, + * but gl vs dx9 clip spaces. + */ + draw_pt_post_vs_prepare( fpme->post_vs, + draw->rasterizer->bypass_clipping, + draw->identity_viewport, + draw->rasterizer->gl_rasterization_rules ); + + if (!(opt & PT_PIPELINE)) - draw_pt_emit_prepare( fpme->emit, prim, opt ); + draw_pt_emit_prepare( fpme->emit, + prim ); + + /* No need to prepare the shader. + */ + vs->prepare(vs, draw); - //fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); - fpme->pipeline_vertex_size = MAX_VERTEX_ALLOCATION; } @@ -74,44 +105,63 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; - char *pipeline_verts; - unsigned pipeline = PT_PIPELINE; + unsigned opt = fpme->opt; - pipeline_verts = MALLOC(fpme->pipeline_vertex_size * - fetch_count); + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count); if (!pipeline_verts) { assert(0); return; } - - /* Shade + /* Fetch into our vertex buffer */ - shader->prepare(shader, draw); - - if (shader->run(shader, draw, fetch_elts, fetch_count, pipeline_verts, - fpme->pipeline_vertex_size)) + draw_pt_fetch_run( fpme->fetch, + fetch_elts, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) { - pipeline |= PT_CLIPTEST; + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->user.constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); } + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } /* Do we need to run the pipeline? */ - if (fpme->opt & pipeline) { + if (opt & PT_PIPELINE) { draw_pt_run_pipeline( fpme->draw, fpme->prim, pipeline_verts, - fpme->pipeline_vertex_size, fetch_count, + fpme->vertex_size, draw_elts, draw_count ); - } else { + } + else { draw_pt_emit( fpme->emit, - pipeline_verts, - fpme->pipeline_vertex_size, + (const float (*)[4])pipeline_verts->data, fetch_count, + fpme->vertex_size, draw_elts, draw_count ); } @@ -129,6 +179,17 @@ static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) { + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + + if (fpme->fetch) + draw_pt_fetch_destroy( fpme->fetch ); + + if (fpme->emit) + draw_pt_emit_destroy( fpme->emit ); + + if (fpme->post_vs) + draw_pt_post_vs_destroy( fpme->post_vs ); + FREE(middle); } @@ -146,6 +207,14 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * fpme->draw = draw; + fpme->fetch = draw_pt_fetch_create( draw ); + if (!fpme->fetch) + goto fail; + + fpme->post_vs = draw_pt_post_vs_create( draw ); + if (!fpme->post_vs) + goto fail; + fpme->emit = draw_pt_emit_create( draw ); if (!fpme->emit) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c index 17ce6febec..1a9a3adb03 100644 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_pipeline.c @@ -117,12 +117,13 @@ void draw_pt_reset_vertex_ids( struct draw_context *draw ) */ void draw_pt_run_pipeline( struct draw_context *draw, unsigned prim, - char *verts, - unsigned stride, + struct vertex_header *pipeline_verts, unsigned vertex_count, + unsigned stride, const ushort *elts, unsigned count ) { + char *verts = (char *)pipeline_verts; unsigned i; draw->pt.pipeline.verts = verts; diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c new file mode 100644 index 0000000000..315b02f4ee --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -0,0 +1,202 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_context.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" + +struct pt_post_vs { + struct draw_context *draw; + + boolean (*run)( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ); +}; + + + +static INLINE unsigned +compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) +{ + unsigned mask = 0x0; + unsigned i; + + /* Do the hardwired planes first: + */ + if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; + if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; + if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; + if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; + if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; + if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + + /* Followed by any remaining ones: + */ + for (i = 6; i < nr; i++) { + if (dot4(clip, plane[i]) < 0) + mask |= (1<draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + unsigned j; + unsigned clipped = 0; + + for (j = 0; j < count; j++) { + out->clip[0] = out->data[0][0]; + out->clip[1] = out->data[0][1]; + out->clip[2] = out->data[0][2]; + out->clip[3] = out->data[0][3]; + + out->vertex_id = 0xffff; + out->edgeflag = 1; + out->clipmask = compute_clipmask_gl(out->clip, + pvs->draw->plane, + pvs->draw->nr_planes); + clipped += out->clipmask; + + if (out->clipmask == 0) + { + /* divide by w */ + float w = 1.0f / out->data[0][3]; + + /* Viewport mapping */ + out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0]; + out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1]; + out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2]; + out->data[0][3] = w; + } + + out = (struct vertex_header *)( (char *)out + stride ); + } + + return clipped != 0; +} + + + +/* If bypass_clipping is set, skip cliptest and rhw divide. + */ +static boolean post_vs_viewport( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + struct vertex_header *out = vertices; + const float *scale = pvs->draw->viewport.scale; + const float *trans = pvs->draw->viewport.translate; + unsigned j; + + debug_printf("%s\n", __FUNCTION__); + for (j = 0; j < count; j++) { + /* Viewport mapping only, no cliptest/rhw divide + */ + out->data[0][0] = out->data[0][0] * scale[0] + trans[0]; + out->data[0][1] = out->data[0][1] * scale[1] + trans[1]; + out->data[0][2] = out->data[0][2] * scale[2] + trans[2]; + + out = (struct vertex_header *)((char *)out + stride); + } + + return FALSE; +} + + +/* If bypass_clipping is set and we have an identity viewport, nothing + * to do. + */ +static boolean post_vs_none( struct pt_post_vs *pvs, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + debug_printf("%s\n", __FUNCTION__); + return FALSE; +} + +boolean draw_pt_post_vs_run( struct pt_post_vs *pvs, + struct vertex_header *pipeline_verts, + unsigned count, + unsigned stride ) +{ + return pvs->run( pvs, pipeline_verts, count, stride ); +} + + +void draw_pt_post_vs_prepare( struct pt_post_vs *pvs, + boolean bypass_clipping, + boolean identity_viewport, + boolean opengl ) +{ + if (bypass_clipping) { + if (identity_viewport) + pvs->run = post_vs_none; + else + pvs->run = post_vs_viewport; + } + else { + //if (opengl) + pvs->run = post_vs_cliptest_viewport_gl; + } +} + + +struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ) +{ + struct pt_post_vs *pvs = CALLOC_STRUCT( pt_post_vs ); + if (!pvs) + return NULL; + + pvs->draw = draw; + + return pvs; +} + +void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ) +{ + FREE(pvs); +} diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 0e05b79715..184151b9b1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -58,8 +58,10 @@ static void vs_exec_prepare( struct draw_vertex_shader *shader, struct draw_context *draw ) { + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + /* specify the vertex program to interpret/execute */ - tgsi_exec_machine_bind_shader(&draw->machine, + tgsi_exec_machine_bind_shader(evs->machine, shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); @@ -84,31 +86,45 @@ vs_exec_run( struct draw_vertex_shader *shader, void *vOut, unsigned vertex_size) { - struct tgsi_exec_machine *machine = &draw->machine; + struct exec_vertex_shader *evs = exec_vertex_shader(shader); + struct tgsi_exec_machine *machine = evs->machine; unsigned int i, j; unsigned int clipped = 0; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); + struct tgsi_exec_vector *outputs = 0; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; assert(shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); machine->Consts = (const float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); + if (draw->rasterizer->bypass_vs) { /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; + outputs = machine->Inputs; } else { - machine->Outputs = ALIGN16_ASSIGN(outputs); + outputs = machine->Outputs; } for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices ); +#if 0 + for (j = 0; j < max_vertices; j++) { + unsigned slot; + debug_printf("%d) Input vert:\n", i + j); + for (slot = 0; slot < shader->info.num_inputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + machine->Inputs[slot].xyzw[0].f[j], + machine->Inputs[slot].xyzw[1].f[j], + machine->Inputs[slot].xyzw[2].f[j], + machine->Inputs[slot].xyzw[3].f[j]); + } + } +#endif + + if (!draw->rasterizer->bypass_vs) { /* run interpreter */ tgsi_exec_machine_run( machine ); @@ -127,10 +143,10 @@ vs_exec_run( struct draw_vertex_shader *shader, * program as a set of DP4 instructions appended to the * user-provided code. */ - x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = out->clip[0] = outputs[0].xyzw[0].f[j]; + y = out->clip[1] = outputs[0].xyzw[1].f[j]; + z = out->clip[2] = outputs[0].xyzw[2].f[j]; + w = out->clip[3] = outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { out->clipmask = compute_clipmask(out->clip, draw->plane, @@ -156,7 +172,8 @@ vs_exec_run( struct draw_vertex_shader *shader, out->data[0][2] = z * scale[2] + trans[2]; out->data[0][3] = w; } - else { + else + { out->data[0][0] = x; out->data[0][1] = y; out->data[0][2] = z; @@ -167,10 +184,10 @@ vs_exec_run( struct draw_vertex_shader *shader, * vertex attrib slots. */ for (slot = 1; slot < draw->num_vs_outputs; slot++) { - out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + out->data[slot][0] = outputs[slot].xyzw[0].f[j]; + out->data[slot][1] = outputs[slot].xyzw[1].f[j]; + out->data[slot][2] = outputs[slot].xyzw[2].f[j]; + out->data[slot][3] = outputs[slot].xyzw[3].f[j]; } #if 0 /*DEBUG*/ @@ -216,12 +233,25 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, /* Swizzle inputs. */ for (j = 0; j < max_vertices; j++) { +#if 0 + debug_printf("%d) Input vert:\n", i + j); + for (slot = 0; slot < shader->info.num_inputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + input[slot][0], + input[slot][1], + input[slot][2], + input[slot][3]); + } +#endif + for (slot = 0; slot < shader->info.num_inputs; slot++) { machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; } + + input = (const float (*)[4])((const char *)input + input_stride); } /* run interpreter */ @@ -235,13 +265,23 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + +#if 0 + debug_printf("%d) Post xform vert:\n", i + j); + for (slot = 0; slot < shader->info.num_outputs; slot++) { + debug_printf("\t%d: %f %f %f %f\n", slot, + output[slot][0], + output[slot][1], + output[slot][2], + output[slot][3]); + } +#endif + + output = (float (*)[4])((char *)output + output_stride); } - /* Advance input, output pointers: - */ - input = (const float (*)[4])((const char *)input + input_stride); - output = (float (*)[4])((char *)output + output_stride); } } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index d0baca715e..4dbfa955a4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -47,6 +47,7 @@ struct draw_llvm_vertex_shader { struct draw_vertex_shader base; struct gallivm_prog *llvm_prog; + struct tgsi_exec_machine *machine; }; @@ -77,12 +78,9 @@ vs_llvm_run( struct draw_vertex_shader *base, struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; + struct tgsi_exec_machine *machine = shader->machine; unsigned int j; unsigned int clipped = 0; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -93,13 +91,12 @@ vs_llvm_run( struct draw_vertex_shader *base, /* Consts does not require 16 byte alignment. */ machine->Consts = (float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); if (draw->rasterizer->bypass_vs) { /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; + outputs = machine->Inputs; } else { - machine->Outputs = ALIGN16_ASSIGN(outputs); + outputs = machine->Outputs; } @@ -119,10 +116,10 @@ vs_llvm_run( struct draw_vertex_shader *base, unsigned slot; float x, y, z, w; - x = vOut[j]->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j]; + y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j]; + z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j]; + w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, @@ -159,10 +156,10 @@ vs_llvm_run( struct draw_vertex_shader *base, * vertex attrib slots. */ for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + vOut[j]->data[slot][0] = outputs[slot].xyzw[0].f[j]; + vOut[j]->data[slot][1] = outputs[slot].xyzw[1].f[j]; + vOut[j]->data[slot][2] = outputs[slot].xyzw[2].f[j]; + vOut[j]->data[slot][3] = outputs[slot].xyzw[3].f[j]; } } /* loop over vertices */ return clipped != 0; @@ -183,7 +180,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; + struct tgsi_exec_machine *machine = shader->machine; unsigned int j; @@ -199,6 +196,8 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; } + + input = (const float (*)[4])((const char *)input + input_stride); } /* run shader */ @@ -216,12 +215,9 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - /* Advance input, output pointers: - */ - input = (const float (*)[4])((const char *)input + input_stride); - output = (float (*)[4])((char *)output + output_stride); + output = (float (*)[4])((char *)output + output_stride); + } } } @@ -263,6 +259,7 @@ draw_create_vs_llvm(struct draw_context *draw, vs->base.run = vs_llvm_run; vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; + vs->machine = &draw->machine; { struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 873ecfdc5d..a763f3845c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -91,12 +91,10 @@ vs_sse_run( struct draw_vertex_shader *base, unsigned vertex_size ) { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; - struct tgsi_exec_machine *machine = &draw->machine; + struct tgsi_exec_machine *machine = shader->machine; unsigned int i, j; unsigned int clipped = 0; - - ALIGN16_DECL(struct tgsi_exec_vector, inputs, PIPE_MAX_ATTRIBS); - ALIGN16_DECL(struct tgsi_exec_vector, outputs, PIPE_MAX_ATTRIBS); + struct tgsi_exec_vector *outputs = 0; const float *scale = draw->viewport.scale; const float *trans = draw->viewport.translate; @@ -104,13 +102,13 @@ vs_sse_run( struct draw_vertex_shader *base, /* Consts does not require 16 byte alignment. */ machine->Consts = (const float (*)[4]) draw->user.constants; - machine->Inputs = ALIGN16_ASSIGN(inputs); + if (draw->rasterizer->bypass_vs) { /* outputs are just the inputs */ - machine->Outputs = machine->Inputs; + outputs = machine->Inputs; } else { - machine->Outputs = ALIGN16_ASSIGN(outputs); + outputs = machine->Outputs; } for (i = 0; i < count; i += SSE_MAX_VERTICES) { @@ -142,10 +140,10 @@ vs_sse_run( struct draw_vertex_shader *base, struct vertex_header *out = draw_header_from_block(vOut, vertex_size, i + j); - x = out->clip[0] = machine->Outputs[0].xyzw[0].f[j]; - y = out->clip[1] = machine->Outputs[0].xyzw[1].f[j]; - z = out->clip[2] = machine->Outputs[0].xyzw[2].f[j]; - w = out->clip[3] = machine->Outputs[0].xyzw[3].f[j]; + x = out->clip[0] = outputs[0].xyzw[0].f[j]; + y = out->clip[1] = outputs[0].xyzw[1].f[j]; + z = out->clip[2] = outputs[0].xyzw[2].f[j]; + w = out->clip[3] = outputs[0].xyzw[3].f[j]; if (!draw->rasterizer->bypass_clipping) { out->clipmask = compute_clipmask(out->clip, draw->plane, @@ -182,10 +180,10 @@ vs_sse_run( struct draw_vertex_shader *base, * vertex attrib slots. */ for (slot = 1; slot < draw->num_vs_outputs; slot++) { - out->data[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - out->data[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - out->data[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - out->data[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + out->data[slot][0] = outputs[slot].xyzw[0].f[j]; + out->data[slot][1] = outputs[slot].xyzw[1].f[j]; + out->data[slot][2] = outputs[slot].xyzw[2].f[j]; + out->data[slot][3] = outputs[slot].xyzw[3].f[j]; } #if 0 /*DEBUG*/ printf("%d) Post xform vert:\n", i + j); @@ -233,6 +231,8 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; } + + input = (const float (*)[4])((const char *)input + input_stride); } /* run compiled shader @@ -253,12 +253,9 @@ vs_sse_run_linear( struct draw_vertex_shader *base, output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; } - } - /* Advance input, output pointers: - */ - input = (const float (*)[4])((const char *)input + input_stride); - output = (float (*)[4])((char *)output + output_stride); + output = (float (*)[4])((char *)output + output_stride); + } } } @@ -300,6 +297,7 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.run = vs_sse_run; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; + vs->machine = &draw->machine; x86_init_func( &vs->sse2_program ); -- cgit v1.2.3 From da9079b936684f88da79425a810d7902e4d6e7ad Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 18 Apr 2008 20:59:28 +0900 Subject: gallium: Add missing files to scons. --- src/gallium/auxiliary/draw/SConscript | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 62e0cd9e50..0274d48e7f 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -6,24 +6,23 @@ draw = env.ConvenienceLibrary( 'draw_aaline.c', 'draw_aapoint.c', 'draw_clip.c', - 'draw_vs_exec.c', - 'draw_vs_sse.c', - 'draw_vs_llvm.c', 'draw_context.c', 'draw_cull.c', 'draw_debug.c', 'draw_flatshade.c', 'draw_offset.c', + 'draw_prim.c', + 'draw_pstipple.c', 'draw_pt.c', - 'draw_pt_vcache.c', + 'draw_pt_elts.c', 'draw_pt_emit.c', + 'draw_pt_fetch.c', 'draw_pt_fetch_emit.c', 'draw_pt_fetch_pipeline.c', 'draw_pt_fetch_shade_pipeline.c', 'draw_pt_pipeline.c', - 'draw_pt_elts.c', - 'draw_prim.c', - 'draw_pstipple.c', + 'draw_pt_post_vs.c', + 'draw_pt_vcache.c', 'draw_stipple.c', 'draw_twoside.c', 'draw_unfilled.c', @@ -36,8 +35,11 @@ draw = env.ConvenienceLibrary( 'draw_vf.c', 'draw_vf_generic.c', 'draw_vf_sse.c', + 'draw_vs_exec.c', + 'draw_vs_llvm.c', + 'draw_vs_sse.c', + 'draw_wide_line.c', 'draw_wide_point.c', - 'draw_wide_line.c' ]) auxiliaries.insert(0, draw) -- cgit v1.2.3 From 5b97c762ed9882dd922f48c2fbf13b14ad86a96e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 17:32:39 +0100 Subject: rtasm: add a couple more insns, clean up x86_mul --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 34 ++++++++++++++++++++++++------ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 ++ 2 files changed, 29 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index aea8b28e58..5c25fa155d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -33,11 +33,6 @@ #define DISASSEM 0 #define X86_TWOB 0x0f -static unsigned char *cptr( void (*label)() ) -{ - return (unsigned char *) label; -} - static void do_realloc( struct x86_function *p ) { @@ -304,6 +299,11 @@ void x86_jmp( struct x86_function *p, unsigned char *label) } #if 0 +static unsigned char *cptr( void (*label)() ) +{ + return (unsigned char *) label; +} + /* This doesn't work once we start reallocating & copying the * generated code on buffer fills, because the call is relative to the * current pc. @@ -417,11 +417,14 @@ void x86_add( struct x86_function *p, emit_op_modrm(p, 0x03, 0x01, dst, src ); } +/* Calculate EAX * src, results in EDX:EAX. + */ void x86_mul( struct x86_function *p, struct x86_reg src ) { - assert (src.file == file_REG32 && src.mod == mod_REG); - emit_op_modrm(p, 0xf7, 0, x86_make_reg (file_REG32, reg_SP), src ); +// assert (src.file == file_REG32 && src.mod == mod_REG); + emit_1ub(p, 0xf7); + emit_modrm_noreg(p, 4, src ); } void x86_sub( struct x86_function *p, @@ -646,6 +649,14 @@ void sse_cvtps2pi( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse2_cvtdq2ps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0x5b); + emit_modrm( p, dst, src ); +} + /* Shufps can also be used to implement a reduced swizzle when dest == * arg0. @@ -735,6 +746,15 @@ void sse2_packuswb( struct x86_function *p, emit_modrm( p, dst, src ); } +void sse2_punpcklbw( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_3ub(p, 0x66, X86_TWOB, 0x60); + emit_modrm( p, dst, src ); +} + + void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 606b41eb35..dfde661f46 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -165,6 +165,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -202,6 +203,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, unsigned char shuf ); void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); +void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From c5f0158a9179463593d63b33cf3b5490167faac9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 17:35:32 +0100 Subject: tgsi: add const qualifier to tokens on sse emit --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index c37e201b2b..0a3a7559ca 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2332,7 +2332,7 @@ emit_declaration( */ unsigned tgsi_emit_sse2( - struct tgsi_token *tokens, + const struct tgsi_token *tokens, struct x86_function *func, float (*immediates)[4]) { diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index d56bf7f98a..063287dc5e 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -10,7 +10,7 @@ struct x86_function; unsigned tgsi_emit_sse2( - struct tgsi_token *tokens, + const struct tgsi_token *tokens, struct x86_function *function, float (*immediates)[4] ); -- cgit v1.2.3 From 363f7abf2000c1cf5993ae8f83ba81b2054bf6e0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 18:30:41 +0100 Subject: rtasm: add x86_imul --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 11 ++++++++++- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 + 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 5c25fa155d..7f8cc23d15 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -422,11 +422,20 @@ void x86_add( struct x86_function *p, void x86_mul( struct x86_function *p, struct x86_reg src ) { -// assert (src.file == file_REG32 && src.mod == mod_REG); emit_1ub(p, 0xf7); emit_modrm_noreg(p, 4, src ); } + +void x86_imul( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src ) +{ + emit_2ub(p, X86_TWOB, 0xAF); + emit_modrm(p, dst, src); +} + + void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index dfde661f46..5e99ceea70 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -213,6 +213,7 @@ void x86_inc( struct x86_function *p, struct x86_reg reg ); void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_mul( struct x86_function *p, struct x86_reg src ); +void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_push( struct x86_function *p, struct x86_reg reg ); -- cgit v1.2.3 From 7400bc4b6fb0c20a935cd108afa92814eeafec6d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 18:31:45 +0100 Subject: translate: add sse version based on old draw_vf_sse.c --- src/gallium/auxiliary/draw/draw_pt_emit.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch.c | 2 +- src/gallium/auxiliary/translate/Makefile | 4 +- src/gallium/auxiliary/translate/SConscript | 2 + src/gallium/auxiliary/translate/translate.h | 4 + src/gallium/auxiliary/translate/translate_sse.c | 615 ++++++++++++++++++++++++ 6 files changed, 626 insertions(+), 3 deletions(-) create mode 100644 src/gallium/auxiliary/translate/translate_sse.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index ef9db70a02..a6a3ff6cac 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -127,7 +127,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, if (emit->translate) emit->translate->release(emit->translate); - emit->translate = translate_generic_create( &hw_key ); + emit->translate = translate_create( &hw_key ); } } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index a7553023b8..1aed251c85 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -119,7 +119,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, if (fetch->translate) fetch->translate->release(fetch->translate); - fetch->translate = translate_generic_create( &key ); + fetch->translate = translate_create( &key ); if (emit_header) { static struct vertex_header vh = { 0, 0, 0, 0xffff }; diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile index 051987bb7e..39dfb0de30 100644 --- a/src/gallium/auxiliary/translate/Makefile +++ b/src/gallium/auxiliary/translate/Makefile @@ -4,7 +4,9 @@ include $(TOP)/configs/current LIBNAME = translate C_SOURCES = \ - translate_generic.c + translate_generic.c \ + translate_sse.c \ + translate.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript index 42513ba3b0..7608908915 100644 --- a/src/gallium/auxiliary/translate/SConscript +++ b/src/gallium/auxiliary/translate/SConscript @@ -4,6 +4,8 @@ translate = env.ConvenienceLibrary( target = 'translate', source = [ 'translate_generic.c', + 'translate_sse.c', + 'translate.c', ]) auxiliaries.insert(0, translate) diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 4f9f40e51a..d95d1ac4f3 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -42,6 +42,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" +#include "pipe/p_state.h" struct translate_element { @@ -93,6 +94,9 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx, #endif +struct translate *translate_create( const struct translate_key *key ); + + /******************************************************************************* * Private: */ diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c new file mode 100644 index 0000000000..cb8815c173 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -0,0 +1,615 @@ +/* + * Copyright 2003 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_util.h" +#include "util/u_simple_list.h" + +#include "translate.h" + + +#if defined(__i386__) || defined(__386__) || defined(i386) + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_x86sse.h" + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + + +typedef void (*run_func)( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ); + +typedef void (*run_elts_func)( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ); + + + +struct translate_sse { + struct translate translate; + + struct x86_function linear_func; + struct x86_function elt_func; + struct x86_function *func; + + boolean loaded_identity; + boolean loaded_255; + boolean loaded_inv_255; + + float identity[4]; + float float_255[4]; + float inv_255[4]; + + struct { + char *input_ptr; + unsigned input_stride; + } attrib[PIPE_MAX_ATTRIBS]; + + run_func gen_run; + run_elts_func gen_run_elts; + +}; + +static int get_offset( const void *a, const void *b ) +{ + return (const char *)b - (const char *)a; +} + + + +static struct x86_reg get_identity( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 6); + + if (!p->loaded_identity) { + /* Nasty: + */ + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_identity = TRUE; + p->identity[0] = 0; + p->identity[1] = 0; + p->identity[2] = 0; + p->identity[3] = 1; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->identity[0]))); + } + + return reg; +} + +static struct x86_reg get_255( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 6); + + if (!p->loaded_255) { + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_255 = TRUE; + p->float_255[0] = + p->float_255[1] = + p->float_255[2] = + p->float_255[3] = 255.0f; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->float_255[0]))); + } + + return reg; + return x86_make_reg(file_XMM, 7); +} + +static struct x86_reg get_inv_255( struct translate_sse *p ) +{ + struct x86_reg reg = x86_make_reg(file_XMM, 5); + + if (!p->loaded_inv_255) { + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + + p->loaded_inv_255 = TRUE; + p->inv_255[0] = + p->inv_255[1] = + p->inv_255[2] = + p->inv_255[3] = 1.0 / 255.0f; + + sse_movups(p->func, reg, + x86_make_disp(translateESI, + get_offset(p, &p->inv_255[0]))); + } + + return reg; +} + + +static void emit_load_R32G32B32A32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + sse_movups(p->func, data, arg0); +} + +static void emit_load_R32G32B32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* Have to jump through some hoops: + * + * c 0 0 0 + * c 0 0 1 + * 0 0 c 1 + * a b c 1 + */ + sse_movss(p->func, data, x86_make_disp(arg0, 8)); + sse_shufps(p->func, data, get_identity(p), SHUF(X,Y,Z,W) ); + sse_shufps(p->func, data, data, SHUF(Y,Z,X,W) ); + sse_movlps(p->func, data, arg0); +} + +static void emit_load_R32G32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* 0 0 0 1 + * a b 0 1 + */ + sse_movups(p->func, data, get_identity(p) ); + sse_movlps(p->func, data, arg0); +} + + +static void emit_load_R32( struct translate_sse *p, + struct x86_reg data, + struct x86_reg arg0 ) +{ + /* a 0 0 0 + * a 0 0 1 + */ + sse_movss(p->func, data, arg0); + sse_orps(p->func, data, get_identity(p) ); +} + + +static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p, + struct x86_reg data, + struct x86_reg src ) +{ + + /* Load and unpack twice: + */ + sse_movss(p->func, data, src); + sse2_punpcklbw(p->func, src, get_identity(p)); + sse2_punpcklbw(p->func, src, get_identity(p)); + + /* Convert to float: + */ + sse2_cvtdq2ps(p->func, src, src); + + + /* Scale by 1/255.0 + */ + sse_mulps(p->func, src, get_inv_255(p)); +} + + + + +static void emit_store_R32G32B32A32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movups(p->func, dest, dataXMM); +} + +static void emit_store_R32G32B32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + /* Emit two, shuffle, emit one. + */ + sse_movlps(p->func, dest, dataXMM); + sse_shufps(p->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ + sse_movss(p->func, x86_make_disp(dest,8), dataXMM); +} + +static void emit_store_R32G32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movlps(p->func, dest, dataXMM); +} + +static void emit_store_R32( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + sse_movss(p->func, dest, dataXMM); +} + + + +static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg dataXMM ) +{ + /* Scale by 255.0 + */ + sse_mulps(p->func, dataXMM, get_255(p)); + + /* Pack and emit: + */ + sse2_cvtps2dq(p->func, dataXMM, dataXMM); + sse2_packssdw(p->func, dataXMM, dataXMM); + sse2_packuswb(p->func, dataXMM, dataXMM); + sse_movss(p->func, dest, dataXMM); +} + + + + + +static void get_src_ptr( struct translate_sse *p, + struct x86_reg srcEAX, + struct x86_reg translateREG, + struct x86_reg eltREG, + unsigned a ) +{ + struct x86_reg input_ptr = + x86_make_disp(translateREG, + get_offset(p, &p->attrib[a].input_ptr)); + + struct x86_reg input_stride = + x86_make_disp(translateREG, + get_offset(p, &p->attrib[a].input_stride)); + + /* Calculate pointer to current attrib: + */ + x86_mov(p->func, srcEAX, input_stride); + x86_imul(p->func, srcEAX, eltREG); + x86_add(p->func, srcEAX, input_ptr); +} + + +/* Extended swizzles? Maybe later. + */ +static void emit_swizzle( struct translate_sse *p, + struct x86_reg dest, + struct x86_reg src, + unsigned shuffle ) +{ + sse_shufps(p->func, dest, src, shuffle); +} + + +static boolean translate_attr( struct translate_sse *p, + const struct translate_element *a, + struct x86_reg srcECX, + struct x86_reg dstEAX) +{ + struct x86_reg dataXMM = x86_make_reg(file_XMM, 0); + + switch (a->input_format) { + case PIPE_FORMAT_R32_FLOAT: + emit_load_R32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_load_R32G32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_load_R32G32B32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_load_R32G32B32A32(p, dataXMM, srcECX); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); + emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_load_R8G8B8A8_UNORM(p, dataXMM, srcECX); + break; + default: + return FALSE; + } + + switch (a->output_format) { + case PIPE_FORMAT_R32_FLOAT: + emit_store_R32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_store_R32G32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_store_R32G32B32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_store_R32G32B32A32(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_swizzle(p, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_store_R8G8B8A8_UNORM(p, dstEAX, dataXMM); + break; + default: + return FALSE; + } + + return TRUE; +} + +/* Build run( struct translate *translate, + * unsigned start, + * unsigned count, + * void *output_buffer ) + * or + * run_elts( struct translate *translate, + * unsigned *elts, + * unsigned count, + * void *output_buffer ) + * + * Lots of hardcoding + * + * EAX -- pointer to current output vertex + * ECX -- pointer to current attribute + * + */ +static boolean build_vertex_emit( struct translate_sse *p, + struct x86_function *func, + boolean linear ) +{ + struct x86_reg vertexECX = x86_make_reg(file_REG32, reg_AX); + struct x86_reg idxEBX = x86_make_reg(file_REG32, reg_BX); + struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX); + struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); + struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); + uint8_t *fixup, *label; + unsigned j; + + p->func = func; + p->loaded_inv_255 = FALSE; + p->loaded_255 = FALSE; + p->loaded_identity = FALSE; + + x86_init_func(p->func); + + /* Push a few regs? + */ + x86_push(p->func, countEBP); + x86_push(p->func, translateESI); + x86_push(p->func, idxEBX); + + /* Get vertex count, compare to zero + */ + x86_xor(p->func, idxEBX, idxEBX); + x86_mov(p->func, countEBP, x86_fn_arg(p->func, 3)); + x86_cmp(p->func, countEBP, idxEBX); + fixup = x86_jcc_forward(p->func, cc_E); + + /* If linear, idx is the current element, otherwise it is a pointer + * to the current element. + */ + x86_mov(p->func, idxEBX, x86_fn_arg(p->func, 2)); + + /* Initialize destination register. + */ + x86_mov(p->func, vertexECX, x86_fn_arg(p->func, 4)); + + /* Move argument 1 (translate_sse pointer) into a reg: + */ + x86_mov(p->func, translateESI, x86_fn_arg(p->func, 1)); + + + /* always load, needed or not: + */ + + /* Note address for loop jump */ + label = x86_get_label(p->func); + + + for (j = 0; j < p->translate.key.nr_elements; j++) { + const struct translate_element *a = &p->translate.key.element[j]; + + struct x86_reg destEAX = x86_make_disp(vertexECX, + a->output_offset); + + /* Figure out source pointer address: + */ + if (linear) { + get_src_ptr(p, srcEAX, translateESI, idxEBX, j); + } + else { + get_src_ptr(p, srcEAX, translateESI, x86_deref(idxEBX), j); + } + + if (!translate_attr( p, a, x86_deref(srcEAX), destEAX )) + return FALSE; + } + + /* Next vertex: + */ + x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride)); + + /* Incr index + */ /* Emit code for each of the attributes. Currently routes + * everything through SSE registers, even when it might be more + * efficient to stick with regular old x86. No optimization or + * other tricks - enough new ground to cover here just getting + * things working. + */ + + if (linear) { + x86_inc(p->func, idxEBX); + } + else { + x86_lea(p->func, idxEBX, x86_make_disp(idxEBX, 4)); + } + + /* decr count, loop if not zero + */ + x86_dec(p->func, countEBP); + x86_test(p->func, countEBP, countEBP); + x86_jcc(p->func, cc_NZ, label); + + /* Exit mmx state? + */ + if (p->func->need_emms) + mmx_emms(p->func); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(p->func, fixup); + + /* Pop regs and return + */ + + x86_pop(p->func, idxEBX); + x86_pop(p->func, translateESI); + x86_pop(p->func, countEBP); + x86_ret(p->func); + + return TRUE; +} + + + + + + + +static void translate_sse_set_buffer( struct translate *translate, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + unsigned i; + + for (i = 0; i < p->translate.key.nr_elements; i++) { + if (p->translate.key.element[i].input_buffer == buf) { + p->attrib[i].input_ptr = ((char *)ptr + + p->translate.key.element[i].input_offset); + p->attrib[i].input_stride = stride; + } + } +} + + +static void translate_sse_release( struct translate *translate ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + x86_release_func( &p->linear_func ); + x86_release_func( &p->elt_func ); + + FREE(p); +} + +static void translate_sse_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + p->gen_run_elts( translate, + elts, + count, + output_buffer ); + +} + +static void translate_sse_run( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct translate_sse *p = (struct translate_sse *)translate; + + p->gen_run( translate, + start, + count, + output_buffer ); +} + + +struct translate *translate_sse2_create( const struct translate_key *key ) +{ + struct translate_sse *p = CALLOC_STRUCT( translate_sse ); + + if (p == NULL) + goto fail; + + if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) + goto fail; + + + p->translate.key = *key; + p->translate.release = translate_sse_release; + p->translate.set_buffer = translate_sse_set_buffer; + p->translate.run_elts = translate_sse_run_elts; + p->translate.run = translate_sse_run; + + if (!build_vertex_emit(p, &p->linear_func, TRUE)) + goto fail; + + if (!build_vertex_emit(p, &p->elt_func, FALSE)) + goto fail; + + p->gen_run = (run_func)x86_get_func(&p->linear_func); + p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func); + + return &p->translate; + + fail: + if (p) + p->translate.release( &p->translate ); + + return NULL; +} + + + +#else + +void translate_create_sse( const struct translate_key *key ) +{ + return NULL; +} + +#endif -- cgit v1.2.3 From 26c27f6636069ca849a740c3969c577d841484e2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 18:42:41 +0100 Subject: draw: remove fetch_pipeline middle end -- just use the general path --- src/gallium/auxiliary/draw/Makefile | 1 - src/gallium/auxiliary/draw/SConscript | 1 - src/gallium/auxiliary/draw/draw_pt.c | 4 - src/gallium/auxiliary/draw/draw_pt.h | 26 +- .../auxiliary/draw/draw_pt_fetch_pipeline.c | 327 --------------------- 5 files changed, 15 insertions(+), 344 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 154c8a99b5..916e134719 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -18,7 +18,6 @@ C_SOURCES = \ draw_pt.c \ draw_pt_vcache.c \ draw_pt_fetch_emit.c \ - draw_pt_fetch_pipeline.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_fetch.c \ draw_pt_post_vs.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 0274d48e7f..6dc0195d93 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -18,7 +18,6 @@ draw = env.ConvenienceLibrary( 'draw_pt_emit.c', 'draw_pt_fetch.c', 'draw_pt_fetch_emit.c', - 'draw_pt_fetch_pipeline.c', 'draw_pt_fetch_shade_pipeline.c', 'draw_pt_pipeline.c', 'draw_pt_post_vs.c', diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 60a47f3911..b48d0cae9f 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -106,10 +106,6 @@ boolean draw_pt_init( struct draw_context *draw ) return FALSE; draw->pt.middle.opt[0] = draw_pt_fetch_emit( draw ); - draw->pt.middle.opt[PT_PIPELINE] = draw_pt_fetch_pipeline( draw ); -// draw->pt.middle.opt[PT_SHADE] = draw_pt_shade_emit( draw ); -// draw->pt.middle.opt[PT_SHADE | PT_PIPELINE] = draw_pt_shade_pipeline( draw ); -// draw->pt.middle.opt[PT_SHADE | PT_CLIPTEST] = draw_pt_shade_clip_either( draw ); draw->pt.middle.opt[PT_SHADE | PT_CLIPTEST | PT_PIPELINE] = draw_pt_fetch_pipeline_or_emit( draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 316289969b..eb23ee1c1e 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -83,11 +83,6 @@ struct draw_pt_front_end { * Currently two versions of this: * - fetch, vertex shade, cliptest, prim-pipeline * - fetch, emit (ie passthrough) - * Later: - * - fetch, vertex shade, cliptest, maybe-pipeline, maybe-emit - * - fetch, vertex shade, emit - * - * Currenly only using the passthrough version. */ struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, @@ -106,10 +101,6 @@ struct draw_pt_middle_end { /* The "back end" - supplied by the driver, defined in draw_vbuf.h. - * - * Not sure whether to wrap the prim pipeline up as an alternate - * backend. Would be a win for everything except pure passthrough - * mode... */ struct vbuf_render; struct vertex_header; @@ -121,11 +112,24 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw ); const void *draw_pt_elt_ptr( struct draw_context *draw, unsigned start ); -/* Implementations: +/* Frontends: + * + * Currently only the general-purpose vcache implementation, could add + * a special case for tiny vertex buffers. */ struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); + +/* Middle-ends: + * + * Currently one general-purpose case which can do all possibilities, + * at the slight expense of creating a vertex_header in some cases + * unecessarily. + * + * The special case fetch_emit code avoids pipeline vertices + * altogether and builds hardware vertices directly from API + * vertex_elements. + */ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); -struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c deleted file mode 100644 index 26d0b37286..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_pipeline.c +++ /dev/null @@ -1,327 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" -#include "draw/draw_vs.h" - -/* The simplest 'middle end' in the new vertex code. - * - * The responsibilities of a middle end are to: - * - perform vertex fetch using - * - draw vertex element/buffer state - * - a list of fetch indices we received as an input - * - run the vertex shader - * - cliptest, - * - clip coord calculation - * - viewport transformation - * - if necessary, run the primitive pipeline, passing it: - * - a linear array of vertex_header vertices constructed here - * - a set of draw indices we received as an input - * - otherwise, drive the hw backend, - * - allocate space for hardware format vertices - * - translate the vertex-shader output vertices to hw format - * - calling the backend draw functions. - * - * For convenience, we provide a helper function to drive the hardware - * backend given similar inputs to those required to run the pipeline. - * - * In the case of passthrough mode, many of these actions are disabled - * or noops, so we end up doing: - * - * - perform vertex fetch - * - drive the hw backend - * - * IE, basically just vertex fetch to post-vs-format vertices, - * followed by a call to the backend helper function. - */ - - -struct fetch_pipeline_middle_end { - struct draw_pt_middle_end base; - struct draw_context *draw; - - void (*header)( unsigned idx, float **out); - - struct { - const ubyte *ptr; - unsigned pitch; - void (*fetch)( const void *from, float *attrib); - void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_MAX_ATTRIBS]; - - unsigned nr_fetch; - unsigned pipeline_vertex_size; - unsigned prim; -}; - - -#if 0 -static void emit_R32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out) += 1; -} - -static void emit_R32G32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out) += 2; -} - -static void emit_R32G32B32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out) += 3; -} -#endif -static void emit_R32G32B32A32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out)[3] = attrib[3]; - (*out) += 4; -} - -static void header( unsigned idx, - float **out ) -{ - struct vertex_header *header = (struct vertex_header *) (*out); - - header->clipmask = 0; - header->edgeflag = 1; - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - - (*out)[1] = 0; - (*out)[2] = 0; - (*out)[3] = 0; - (*out)[3] = 1; - (*out) += 5; -} - - -static void header_ef( unsigned idx, - float **out ) -{ - struct vertex_header *header = (struct vertex_header *) (*out); - - /* XXX: need a reset_stipple flag in the vertex header too? - */ - header->clipmask = 0; - header->edgeflag = (idx & DRAW_PT_EDGEFLAG) != 0; - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - - (*out)[1] = 0; - (*out)[2] = 0; - (*out)[3] = 0; - (*out)[3] = 1; - (*out) += 5; -} - - -/** - * General-purpose fetch from user's vertex arrays, emit to driver's - * vertex buffer. - * - * XXX this is totally temporary. - */ -static void -fetch_store_general( struct fetch_pipeline_middle_end *fpme, - void *out_ptr, - const unsigned *fetch_elts, - unsigned count ) -{ - float *out = (float *)out_ptr; - uint i, j; - - for (i = 0; i < count; i++) { - unsigned elt = fetch_elts[i]; - - fpme->header( elt, &out ); - elt &= ~DRAW_PT_FLAG_MASK; - - for (j = 0; j < fpme->nr_fetch; j++) { - float attrib[4]; - const ubyte *from = (fpme->fetch[j].ptr + - fpme->fetch[j].pitch * elt); - - fpme->fetch[j].fetch( from, attrib ); - fpme->fetch[j].emit( attrib, &out ); - } - } -} - - -/* We aren't running a vertex shader, but are running the pipeline. - * That means the vertices we need to build look like: - * - * dw0: vertex header (zero?) - * dw1: clip coord 0 - * dw2: clip coord 1 - * dw3: clip coord 2 - * dw4: clip coord 4 - * dw5: screen coord 0 - * dw6: screen coord 0 - * dw7: screen coord 0 - * dw8: screen coord 0 - * dw9: other attribs... - * - */ -static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - unsigned prim, - unsigned opt ) -{ - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - struct draw_context *draw = fpme->draw; - unsigned i, nr = 0; - - fpme->prim = prim; - - /* Emit the vertex header and empty clipspace coord field: - */ - if (draw->user.edgeflag) { - fpme->header = header_ef; - } - else { - fpme->header = header; - } - - - /* Need to look at vertex shader inputs (we know it is a - * passthrough shader, so these define the outputs too). If we - * were running a shader, we'd still be looking at the inputs at - * this point. - */ - for (i = 0; i < draw->vertex_shader->info.num_inputs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; - - fpme->fetch[nr].ptr = ((const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset); - - fpme->fetch[nr].pitch = draw->vertex_buffer[buf].pitch; - fpme->fetch[nr].fetch = draw_get_fetch_func( format ); - - /* Always do this -- somewhat redundant... - */ - fpme->fetch[nr].emit = emit_R32G32B32A32_FLOAT; - nr++; - } - - fpme->nr_fetch = nr; - fpme->pipeline_vertex_size = sizeof(struct vertex_header) + nr * 4 * sizeof(float); -} - - - - -static void fetch_pipeline_run( struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) -{ - struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; - char *pipeline_verts; - - pipeline_verts = MALLOC( fpme->pipeline_vertex_size * - fetch_count ); - if (!pipeline_verts) { - assert(0); - return; - } - - - /* Single routine to fetch vertices and emit pipeline verts. - */ - fetch_store_general( fpme, - pipeline_verts, - fetch_elts, - fetch_count ); - - - /* Run the pipeline - */ - draw_pt_run_pipeline( fpme->draw, - fpme->prim, - (struct vertex_header *)pipeline_verts, - fetch_count, - fpme->pipeline_vertex_size, - draw_elts, - draw_count ); - - - /* Done -- that was easy, wasn't it: - */ - FREE( pipeline_verts ); -} - - - -static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) -{ - /* nothing to do */ -} - -static void fetch_pipeline_destroy( struct draw_pt_middle_end *middle ) -{ - FREE(middle); -} - - -struct draw_pt_middle_end *draw_pt_fetch_pipeline( struct draw_context *draw ) -{ - struct fetch_pipeline_middle_end *fetch_pipeline = CALLOC_STRUCT( fetch_pipeline_middle_end ); - - fetch_pipeline->base.prepare = fetch_pipeline_prepare; - fetch_pipeline->base.run = fetch_pipeline_run; - fetch_pipeline->base.finish = fetch_pipeline_finish; - fetch_pipeline->base.destroy = fetch_pipeline_destroy; - - fetch_pipeline->draw = draw; - - return &fetch_pipeline->base; -} - -- cgit v1.2.3 From 19218e2195f3dffc9403f16a742ba8c63edbf8b4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 18 Apr 2008 11:15:18 -0600 Subject: gallium: implement recip sqrt() with C code for now. Some conformance lighting tests fail with the SSE rsqrt instruction. --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 30 +++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 0a3a7559ca..6f785be3f5 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -36,6 +36,8 @@ #if defined(__i386__) || defined(__386__) +#define HIGH_PRECISION 1 /* for 1/sqrt() */ + #define DUMP_SSE 0 #if DUMP_SSE @@ -1137,16 +1139,44 @@ emit_rcp ( make_xmm( xmm_src ) ); } +#if HIGH_PRECISION +static void XSTDCALL +rsqrt4f( + float *store ) +{ +#ifdef WIN32 + store[0] = 1.0F / (float) sqrt( (double) store[0] ); + store[1] = 1.0F / (float) sqrt( (double) store[1] ); + store[2] = 1.0F / (float) sqrt( (double) store[2] ); + store[3] = 1.0F / (float) sqrt( (double) store[3] ); +#else + const unsigned X = TEMP_R0 * 16; + store[X + 0] = 1.0F / sqrt( store[X + 0] ); + store[X + 1] = 1.0F / sqrt( store[X + 1] ); + store[X + 2] = 1.0F / sqrt( store[X + 2] ); + store[X + 3] = 1.0F / sqrt( store[X + 3] ); +#endif +} +#endif + static void emit_rsqrt( struct x86_function *func, unsigned xmm_dst, unsigned xmm_src ) { +#if HIGH_PRECISION + emit_func_call_dst_src( + func, + xmm_dst, + xmm_src, + rsqrt4f ); +#else emit_rsqrtps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); +#endif } static void -- cgit v1.2.3 From e430d885e0d819172068805b1492cb6f10eb5d7f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 18 Apr 2008 11:15:53 -0600 Subject: gallium: a few comments --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 6f785be3f5..d47935e982 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1598,20 +1598,25 @@ emit_instruction( STORE( func, *inst, 0, 0, CHAN_Y ); } if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + /* XMM[1] = SrcReg[0].yyyy */ FETCH( func, *inst, 1, 0, CHAN_Y ); + /* XMM[1] = max(XMM[1], 0) */ emit_maxps( func, make_xmm( 1 ), get_temp( TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ) ); + /* XMM[2] = SrcReg[0].wwww */ FETCH( func, *inst, 2, 0, CHAN_W ); + /* XMM[2] = min(XMM[2], 128.0) */ emit_minps( func, make_xmm( 2 ), get_temp( TGSI_EXEC_TEMP_128_I, TGSI_EXEC_TEMP_128_C ) ); + /* XMM[2] = max(XMM[2], -128.0) */ emit_maxps( func, make_xmm( 2 ), -- cgit v1.2.3 From 7b34a43d1ab74974bd157a2339d7a491aed9c9b4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 18:51:43 +0100 Subject: translate: missing file --- src/gallium/auxiliary/translate/translate.c | 46 +++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) create mode 100644 src/gallium/auxiliary/translate/translate.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c new file mode 100644 index 0000000000..86ebeae2a7 --- /dev/null +++ b/src/gallium/auxiliary/translate/translate.c @@ -0,0 +1,46 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "translate.h" + +struct translate *translate_create( const struct translate_key *key ) +{ + struct translate *translate = NULL; + + translate = translate_sse2_create( key ); + if (translate) + return translate; + + return translate_generic_create( key ); +} -- cgit v1.2.3 From cb9f0a589623397c3437911aeef39f189213527a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 20:05:36 +0100 Subject: draw: remove draw_vf code, use translate instead --- src/gallium/auxiliary/draw/Makefile | 3 - src/gallium/auxiliary/draw/SConscript | 3 - src/gallium/auxiliary/draw/draw_pt_emit.c | 3 +- src/gallium/auxiliary/draw/draw_vbuf.c | 188 ++++---- src/gallium/auxiliary/draw/draw_vf.c | 378 ----------------- src/gallium/auxiliary/draw/draw_vf.h | 232 ---------- src/gallium/auxiliary/draw/draw_vf_generic.c | 585 ------------------------- src/gallium/auxiliary/draw/draw_vf_sse.c | 613 --------------------------- 8 files changed, 115 insertions(+), 1890 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_vf.c delete mode 100644 src/gallium/auxiliary/draw/draw_vf.h delete mode 100644 src/gallium/auxiliary/draw/draw_vf_generic.c delete mode 100644 src/gallium/auxiliary/draw/draw_vf_sse.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 916e134719..27464e5c86 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -35,9 +35,6 @@ C_SOURCES = \ draw_vertex_cache.c \ draw_vertex_fetch.c \ draw_vertex_shader.c \ - draw_vf.c \ - draw_vf_generic.c \ - draw_vf_sse.c \ draw_wide_line.c \ draw_wide_point.c diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 6dc0195d93..7af65c3c05 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -31,9 +31,6 @@ draw = env.ConvenienceLibrary( 'draw_vertex_cache.c', 'draw_vertex_fetch.c', 'draw_vertex_shader.c', - 'draw_vf.c', - 'draw_vf_generic.c', - 'draw_vf_sse.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', 'draw_vs_sse.c', diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index a6a3ff6cac..490da4cca3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -62,8 +62,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, vinfo = draw->render->get_vertex_info(draw->render); - /* In passthrough mode, need to translate from vertex shader - * outputs to hw vertices. + /* Translate from pipeline vertices to hw vertices. */ dst_offset = 0; for (i = 0; i < vinfo->num_attribs; i++) { diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c index 59b63f5bfa..dbbcf05c3c 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -40,7 +40,7 @@ #include "draw_vbuf.h" #include "draw_private.h" #include "draw_vertex.h" -#include "draw_vf.h" +#include "translate/translate.h" /** @@ -56,7 +56,7 @@ struct vbuf_stage { /** Vertex size in bytes */ unsigned vertex_size; - struct draw_vertex_fetch *vf; + struct translate *translate; /* FIXME: we have no guarantee that 'unsigned' is 32bit */ @@ -71,8 +71,9 @@ struct vbuf_stage { unsigned max_indices; unsigned nr_indices; - /** Pipe primitive */ - unsigned prim; + /* Cache point size somewhere it's address won't change: + */ + float point_size; }; @@ -175,26 +176,25 @@ dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) * have a couple of slots at the beginning (1-dword header, 4-dword * clip pos) that we ignore here. We only use the vertex->data[] fields. */ -static INLINE void +static INLINE ushort emit_vertex( struct vbuf_stage *vbuf, struct vertex_header *vertex ) { - if(vertex->vertex_id != UNDEFINED_VERTEX_ID) { - if(vertex->vertex_id < vbuf->nr_vertices) - return; - else - debug_printf("Bad vertex id 0x%04x (>= 0x%04x)\n", - vertex->vertex_id, vbuf->nr_vertices); - return; - } + if(vertex->vertex_id == UNDEFINED_VERTEX_ID) { + /* Hmm - vertices are emitted one at a time - better make sure + * set_buffer is efficient. Consider a special one-shot mode for + * translate. + */ + vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); + vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + + if (0) dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); - vertex->vertex_id = vbuf->nr_vertices++; - - draw_vf_emit_vertex(vbuf->vf, vertex, vbuf->vertex_ptr); + vbuf->vertex_ptr += vbuf->vertex_size/4; + vertex->vertex_id = vbuf->nr_vertices++; + } - if (0) dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); - - vbuf->vertex_ptr += vbuf->vertex_size/4; + return vertex->vertex_id; } @@ -208,9 +208,7 @@ vbuf_tri( struct draw_stage *stage, check_space( vbuf, 3 ); for (i = 0; i < 3; i++) { - emit_vertex( vbuf, prim->v[i] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); } } @@ -225,9 +223,7 @@ vbuf_line( struct draw_stage *stage, check_space( vbuf, 2 ); for (i = 0; i < 2; i++) { - emit_vertex( vbuf, prim->v[i] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[i]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); } } @@ -240,43 +236,112 @@ vbuf_point( struct draw_stage *stage, check_space( vbuf, 1 ); - emit_vertex( vbuf, prim->v[0] ); - - vbuf->indices[vbuf->nr_indices++] = (ushort) prim->v[0]->vertex_id; + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] ); } + + /** * Set the prim type for subsequent vertices. * This may result in a new vertex size. The existing vbuffer (if any) * will be flushed if needed and a new one allocated. */ static void -vbuf_set_prim( struct vbuf_stage *vbuf, uint newprim ) +vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) { - const struct vertex_info *vinfo; - unsigned vertex_size; - - assert(newprim == PIPE_PRIM_POINTS || - newprim == PIPE_PRIM_LINES || - newprim == PIPE_PRIM_TRIANGLES); + struct translate_key hw_key; + unsigned dst_offset; + unsigned i; - vbuf->prim = newprim; - vbuf->render->set_primitive(vbuf->render, newprim); + vbuf->render->set_primitive(vbuf->render, prim); - vinfo = vbuf->render->get_vertex_info(vbuf->render); - vertex_size = vinfo->size * sizeof(float); + /* Must do this after set_primitive() above: + * + * XXX: need some state managment to track when this needs to be + * recalculated. The driver should tell us whether there was a + * state change. + */ + vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render); - if (vertex_size != vbuf->vertex_size) + if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) { vbuf_flush_vertices(vbuf); + vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); + } - vbuf->vinfo = vinfo; - vbuf->vertex_size = vertex_size; - if(vbuf->vf) - draw_vf_set_vertex_info(vbuf->vf, - vbuf->vinfo, - vbuf->stage.draw->rasterizer->point_size); - + /* Translate from pipeline vertices to hw vertices. + */ + dst_offset = 0; + memset(&hw_key, 0, sizeof(hw_key)); + + for (i = 0; i < vbuf->vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) ); + + switch (vbuf->vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vbuf->vinfo->num_attribs; + hw_key.output_stride = vbuf->vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!vbuf->translate || + memcmp(&vbuf->translate->key, &hw_key, sizeof(hw_key)) != 0) + { + if (vbuf->translate) + vbuf->translate->release(vbuf->translate); + + vbuf->translate = translate_create( &hw_key ); + + vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); + } + + vbuf->point_size = vbuf->stage.draw->rasterizer->point_size; + + /* Allocate new buffer? + */ if (!vbuf->vertices) vbuf_alloc_vertices(vbuf); } @@ -330,29 +395,9 @@ vbuf_flush_indices( struct vbuf_stage *vbuf ) assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); - switch(vbuf->prim) { - case PIPE_PRIM_POINTS: - break; - case PIPE_PRIM_LINES: - assert(vbuf->nr_indices % 2 == 0); - break; - case PIPE_PRIM_TRIANGLES: - assert(vbuf->nr_indices % 3 == 0); - break; - default: - assert(0); - } - vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); vbuf->nr_indices = 0; - - /* don't need to reset point/line/tri functions */ -#if 0 - stage->point = vbuf_first_point; - stage->line = vbuf_first_line; - stage->tri = vbuf_first_tri; -#endif } @@ -433,8 +478,8 @@ static void vbuf_destroy( struct draw_stage *stage ) if(vbuf->indices) align_free( vbuf->indices ); - if(vbuf->vf) - draw_vf_destroy( vbuf->vf ); + if(vbuf->translate) + vbuf->translate->release( vbuf->translate ); if (vbuf->render) vbuf->render->destroy( vbuf->render ); @@ -473,11 +518,6 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, vbuf->vertices = NULL; vbuf->vertex_ptr = vbuf->vertices; - - vbuf->prim = ~0; - vbuf->vf = draw_vf_create(); - if (!vbuf->vf) - goto fail; return &vbuf->stage; diff --git a/src/gallium/auxiliary/draw/draw_vf.c b/src/gallium/auxiliary/draw/draw_vf.c deleted file mode 100644 index 9d0154c50d..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf.c +++ /dev/null @@ -1,378 +0,0 @@ -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell - */ - - -#include - -#include "pipe/p_compiler.h" -#include "pipe/p_util.h" -#include "rtasm/rtasm_execmem.h" - -#include "draw_vf.h" - - -#define DRAW_VF_DBG 0 - - -static boolean match_fastpath( struct draw_vertex_fetch *vf, - const struct draw_vf_fastpath *fp) -{ - unsigned j; - - if (vf->attr_count != fp->attr_count) - return FALSE; - - for (j = 0; j < vf->attr_count; j++) - if (vf->attr[j].format != fp->attr[j].format || - vf->attr[j].inputsize != fp->attr[j].size || - vf->attr[j].vertoffset != fp->attr[j].offset) - return FALSE; - - if (fp->match_strides) { - if (vf->vertex_stride != fp->vertex_stride) - return FALSE; - - for (j = 0; j < vf->attr_count; j++) - if (vf->attr[j].inputstride != fp->attr[j].stride) - return FALSE; - } - - return TRUE; -} - -static boolean search_fastpath_emit( struct draw_vertex_fetch *vf ) -{ - struct draw_vf_fastpath *fp = vf->fastpath; - - for ( ; fp ; fp = fp->next) { - if (match_fastpath(vf, fp)) { - vf->emit = fp->func; - return TRUE; - } - } - - return FALSE; -} - -void draw_vf_register_fastpath( struct draw_vertex_fetch *vf, - boolean match_strides ) -{ - struct draw_vf_fastpath *fastpath = CALLOC_STRUCT(draw_vf_fastpath); - unsigned i; - - fastpath->vertex_stride = vf->vertex_stride; - fastpath->attr_count = vf->attr_count; - fastpath->match_strides = match_strides; - fastpath->func = vf->emit; - fastpath->attr = (struct draw_vf_attr_type *) - MALLOC(vf->attr_count * sizeof(fastpath->attr[0])); - - for (i = 0; i < vf->attr_count; i++) { - fastpath->attr[i].format = vf->attr[i].format; - fastpath->attr[i].stride = vf->attr[i].inputstride; - fastpath->attr[i].size = vf->attr[i].inputsize; - fastpath->attr[i].offset = vf->attr[i].vertoffset; - } - - fastpath->next = vf->fastpath; - vf->fastpath = fastpath; -} - - - - -/*********************************************************************** - * Build codegen functions or return generic ones: - */ -static void choose_emit_func( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *dest) -{ - vf->emit = NULL; - - /* Does this match an existing (hardwired, codegen or known-bad) - * fastpath? - */ - if (search_fastpath_emit(vf)) { - /* Use this result. If it is null, then it is already known - * that the current state will fail for codegen and there is no - * point trying again. - */ - } - else if (vf->codegen_emit) { - vf->codegen_emit( vf ); - } - - if (!vf->emit) { - draw_vf_generate_hardwired_emit(vf); - } - - /* Otherwise use the generic version: - */ - if (!vf->emit) - vf->emit = draw_vf_generic_emit; - - vf->emit( vf, count, dest ); -} - - - - - -/*********************************************************************** - * Public entrypoints, mostly dispatch to the above: - */ - - - -static unsigned -draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, - const struct draw_vf_attr_map *map, - unsigned nr, - unsigned vertex_stride ) -{ - unsigned offset = 0; - unsigned i, j; - - assert(nr < PIPE_MAX_ATTRIBS); - - for (j = 0, i = 0; i < nr; i++) { - const unsigned format = map[i].format; - if (format == DRAW_EMIT_PAD) { -#if (DRAW_VF_DBG) - debug_printf("%d: pad %d, offset %d\n", i, - map[i].offset, offset); -#endif - - offset += map[i].offset; - - } - else { - vf->attr[j].attrib = map[i].attrib; - vf->attr[j].format = format; - vf->attr[j].insert = draw_vf_format_info[format].insert; - vf->attr[j].vertattrsize = draw_vf_format_info[format].attrsize; - vf->attr[j].vertoffset = offset; - vf->attr[j].isconst = draw_vf_format_info[format].isconst; - if(vf->attr[j].isconst) - memcpy(vf->attr[j].data, &map[i].data, vf->attr[j].vertattrsize); - -#if (DRAW_VF_DBG) - debug_printf("%d: %s, offset %d\n", i, - draw_vf_format_info[format].name, - vf->attr[j].vertoffset); -#endif - - offset += draw_vf_format_info[format].attrsize; - j++; - } - } - - vf->attr_count = j; - vf->vertex_stride = vertex_stride ? vertex_stride : offset; - vf->emit = choose_emit_func; - - assert(vf->vertex_stride >= offset); - return vf->vertex_stride; -} - - -void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, - const struct vertex_info *vinfo, - float point_size ) -{ - unsigned i, j; - struct draw_vf_attr *a = vf->attr; - struct draw_vf_attr_map attrs[PIPE_MAX_SHADER_INPUTS]; - unsigned count = 0; /* for debug/sanity */ - unsigned nr_attrs = 0; - - for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - /* no-op */ - break; - case EMIT_1F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_1F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count++; - break; - case EMIT_1F_PSIZE: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_1F_CONST; - attrs[nr_attrs].offset = 0; - attrs[nr_attrs].data.f[0] = point_size; - nr_attrs++; - count++; - break; - case EMIT_2F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_2F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 2; - break; - case EMIT_3F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_3F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 3; - break; - case EMIT_4F: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_4F; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 4; - break; - case EMIT_4UB: - attrs[nr_attrs].attrib = j; - attrs[nr_attrs].format = DRAW_EMIT_4UB_4F_BGRA; - attrs[nr_attrs].offset = 0; - nr_attrs++; - count += 1; - break; - default: - assert(0); - } - } - - assert(count == vinfo->size); - - draw_vf_set_vertex_attributes(vf, - attrs, - nr_attrs, - vinfo->size * sizeof(float) ); - - for (j = 0; j < vf->attr_count; j++) { - a[j].inputsize = 4; - a[j].do_insert = a[j].insert[4 - 1]; - if(a[j].isconst) { - a[j].inputptr = a[j].data; - a[j].inputstride = 0; - } - } -} - - -#if 0 -/* Set attribute pointers, adjusted for start position: - */ -void draw_vf_set_sources( struct draw_vertex_fetch *vf, - GLvector4f * const sources[], - unsigned start ) -{ - struct draw_vf_attr *a = vf->attr; - unsigned j; - - for (j = 0; j < vf->attr_count; j++) { - const GLvector4f *vptr = sources[a[j].attrib]; - - if ((a[j].inputstride != vptr->stride) || - (a[j].inputsize != vptr->size)) - vf->emit = choose_emit_func; - - a[j].inputstride = vptr->stride; - a[j].inputsize = vptr->size; - a[j].do_insert = a[j].insert[vptr->size - 1]; - a[j].inputptr = ((uint8_t *)vptr->data) + start * vptr->stride; - } -} -#endif - - -/** - * Emit a vertex to dest. - */ -void draw_vf_emit_vertex( struct draw_vertex_fetch *vf, - struct vertex_header *vertex, - void *dest ) -{ - struct draw_vf_attr *a = vf->attr; - unsigned j; - - for (j = 0; j < vf->attr_count; j++) { - if (!a[j].isconst) { - a[j].inputptr = (uint8_t *)&vertex->data[a[j].attrib][0]; - a[j].inputstride = 0; /* XXX: one-vertex-max ATM */ - } - } - - vf->emit( vf, 1, (uint8_t*) dest ); -} - - - -struct draw_vertex_fetch *draw_vf_create( void ) -{ - struct draw_vertex_fetch *vf = CALLOC_STRUCT(draw_vertex_fetch); - unsigned i; - - for (i = 0; i < PIPE_MAX_ATTRIBS; i++) - vf->attr[i].vf = vf; - - vf->identity[0] = 0.0; - vf->identity[1] = 0.0; - vf->identity[2] = 0.0; - vf->identity[3] = 1.0; - - vf->codegen_emit = NULL; - -#ifdef USE_SSE_ASM - if (!GETENV("GALLIUM_NO_CODEGEN")) - vf->codegen_emit = draw_vf_generate_sse_emit; -#endif - - return vf; -} - - -void draw_vf_destroy( struct draw_vertex_fetch *vf ) -{ - struct draw_vf_fastpath *fp, *tmp; - - for (fp = vf->fastpath ; fp ; fp = tmp) { - tmp = fp->next; - FREE(fp->attr); - - /* KW: At the moment, fp->func is constrained to be allocated by - * rtasm_exec_alloc(), as the hardwired fastpaths in - * t_vertex_generic.c are handled specially. It would be nice - * to unify them, but this probably won't change until this - * module gets another overhaul. - */ - //rtasm_exec_free((void *) fp->func); - FREE(fp); - } - - vf->fastpath = NULL; - FREE(vf); -} diff --git a/src/gallium/auxiliary/draw/draw_vf.h b/src/gallium/auxiliary/draw/draw_vf.h deleted file mode 100644 index 0ef98d6257..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf.h +++ /dev/null @@ -1,232 +0,0 @@ -/* - * Copyright 2008 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - */ - - -/** - * Vertex fetch/store/convert code. This functionality is used in two places: - * 1. Vertex fetch/convert - to grab vertex data from incoming vertex - * arrays and convert to format needed by vertex shaders. - * 2. Vertex store/emit - to convert simple float[][4] vertex attributes - * (which is the organization used throughout the draw/prim pipeline) to - * hardware-specific formats and emit into hardware vertex buffers. - * - * - * Authors: - * Keith Whitwell - */ - -#ifndef DRAW_VF_H -#define DRAW_VF_H - - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" - -#include "draw_vertex.h" -#include "draw_private.h" /* for vertex_header */ - - -enum draw_vf_attr_format { - DRAW_EMIT_1F, - DRAW_EMIT_2F, - DRAW_EMIT_3F, - DRAW_EMIT_4F, - DRAW_EMIT_3F_XYW, /**< for projective texture */ - DRAW_EMIT_1UB_1F, /**< for fog coordinate */ - DRAW_EMIT_3UB_3F_RGB, /**< for specular color */ - DRAW_EMIT_3UB_3F_BGR, /**< for specular color */ - DRAW_EMIT_4UB_4F_RGBA, /**< for color */ - DRAW_EMIT_4UB_4F_BGRA, /**< for color */ - DRAW_EMIT_4UB_4F_ARGB, /**< for color */ - DRAW_EMIT_4UB_4F_ABGR, /**< for color */ - DRAW_EMIT_1F_CONST, - DRAW_EMIT_2F_CONST, - DRAW_EMIT_3F_CONST, - DRAW_EMIT_4F_CONST, - DRAW_EMIT_PAD, /**< leave a hole of 'offset' bytes */ - DRAW_EMIT_MAX -}; - -struct draw_vf_attr_map -{ - /** Input attribute number */ - unsigned attrib; - - enum draw_vf_attr_format format; - - unsigned offset; - - /** - * Constant data for DRAW_EMIT_*_CONST - */ - union { - uint8_t ub[4]; - float f[4]; - } data; -}; - -struct draw_vertex_fetch; - - - -#if 0 -unsigned -draw_vf_set_vertex_attributes( struct draw_vertex_fetch *vf, - const struct draw_vf_attr_map *map, - unsigned nr, - unsigned vertex_stride ); -#endif - -void draw_vf_set_vertex_info( struct draw_vertex_fetch *vf, - const struct vertex_info *vinfo, - float point_size ); - -#if 0 -void -draw_vf_set_sources( struct draw_vertex_fetch *vf, - GLvector4f * const attrib[], - unsigned start ); -#endif - -void -draw_vf_emit_vertex( struct draw_vertex_fetch *vf, - struct vertex_header *vertex, - void *dest ); - -struct draw_vertex_fetch * -draw_vf_create( void ); - -void -draw_vf_destroy( struct draw_vertex_fetch *vf ); - - - -/*********************************************************************** - * Internal functions and structs: - */ - -struct draw_vf_attr; - - -typedef void (*draw_vf_insert_func)( const struct draw_vf_attr *a, - uint8_t *v, - const float *in ); - -typedef void (*draw_vf_emit_func)( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *dest ); - - - -/** - * Describes how to convert/move a vertex attribute from a vertex - * array to a vertex structure. - */ -struct draw_vf_attr -{ - struct draw_vertex_fetch *vf; - - unsigned format; - unsigned inputsize; - unsigned inputstride; - unsigned vertoffset; /**< position of the attrib in the vertex struct */ - - boolean isconst; /**< read from const data below */ - uint8_t data[16]; - - unsigned attrib; /**< which vertex attrib (0=position, etc) */ - unsigned vertattrsize; /**< size of the attribute in bytes */ - - uint8_t *inputptr; - const draw_vf_insert_func *insert; - draw_vf_insert_func do_insert; -}; - -struct draw_vertex_fetch -{ - struct draw_vf_attr attr[PIPE_MAX_ATTRIBS]; - unsigned attr_count; - unsigned vertex_stride; - - draw_vf_emit_func emit; - - /* Parameters and constants for codegen: - */ - float identity[4]; - - struct draw_vf_fastpath *fastpath; - - void (*codegen_emit)( struct draw_vertex_fetch *vf ); -}; - - -struct draw_vf_attr_type { - unsigned format; - unsigned size; - unsigned stride; - unsigned offset; -}; - -/** XXX this could be moved into draw_vf.c */ -struct draw_vf_fastpath { - unsigned vertex_stride; - unsigned attr_count; - boolean match_strides; - - struct draw_vf_attr_type *attr; - - draw_vf_emit_func func; - struct draw_vf_fastpath *next; -}; - - -void -draw_vf_register_fastpath( struct draw_vertex_fetch *vtx, - boolean match_strides ); - -void -draw_vf_generic_emit( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *v ); - -void -draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ); - -void -draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ); - - -/** XXX this type and function could probably be moved into draw_vf.c */ -struct draw_vf_format_info { - const char *name; - draw_vf_insert_func insert[4]; - const unsigned attrsize; - const boolean isconst; -}; - -extern const struct draw_vf_format_info -draw_vf_format_info[DRAW_EMIT_MAX]; - - -#endif diff --git a/src/gallium/auxiliary/draw/draw_vf_generic.c b/src/gallium/auxiliary/draw/draw_vf_generic.c deleted file mode 100644 index 7a60a9db9c..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf_generic.c +++ /dev/null @@ -1,585 +0,0 @@ - -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell - */ - - -#include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_util.h" - -#include "draw_vf.h" - - - -static INLINE void insert_4f_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = in[3]; -} - -static INLINE void insert_4f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; - out[3] = 1; -} - -static INLINE void insert_4f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = 0; - out[3] = 1; -} - -static INLINE void insert_4f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; - out[2] = 0; - out[3] = 1; -} - -static INLINE void insert_3f_xyw_4( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[3]; -} - -static INLINE void insert_3f_xyw_err( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - (void) a; (void) v; (void) in; - assert(0); -} - -static INLINE void insert_3f_3( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = in[2]; -} - -static INLINE void insert_3f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; - out[2] = 0; -} - -static INLINE void insert_3f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; - out[2] = 0; -} - - -static INLINE void insert_2f_2( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = in[1]; -} - -static INLINE void insert_2f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; - out[1] = 0; -} - -static INLINE void insert_1f_1( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - float *out = (float *)(v); - (void) a; - - out[0] = in[0]; -} - -static INLINE void insert_null( const struct draw_vf_attr *a, uint8_t *v, const float *in ) -{ - (void) a; (void) v; (void) in; -} - -static INLINE void insert_4ub_4f_rgba_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); -} - -static INLINE void insert_4ub_4f_rgba_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_rgba_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[2] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_rgba_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - v[1] = 0; - v[2] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[3]); -} - -static INLINE void insert_4ub_4f_bgra_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[0] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_bgra_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - v[1] = 0; - v[0] = 0; - v[3] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); -} - -static INLINE void insert_4ub_4f_argb_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[2]); - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - v[3] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_argb_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[0]); - v[2] = 0x00; - v[3] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_4( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[3]); -} - -static INLINE void insert_4ub_4f_abgr_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[2]); - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[1]); - v[1] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_4ub_4f_abgr_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[3], in[0]); - v[2] = 0x00; - v[1] = 0x00; - v[0] = 0xff; -} - -static INLINE void insert_3ub_3f_rgb_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[2]); -} - -static INLINE void insert_3ub_3f_rgb_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[2] = 0; -} - -static INLINE void insert_3ub_3f_rgb_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); - v[1] = 0; - v[2] = 0; -} - -static INLINE void insert_3ub_3f_bgr_3( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[2]); -} - -static INLINE void insert_3ub_3f_bgr_2( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - UNCLAMPED_FLOAT_TO_UBYTE(v[1], in[1]); - v[0] = 0; -} - -static INLINE void insert_3ub_3f_bgr_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[2], in[0]); - v[1] = 0; - v[0] = 0; -} - - -static INLINE void insert_1ub_1f_1( const struct draw_vf_attr *a, uint8_t *v, - const float *in ) -{ - (void) a; - UNCLAMPED_FLOAT_TO_UBYTE(v[0], in[0]); -} - - -const struct draw_vf_format_info draw_vf_format_info[DRAW_EMIT_MAX] = -{ - { "1f", - { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, - sizeof(float), FALSE }, - - { "2f", - { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, - 2 * sizeof(float), FALSE }, - - { "3f", - { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, - 3 * sizeof(float), FALSE }, - - { "4f", - { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, - 4 * sizeof(float), FALSE }, - - { "3f_xyw", - { insert_3f_xyw_err, insert_3f_xyw_err, insert_3f_xyw_err, - insert_3f_xyw_4 }, - 3 * sizeof(float), FALSE }, - - { "1ub_1f", - { insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1, insert_1ub_1f_1 }, - sizeof(uint8_t), FALSE }, - - { "3ub_3f_rgb", - { insert_3ub_3f_rgb_1, insert_3ub_3f_rgb_2, insert_3ub_3f_rgb_3, - insert_3ub_3f_rgb_3 }, - 3 * sizeof(uint8_t), FALSE }, - - { "3ub_3f_bgr", - { insert_3ub_3f_bgr_1, insert_3ub_3f_bgr_2, insert_3ub_3f_bgr_3, - insert_3ub_3f_bgr_3 }, - 3 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_rgba", - { insert_4ub_4f_rgba_1, insert_4ub_4f_rgba_2, insert_4ub_4f_rgba_3, - insert_4ub_4f_rgba_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_bgra", - { insert_4ub_4f_bgra_1, insert_4ub_4f_bgra_2, insert_4ub_4f_bgra_3, - insert_4ub_4f_bgra_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_argb", - { insert_4ub_4f_argb_1, insert_4ub_4f_argb_2, insert_4ub_4f_argb_3, - insert_4ub_4f_argb_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "4ub_4f_abgr", - { insert_4ub_4f_abgr_1, insert_4ub_4f_abgr_2, insert_4ub_4f_abgr_3, - insert_4ub_4f_abgr_4 }, - 4 * sizeof(uint8_t), FALSE }, - - { "1f_const", - { insert_1f_1, insert_1f_1, insert_1f_1, insert_1f_1 }, - sizeof(float), TRUE }, - - { "2f_const", - { insert_2f_1, insert_2f_2, insert_2f_2, insert_2f_2 }, - 2 * sizeof(float), TRUE }, - - { "3f_const", - { insert_3f_1, insert_3f_2, insert_3f_3, insert_3f_3 }, - 3 * sizeof(float), TRUE }, - - { "4f_const", - { insert_4f_1, insert_4f_2, insert_4f_3, insert_4f_4 }, - 4 * sizeof(float), TRUE }, - - { "pad", - { NULL, NULL, NULL, NULL }, - 0, FALSE }, - -}; - - - - -/*********************************************************************** - * Hardwired fastpaths for emitting whole vertices or groups of - * vertices - */ -#define EMIT5(NR, F0, F1, F2, F3, F4, NAME) \ -static void NAME( struct draw_vertex_fetch *vf, \ - unsigned count, \ - uint8_t *v ) \ -{ \ - struct draw_vf_attr *a = vf->attr; \ - unsigned i; \ - \ - for (i = 0 ; i < count ; i++, v += vf->vertex_stride) { \ - if (NR > 0) { \ - F0( &a[0], v + a[0].vertoffset, (float *)a[0].inputptr ); \ - a[0].inputptr += a[0].inputstride; \ - } \ - \ - if (NR > 1) { \ - F1( &a[1], v + a[1].vertoffset, (float *)a[1].inputptr ); \ - a[1].inputptr += a[1].inputstride; \ - } \ - \ - if (NR > 2) { \ - F2( &a[2], v + a[2].vertoffset, (float *)a[2].inputptr ); \ - a[2].inputptr += a[2].inputstride; \ - } \ - \ - if (NR > 3) { \ - F3( &a[3], v + a[3].vertoffset, (float *)a[3].inputptr ); \ - a[3].inputptr += a[3].inputstride; \ - } \ - \ - if (NR > 4) { \ - F4( &a[4], v + a[4].vertoffset, (float *)a[4].inputptr ); \ - a[4].inputptr += a[4].inputstride; \ - } \ - } \ -} - - -#define EMIT2(F0, F1, NAME) EMIT5(2, F0, F1, insert_null, \ - insert_null, insert_null, NAME) - -#define EMIT3(F0, F1, F2, NAME) EMIT5(3, F0, F1, F2, insert_null, \ - insert_null, NAME) - -#define EMIT4(F0, F1, F2, F3, NAME) EMIT5(4, F0, F1, F2, F3, \ - insert_null, NAME) - - -EMIT2(insert_3f_3, insert_4ub_4f_rgba_4, emit_xyz3_rgba4) - -EMIT3(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, emit_xyzw4_rgba4_st2) - -EMIT4(insert_4f_4, insert_4ub_4f_rgba_4, insert_2f_2, insert_2f_2, emit_xyzw4_rgba4_st2_st2) - - -/* Use the codegen paths to select one of a number of hardwired - * fastpaths. - */ -void draw_vf_generate_hardwired_emit( struct draw_vertex_fetch *vf ) -{ - draw_vf_emit_func func = NULL; - - /* Does it fit a hardwired fastpath? Help! this is growing out of - * control! - */ - switch (vf->attr_count) { - case 2: - if (vf->attr[0].do_insert == insert_3f_3 && - vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - func = emit_xyz3_rgba4; - } - break; - case 3: - if (vf->attr[2].do_insert == insert_2f_2) { - if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - if (vf->attr[0].do_insert == insert_4f_4) - func = emit_xyzw4_rgba4_st2; - } - } - break; - case 4: - if (vf->attr[2].do_insert == insert_2f_2 && - vf->attr[3].do_insert == insert_2f_2) { - if (vf->attr[1].do_insert == insert_4ub_4f_rgba_4) { - if (vf->attr[0].do_insert == insert_4f_4) - func = emit_xyzw4_rgba4_st2_st2; - } - } - break; - } - - vf->emit = func; -} - -/*********************************************************************** - * Generic (non-codegen) functions for whole vertices or groups of - * vertices - */ - -void draw_vf_generic_emit( struct draw_vertex_fetch *vf, - unsigned count, - uint8_t *v ) -{ - struct draw_vf_attr *a = vf->attr; - const unsigned attr_count = vf->attr_count; - const unsigned stride = vf->vertex_stride; - unsigned i, j; - - for (i = 0 ; i < count ; i++, v += stride) { - for (j = 0; j < attr_count; j++) { - float *in = (float *)a[j].inputptr; - a[j].inputptr += a[j].inputstride; - a[j].do_insert( &a[j], v + a[j].vertoffset, in ); - } - } -} - - diff --git a/src/gallium/auxiliary/draw/draw_vf_sse.c b/src/gallium/auxiliary/draw/draw_vf_sse.c deleted file mode 100644 index aff4ffd985..0000000000 --- a/src/gallium/auxiliary/draw/draw_vf_sse.c +++ /dev/null @@ -1,613 +0,0 @@ -/* - * Copyright 2003 Tungsten Graphics, inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the "Software"), - * to deal in the Software without restriction, including without limitation - * on the rights to use, copy, modify, merge, publish, distribute, sub - * license, and/or sell copies of the Software, and to permit persons to whom - * the Software is furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice (including the next - * paragraph) shall be included in all copies or substantial portions of the - * Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL - * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, - * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR - * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE - * USE OR OTHER DEALINGS IN THE SOFTWARE. - * - * Authors: - * Keith Whitwell - */ - - -#include "pipe/p_compiler.h" -#include "util/u_simple_list.h" - -#include "draw_vf.h" - - -#if defined(USE_SSE_ASM) - -#include "rtasm/rtasm_cpu.h" -#include "rtasm/rtasm_x86sse.h" - - -#define X 0 -#define Y 1 -#define Z 2 -#define W 3 - - -struct x86_program { - struct x86_function func; - - struct draw_vertex_fetch *vf; - boolean inputs_safe; - boolean outputs_safe; - boolean have_sse2; - - struct x86_reg identity; - struct x86_reg chan0; -}; - - -static struct x86_reg get_identity( struct x86_program *p ) -{ - return p->identity; -} - -static void emit_load4f_4( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movups(&p->func, dest, arg0); -} - -static void emit_load4f_3( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Have to jump through some hoops: - * - * c 0 0 0 - * c 0 0 1 - * 0 0 c 1 - * a b c 1 - */ - sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); - sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); - sse_shufps(&p->func, dest, dest, SHUF(Y,Z,X,W) ); - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load4f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Initialize from identity, then pull in low two words: - */ - sse_movups(&p->func, dest, get_identity(p)); - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load4f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Pull in low word, then swizzle in identity */ - sse_movss(&p->func, dest, arg0); - sse_shufps(&p->func, dest, get_identity(p), SHUF(X,Y,Z,W) ); -} - - - -static void emit_load3f_3( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - /* Over-reads by 1 dword - potential SEGV if input is a vertex - * array. - */ - if (p->inputs_safe) { - sse_movups(&p->func, dest, arg0); - } - else { - /* c 0 0 0 - * c c c c - * a b c c - */ - sse_movss(&p->func, dest, x86_make_disp(arg0, 8)); - sse_shufps(&p->func, dest, dest, SHUF(X,X,X,X)); - sse_movlps(&p->func, dest, arg0); - } -} - -static void emit_load3f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_2(p, dest, arg0); -} - -static void emit_load3f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_1(p, dest, arg0); -} - -static void emit_load2f_2( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movlps(&p->func, dest, arg0); -} - -static void emit_load2f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - emit_load4f_1(p, dest, arg0); -} - -static void emit_load1f_1( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movss(&p->func, dest, arg0); -} - -static void (*load[4][4])( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) = { - { emit_load1f_1, - emit_load1f_1, - emit_load1f_1, - emit_load1f_1 }, - - { emit_load2f_1, - emit_load2f_2, - emit_load2f_2, - emit_load2f_2 }, - - { emit_load3f_1, - emit_load3f_2, - emit_load3f_3, - emit_load3f_3 }, - - { emit_load4f_1, - emit_load4f_2, - emit_load4f_3, - emit_load4f_4 } -}; - -static void emit_load( struct x86_program *p, - struct x86_reg dest, - unsigned sz, - struct x86_reg src, - unsigned src_sz) -{ - load[sz-1][src_sz-1](p, dest, src); -} - -static void emit_store4f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movups(&p->func, dest, arg0); -} - -static void emit_store3f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - if (p->outputs_safe) { - /* Emit the extra dword anyway. This may hurt writecombining, - * may cause other problems. - */ - sse_movups(&p->func, dest, arg0); - } - else { - /* Alternate strategy - emit two, shuffle, emit one. - */ - sse_movlps(&p->func, dest, arg0); - sse_shufps(&p->func, arg0, arg0, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ - sse_movss(&p->func, x86_make_disp(dest,8), arg0); - } -} - -static void emit_store2f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movlps(&p->func, dest, arg0); -} - -static void emit_store1f( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) -{ - sse_movss(&p->func, dest, arg0); -} - - -static void (*store[4])( struct x86_program *p, - struct x86_reg dest, - struct x86_reg arg0 ) = -{ - emit_store1f, - emit_store2f, - emit_store3f, - emit_store4f -}; - -static void emit_store( struct x86_program *p, - struct x86_reg dest, - unsigned sz, - struct x86_reg temp ) - -{ - store[sz-1](p, dest, temp); -} - -static void emit_pack_store_4ub( struct x86_program *p, - struct x86_reg dest, - struct x86_reg temp ) -{ - /* Scale by 255.0 - */ - sse_mulps(&p->func, temp, p->chan0); - - if (p->have_sse2) { - sse2_cvtps2dq(&p->func, temp, temp); - sse2_packssdw(&p->func, temp, temp); - sse2_packuswb(&p->func, temp, temp); - sse_movss(&p->func, dest, temp); - } - else { - struct x86_reg mmx0 = x86_make_reg(file_MMX, 0); - struct x86_reg mmx1 = x86_make_reg(file_MMX, 1); - sse_cvtps2pi(&p->func, mmx0, temp); - sse_movhlps(&p->func, temp, temp); - sse_cvtps2pi(&p->func, mmx1, temp); - mmx_packssdw(&p->func, mmx0, mmx1); - mmx_packuswb(&p->func, mmx0, mmx0); - mmx_movd(&p->func, dest, mmx0); - } -} - -static int get_offset( const void *a, const void *b ) -{ - return (const char *)b - (const char *)a; -} - -/* Not much happens here. Eventually use this function to try and - * avoid saving/reloading the source pointers each vertex (if some of - * them can fit in registers). - */ -static void get_src_ptr( struct x86_program *p, - struct x86_reg srcREG, - struct x86_reg vfREG, - struct draw_vf_attr *a ) -{ - struct draw_vertex_fetch *vf = p->vf; - struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); - - /* Load current a[j].inputptr - */ - x86_mov(&p->func, srcREG, ptr_to_src); -} - -static void update_src_ptr( struct x86_program *p, - struct x86_reg srcREG, - struct x86_reg vfREG, - struct draw_vf_attr *a ) -{ - if (a->inputstride) { - struct draw_vertex_fetch *vf = p->vf; - struct x86_reg ptr_to_src = x86_make_disp(vfREG, get_offset(vf, &a->inputptr)); - - /* add a[j].inputstride (hardcoded value - could just as easily - * pull the stride value from memory each time). - */ - x86_lea(&p->func, srcREG, x86_make_disp(srcREG, a->inputstride)); - - /* save new value of a[j].inputptr - */ - x86_mov(&p->func, ptr_to_src, srcREG); - } -} - - -/* Lots of hardcoding - * - * EAX -- pointer to current output vertex - * ECX -- pointer to current attribute - * - */ -static boolean build_vertex_emit( struct x86_program *p ) -{ - struct draw_vertex_fetch *vf = p->vf; - unsigned j = 0; - - struct x86_reg vertexEAX = x86_make_reg(file_REG32, reg_AX); - struct x86_reg srcECX = x86_make_reg(file_REG32, reg_CX); - struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); - struct x86_reg vfESI = x86_make_reg(file_REG32, reg_SI); - struct x86_reg temp = x86_make_reg(file_XMM, 0); - uint8_t *fixup, *label; - - /* Push a few regs? - */ - x86_push(&p->func, countEBP); - x86_push(&p->func, vfESI); - - - /* Get vertex count, compare to zero - */ - x86_xor(&p->func, srcECX, srcECX); - x86_mov(&p->func, countEBP, x86_fn_arg(&p->func, 2)); - x86_cmp(&p->func, countEBP, srcECX); - fixup = x86_jcc_forward(&p->func, cc_E); - - /* Initialize destination register. - */ - x86_mov(&p->func, vertexEAX, x86_fn_arg(&p->func, 3)); - - /* Move argument 1 (vf) into a reg: - */ - x86_mov(&p->func, vfESI, x86_fn_arg(&p->func, 1)); - - - /* always load, needed or not: - */ - sse_movups(&p->func, p->identity, x86_make_disp(vfESI, get_offset(vf, &vf->identity[0]))); - - /* Note address for loop jump */ - label = x86_get_label(&p->func); - - /* Emit code for each of the attributes. Currently routes - * everything through SSE registers, even when it might be more - * efficient to stick with regular old x86. No optimization or - * other tricks - enough new ground to cover here just getting - * things working. - */ - while (j < vf->attr_count) { - struct draw_vf_attr *a = &vf->attr[j]; - struct x86_reg dest = x86_make_disp(vertexEAX, a->vertoffset); - - /* Now, load an XMM reg from src, perhaps transform, then save. - * Could be shortcircuited in specific cases: - */ - switch (a->format) { - case DRAW_EMIT_1F: - case DRAW_EMIT_1F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 1, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_2F: - case DRAW_EMIT_2F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 2, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_3F: - case DRAW_EMIT_3F_CONST: - /* Potentially the worst case - hardcode 2+1 copying: - */ - if (0) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 3, temp); - update_src_ptr(p, srcECX, vfESI, a); - } - else { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 2, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 2, temp); - if (a->inputsize > 2) { - emit_load(p, temp, 1, x86_make_disp(srcECX, 8), 1); - emit_store(p, x86_make_disp(dest,8), 1, temp); - } - else { - sse_movss(&p->func, x86_make_disp(dest,8), get_identity(p)); - } - update_src_ptr(p, srcECX, vfESI, a); - } - break; - case DRAW_EMIT_4F: - case DRAW_EMIT_4F_CONST: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - emit_store(p, dest, 4, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_3F_XYW: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(X,Y,W,Z)); - emit_store(p, dest, 3, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - - case DRAW_EMIT_1UB_1F: - /* Test for PAD3 + 1UB: - */ - if (j > 0 && - a[-1].vertoffset + a[-1].vertattrsize <= a->vertoffset - 3) - { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 1, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(X,X,X,X)); - emit_pack_store_4ub(p, x86_make_disp(dest, -3), temp); /* overkill! */ - update_src_ptr(p, srcECX, vfESI, a); - } - else { - debug_printf("Can't emit 1ub %x %x %d\n", - a->vertoffset, a[-1].vertoffset, a[-1].vertattrsize ); - return FALSE; - } - break; - case DRAW_EMIT_3UB_3F_RGB: - case DRAW_EMIT_3UB_3F_BGR: - /* Test for 3UB + PAD1: - */ - if (j == vf->attr_count - 1 || - a[1].vertoffset >= a->vertoffset + 4) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - if (a->format == DRAW_EMIT_3UB_3F_BGR) - sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - } - /* Test for 3UB + 1UB: - */ - else if (j < vf->attr_count - 1 && - a[1].format == DRAW_EMIT_1UB_1F && - a[1].vertoffset == a->vertoffset + 3) { - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 3, x86_deref(srcECX), a->inputsize); - update_src_ptr(p, srcECX, vfESI, a); - - /* Make room for incoming value: - */ - sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); - - get_src_ptr(p, srcECX, vfESI, &a[1]); - emit_load(p, temp, 1, x86_deref(srcECX), a[1].inputsize); - update_src_ptr(p, srcECX, vfESI, &a[1]); - - /* Rearrange and possibly do BGR conversion: - */ - if (a->format == DRAW_EMIT_3UB_3F_BGR) - sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); - else - sse_shufps(&p->func, temp, temp, SHUF(Y,Z,W,X)); - - emit_pack_store_4ub(p, dest, temp); - j++; /* NOTE: two attrs consumed */ - } - else { - debug_printf("Can't emit 3ub\n"); - } - return FALSE; /* add this later */ - break; - - case DRAW_EMIT_4UB_4F_RGBA: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_BGRA: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(Z,Y,X,W)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_ARGB: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(W,X,Y,Z)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - case DRAW_EMIT_4UB_4F_ABGR: - get_src_ptr(p, srcECX, vfESI, a); - emit_load(p, temp, 4, x86_deref(srcECX), a->inputsize); - sse_shufps(&p->func, temp, temp, SHUF(W,Z,Y,X)); - emit_pack_store_4ub(p, dest, temp); - update_src_ptr(p, srcECX, vfESI, a); - break; - default: - debug_printf("unknown a[%d].format %d\n", j, a->format); - return FALSE; /* catch any new opcodes */ - } - - /* Increment j by at least 1 - may have been incremented above also: - */ - j++; - } - - /* Next vertex: - */ - x86_lea(&p->func, vertexEAX, x86_make_disp(vertexEAX, vf->vertex_stride)); - - /* decr count, loop if not zero - */ - x86_dec(&p->func, countEBP); - x86_test(&p->func, countEBP, countEBP); - x86_jcc(&p->func, cc_NZ, label); - - /* Exit mmx state? - */ - if (p->func.need_emms) - mmx_emms(&p->func); - - /* Land forward jump here: - */ - x86_fixup_fwd_jump(&p->func, fixup); - - /* Pop regs and return - */ - x86_pop(&p->func, x86_get_base_reg(vfESI)); - x86_pop(&p->func, countEBP); - x86_ret(&p->func); - - vf->emit = (draw_vf_emit_func)x86_get_func(&p->func); - return TRUE; -} - - - -void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) -{ - struct x86_program p; - - if (!rtasm_cpu_has_sse()) { - vf->codegen_emit = NULL; - return; - } - - memset(&p, 0, sizeof(p)); - - p.vf = vf; - p.inputs_safe = 0; /* for now */ - p.outputs_safe = 1; /* for now */ - p.have_sse2 = rtasm_cpu_has_sse2(); - p.identity = x86_make_reg(file_XMM, 6); - p.chan0 = x86_make_reg(file_XMM, 7); - - x86_init_func(&p.func); - - if (build_vertex_emit(&p)) { - draw_vf_register_fastpath( vf, TRUE ); - } - else { - /* Note the failure so that we don't keep trying to codegen an - * impossible state: - */ - draw_vf_register_fastpath( vf, FALSE ); - x86_release_func(&p.func); - } -} - -#else - -void draw_vf_generate_sse_emit( struct draw_vertex_fetch *vf ) -{ - /* Dummy version for when USE_SSE_ASM not defined */ -} - -#endif -- cgit v1.2.3 From a41c05b20a36d2160aa232d08ed57d3095438025 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 20:11:16 +0100 Subject: draw: switch over to draw_pt paths, will remove old code shortly --- src/gallium/auxiliary/draw/draw_context.c | 2 -- src/gallium/auxiliary/draw/draw_private.h | 1 - src/gallium/auxiliary/draw/draw_pt.c | 3 --- 3 files changed, 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 4838b68ed1..5dc3358bd1 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -49,8 +49,6 @@ struct draw_context *draw_create( void ) draw->use_sse = FALSE; #endif - draw->use_pt_shaders = GETENV( "GALLIUM_PT_SHADERS" ) != NULL; - /* create pipeline stages */ draw->pipeline.wide_line = draw_wide_line_stage( draw ); draw->pipeline.wide_point = draw_wide_point_stage( draw ); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index da94e69781..18ce6c0ec5 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -258,7 +258,6 @@ struct draw_context boolean line_stipple; /**< do line stipple? */ boolean point_sprite; /**< convert points to quads for sprites? */ boolean use_sse; - boolean use_pt_shaders; /* temporary flag to switch on pt shader paths */ /* If a prim stage introduces new vertex attributes, they'll be stored here */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index b48d0cae9f..941f6efbe4 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -68,9 +68,6 @@ draw_pt_arrays(struct draw_context *draw, if (!draw->rasterizer->bypass_vs) { opt |= PT_SHADE; - - if (!draw->use_pt_shaders) - return FALSE; } -- cgit v1.2.3 From b11d89dc6d230f7f945f9eb420d39921c648ec20 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 20:36:38 +0100 Subject: draw: remove draw_prim.c --- src/gallium/auxiliary/draw/Makefile | 1 - src/gallium/auxiliary/draw/SConscript | 1 - src/gallium/auxiliary/draw/draw_prim.c | 23 -------------- src/gallium/auxiliary/draw/draw_pt.c | 55 ++++++++++++++++++++++++++++++++++ 4 files changed, 55 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 27464e5c86..080311a667 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -24,7 +24,6 @@ C_SOURCES = \ draw_pt_emit.c \ draw_pt_pipeline.c \ draw_pt_elts.c \ - draw_prim.c \ draw_pstipple.c \ draw_stipple.c \ draw_twoside.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 7af65c3c05..238e3f7d28 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -11,7 +11,6 @@ draw = env.ConvenienceLibrary( 'draw_debug.c', 'draw_flatshade.c', 'draw_offset.c', - 'draw_prim.c', 'draw_pstipple.c', 'draw_pt.c', 'draw_pt_elts.c', diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c index 51b6950334..d61cd25243 100644 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ b/src/gallium/auxiliary/draw/draw_prim.c @@ -497,27 +497,4 @@ draw_prim( struct draw_context *draw, -/** - * Draw vertex arrays - * This is the main entrypoint into the drawing module. - * \param prim one of PIPE_PRIM_x - * \param start index of first vertex to draw - * \param count number of vertices to draw - */ -void -draw_arrays(struct draw_context *draw, unsigned prim, - unsigned start, unsigned count) -{ - if (reduced_prim[prim] != draw->reduced_prim) { - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->reduced_prim = reduced_prim[prim]; - } - - /* drawing done here: */ - if (!draw_pt_arrays(draw, prim, start, count)) { - /* we have to run the whole pipeline */ - draw_prim(draw, prim, start, count); - } -} - diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 941f6efbe4..ecaed84070 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -128,3 +128,58 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.front.vcache = NULL; } } + + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + +/** + * Draw vertex arrays + * This is the main entrypoint into the drawing module. + * \param prim one of PIPE_PRIM_x + * \param start index of first vertex to draw + * \param count number of vertices to draw + */ +void +draw_arrays(struct draw_context *draw, unsigned prim, + unsigned start, unsigned count) +{ + if (reduced_prim[prim] != draw->reduced_prim) { + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->reduced_prim = reduced_prim[prim]; + } + + /* drawing done here: */ + draw_pt_arrays(draw, prim, start, count); +} + + +/* Revamp me please: + */ +void draw_do_flush( struct draw_context *draw, unsigned flags ) +{ + if (!draw->flushing) + { + draw->flushing = TRUE; + + if (flags >= DRAW_FLUSH_STATE_CHANGE) { + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; + draw->reduced_prim = ~0; + } + + draw->flushing = FALSE; + } +} -- cgit v1.2.3 From 66891826421d5b774e081f7a2a85580cd0523fab Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 20:39:13 +0100 Subject: draw: remove draw_vertex_cache.c --- src/gallium/auxiliary/draw/Makefile | 1 - src/gallium/auxiliary/draw/draw_context.c | 20 +- src/gallium/auxiliary/draw/draw_prim.c | 500 ------------------------- src/gallium/auxiliary/draw/draw_vertex_cache.c | 219 ----------- 4 files changed, 18 insertions(+), 722 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_prim.c delete mode 100644 src/gallium/auxiliary/draw/draw_vertex_cache.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 080311a667..03210f6a4a 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -31,7 +31,6 @@ C_SOURCES = \ draw_validate.c \ draw_vbuf.c \ draw_vertex.c \ - draw_vertex_cache.c \ draw_vertex_fetch.c \ draw_vertex_shader.c \ draw_wide_line.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 5dc3358bd1..c611300841 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -103,7 +103,6 @@ struct draw_context *draw_create( void ) draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ - draw_vertex_cache_invalidate( draw ); draw_set_mapped_element_buffer( draw, 0, NULL ); tgsi_exec_machine_init(&draw->machine); @@ -446,7 +445,6 @@ void draw_reset_vertex_ids(struct draw_context *draw) stage = stage->next; } - draw_vertex_cache_reset_vertex_ids(draw); /* going away soon */ draw_pt_reset_vertex_ids(draw); } @@ -473,3 +471,21 @@ boolean draw_get_edgeflag( struct draw_context *draw, return 1; } + +/** + * Tell the drawing context about the index/element buffer to use + * (ala glDrawElements) + * If no element buffer is to be used (i.e. glDrawArrays) then this + * should be called with eltSize=0 and elements=NULL. + * + * \param draw the drawing context + * \param eltSize size of each element (1, 2 or 4 bytes) + * \param elements the element buffer ptr + */ +void +draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, void *elements ) +{ + draw->user.elts = elements; + draw->user.eltSize = eltSize; +} diff --git a/src/gallium/auxiliary/draw/draw_prim.c b/src/gallium/auxiliary/draw/draw_prim.c deleted file mode 100644 index d61cd25243..0000000000 --- a/src/gallium/auxiliary/draw/draw_prim.c +++ /dev/null @@ -1,500 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" - -#include "draw_private.h" -#include "draw_context.h" - - - -#define RP_NONE 0 -#define RP_POINT 1 -#define RP_LINE 2 -#define RP_TRI 3 - - -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { - RP_POINT, - RP_LINE, - RP_LINE, - RP_LINE, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI, - RP_TRI -}; - - -static void draw_prim_queue_flush( struct draw_context *draw ) -{ - unsigned i; - - if (0) - debug_printf("Flushing with %d prims, %d verts\n", - draw->pq.queue_nr, draw->vs.queue_nr); - - assert (draw->pq.queue_nr != 0); - - /* NOTE: we cannot save draw->pipeline->first in a local var because - * draw->pipeline->first is often changed by the first call to tri(), - * line(), etc. - */ - if (draw->rasterizer->line_stipple_enable) { - switch (draw->reduced_prim) { - case RP_TRI: - for (i = 0; i < draw->pq.queue_nr; i++) { - if (draw->pq.queue[i].reset_line_stipple) - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - - draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); - } - break; - case RP_LINE: - for (i = 0; i < draw->pq.queue_nr; i++) { - if (draw->pq.queue[i].reset_line_stipple) - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - - draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); - } - break; - case RP_POINT: - draw->pipeline.first->reset_stipple_counter( draw->pipeline.first ); - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); - break; - } - } - else { - switch (draw->reduced_prim) { - case RP_TRI: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->tri( draw->pipeline.first, &draw->pq.queue[i] ); - break; - case RP_LINE: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->line( draw->pipeline.first, &draw->pq.queue[i] ); - break; - case RP_POINT: - for (i = 0; i < draw->pq.queue_nr; i++) - draw->pipeline.first->point( draw->pipeline.first, &draw->pq.queue[i] ); - break; - } - } - - draw->pq.queue_nr = 0; - draw->vs.post_nr = 0; - draw_vertex_cache_unreference( draw ); -} - -void draw_do_flush( struct draw_context *draw, unsigned flags ) -{ - if (0) - debug_printf("Flushing with %d verts, %d prims\n", - draw->vs.queue_nr, - draw->pq.queue_nr ); - - if (draw->flushing) - return; - - draw->flushing = TRUE; - - if (flags >= DRAW_FLUSH_SHADER_QUEUE) { - if (draw->vs.queue_nr) { - (*draw->shader_queue_flush)(draw); - } - - if (flags >= DRAW_FLUSH_PRIM_QUEUE) { - if (draw->pq.queue_nr) - draw_prim_queue_flush(draw); - - if (flags >= DRAW_FLUSH_VERTEX_CACHE) { - draw_vertex_cache_invalidate(draw); - - if (flags >= DRAW_FLUSH_STATE_CHANGE) { - draw->pipeline.first->flush( draw->pipeline.first, flags ); - draw->pipeline.first = draw->pipeline.validate; - draw->reduced_prim = ~0; - } - } - } - } - - draw->flushing = FALSE; -} - - - -/* Return a pointer to a freshly queued primitive header. Ensure that - * there is room in the vertex cache for a maximum of "nr_verts" new - * vertices. Flush primitive and/or vertex queues if necessary to - * make space. - */ -static struct prim_header *get_queued_prim( struct draw_context *draw, - unsigned nr_verts ) -{ - if (!draw_vertex_cache_check_space( draw, nr_verts )) { -// debug_printf("v"); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE ); - } - else if (draw->pq.queue_nr == PRIM_QUEUE_LENGTH) { -// debug_printf("p"); - draw_do_flush( draw, DRAW_FLUSH_PRIM_QUEUE ); - } - - assert(draw->pq.queue_nr < PRIM_QUEUE_LENGTH); - - return &draw->pq.queue[draw->pq.queue_nr++]; -} - - - -/** - * Add a point to the primitive queue. - * \param i0 index into user's vertex arrays - */ -static void do_point( struct draw_context *draw, - unsigned i0 ) -{ - struct prim_header *prim = get_queued_prim( draw, 1 ); - - prim->reset_line_stipple = 0; - prim->edgeflags = 1; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); -} - - -/** - * Add a line to the primitive queue. - * \param i0 index into user's vertex arrays - * \param i1 index into user's vertex arrays - */ -static void do_line( struct draw_context *draw, - boolean reset_stipple, - unsigned i0, - unsigned i1 ) -{ - struct prim_header *prim = get_queued_prim( draw, 2 ); - - prim->reset_line_stipple = reset_stipple; - prim->edgeflags = 1; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); - prim->v[1] = draw->vcache.get_vertex( draw, i1 ); -} - -/** - * Add a triangle to the primitive queue. - */ -static void do_triangle( struct draw_context *draw, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - struct prim_header *prim = get_queued_prim( draw, 3 ); - -// _mesa_printf("tri %d %d %d\n", i0, i1, i2); - prim->reset_line_stipple = 1; - prim->edgeflags = ~0; - prim->pad = 0; - prim->v[0] = draw->vcache.get_vertex( draw, i0 ); - prim->v[1] = draw->vcache.get_vertex( draw, i1 ); - prim->v[2] = draw->vcache.get_vertex( draw, i2 ); -} - -static void do_ef_triangle( struct draw_context *draw, - boolean reset_stipple, - unsigned ef_mask, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - struct prim_header *prim = get_queued_prim( draw, 3 ); - struct vertex_header *v0 = draw->vcache.get_vertex( draw, i0 ); - struct vertex_header *v1 = draw->vcache.get_vertex( draw, i1 ); - struct vertex_header *v2 = draw->vcache.get_vertex( draw, i2 ); - - prim->reset_line_stipple = reset_stipple; - - prim->edgeflags = ef_mask & ((v0->edgeflag << 0) | - (v1->edgeflag << 1) | - (v2->edgeflag << 2)); - prim->pad = 0; - prim->v[0] = v0; - prim->v[1] = v1; - prim->v[2] = v2; -} - - -static void do_ef_quad( struct draw_context *draw, - unsigned v0, - unsigned v1, - unsigned v2, - unsigned v3 ) -{ - const unsigned omitEdge2 = ~(1 << 1); - const unsigned omitEdge3 = ~(1 << 2); - do_ef_triangle( draw, 1, omitEdge2, v0, v1, v3 ); - do_ef_triangle( draw, 0, omitEdge3, v1, v2, v3 ); -} - -static void do_quad( struct draw_context *draw, - unsigned v0, - unsigned v1, - unsigned v2, - unsigned v3 ) -{ - do_triangle( draw, v0, v1, v3 ); - do_triangle( draw, v1, v2, v3 ); -} - - -/** - * Main entrypoint to draw some number of points/lines/triangles - */ -static void -draw_prim( struct draw_context *draw, - unsigned prim, unsigned start, unsigned count ) -{ - unsigned i; - boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); - boolean flatfirst = - (draw->rasterizer->flatshade & draw->rasterizer->flatshade_first) ? TRUE : FALSE; - -// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); - - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - do_point( draw, - start + i ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - do_line( draw, - TRUE, - start + i + 0, - start + i + 1); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, /* XXX: only if vb not split */ - start + i - 1, - start + i ); - } - - do_line( draw, - 0, - start + count - 1, - start + 0 ); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - do_line( draw, - i == 1, - start + i - 1, - start + i ); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (unfilled) { - for (i = 0; i+2 < count; i += 3) { - do_ef_triangle( draw, - 1, - ~0, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - else { - for (i = 0; i+2 < count; i += 3) { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - do_triangle( draw, - start + i + 0, - start + i + 2, - start + i + 1 ); - } - else { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - } - else { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - do_triangle( draw, - start + i + 1, - start + i + 0, - start + i + 2 ); - } - else { - do_triangle( draw, - start + i + 0, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + i + 1, - start + i + 2, - start + 0 ); - } - } - else { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + 0, - start + i + 1, - start + i + 2 ); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - if (unfilled) { - for (i = 0; i+3 < count; i += 4) { - do_ef_quad( draw, - start + i + 0, - start + i + 1, - start + i + 2, - start + i + 3); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - do_quad( draw, - start + i + 0, - start + i + 1, - start + i + 2, - start + i + 3); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (unfilled) { - for (i = 0; i+3 < count; i += 2) { - do_ef_quad( draw, - start + i + 2, - start + i + 0, - start + i + 1, - start + i + 3); - } - } - else { - for (i = 0; i+3 < count; i += 2) { - do_quad( draw, - start + i + 2, - start + i + 0, - start + i + 1, - start + i + 3); - } - } - break; - - case PIPE_PRIM_POLYGON: - if (unfilled) { - unsigned ef_mask = (1<<2) | (1<<0); - - for (i = 0; i+2 < count; i++) { - - if (i + 3 >= count) - ef_mask |= (1<<1); - - do_ef_triangle( draw, - i == 0, - ef_mask, - start + i + 1, - start + i + 2, - start + 0); - - ef_mask &= ~(1<<2); - } - } - else { - for (i = 0; i+2 < count; i++) { - do_triangle( draw, - start + i + 1, - start + i + 2, - start + 0); - } - } - break; - - default: - assert(0); - break; - } -} - - - - - diff --git a/src/gallium/auxiliary/draw/draw_vertex_cache.c b/src/gallium/auxiliary/draw/draw_vertex_cache.c deleted file mode 100644 index 730c18bcb3..0000000000 --- a/src/gallium/auxiliary/draw/draw_vertex_cache.c +++ /dev/null @@ -1,219 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "draw_private.h" -#include "draw_context.h" - - -void draw_vertex_cache_invalidate( struct draw_context *draw ) -{ - assert(draw->pq.queue_nr == 0); - assert(draw->vs.queue_nr == 0); - assert(draw->vcache.referenced == 0); - - /* There's an error somewhere in the vcache code that requires this - * memset. The bug is exposed in q3demo demo001, but probably - * elsewhere as well. Will track it down later. - */ - memset(draw->vcache.idx, ~0, sizeof(draw->vcache.idx)); -} - - -/** - * Check if vertex is in cache, otherwise add it. It won't go through - * VS yet, not until there is a flush operation or the VS queue fills up. - * - * Note that cache entries are basically just two pointers: the first - * an index into the user's vertex arrays, the second a location in - * the vertex shader cache for the post-transformed vertex. - * - * \return pointer to location of (post-transformed) vertex header in the cache - */ -static struct vertex_header *get_vertex( struct draw_context *draw, - unsigned i ) -{ - unsigned slot = (i + (i>>5)) % VCACHE_SIZE; - - assert(slot < 32); /* so we don't exceed the bitfield size below */ - - if (draw->vcache.referenced & (1<vcache.idx[slot].in == i) { - /*debug_printf("HIT %d %d\n", slot, i);*/ - assert(draw->vcache.idx[slot].out < draw->vs.queue_nr); - return draw_header_from_block(draw->vs.vertex_cache, - MAX_VERTEX_ALLOCATION, - draw->vcache.idx[slot].out); - } - - /* Otherwise a collision - */ - slot = VCACHE_SIZE + draw->vcache.overflow++; - /*debug_printf("XXX %d --> %d\n", i, slot);*/ - } - - /* Deal with the cache miss: - */ - { - unsigned out; - struct vertex_header *header; - - assert(slot < Elements(draw->vcache.idx)); - - /*debug_printf("NEW %d %d\n", slot, i);*/ - draw->vcache.idx[slot].in = i; - draw->vcache.idx[slot].out = out = draw->vs.queue_nr++; - draw->vcache.referenced |= (1 << slot); - - - /* Add to vertex shader queue: - */ - assert(draw->vs.queue_nr < VS_QUEUE_LENGTH); - - header = draw_header_from_block(draw->vs.vertex_cache, MAX_VERTEX_ALLOCATION, - out); - draw->vs.elts[out] = i; - header->clipmask = 0; - header->edgeflag = draw_get_edgeflag(draw, i); - header->pad = 0; - header->vertex_id = UNDEFINED_VERTEX_ID; - - /* Need to set the vertex's edge flag here. If we're being called - * by do_ef_triangle(), that function needs edge flag info! - */ - - return draw_header_from_block(draw->vs.vertex_cache, - MAX_VERTEX_ALLOCATION, - draw->vcache.idx[slot].out); - } -} - - -static struct vertex_header *get_uint_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const unsigned *elts = (const unsigned *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -static struct vertex_header *get_ushort_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const ushort *elts = (const ushort *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -static struct vertex_header *get_ubyte_elt_vertex( struct draw_context *draw, - unsigned i ) -{ - const ubyte *elts = (const ubyte *) draw->user.elts; - return get_vertex( draw, elts[i] ); -} - - -void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ) -{ - unsigned i; - - for (i = 0; i < draw->vs.post_nr; i++) { - struct vertex_header * header = - draw_header_from_block(draw->vs.vertex_cache, - MAX_VERTEX_ALLOCATION, i); - header->vertex_id = UNDEFINED_VERTEX_ID; - } -} - - -void draw_vertex_cache_unreference( struct draw_context *draw ) -{ - draw->vcache.referenced = 0; - draw->vcache.overflow = 0; -} - - -int draw_vertex_cache_check_space( struct draw_context *draw, - unsigned nr_verts ) -{ - if (draw->vcache.overflow + nr_verts < VCACHE_OVERFLOW) { - /* The vs queue is sized so that this can never happen: - */ - assert(draw->vs.queue_nr + nr_verts < VS_QUEUE_LENGTH); - return TRUE; - } - else - return FALSE; -} - - - -/** - * Tell the drawing context about the index/element buffer to use - * (ala glDrawElements) - * If no element buffer is to be used (i.e. glDrawArrays) then this - * should be called with eltSize=0 and elements=NULL. - * - * \param draw the drawing context - * \param eltSize size of each element (1, 2 or 4 bytes) - * \param elements the element buffer ptr - */ -void -draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, void *elements ) -{ -// draw_statechange( draw ); - - /* choose the get_vertex() function to use */ - switch (eltSize) { - case 0: - draw->vcache.get_vertex = get_vertex; - break; - case 1: - draw->vcache.get_vertex = get_ubyte_elt_vertex; - break; - case 2: - draw->vcache.get_vertex = get_ushort_elt_vertex; - break; - case 4: - draw->vcache.get_vertex = get_uint_elt_vertex; - break; - default: - assert(0); - } - draw->user.elts = elements; - draw->user.eltSize = eltSize; -} - -- cgit v1.2.3 From dd903d83b3ff8dd19f75ac7542b96bc8f1387fe6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 20:41:39 +0100 Subject: draw: remove old vertex_shader->run() functions --- src/gallium/auxiliary/draw/draw_vertex_shader.c | 2 +- src/gallium/auxiliary/draw/draw_vs.h | 7 -- src/gallium/auxiliary/draw/draw_vs_exec.c | 137 ------------------------ src/gallium/auxiliary/draw/draw_vs_llvm.c | 108 ------------------- src/gallium/auxiliary/draw/draw_vs_sse.c | 126 ---------------------- 5 files changed, 1 insertion(+), 379 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index c0c17c116e..118664d619 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -74,7 +74,7 @@ draw_vertex_shader_queue_flush(struct draw_context *draw) assert(n > 0); assert(n <= MAX_SHADER_VERTICES); - shader->run(shader, draw, elts, n, dests, MAX_VERTEX_ALLOCATION); +// shader->run(shader, draw, elts, n, dests, MAX_VERTEX_ALLOCATION); } draw->vs.post_nr = draw->vs.queue_nr; diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index e88d00e247..b6f3bbdaea 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -55,13 +55,6 @@ struct draw_vertex_shader { /* Run the shader - this interface will get cleaned up in the * future: */ - boolean (*run)( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *out, - unsigned vertex_size); - void (*run_linear)( struct draw_vertex_shader *shader, const float (*input)[4], float (*output)[4], diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 184151b9b1..54a2b2ab04 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -66,145 +66,9 @@ vs_exec_prepare( struct draw_vertex_shader *shader, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); - draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices - */ -static boolean -vs_exec_run( struct draw_vertex_shader *shader, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *vOut, - unsigned vertex_size) -{ - struct exec_vertex_shader *evs = exec_vertex_shader(shader); - struct tgsi_exec_machine *machine = evs->machine; - unsigned int i, j; - unsigned int clipped = 0; - struct tgsi_exec_vector *outputs = 0; - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(shader->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); - - machine->Consts = (const float (*)[4]) draw->user.constants; - - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - outputs = machine->Inputs; - } - else { - outputs = machine->Outputs; - } - - for (i = 0; i < count; i += MAX_TGSI_VERTICES) { - unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - draw->vertex_fetch.fetch_func( draw, machine, &elts[i], max_vertices ); - -#if 0 - for (j = 0; j < max_vertices; j++) { - unsigned slot; - debug_printf("%d) Input vert:\n", i + j); - for (slot = 0; slot < shader->info.num_inputs; slot++) { - debug_printf("\t%d: %f %f %f %f\n", slot, - machine->Inputs[slot].xyzw[0].f[j], - machine->Inputs[slot].xyzw[1].f[j], - machine->Inputs[slot].xyzw[2].f[j], - machine->Inputs[slot].xyzw[3].f[j]); - } - } -#endif - - - if (!draw->rasterizer->bypass_vs) { - /* run interpreter */ - tgsi_exec_machine_run( machine ); - } - - /* store machine results */ - for (j = 0; j < max_vertices; j++) { - unsigned slot; - float x, y, z, w; - struct vertex_header *out = - draw_header_from_block(vOut, vertex_size, i + j); - - /* Handle attr[0] (position) specially: - * - * XXX: Computing the clipmask should be done in the vertex - * program as a set of DP4 instructions appended to the - * user-provided code. - */ - x = out->clip[0] = outputs[0].xyzw[0].f[j]; - y = out->clip[1] = outputs[0].xyzw[1].f[j]; - z = out->clip[2] = outputs[0].xyzw[2].f[j]; - w = out->clip[3] = outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - out->clipmask = compute_clipmask(out->clip, draw->plane, - draw->nr_planes); - clipped += out->clipmask; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - out->clipmask = 0; - } - out->edgeflag = 1; - out->vertex_id = UNDEFINED_VERTEX_ID; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - out->data[0][0] = x * scale[0] + trans[0]; - out->data[0][1] = y * scale[1] + trans[1]; - out->data[0][2] = z * scale[2] + trans[2]; - out->data[0][3] = w; - } - else - { - out->data[0][0] = x; - out->data[0][1] = y; - out->data[0][2] = z; - out->data[0][3] = w; - } - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - out->data[slot][0] = outputs[slot].xyzw[0].f[j]; - out->data[slot][1] = outputs[slot].xyzw[1].f[j]; - out->data[slot][2] = outputs[slot].xyzw[2].f[j]; - out->data[slot][3] = outputs[slot].xyzw[3].f[j]; - } - -#if 0 /*DEBUG*/ - printf("%d) Post xform vert:\n", i + j); - for (slot = 0; slot < draw->num_vs_outputs; slot++) { - printf("\t%d: %f %f %f %f\n", slot, - out->data[slot][0], - out->data[slot][1], - out->data[slot][2], - out->data[slot][3]); - } -#endif - } /* loop over vertices */ - } - return clipped != 0; -} - /* Simplified vertex shader interface for the pt paths. Given the @@ -312,7 +176,6 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; - vs->base.run = vs_exec_run; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; vs->machine = &draw->machine; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 4dbfa955a4..5e27bc9ff0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -60,113 +60,6 @@ vs_llvm_prepare( struct draw_vertex_shader *base, -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices - */ -static boolean -vs_llvm_run( struct draw_vertex_shader *base, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *vOut ) -{ - struct draw_llvm_vertex_shader *shader = - (struct draw_llvm_vertex_shader *)base; - - struct tgsi_exec_machine *machine = shader->machine; - unsigned int j; - unsigned int clipped = 0; - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - - assert(count <= 4); - assert(base->state->output_semantic_name[0] == TGSI_SEMANTIC_POSITION); - - /* Consts does not require 16 byte alignment. */ - machine->Consts = (float (*)[4]) draw->user.constants; - - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - outputs = machine->Inputs; - } - else { - outputs = machine->Outputs; - } - - - draw->vertex_fetch.fetch_func( draw, machine, elts, count ); - - if (!draw->rasterizer->bypass_vs) { - /* run shader */ - gallivm_cpu_vs_exec(shader->llvm_prog, - machine->Inputs, - machine->Outputs, - machine->Consts, - machine->Temps); - } - - /* store machine results */ - for (j = 0; j < count; j++) { - unsigned slot; - float x, y, z, w; - - x = vOut[j]->clip[0] = outputs[0].xyzw[0].f[j]; - y = vOut[j]->clip[1] = outputs[0].xyzw[1].f[j]; - z = vOut[j]->clip[2] = outputs[0].xyzw[2].f[j]; - w = vOut[j]->clip[3] = outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - vOut[j]->clipmask = compute_clipmask(vOut[j]->clip, draw->plane, - draw->nr_planes); - clipped += vOut[j]->clipmask; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - vOut[j]->clipmask = 0; - } - vOut[j]->edgeflag = 1; - vOut[j]->vertex_id = UNDEFINED_VERTEX_ID; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - vOut[j]->data[0][0] = x * scale[0] + trans[0]; - vOut[j]->data[0][1] = y * scale[1] + trans[1]; - vOut[j]->data[0][2] = z * scale[2] + trans[2]; - vOut[j]->data[0][3] = w; - } - else { - vOut[j]->data[0][0] = x; - vOut[j]->data[0][1] = y; - vOut[j]->data[0][2] = z; - vOut[j]->data[0][3] = w; - } - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - vOut[j]->data[slot][0] = outputs[slot].xyzw[0].f[j]; - vOut[j]->data[slot][1] = outputs[slot].xyzw[1].f[j]; - vOut[j]->data[slot][2] = outputs[slot].xyzw[2].f[j]; - vOut[j]->data[slot][3] = outputs[slot].xyzw[3].f[j]; - } - } /* loop over vertices */ - return clipped != 0; -} - - - static void vs_llvm_run_linear( struct draw_vertex_shader *base, @@ -256,7 +149,6 @@ draw_create_vs_llvm(struct draw_context *draw, tgsi_scan_shader(shader->tokens, &vs->base.info); vs->base.prepare = vs_llvm_prepare; - vs->base.run = vs_llvm_run; vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; vs->machine = &draw->machine; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index a763f3845c..60f60a5b53 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -74,131 +74,6 @@ vs_sse_prepare( struct draw_vertex_shader *base, draw_update_vertex_fetch( draw ); } -/** - * Transform vertices with the current vertex program/shader - * Up to four vertices can be shaded at a time. - * \param vbuffer the input vertex data - * \param elts indexes of four input vertices - * \param count number of vertices to shade [1..4] - * \param vOut array of pointers to four output vertices - */ -static boolean -vs_sse_run( struct draw_vertex_shader *base, - struct draw_context *draw, - const unsigned *elts, - unsigned count, - void *vOut, - unsigned vertex_size ) -{ - struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; - struct tgsi_exec_machine *machine = shader->machine; - unsigned int i, j; - unsigned int clipped = 0; - struct tgsi_exec_vector *outputs = 0; - const float *scale = draw->viewport.scale; - const float *trans = draw->viewport.translate; - - assert(base->info.output_semantic_name[0] == TGSI_SEMANTIC_POSITION); - - /* Consts does not require 16 byte alignment. */ - machine->Consts = (const float (*)[4]) draw->user.constants; - - if (draw->rasterizer->bypass_vs) { - /* outputs are just the inputs */ - outputs = machine->Inputs; - } - else { - outputs = machine->Outputs; - } - - for (i = 0; i < count; i += SSE_MAX_VERTICES) { - unsigned int max_vertices = MIN2(SSE_MAX_VERTICES, count - i); - /* Fetch vertices. This may at some point be integrated into the - * compiled shader -- that would require a reorganization where - * multiple versions of the compiled shader might exist, - * specialized for each fetch state. - */ - draw->vertex_fetch.fetch_func(draw, machine, &elts[i], max_vertices); - - if (!draw->rasterizer->bypass_vs) { - /* run compiled shader - */ - shader->func(machine->Inputs, - machine->Outputs, - (float (*)[4])machine->Consts, - machine->Temps, - shader->immediates); - } - - /* XXX: Computing the clipmask and emitting results should be done - * in the vertex program as a set of instructions appended to - * the user-provided code. - */ - for (j = 0; j < max_vertices; j++) { - unsigned slot; - float x, y, z, w; - struct vertex_header *out = - draw_header_from_block(vOut, vertex_size, i + j); - - x = out->clip[0] = outputs[0].xyzw[0].f[j]; - y = out->clip[1] = outputs[0].xyzw[1].f[j]; - z = out->clip[2] = outputs[0].xyzw[2].f[j]; - w = out->clip[3] = outputs[0].xyzw[3].f[j]; - - if (!draw->rasterizer->bypass_clipping) { - out->clipmask = compute_clipmask(out->clip, draw->plane, - draw->nr_planes); - clipped += out->clipmask; - - /* divide by w */ - w = 1.0f / w; - x *= w; - y *= w; - z *= w; - } - else { - out->clipmask = 0; - } - out->edgeflag = 1; - out->vertex_id = UNDEFINED_VERTEX_ID; - - if (!draw->identity_viewport) { - /* Viewport mapping */ - out->data[0][0] = x * scale[0] + trans[0]; - out->data[0][1] = y * scale[1] + trans[1]; - out->data[0][2] = z * scale[2] + trans[2]; - out->data[0][3] = w; - } - else { - out->data[0][0] = x; - out->data[0][1] = y; - out->data[0][2] = z; - out->data[0][3] = w; - } - - /* Remaining attributes are packed into sequential post-transform - * vertex attrib slots. - */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { - out->data[slot][0] = outputs[slot].xyzw[0].f[j]; - out->data[slot][1] = outputs[slot].xyzw[1].f[j]; - out->data[slot][2] = outputs[slot].xyzw[2].f[j]; - out->data[slot][3] = outputs[slot].xyzw[3].f[j]; - } -#if 0 /*DEBUG*/ - printf("%d) Post xform vert:\n", i + j); - for (slot = 0; slot < draw->num_vs_outputs; slot++) { - printf("\t%d: %f %f %f %f\n", slot, - out->data[slot][0], - out->data[slot][1], - out->data[slot][2], - out->data[slot][3]); - } -#endif - } - } - return clipped != 0; -} /* Simplified vertex shader interface for the pt paths. Given the @@ -294,7 +169,6 @@ draw_create_vs_sse(struct draw_context *draw, tgsi_scan_shader(templ->tokens, &vs->base.info); vs->base.prepare = vs_sse_prepare; - vs->base.run = vs_sse_run; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; vs->machine = &draw->machine; -- cgit v1.2.3 From 709e33cf0bfd552220e46f44e8cfa2063c3cef69 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 20:44:13 +0100 Subject: draw: remove old draw_vertex_shader_queue_flush function --- src/gallium/auxiliary/draw/draw_context.c | 2 -- src/gallium/auxiliary/draw/draw_private.h | 6 ---- src/gallium/auxiliary/draw/draw_vertex_shader.c | 43 ------------------------- 3 files changed, 51 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index c611300841..9c3ae9bc0d 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -93,8 +93,6 @@ struct draw_context *draw_create( void ) draw->vs.vertex_cache = tmp; } - draw->shader_queue_flush = draw_vertex_shader_queue_flush; - /* these defaults are oriented toward the needs of softpipe */ draw->wide_point_threshold = 1000000.0; /* infinity */ draw->wide_line_threshold = 1.0; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 18ce6c0ec5..93add257a3 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -309,11 +309,6 @@ struct draw_context unsigned post_nr; } vs; - /** - * Run the vertex shader on all vertices in the vertex queue. - */ - void (*shader_queue_flush)(struct draw_context *draw); - /* Prim pipeline queue: */ struct { @@ -359,7 +354,6 @@ extern void draw_vertex_cache_invalidate( struct draw_context *draw ); extern void draw_vertex_cache_unreference( struct draw_context *draw ); extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ); -extern void draw_vertex_shader_queue_flush( struct draw_context *draw ); extern void draw_update_vertex_fetch( struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c index 118664d619..03fe00a951 100644 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ b/src/gallium/auxiliary/draw/draw_vertex_shader.c @@ -37,49 +37,6 @@ #include "draw_context.h" #include "draw_vs.h" -/** - * Run the vertex shader on all vertices in the vertex queue. - * Called by the draw module when the vertx cache needs to be flushed. - */ -void -draw_vertex_shader_queue_flush(struct draw_context *draw) -{ - struct draw_vertex_shader *shader = draw->vertex_shader; - unsigned i; - - assert(draw->vs.queue_nr != 0); - - /* XXX: do this on statechange: - */ - shader->prepare( shader, draw ); - -// fprintf(stderr, "%s %d\n", __FUNCTION__, draw->vs.queue_nr ); - - /* run vertex shader on vertex cache entries, four per invokation */ - for (i = 0; i < draw->vs.queue_nr; i += MAX_SHADER_VERTICES) { - unsigned elts[MAX_SHADER_VERTICES]; - int j, n = MIN2(MAX_SHADER_VERTICES, draw->vs.queue_nr - i); - struct vertex_header *dests = - draw_header_from_block(draw->vs.vertex_cache, - MAX_VERTEX_ALLOCATION, i); - - for (j = 0; j < n; j++) { - elts[j] = draw->vs.elts[i + j]; - } - - for ( ; j < MAX_SHADER_VERTICES; j++) { - elts[j] = elts[0]; - } - - assert(n > 0); - assert(n <= MAX_SHADER_VERTICES); - -// shader->run(shader, draw, elts, n, dests, MAX_VERTEX_ALLOCATION); - } - - draw->vs.post_nr = draw->vs.queue_nr; - draw->vs.queue_nr = 0; -} struct draw_vertex_shader * -- cgit v1.2.3 From 415e8e039ba38716336e8de3d7b3cdc23b9a9d8e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 18 Apr 2008 20:46:06 +0100 Subject: draw: remove draw_vertex_fetch.c --- src/gallium/auxiliary/draw/Makefile | 1 - src/gallium/auxiliary/draw/SConscript | 2 - src/gallium/auxiliary/draw/draw_vertex_fetch.c | 528 ------------------------- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 - src/gallium/auxiliary/draw/draw_vs_sse.c | 1 - 5 files changed, 533 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_vertex_fetch.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 03210f6a4a..60bb9dfe88 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -31,7 +31,6 @@ C_SOURCES = \ draw_validate.c \ draw_vbuf.c \ draw_vertex.c \ - draw_vertex_fetch.c \ draw_vertex_shader.c \ draw_wide_line.c \ draw_wide_point.c diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 238e3f7d28..246083a962 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -27,8 +27,6 @@ draw = env.ConvenienceLibrary( 'draw_validate.c', 'draw_vbuf.c', 'draw_vertex.c', - 'draw_vertex_cache.c', - 'draw_vertex_fetch.c', 'draw_vertex_shader.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', diff --git a/src/gallium/auxiliary/draw/draw_vertex_fetch.c b/src/gallium/auxiliary/draw/draw_vertex_fetch.c deleted file mode 100644 index 0bc2fcb424..0000000000 --- a/src/gallium/auxiliary/draw/draw_vertex_fetch.c +++ /dev/null @@ -1,528 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "draw_vs.h" -#include "draw_context.h" - - -#define DRAW_DBG 0 - - -/** - * Fetch a float[4] vertex attribute from memory, doing format/type - * conversion as needed. - * - * This is probably needed/dupliocated elsewhere, eg format - * conversion, texture sampling etc. - */ -#define FETCH_ATTRIB( NAME, SZ, CVT ) \ -static void \ -fetch_##NAME(const void *ptr, float *attrib) \ -{ \ - static const float defaults[4] = { 0,0,0,1 }; \ - int i; \ - \ - for (i = 0; i < SZ; i++) { \ - attrib[i] = CVT(i); \ - } \ - \ - for (; i < 4; i++) { \ - attrib[i] = defaults[i]; \ - } \ -} - -#define CVT_64_FLOAT(i) (float) ((double *) ptr)[i] -#define CVT_32_FLOAT(i) ((float *) ptr)[i] - -#define CVT_8_USCALED(i) (float) ((unsigned char *) ptr)[i] -#define CVT_16_USCALED(i) (float) ((unsigned short *) ptr)[i] -#define CVT_32_USCALED(i) (float) ((unsigned int *) ptr)[i] - -#define CVT_8_SSCALED(i) (float) ((char *) ptr)[i] -#define CVT_16_SSCALED(i) (float) ((short *) ptr)[i] -#define CVT_32_SSCALED(i) (float) ((int *) ptr)[i] - -#define CVT_8_UNORM(i) (float) ((unsigned char *) ptr)[i] / 255.0f -#define CVT_16_UNORM(i) (float) ((unsigned short *) ptr)[i] / 65535.0f -#define CVT_32_UNORM(i) (float) ((unsigned int *) ptr)[i] / 4294967295.0f - -#define CVT_8_SNORM(i) (float) ((char *) ptr)[i] / 127.0f -#define CVT_16_SNORM(i) (float) ((short *) ptr)[i] / 32767.0f -#define CVT_32_SNORM(i) (float) ((int *) ptr)[i] / 2147483647.0f - -FETCH_ATTRIB( R64G64B64A64_FLOAT, 4, CVT_64_FLOAT ) -FETCH_ATTRIB( R64G64B64_FLOAT, 3, CVT_64_FLOAT ) -FETCH_ATTRIB( R64G64_FLOAT, 2, CVT_64_FLOAT ) -FETCH_ATTRIB( R64_FLOAT, 1, CVT_64_FLOAT ) - -FETCH_ATTRIB( R32G32B32A32_FLOAT, 4, CVT_32_FLOAT ) -FETCH_ATTRIB( R32G32B32_FLOAT, 3, CVT_32_FLOAT ) -FETCH_ATTRIB( R32G32_FLOAT, 2, CVT_32_FLOAT ) -FETCH_ATTRIB( R32_FLOAT, 1, CVT_32_FLOAT ) - -FETCH_ATTRIB( R32G32B32A32_USCALED, 4, CVT_32_USCALED ) -FETCH_ATTRIB( R32G32B32_USCALED, 3, CVT_32_USCALED ) -FETCH_ATTRIB( R32G32_USCALED, 2, CVT_32_USCALED ) -FETCH_ATTRIB( R32_USCALED, 1, CVT_32_USCALED ) - -FETCH_ATTRIB( R32G32B32A32_SSCALED, 4, CVT_32_SSCALED ) -FETCH_ATTRIB( R32G32B32_SSCALED, 3, CVT_32_SSCALED ) -FETCH_ATTRIB( R32G32_SSCALED, 2, CVT_32_SSCALED ) -FETCH_ATTRIB( R32_SSCALED, 1, CVT_32_SSCALED ) - -FETCH_ATTRIB( R32G32B32A32_UNORM, 4, CVT_32_UNORM ) -FETCH_ATTRIB( R32G32B32_UNORM, 3, CVT_32_UNORM ) -FETCH_ATTRIB( R32G32_UNORM, 2, CVT_32_UNORM ) -FETCH_ATTRIB( R32_UNORM, 1, CVT_32_UNORM ) - -FETCH_ATTRIB( R32G32B32A32_SNORM, 4, CVT_32_SNORM ) -FETCH_ATTRIB( R32G32B32_SNORM, 3, CVT_32_SNORM ) -FETCH_ATTRIB( R32G32_SNORM, 2, CVT_32_SNORM ) -FETCH_ATTRIB( R32_SNORM, 1, CVT_32_SNORM ) - -FETCH_ATTRIB( R16G16B16A16_USCALED, 4, CVT_16_USCALED ) -FETCH_ATTRIB( R16G16B16_USCALED, 3, CVT_16_USCALED ) -FETCH_ATTRIB( R16G16_USCALED, 2, CVT_16_USCALED ) -FETCH_ATTRIB( R16_USCALED, 1, CVT_16_USCALED ) - -FETCH_ATTRIB( R16G16B16A16_SSCALED, 4, CVT_16_SSCALED ) -FETCH_ATTRIB( R16G16B16_SSCALED, 3, CVT_16_SSCALED ) -FETCH_ATTRIB( R16G16_SSCALED, 2, CVT_16_SSCALED ) -FETCH_ATTRIB( R16_SSCALED, 1, CVT_16_SSCALED ) - -FETCH_ATTRIB( R16G16B16A16_UNORM, 4, CVT_16_UNORM ) -FETCH_ATTRIB( R16G16B16_UNORM, 3, CVT_16_UNORM ) -FETCH_ATTRIB( R16G16_UNORM, 2, CVT_16_UNORM ) -FETCH_ATTRIB( R16_UNORM, 1, CVT_16_UNORM ) - -FETCH_ATTRIB( R16G16B16A16_SNORM, 4, CVT_16_SNORM ) -FETCH_ATTRIB( R16G16B16_SNORM, 3, CVT_16_SNORM ) -FETCH_ATTRIB( R16G16_SNORM, 2, CVT_16_SNORM ) -FETCH_ATTRIB( R16_SNORM, 1, CVT_16_SNORM ) - -FETCH_ATTRIB( R8G8B8A8_USCALED, 4, CVT_8_USCALED ) -FETCH_ATTRIB( R8G8B8_USCALED, 3, CVT_8_USCALED ) -FETCH_ATTRIB( R8G8_USCALED, 2, CVT_8_USCALED ) -FETCH_ATTRIB( R8_USCALED, 1, CVT_8_USCALED ) - -FETCH_ATTRIB( R8G8B8A8_SSCALED, 4, CVT_8_SSCALED ) -FETCH_ATTRIB( R8G8B8_SSCALED, 3, CVT_8_SSCALED ) -FETCH_ATTRIB( R8G8_SSCALED, 2, CVT_8_SSCALED ) -FETCH_ATTRIB( R8_SSCALED, 1, CVT_8_SSCALED ) - -FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8B8_UNORM, 3, CVT_8_UNORM ) -FETCH_ATTRIB( R8G8_UNORM, 2, CVT_8_UNORM ) -FETCH_ATTRIB( R8_UNORM, 1, CVT_8_UNORM ) - -FETCH_ATTRIB( R8G8B8A8_SNORM, 4, CVT_8_SNORM ) -FETCH_ATTRIB( R8G8B8_SNORM, 3, CVT_8_SNORM ) -FETCH_ATTRIB( R8G8_SNORM, 2, CVT_8_SNORM ) -FETCH_ATTRIB( R8_SNORM, 1, CVT_8_SNORM ) - -FETCH_ATTRIB( A8R8G8B8_UNORM, 4, CVT_8_UNORM ) -//FETCH_ATTRIB( R8G8B8A8_UNORM, 4, CVT_8_UNORM ) - - - -static void -fetch_B8G8R8A8_UNORM(const void *ptr, float *attrib) -{ - attrib[2] = CVT_8_UNORM(0); - attrib[1] = CVT_8_UNORM(1); - attrib[0] = CVT_8_UNORM(2); - attrib[3] = CVT_8_UNORM(3); -} - - -fetch_func draw_get_fetch_func( enum pipe_format format ) -{ -#if 0 - { - char tmp[80]; - pf_sprint_name(tmp, format); - debug_printf("%s: %s\n", __FUNCTION__, tmp); - } -#endif - - switch (format) { - case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; - case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; - case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; - case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; - - case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; - case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; - case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; - case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; - - case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; - case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; - case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; - case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; - - case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; - case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; - case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; - case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; - - case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; - case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; - case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; - case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; - - case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; - case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; - case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; - case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; - - case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; - case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; - case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; - case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; - - case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; - case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; - case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; - case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; - - case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; - case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; - case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; - case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; - - case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; - case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; - case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; - case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; - - case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; - case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; - case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; - case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; - - case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; - case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; - case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; - case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; - - case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; - case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; - case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; - case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; - - case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; - case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; - case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; - case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; - - case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; - - - case PIPE_FORMAT_B8G8R8A8_UNORM: - return fetch_B8G8R8A8_UNORM; - - case 0: - return NULL; /* not sure why this is needed */ - - default: - /* This can get hit because draw-state-validation is too eager, - and can jump in here validating stuff before the state tracker has set - up everything. - */ - /* assert(0); */ - return NULL; - } -} - - -static void -transpose_4x4( float *out, const float *in ) -{ - /* This can be achieved in 12 sse instructions, plus the final - * stores I guess. This is probably a bit more than that - maybe - * 32 or so? - */ - out[0] = in[0]; out[1] = in[4]; out[2] = in[8]; out[3] = in[12]; - out[4] = in[1]; out[5] = in[5]; out[6] = in[9]; out[7] = in[13]; - out[8] = in[2]; out[9] = in[6]; out[10] = in[10]; out[11] = in[14]; - out[12] = in[3]; out[13] = in[7]; out[14] = in[11]; out[15] = in[15]; -} - - - -static void fetch_xyz_rgb( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - int i; - - assert(count <= 4); - -// debug_printf("%s\n", __FUNCTION__); - - /* loop over vertex attributes (vertex shader inputs) - */ - - for (i = 0; i < 4; i++) { - { - const float *in = (const float *)(src[0] + elts[i] * pitch[0]); - float *out = &machine->Inputs[0].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[1] + elts[i] * pitch[1]); - float *out = &machine->Inputs[1].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - } -} - - - - -static void fetch_xyz_rgb_st( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - const unsigned *pitch = draw->vertex_fetch.pitch; - const ubyte **src = draw->vertex_fetch.src_ptr; - int i; - - assert(count <= 4); - - /* loop over vertex attributes (vertex shader inputs) - */ - - for (i = 0; i < 4; i++) { - { - const float *in = (const float *)(src[0] + elts[i] * pitch[0]); - float *out = &machine->Inputs[0].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[1] + elts[i] * pitch[1]); - float *out = &machine->Inputs[1].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = in[2]; - out[12] = 1.0f; - } - - { - const float *in = (const float *)(src[2] + elts[i] * pitch[2]); - float *out = &machine->Inputs[2].xyzw[0].f[i]; - out[0] = in[0]; - out[4] = in[1]; - out[8] = 0.0f; - out[12] = 1.0f; - } - } -} - - - - -/** - * Fetch vertex attributes for 'count' vertices. - */ -static void generic_vertex_fetch( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ) -{ - unsigned nr_attrs = draw->vertex_fetch.nr_attrs; - unsigned attr; - - assert(count <= 4); - -// debug_printf("%s %d\n", __FUNCTION__, count); - - /* loop over vertex attributes (vertex shader inputs) - */ - for (attr = 0; attr < nr_attrs; attr++) { - - const unsigned pitch = draw->vertex_fetch.pitch[attr]; - const ubyte *src = draw->vertex_fetch.src_ptr[attr]; - const fetch_func fetch = draw->vertex_fetch.fetch[attr]; - unsigned i; - float p[4][4]; - - - /* Fetch four attributes for four vertices. - * - * Could fetch directly into AOS format, but this is meant to be - * a prototype for an sse implementation, which would have - * difficulties doing that. - */ - for (i = 0; i < count; i++) - fetch( src + elts[i] * pitch, p[i] ); - - /* Be nice and zero out any missing vertices: - */ - for ( ; i < 4; i++) - p[i][0] = p[i][1] = p[i][2] = p[i][3] = 0; - - /* Transpose/swizzle into sse-friendly format. Currently - * assuming that all vertex shader inputs are float[4], but this - * isn't true -- if the vertex shader only wants tex0.xy, we - * could optimize for that. - * - * To do so fully without codegen would probably require an - * excessive number of fetch functions, but we could at least - * minimize the transpose step: - */ - transpose_4x4( (float *)&machine->Inputs[attr].xyzw[0].f[0], (float *)p ); - } -} - - - -void draw_update_vertex_fetch( struct draw_context *draw ) -{ - unsigned nr_attrs, i; - -// debug_printf("%s\n", __FUNCTION__); - - /* this may happend during context init */ - if (!draw->vertex_shader) - return; - - nr_attrs = draw->vertex_shader->info.num_inputs; - - for (i = 0; i < nr_attrs; i++) { - unsigned buf = draw->vertex_element[i].vertex_buffer_index; - enum pipe_format format = draw->vertex_element[i].src_format; - - draw->vertex_fetch.src_ptr[i] = (const ubyte *) draw->user.vbuffer[buf] + - draw->vertex_buffer[buf].buffer_offset + - draw->vertex_element[i].src_offset; - - draw->vertex_fetch.pitch[i] = draw->vertex_buffer[buf].pitch; - draw->vertex_fetch.fetch[i] = draw_get_fetch_func( format ); - } - - draw->vertex_fetch.nr_attrs = nr_attrs; - - draw->vertex_fetch.fetch_func = generic_vertex_fetch; - - switch (nr_attrs) { - case 2: - if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT) - draw->vertex_fetch.fetch_func = fetch_xyz_rgb; - break; - case 3: - if (draw->vertex_element[0].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[1].src_format == PIPE_FORMAT_R32G32B32_FLOAT && - draw->vertex_element[2].src_format == PIPE_FORMAT_R32G32_FLOAT) - draw->vertex_fetch.fetch_func = fetch_xyz_rgb_st; - break; - default: - break; - } - -} diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 5e27bc9ff0..c219a91156 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -55,7 +55,6 @@ static void vs_llvm_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { - draw_update_vertex_fetch( draw ); } diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 60f60a5b53..8e2d381f14 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -71,7 +71,6 @@ static void vs_sse_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { - draw_update_vertex_fetch( draw ); } -- cgit v1.2.3 From 6094e79f4e3350d123c7532b1c73faa60834a62d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 00:27:12 +0100 Subject: draw: remove dead data structures --- src/gallium/auxiliary/draw/draw_context.c | 20 ----------- src/gallium/auxiliary/draw/draw_private.h | 59 ------------------------------- 2 files changed, 79 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 9c3ae9bc0d..6012bc155e 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -83,16 +83,6 @@ struct draw_context *draw_create( void ) ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ draw->nr_planes = 6; - /* Statically allocate maximum sized vertices for the cache - could be cleverer... - */ - { - char *tmp = align_malloc(VS_QUEUE_LENGTH * MAX_VERTEX_ALLOCATION, 16); - if (!tmp) - goto fail; - - draw->vs.vertex_cache = tmp; - } - /* these defaults are oriented toward the needs of softpipe */ draw->wide_point_threshold = 1000000.0; /* infinity */ draw->wide_line_threshold = 1.0; @@ -162,10 +152,6 @@ void draw_destroy( struct draw_context *draw ) align_free(draw->machine.Outputs); tgsi_exec_machine_free_data(&draw->machine); - - if (draw->vs.vertex_cache) - align_free( draw->vs.vertex_cache ); /* Frees all the vertices. */ - /* Not so fast -- we're just borrowing this at the moment. * if (draw->render) @@ -254,8 +240,6 @@ draw_set_vertex_buffers(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0])); draw->nr_vertex_buffers = count; } @@ -268,8 +252,6 @@ draw_set_vertex_elements(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); - memcpy(draw->vertex_element, elements, count * sizeof(elements[0])); draw->nr_vertex_elements = count; } @@ -282,7 +264,6 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer) { - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); draw->user.vbuffer[attr] = buffer; } @@ -291,7 +272,6 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer) { - draw_do_flush( draw, DRAW_FLUSH_VERTEX_CACHE/*STATE_CHANGE*/ ); draw->user.constants = buffer; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 93add257a3..78265c64a6 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -195,9 +195,6 @@ struct draw_context /* Support prototype passthrough path: */ struct { - unsigned prim; /* XXX: to be removed */ - unsigned hw_vertex_size; /* XXX: to be removed */ - struct { struct draw_pt_middle_end *opt[PT_MAX_MIDDLE]; } middle; @@ -272,54 +269,6 @@ struct draw_context /** TGSI program interpreter runtime state */ struct tgsi_exec_machine machine; - /* Vertex fetch internal state - */ - struct { - const ubyte *src_ptr[PIPE_MAX_ATTRIBS]; - unsigned pitch[PIPE_MAX_ATTRIBS]; - fetch_func fetch[PIPE_MAX_ATTRIBS]; - unsigned nr_attrs; - full_fetch_func fetch_func; - pt_fetch_func pt_fetch; - } vertex_fetch; - - /* Post-tnl vertex cache: - */ - struct { - unsigned referenced; /**< bitfield */ - - struct { - unsigned in; /* client array element */ - unsigned out; /* index in vs queue/array */ - } idx[VCACHE_SIZE + VCACHE_OVERFLOW]; - - unsigned overflow; - - /** To find space in the vertex cache: */ - struct vertex_header *(*get_vertex)( struct draw_context *draw, - unsigned i ); - } vcache; - - /* Vertex shader queue: - */ - struct { - unsigned elts[VS_QUEUE_LENGTH]; /**< index into the user's vertex arrays */ - char *vertex_cache; - unsigned queue_nr; - unsigned post_nr; - } vs; - - /* Prim pipeline queue: - */ - struct { - /* Need to queue up primitives until their vertices have been - * transformed by a vs queue flush. - */ - struct prim_header queue[PRIM_QUEUE_LENGTH]; - unsigned queue_nr; - } pq; - - /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. */ struct gallivm_cpu_engine *engine; @@ -372,9 +321,6 @@ boolean draw_pt_arrays( struct draw_context *draw, void draw_pt_reset_vertex_ids( struct draw_context *draw ); -#define DRAW_FLUSH_SHADER_QUEUE 0x1 /* sized not to overflow, never raised */ -#define DRAW_FLUSH_PRIM_QUEUE 0x2 -#define DRAW_FLUSH_VERTEX_CACHE 0x4 #define DRAW_FLUSH_STATE_CHANGE 0x8 #define DRAW_FLUSH_BACKEND 0x10 @@ -416,10 +362,5 @@ dot4(const float *a, const float *b) return result; } -static INLINE struct vertex_header * -draw_header_from_block(char *block, int size, int num) -{ - return (struct vertex_header*)(block + num * size); -} #endif /* DRAW_PRIVATE_H */ -- cgit v1.2.3 From 251ebcc175d479dda8d0d5b64fc42f44e747197e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 00:29:25 +0100 Subject: draw: remove more dead data structures --- src/gallium/auxiliary/draw/draw_private.h | 25 ------------------------- 1 file changed, 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 78265c64a6..476b1184d4 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -128,31 +128,6 @@ struct draw_stage }; -#define PRIM_QUEUE_LENGTH 32 -#define VCACHE_SIZE 32 -#define VCACHE_OVERFLOW 4 -#define VS_QUEUE_LENGTH (VCACHE_SIZE + VCACHE_OVERFLOW + 1) /* can never fill up */ - - - -/* Internal function for vertex fetch. - */ -typedef void (*fetch_func)(const void *ptr, float *attrib); - -fetch_func draw_get_fetch_func( enum pipe_format format ); - - - -typedef void (*full_fetch_func)( struct draw_context *draw, - struct tgsi_exec_machine *machine, - const unsigned *elts, - unsigned count ); - -typedef void (*pt_fetch_func)( struct draw_context *draw, - float *out, - unsigned start, - unsigned count ); - struct vbuf_render; -- cgit v1.2.3 From 6d9132de04fc190fea56978849dfc427e5359912 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 00:31:06 +0100 Subject: draw: make draw_pt_fetch_emit use translate facility --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 186 +++++++++--------------- src/gallium/auxiliary/draw/draw_vbuf.c | 4 +- 2 files changed, 72 insertions(+), 118 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index d696afa1aa..6e4fea460b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -36,6 +36,7 @@ #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "translate/translate.h" /* The simplest 'middle end' in the new vertex code. * @@ -72,94 +73,15 @@ struct fetch_emit_middle_end { struct draw_pt_middle_end base; struct draw_context *draw; - - struct { - const ubyte *ptr; - unsigned pitch; - void (*fetch)( const void *from, float *attrib); - void (*emit)( const float *attrib, float **out ); - } fetch[PIPE_MAX_ATTRIBS]; - unsigned nr_fetch; - unsigned hw_vertex_size; -}; - - - -static void fetch_R32_FLOAT( const void *from, - float *attrib ) -{ - float *f = (float *) from; - attrib[0] = f[0]; - attrib[1] = 0.0; - attrib[2] = 0.0; - attrib[3] = 1.0; -} - - -static void emit_R32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out) += 1; -} - -static void emit_R32G32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out) += 2; -} - -static void emit_R32G32B32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out) += 3; -} - -static void emit_R32G32B32A32_FLOAT( const float *attrib, - float **out ) -{ - (*out)[0] = attrib[0]; - (*out)[1] = attrib[1]; - (*out)[2] = attrib[2]; - (*out)[3] = attrib[3]; - (*out) += 4; -} + struct translate *translate; + /* Cache point size somewhere it's address won't change: + */ + float point_size; -/** - * General-purpose fetch from user's vertex arrays, emit to driver's - * vertex buffer. - * - * XXX this is totally temporary. - */ -static void -fetch_store_general( struct fetch_emit_middle_end *feme, - void *out_ptr, - const unsigned *fetch_elts, - unsigned count ) -{ - float *out = (float *)out_ptr; - uint i, j; +}; - for (i = 0; i < count; i++) { - unsigned elt = fetch_elts[i] & ~DRAW_PT_FLAG_MASK; - - for (j = 0; j < feme->nr_fetch; j++) { - float attrib[4]; - const ubyte *from = (feme->fetch[j].ptr + - feme->fetch[j].pitch * elt); - - feme->fetch[j].fetch( from, attrib ); - feme->fetch[j].emit( attrib, &out ); - } - } -} @@ -170,8 +92,9 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; const struct vertex_info *vinfo; - unsigned i; + unsigned i, dst_offset; boolean ok; + struct translate_key key; ok = draw->render->set_primitive( draw->render, @@ -186,51 +109,82 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, vinfo = draw->render->get_vertex_info(draw->render); - /* This is unique as it transforms straight from API vertex - * information to HW vertices. All other cases go through our - * intermediate float[4] format. + + /* Transform from API vertices to HW vertices, skipping the + * pipeline_vertex intermediate step. */ - for (i = 0; i < vinfo->num_attribs; i++) { - unsigned src_element = vinfo->src_index[i]; - unsigned src_buffer = draw->vertex_element[src_element].vertex_buffer_index; - - feme->fetch[i].ptr = ((const ubyte *)draw->user.vbuffer[src_buffer] + - draw->vertex_buffer[src_buffer].buffer_offset + - draw->vertex_element[src_element].src_offset); + dst_offset = 0; + memset(&key, 0, sizeof(key)); - feme->fetch[i].pitch = draw->vertex_buffer[src_buffer].pitch; - - feme->fetch[i].fetch = draw_get_fetch_func(draw->vertex_element[src_element].src_format); + for (i = 0; i < vinfo->num_attribs; i++) { + const struct pipe_vertex_element *src = &draw->vertex_element[vinfo->src_index[i]]; + unsigned emit_sz = 0; + unsigned input_format = src->src_format; + unsigned input_buffer = src->vertex_buffer_index; + unsigned input_offset = src->src_offset; + unsigned output_format; switch (vinfo->emit[i]) { case EMIT_4F: - feme->fetch[i].emit = emit_R32G32B32A32_FLOAT; + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); break; case EMIT_3F: - feme->fetch[i].emit = emit_R32G32B32_FLOAT; + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); break; case EMIT_2F: - feme->fetch[i].emit = emit_R32G32_FLOAT; + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); break; case EMIT_1F: - feme->fetch[i].emit = emit_R32_FLOAT; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); break; case EMIT_1F_PSIZE: - feme->fetch[i].ptr = (const ubyte *)&feme->draw->rasterizer->point_size; - feme->fetch[i].pitch = 0; - feme->fetch[i].fetch = fetch_R32_FLOAT; - feme->fetch[i].emit = emit_R32_FLOAT; + input_format = PIPE_FORMAT_R32_FLOAT; + input_buffer = draw->nr_vertex_buffers; + input_offset = 0; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); break; default: assert(0); - feme->fetch[i].emit = NULL; - break; + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + continue; } + + key.element[i].input_format = input_format; + key.element[i].input_buffer = input_buffer; + key.element[i].input_offset = input_offset; + key.element[i].output_format = output_format; + key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; } - feme->nr_fetch = vinfo->num_attribs; - feme->hw_vertex_size = vinfo->size * 4; + key.nr_elements = vinfo->num_attribs; + key.output_stride = vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!feme->translate || + memcmp(&feme->translate->key, &key, sizeof(key)) != 0) + { + if (feme->translate) + feme->translate->release(feme->translate); + + feme->translate = translate_create( &key ); + + feme->translate->set_buffer(feme->translate, + draw->nr_vertex_buffers, + &feme->point_size, + 0); + } + + feme->point_size = draw->rasterizer->point_size; } @@ -252,7 +206,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, draw_do_flush( draw, DRAW_FLUSH_BACKEND ); hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)feme->hw_vertex_size, + (ushort)feme->translate->key.output_stride, (ushort)fetch_count ); if (!hw_verts) { assert(0); @@ -262,10 +216,10 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, /* Single routine to fetch vertices and emit HW verts. */ - fetch_store_general( feme, - hw_verts, - fetch_elts, - fetch_count ); + feme->translate->run_elts( feme->translate, + fetch_elts, + fetch_count, + hw_verts ); /* XXX: Draw arrays path to avoid re-emitting index list again and * again. @@ -278,7 +232,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, */ draw->render->release_vertices( draw->render, hw_verts, - feme->hw_vertex_size, + feme->translate->key.output_stride, fetch_count ); } diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c index dbbcf05c3c..30dceeb43d 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_vbuf.c @@ -439,8 +439,8 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) /* Allocate a new vertex buffer */ vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, - (ushort) vbuf->vertex_size, - (ushort) vbuf->max_vertices); + (ushort) vbuf->vertex_size, + (ushort) vbuf->max_vertices); vbuf->vertex_ptr = vbuf->vertices; } -- cgit v1.2.3 From 6494946db66b62d280e34a0486b83cca15f5b457 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 00:36:35 +0100 Subject: draw: remove draw_debug.c --- src/gallium/auxiliary/draw/Makefile | 1 - src/gallium/auxiliary/draw/SConscript | 1 - src/gallium/auxiliary/draw/draw_debug.c | 113 -------------------------------- 3 files changed, 115 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_debug.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 60bb9dfe88..45def9153c 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -12,7 +12,6 @@ C_SOURCES = \ draw_vs_llvm.c \ draw_context.c\ draw_cull.c \ - draw_debug.c \ draw_flatshade.c \ draw_offset.c \ draw_pt.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 246083a962..bff32d2c8b 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -8,7 +8,6 @@ draw = env.ConvenienceLibrary( 'draw_clip.c', 'draw_context.c', 'draw_cull.c', - 'draw_debug.c', 'draw_flatshade.c', 'draw_offset.c', 'draw_pstipple.c', diff --git a/src/gallium/auxiliary/draw/draw_debug.c b/src/gallium/auxiliary/draw/draw_debug.c deleted file mode 100644 index d6220b5f62..0000000000 --- a/src/gallium/auxiliary/draw/draw_debug.c +++ /dev/null @@ -1,113 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - -#include "draw_private.h" -#include "draw_context.h" - - - -static void -draw_prim_info(unsigned prim, unsigned *first, unsigned *incr) -{ - assert(prim >= PIPE_PRIM_POINTS); - assert(prim <= PIPE_PRIM_POLYGON); - - switch (prim) { - case PIPE_PRIM_POINTS: - *first = 1; - *incr = 1; - break; - case PIPE_PRIM_LINES: - *first = 2; - *incr = 2; - break; - case PIPE_PRIM_LINE_STRIP: - *first = 2; - *incr = 1; - break; - case PIPE_PRIM_LINE_LOOP: - *first = 2; - *incr = 1; - break; - case PIPE_PRIM_TRIANGLES: - *first = 3; - *incr = 3; - break; - case PIPE_PRIM_TRIANGLE_STRIP: - *first = 3; - *incr = 1; - break; - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_POLYGON: - *first = 3; - *incr = 1; - break; - case PIPE_PRIM_QUADS: - *first = 4; - *incr = 4; - break; - case PIPE_PRIM_QUAD_STRIP: - *first = 4; - *incr = 2; - break; - default: - assert(0); - *first = 1; - *incr = 1; - break; - } -} - - -unsigned -draw_trim_prim( unsigned mode, unsigned count ) -{ - unsigned length, first, incr; - - draw_prim_info( mode, &first, &incr ); - - if (count < first) - length = 0; - else - length = count - (count - first) % incr; - - return length; -} - - -boolean -draw_validate_prim( unsigned mode, unsigned count ) -{ - return (count > 0 && - count == draw_trim_prim( mode, count )); -} - -- cgit v1.2.3 From 6c38c600ff1212699e2e8e0f2928cd9e69559ac5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 00:37:51 +0100 Subject: draw: rename draw_vertex_shader.c -> draw_vs.c --- src/gallium/auxiliary/draw/Makefile | 2 +- src/gallium/auxiliary/draw/SConscript | 2 +- src/gallium/auxiliary/draw/draw_vertex_shader.c | 85 ------------------------- src/gallium/auxiliary/draw/draw_vs.c | 85 +++++++++++++++++++++++++ 4 files changed, 87 insertions(+), 87 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_vertex_shader.c create mode 100644 src/gallium/auxiliary/draw/draw_vs.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 45def9153c..9c41649883 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ draw_aaline.c \ draw_aapoint.c \ draw_clip.c \ + draw_vs.c \ draw_vs_exec.c \ draw_vs_sse.c \ draw_vs_llvm.c \ @@ -30,7 +31,6 @@ C_SOURCES = \ draw_validate.c \ draw_vbuf.c \ draw_vertex.c \ - draw_vertex_shader.c \ draw_wide_line.c \ draw_wide_point.c diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index bff32d2c8b..388d7879dd 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -26,7 +26,7 @@ draw = env.ConvenienceLibrary( 'draw_validate.c', 'draw_vbuf.c', 'draw_vertex.c', - 'draw_vertex_shader.c', + 'draw_vs.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', 'draw_vs_sse.c', diff --git a/src/gallium/auxiliary/draw/draw_vertex_shader.c b/src/gallium/auxiliary/draw/draw_vertex_shader.c deleted file mode 100644 index 03fe00a951..0000000000 --- a/src/gallium/auxiliary/draw/draw_vertex_shader.c +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - * Brian Paul - */ - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" -#include "draw_context.h" -#include "draw_vs.h" - - - -struct draw_vertex_shader * -draw_create_vertex_shader(struct draw_context *draw, - const struct pipe_shader_state *shader) -{ - struct draw_vertex_shader *vs; - - vs = draw_create_vs_llvm( draw, shader ); - if (!vs) { - vs = draw_create_vs_sse( draw, shader ); - if (!vs) { - vs = draw_create_vs_exec( draw, shader ); - } - } - - assert(vs); - return vs; -} - - -void -draw_bind_vertex_shader(struct draw_context *draw, - struct draw_vertex_shader *dvs) -{ - draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - - if (dvs) - { - draw->vertex_shader = dvs; - draw->num_vs_outputs = dvs->info.num_outputs; - dvs->prepare( dvs, draw ); - } - else { - draw->vertex_shader = NULL; - draw->num_vs_outputs = 0; - } -} - - -void -draw_delete_vertex_shader(struct draw_context *draw, - struct draw_vertex_shader *dvs) -{ - dvs->delete( dvs ); -} diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c new file mode 100644 index 0000000000..03fe00a951 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + */ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" +#include "draw_context.h" +#include "draw_vs.h" + + + +struct draw_vertex_shader * +draw_create_vertex_shader(struct draw_context *draw, + const struct pipe_shader_state *shader) +{ + struct draw_vertex_shader *vs; + + vs = draw_create_vs_llvm( draw, shader ); + if (!vs) { + vs = draw_create_vs_sse( draw, shader ); + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); + } + } + + assert(vs); + return vs; +} + + +void +draw_bind_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + if (dvs) + { + draw->vertex_shader = dvs; + draw->num_vs_outputs = dvs->info.num_outputs; + dvs->prepare( dvs, draw ); + } + else { + draw->vertex_shader = NULL; + draw->num_vs_outputs = 0; + } +} + + +void +draw_delete_vertex_shader(struct draw_context *draw, + struct draw_vertex_shader *dvs) +{ + dvs->delete( dvs ); +} -- cgit v1.2.3 From 1b411f894369f6a55c6f11cf650511eaa18a8510 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 00:39:38 +0100 Subject: draw: sort makefile sources --- src/gallium/auxiliary/draw/Makefile | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 9c41649883..0b0578a179 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -7,30 +7,30 @@ C_SOURCES = \ draw_aaline.c \ draw_aapoint.c \ draw_clip.c \ - draw_vs.c \ - draw_vs_exec.c \ - draw_vs_sse.c \ - draw_vs_llvm.c \ draw_context.c\ draw_cull.c \ draw_flatshade.c \ draw_offset.c \ + draw_pstipple.c \ draw_pt.c \ - draw_pt_vcache.c \ + draw_pt_elts.c \ + draw_pt_emit.c \ + draw_pt_fetch.c \ draw_pt_fetch_emit.c \ draw_pt_fetch_shade_pipeline.c \ - draw_pt_fetch.c \ - draw_pt_post_vs.c \ - draw_pt_emit.c \ draw_pt_pipeline.c \ - draw_pt_elts.c \ - draw_pstipple.c \ + draw_pt_post_vs.c \ + draw_pt_vcache.c \ draw_stipple.c \ draw_twoside.c \ draw_unfilled.c \ draw_validate.c \ draw_vbuf.c \ draw_vertex.c \ + draw_vs.c \ + draw_vs_exec.c \ + draw_vs_llvm.c \ + draw_vs_sse.c \ draw_wide_line.c \ draw_wide_point.c -- cgit v1.2.3 From 26831bdac594a11e51b6c4b09df78bb11444f5dd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 00:45:05 +0100 Subject: draw: rename pipeline files to draw_pipe_* --- src/gallium/auxiliary/draw/Makefile | 32 +- src/gallium/auxiliary/draw/SConscript | 30 +- src/gallium/auxiliary/draw/draw_aaline.c | 859 ---------------------- src/gallium/auxiliary/draw/draw_aapoint.c | 846 --------------------- src/gallium/auxiliary/draw/draw_clip.c | 503 ------------- src/gallium/auxiliary/draw/draw_cull.c | 150 ---- src/gallium/auxiliary/draw/draw_flatshade.c | 248 ------- src/gallium/auxiliary/draw/draw_offset.c | 186 ----- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 859 ++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 846 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pipe_clip.c | 503 +++++++++++++ src/gallium/auxiliary/draw/draw_pipe_cull.c | 150 ++++ src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 248 +++++++ src/gallium/auxiliary/draw/draw_pipe_offset.c | 186 +++++ src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 746 +++++++++++++++++++ src/gallium/auxiliary/draw/draw_pipe_stipple.c | 239 ++++++ src/gallium/auxiliary/draw/draw_pipe_twoside.c | 203 +++++ src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 206 ++++++ src/gallium/auxiliary/draw/draw_pipe_validate.c | 312 ++++++++ src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 529 +++++++++++++ src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 190 +++++ src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 281 +++++++ src/gallium/auxiliary/draw/draw_pipe_wide_prims.c | 366 +++++++++ src/gallium/auxiliary/draw/draw_pstipple.c | 746 ------------------- src/gallium/auxiliary/draw/draw_stipple.c | 239 ------ src/gallium/auxiliary/draw/draw_twoside.c | 203 ----- src/gallium/auxiliary/draw/draw_unfilled.c | 206 ------ src/gallium/auxiliary/draw/draw_validate.c | 312 -------- src/gallium/auxiliary/draw/draw_vbuf.c | 529 ------------- src/gallium/auxiliary/draw/draw_wide_line.c | 190 ----- src/gallium/auxiliary/draw/draw_wide_point.c | 281 ------- src/gallium/auxiliary/draw/draw_wide_prims.c | 366 --------- 32 files changed, 5895 insertions(+), 5895 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_aaline.c delete mode 100644 src/gallium/auxiliary/draw/draw_aapoint.c delete mode 100644 src/gallium/auxiliary/draw/draw_clip.c delete mode 100644 src/gallium/auxiliary/draw/draw_cull.c delete mode 100644 src/gallium/auxiliary/draw/draw_flatshade.c delete mode 100644 src/gallium/auxiliary/draw/draw_offset.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_aaline.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_aapoint.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_clip.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_cull.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_flatshade.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_offset.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_pstipple.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_stipple.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_twoside.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_unfilled.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_validate.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_vbuf.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_wide_line.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_wide_point.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe_wide_prims.c delete mode 100644 src/gallium/auxiliary/draw/draw_pstipple.c delete mode 100644 src/gallium/auxiliary/draw/draw_stipple.c delete mode 100644 src/gallium/auxiliary/draw/draw_twoside.c delete mode 100644 src/gallium/auxiliary/draw/draw_unfilled.c delete mode 100644 src/gallium/auxiliary/draw/draw_validate.c delete mode 100644 src/gallium/auxiliary/draw/draw_vbuf.c delete mode 100644 src/gallium/auxiliary/draw/draw_wide_line.c delete mode 100644 src/gallium/auxiliary/draw/draw_wide_point.c delete mode 100644 src/gallium/auxiliary/draw/draw_wide_prims.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 0b0578a179..62f46c6db1 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -4,14 +4,21 @@ include $(TOP)/configs/current LIBNAME = draw C_SOURCES = \ - draw_aaline.c \ - draw_aapoint.c \ - draw_clip.c \ - draw_context.c\ - draw_cull.c \ - draw_flatshade.c \ - draw_offset.c \ - draw_pstipple.c \ + draw_context.c \ + draw_pipe_aaline.c \ + draw_pipe_aapoint.c \ + draw_pipe_clip.c \ + draw_pipe_cull.c \ + draw_pipe_flatshade.c \ + draw_pipe_offset.c \ + draw_pipe_pstipple.c \ + draw_pipe_stipple.c \ + draw_pipe_twoside.c \ + draw_pipe_unfilled.c \ + draw_pipe_validate.c \ + draw_pipe_vbuf.c \ + draw_pipe_wide_line.c \ + draw_pipe_wide_point.c \ draw_pt.c \ draw_pt_elts.c \ draw_pt_emit.c \ @@ -21,18 +28,11 @@ C_SOURCES = \ draw_pt_pipeline.c \ draw_pt_post_vs.c \ draw_pt_vcache.c \ - draw_stipple.c \ - draw_twoside.c \ - draw_unfilled.c \ - draw_validate.c \ - draw_vbuf.c \ draw_vertex.c \ draw_vs.c \ draw_vs_exec.c \ draw_vs_llvm.c \ - draw_vs_sse.c \ - draw_wide_line.c \ - draw_wide_point.c + draw_vs_sse.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 388d7879dd..5fa35d3005 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -3,14 +3,22 @@ Import('*') draw = env.ConvenienceLibrary( target = 'draw', source = [ - 'draw_aaline.c', - 'draw_aapoint.c', - 'draw_clip.c', 'draw_context.c', - 'draw_cull.c', - 'draw_flatshade.c', - 'draw_offset.c', - 'draw_pstipple.c', + 'draw_pipe_aaline.c', + 'draw_pipe_aapoint.c', + 'draw_pipe_clip.c', + 'draw_pipe_cull.c', + 'draw_pipe_flatshade.c', + 'draw_pipe_offset.c', + 'draw_pipe_pstipple.c', + 'draw_pipe_stipple.c', + 'draw_pipe_twoside.c', + 'draw_pipe_unfilled.c', + 'draw_pipe_validate.c', + 'draw_pipe_vbuf.c', + 'draw_pipe_vertex.c', + 'draw_pipe_wide_line.c', + 'draw_pipe_wide_point.c', 'draw_pt.c', 'draw_pt_elts.c', 'draw_pt_emit.c', @@ -20,18 +28,10 @@ draw = env.ConvenienceLibrary( 'draw_pt_pipeline.c', 'draw_pt_post_vs.c', 'draw_pt_vcache.c', - 'draw_stipple.c', - 'draw_twoside.c', - 'draw_unfilled.c', - 'draw_validate.c', - 'draw_vbuf.c', - 'draw_vertex.c', 'draw_vs.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', 'draw_vs_sse.c', - 'draw_wide_line.c', - 'draw_wide_point.c', ]) auxiliaries.insert(0, draw) diff --git a/src/gallium/auxiliary/draw/draw_aaline.c b/src/gallium/auxiliary/draw/draw_aaline.c deleted file mode 100644 index e8d2a45102..0000000000 --- a/src/gallium/auxiliary/draw/draw_aaline.c +++ /dev/null @@ -1,859 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * AA line stage: AA lines are converted to texture mapped triangles. - * - * Authors: Brian Paul - */ - - -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" - -#include "draw_context.h" -#include "draw_private.h" - - -/** - * Max texture level for the alpha texture used for antialiasing - */ -#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ - - -/** - * Subclass of pipe_shader_state to carry extra fragment shader info. - */ -struct aaline_fragment_shader -{ - struct pipe_shader_state state; - void *driver_fs; - void *aaline_fs; - void *aapoint_fs; /* not yet */ - void *sprite_fs; /* not yet */ - uint sampler_unit; - int generic_attrib; /**< texcoord/generic used for texture */ -}; - - -/** - * Subclass of draw_stage - */ -struct aaline_stage -{ - struct draw_stage stage; - - float half_line_width; - - /** For AA lines, this is the vertex attrib slot for the new texcoords */ - uint tex_slot; - - void *sampler_cso; - struct pipe_texture *texture; - uint num_samplers; - uint num_textures; - - - /* - * Currently bound state - */ - struct aaline_fragment_shader *fs; - struct { - void *sampler[PIPE_MAX_SAMPLERS]; - struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; - } state; - - /* - * Driver interface/override functions - */ - void * (*driver_create_fs_state)(struct pipe_context *, - const struct pipe_shader_state *); - void (*driver_bind_fs_state)(struct pipe_context *, void *); - void (*driver_delete_fs_state)(struct pipe_context *, void *); - - void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, - void **); - void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, - struct pipe_texture **); - - struct pipe_context *pipe; -}; - - - -/** - * Subclass of tgsi_transform_context, used for transforming the - * user's fragment shader to add the special AA instructions. - */ -struct aa_transform_context { - struct tgsi_transform_context base; - uint tempsUsed; /**< bitmask */ - int colorOutput; /**< which output is the primary color */ - uint samplersUsed; /**< bitfield of samplers used */ - int freeSampler; /** an available sampler for the pstipple */ - int maxInput, maxGeneric; /**< max input index found */ - int colorTemp, texTemp; /**< temp registers */ - boolean firstInstruction; -}; - - -/** - * TGSI declaration transform callback. - * Look for a free sampler, a free input attrib, and two free temp regs. - */ -static void -aa_transform_decl(struct tgsi_transform_context *ctx, - struct tgsi_full_declaration *decl) -{ - struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; - - if (decl->Declaration.File == TGSI_FILE_OUTPUT && - decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && - decl->Semantic.SemanticIndex == 0) { - aactx->colorOutput = decl->u.DeclarationRange.First; - } - else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { - aactx->samplersUsed |= 1 << i; - } - } - else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->u.DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { - aactx->maxGeneric = decl->Semantic.SemanticIndex; - } - } - else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { - uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { - aactx->tempsUsed |= (1 << i); - } - } - - ctx->emit_declaration(ctx, decl); -} - - -/** - * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. - */ -static int -free_bit(uint bitfield) -{ - int i; - for (i = 0; i < 32; i++) { - if ((bitfield & (1 << i)) == 0) - return i; - } - return -1; -} - - -/** - * TGSI instruction transform callback. - * Replace writes to result.color w/ a temp reg. - * Upon END instruction, insert texture sampling code for antialiasing. - */ -static void -aa_transform_inst(struct tgsi_transform_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; - - if (aactx->firstInstruction) { - /* emit our new declarations before the first instruction */ - - struct tgsi_full_declaration decl; - uint i; - - /* find free sampler */ - aactx->freeSampler = free_bit(aactx->samplersUsed); - if (aactx->freeSampler >= PIPE_MAX_SAMPLERS) - aactx->freeSampler = PIPE_MAX_SAMPLERS - 1; - - /* find two free temp regs */ - for (i = 0; i < 32; i++) { - if ((aactx->tempsUsed & (1 << i)) == 0) { - /* found a free temp */ - if (aactx->colorTemp < 0) - aactx->colorTemp = i; - else if (aactx->texTemp < 0) - aactx->texTemp = i; - else - break; - } - } - assert(aactx->colorTemp >= 0); - assert(aactx->texTemp >= 0); - - /* declare new generic input/texcoord */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; - decl.Declaration.Interpolate = 1; - /* XXX this could be linear... */ - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->maxInput + 1; - ctx->emit_declaration(ctx, &decl); - - /* declare new sampler */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->freeSampler; - ctx->emit_declaration(ctx, &decl); - - /* declare new temp regs */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->texTemp; - ctx->emit_declaration(ctx, &decl); - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->colorTemp; - ctx->emit_declaration(ctx, &decl); - - aactx->firstInstruction = FALSE; - } - - if (inst->Instruction.Opcode == TGSI_OPCODE_END && - aactx->colorOutput != -1) { - struct tgsi_full_instruction newInst; - - /* TEX */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_TEX; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; - newInst.Instruction.NumSrcRegs = 2; - newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler; - - ctx->emit_instruction(ctx, &newInst); - - /* MOV rgb */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MOV; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - ctx->emit_instruction(ctx, &newInst); - - /* MUL alpha */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MUL; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; - ctx->emit_instruction(ctx, &newInst); - - /* END */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_END; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 0; - ctx->emit_instruction(ctx, &newInst); - } - else { - /* Not an END instruction. - * Look for writes to result.color and replace with colorTemp reg. - */ - uint i; - - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT && - dst->DstRegister.Index == aactx->colorOutput) { - dst->DstRegister.File = TGSI_FILE_TEMPORARY; - dst->DstRegister.Index = aactx->colorTemp; - } - } - - ctx->emit_instruction(ctx, inst); - } -} - - -/** - * Generate the frag shader we'll use for drawing AA lines. - * This will be the user's shader plus some texture/modulate instructions. - */ -static void -generate_aaline_fs(struct aaline_stage *aaline) -{ - const struct pipe_shader_state *orig_fs = &aaline->fs->state; - //struct draw_context *draw = aaline->stage.draw; - struct pipe_shader_state aaline_fs; - struct aa_transform_context transform; - -#define MAX 1000 - - aaline_fs = *orig_fs; /* copy to init */ - aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); - - memset(&transform, 0, sizeof(transform)); - transform.colorOutput = -1; - transform.maxInput = -1; - transform.maxGeneric = -1; - transform.colorTemp = -1; - transform.texTemp = -1; - transform.firstInstruction = TRUE; - transform.base.transform_instruction = aa_transform_inst; - transform.base.transform_declaration = aa_transform_decl; - - tgsi_transform_shader(orig_fs->tokens, - (struct tgsi_token *) aaline_fs.tokens, - MAX, &transform.base); - -#if 0 /* DEBUG */ - tgsi_dump(orig_fs->tokens, 0); - tgsi_dump(aaline_fs.tokens, 0); -#endif - - aaline->fs->sampler_unit = transform.freeSampler; - - aaline->fs->aaline_fs - = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); - - aaline->fs->generic_attrib = transform.maxGeneric + 1; -} - - -/** - * Create the texture map we'll use for antialiasing the lines. - */ -static void -aaline_create_texture(struct aaline_stage *aaline) -{ - struct pipe_context *pipe = aaline->pipe; - struct pipe_screen *screen = pipe->screen; - struct pipe_texture texTemp; - uint level; - - memset(&texTemp, 0, sizeof(texTemp)); - texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ - texTemp.last_level = MAX_TEXTURE_LEVEL; - texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; - texTemp.depth[0] = 1; - texTemp.cpp = 1; - - aaline->texture = screen->texture_create(screen, &texTemp); - - /* Fill in mipmap images. - * Basically each level is solid opaque, except for the outermost - * texels which are zero. Special case the 1x1 and 2x2 levels. - */ - for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { - struct pipe_surface *surface; - const uint size = aaline->texture->width[level]; - ubyte *data; - uint i, j; - - assert(aaline->texture->width[level] == aaline->texture->height[level]); - - surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); - data = pipe_surface_map(surface); - - for (i = 0; i < size; i++) { - for (j = 0; j < size; j++) { - ubyte d; - if (size == 1) { - d = 255; - } - else if (size == 2) { - d = 200; /* tuneable */ - } - else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { - d = 0; - } - else { - d = 255; - } - data[i * surface->pitch + j] = d; - } - } - - /* unmap */ - pipe_surface_unmap(surface); - pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); - } -} - - -/** - * Create the sampler CSO that'll be used for antialiasing. - * By using a mipmapped texture, we don't have to generate a different - * texture image for each line size. - */ -static void -aaline_create_sampler(struct aaline_stage *aaline) -{ - struct pipe_sampler_state sampler; - struct pipe_context *pipe = aaline->pipe; - - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; - sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; - sampler.normalized_coords = 1; - sampler.min_lod = 0.0f; - sampler.max_lod = MAX_TEXTURE_LEVEL; - - aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); -} - - -/** - * When we're about to draw our first AA line in a batch, this function is - * called to tell the driver to bind our modified fragment shader. - */ -static void -bind_aaline_fragment_shader(struct aaline_stage *aaline) -{ - if (!aaline->fs->aaline_fs) { - generate_aaline_fs(aaline); - } - aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); -} - - - -static INLINE struct aaline_stage * -aaline_stage( struct draw_stage *stage ) -{ - return (struct aaline_stage *) stage; -} - - -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} - - -/** - * Draw a wide line by drawing a quad, using geometry which will - * fullfill GL's antialiased line requirements. - */ -static void -aaline_line(struct draw_stage *stage, struct prim_header *header) -{ - const struct aaline_stage *aaline = aaline_stage(stage); - const float half_width = aaline->half_line_width; - struct prim_header tri; - struct vertex_header *v[8]; - uint texPos = aaline->tex_slot; - float *pos, *tex; - float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; - float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; - double a = atan2(dy, dx); - float c_a = (float) cos(a), s_a = (float) sin(a); - uint i; - - /* XXX the ends of lines aren't quite perfect yet, but probably passable */ - dx = 0.5F * half_width; - dy = half_width; - - /* allocate/dup new verts */ - for (i = 0; i < 8; i++) { - v[i] = dup_vert(stage, header->v[i/4], i); - } - - /* - * Quad strip for line from v0 to v1 (*=endpoints): - * - * 1 3 5 7 - * +---+---------------------+---+ - * | | - * | *v0 v1* | - * | | - * +---+---------------------+---+ - * 0 2 4 6 - */ - - /* new verts */ - pos = v[0]->data[0]; - pos[0] += (-dx * c_a - dy * s_a); - pos[1] += (-dx * s_a + dy * c_a); - - pos = v[1]->data[0]; - pos[0] += (-dx * c_a - -dy * s_a); - pos[1] += (-dx * s_a + -dy * c_a); - - pos = v[2]->data[0]; - pos[0] += ( dx * c_a - dy * s_a); - pos[1] += ( dx * s_a + dy * c_a); - - pos = v[3]->data[0]; - pos[0] += ( dx * c_a - -dy * s_a); - pos[1] += ( dx * s_a + -dy * c_a); - - pos = v[4]->data[0]; - pos[0] += (-dx * c_a - dy * s_a); - pos[1] += (-dx * s_a + dy * c_a); - - pos = v[5]->data[0]; - pos[0] += (-dx * c_a - -dy * s_a); - pos[1] += (-dx * s_a + -dy * c_a); - - pos = v[6]->data[0]; - pos[0] += ( dx * c_a - dy * s_a); - pos[1] += ( dx * s_a + dy * c_a); - - pos = v[7]->data[0]; - pos[0] += ( dx * c_a - -dy * s_a); - pos[1] += ( dx * s_a + -dy * c_a); - - /* new texcoords */ - tex = v[0]->data[texPos]; - ASSIGN_4V(tex, 0, 0, 0, 1); - - tex = v[1]->data[texPos]; - ASSIGN_4V(tex, 0, 1, 0, 1); - - tex = v[2]->data[texPos]; - ASSIGN_4V(tex, .5, 0, 0, 1); - - tex = v[3]->data[texPos]; - ASSIGN_4V(tex, .5, 1, 0, 1); - - tex = v[4]->data[texPos]; - ASSIGN_4V(tex, .5, 0, 0, 1); - - tex = v[5]->data[texPos]; - ASSIGN_4V(tex, .5, 1, 0, 1); - - tex = v[6]->data[texPos]; - ASSIGN_4V(tex, 1, 0, 0, 1); - - tex = v[7]->data[texPos]; - ASSIGN_4V(tex, 1, 1, 0, 1); - - /* emit 6 tris for the quad strip */ - tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[4]; tri.v[1] = v[3]; tri.v[2] = v[2]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[5]; tri.v[1] = v[3]; tri.v[2] = v[4]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[6]; tri.v[1] = v[5]; tri.v[2] = v[4]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[7]; tri.v[1] = v[5]; tri.v[2] = v[6]; - stage->next->tri( stage->next, &tri ); -} - - -static void -aaline_first_line(struct draw_stage *stage, struct prim_header *header) -{ - auto struct aaline_stage *aaline = aaline_stage(stage); - struct draw_context *draw = stage->draw; - struct pipe_context *pipe = aaline->pipe; - uint num_samplers; - - assert(draw->rasterizer->line_smooth); - - if (draw->rasterizer->line_width <= 3.0) - aaline->half_line_width = 1.5f; - else - aaline->half_line_width = 0.5f * draw->rasterizer->line_width; - - /* - * Bind (generate) our fragprog, sampler and texture - */ - bind_aaline_fragment_shader(aaline); - - /* update vertex attrib info */ - aaline->tex_slot = draw->num_vs_outputs; - assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ - - /* advertise the extra post-transformed vertex attribute */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib; - draw->extra_vp_outputs.slot = aaline->tex_slot; - - /* how many samplers? */ - /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ - num_samplers = MAX2(aaline->num_textures, aaline->num_samplers); - num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1); - - aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso; - pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit], - aaline->texture); - - aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler); - aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture); - - /* now really draw first line */ - stage->line = aaline_line; - stage->line(stage, header); -} - - -static void -aaline_flush(struct draw_stage *stage, unsigned flags) -{ - struct draw_context *draw = stage->draw; - struct aaline_stage *aaline = aaline_stage(stage); - struct pipe_context *pipe = aaline->pipe; - - stage->line = aaline_first_line; - stage->next->flush( stage->next, flags ); - - /* restore original frag shader */ - aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); - - /* XXX restore original texture, sampler state */ - aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, - aaline->state.sampler); - aaline->driver_set_sampler_textures(pipe, aaline->num_textures, - aaline->state.texture); - - draw->extra_vp_outputs.slot = 0; -} - - -static void -aaline_reset_stipple_counter(struct draw_stage *stage) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void -aaline_destroy(struct draw_stage *stage) -{ - struct aaline_stage *aaline = aaline_stage(stage); - - aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); - - pipe_texture_release(&aaline->texture); - - draw_free_temp_verts( stage ); - - FREE( stage ); -} - - -static struct aaline_stage * -draw_aaline_stage(struct draw_context *draw) -{ - struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); - - draw_alloc_temp_verts( &aaline->stage, 8 ); - - aaline->stage.draw = draw; - aaline->stage.next = NULL; - aaline->stage.point = passthrough_point; - aaline->stage.line = aaline_first_line; - aaline->stage.tri = passthrough_tri; - aaline->stage.flush = aaline_flush; - aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; - aaline->stage.destroy = aaline_destroy; - - return aaline; -} - - -static struct aaline_stage * -aaline_stage_from_pipe(struct pipe_context *pipe) -{ - struct draw_context *draw = (struct draw_context *) pipe->draw; - return aaline_stage(draw->pipeline.aaline); -} - - -/** - * This function overrides the driver's create_fs_state() function and - * will typically be called by the state tracker. - */ -static void * -aaline_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *fs) -{ - struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); - struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); - - if (aafs) { - aafs->state = *fs; - - /* pass-through */ - aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); - } - - return aafs; -} - - -static void -aaline_bind_fs_state(struct pipe_context *pipe, void *fs) -{ - struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); - struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; - /* save current */ - aaline->fs = aafs; - /* pass-through */ - aaline->driver_bind_fs_state(aaline->pipe, - (aafs ? aafs->driver_fs : NULL)); -} - - -static void -aaline_delete_fs_state(struct pipe_context *pipe, void *fs) -{ - struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); - struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; - /* pass-through */ - aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs); - FREE(aafs); -} - - -static void -aaline_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) -{ - struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); - /* save current */ - memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); - aaline->num_samplers = num; - /* pass-through */ - aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); -} - - -static void -aaline_set_sampler_textures(struct pipe_context *pipe, - unsigned num, struct pipe_texture **texture) -{ - struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); - uint i; - - /* save current */ - for (i = 0; i < num; i++) { - pipe_texture_reference(&aaline->state.texture[i], texture[i]); - } - aaline->num_textures = num; - - /* pass-through */ - aaline->driver_set_sampler_textures(aaline->pipe, num, texture); -} - - -/** - * Called by drivers that want to install this AA line prim stage - * into the draw module's pipeline. This will not be used if the - * hardware has native support for AA lines. - */ -void -draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) -{ - struct aaline_stage *aaline; - - pipe->draw = (void *) draw; - - /* - * Create / install AA line drawing / prim stage - */ - aaline = draw_aaline_stage( draw ); - assert(aaline); - draw->pipeline.aaline = &aaline->stage; - - aaline->pipe = pipe; - - /* create special texture, sampler state */ - aaline_create_texture(aaline); - aaline_create_sampler(aaline); - - /* save original driver functions */ - aaline->driver_create_fs_state = pipe->create_fs_state; - aaline->driver_bind_fs_state = pipe->bind_fs_state; - aaline->driver_delete_fs_state = pipe->delete_fs_state; - - aaline->driver_bind_sampler_states = pipe->bind_sampler_states; - aaline->driver_set_sampler_textures = pipe->set_sampler_textures; - - /* override the driver's functions */ - pipe->create_fs_state = aaline_create_fs_state; - pipe->bind_fs_state = aaline_bind_fs_state; - pipe->delete_fs_state = aaline_delete_fs_state; - - pipe->bind_sampler_states = aaline_bind_sampler_states; - pipe->set_sampler_textures = aaline_set_sampler_textures; -} diff --git a/src/gallium/auxiliary/draw/draw_aapoint.c b/src/gallium/auxiliary/draw/draw_aapoint.c deleted file mode 100644 index e84d380e50..0000000000 --- a/src/gallium/auxiliary/draw/draw_aapoint.c +++ /dev/null @@ -1,846 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * AA point stage: AA points are converted to quads and rendered with a - * special fragment shader. Another approach would be to use a texture - * map image of a point, but experiments indicate the quality isn't nearly - * as good as this approach. - * - * Note: this looks a lot like draw_aaline.c but there's actually little - * if any code that can be shared. - * - * Authors: Brian Paul - */ - - -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" - -#include "draw_context.h" -#include "draw_vs.h" - - -/* - * Enabling NORMALIZE might give _slightly_ better results. - * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or - * d=x*x+y*y. Since we're working with a unit circle, the later seems - * close enough and saves some costly instructions. - */ -#define NORMALIZE 0 - - -/** - * Subclass of pipe_shader_state to carry extra fragment shader info. - */ -struct aapoint_fragment_shader -{ - struct pipe_shader_state state; - void *driver_fs; /**< the regular shader */ - void *aapoint_fs; /**< the aa point-augmented shader */ - int generic_attrib; /**< The generic input attrib/texcoord we'll use */ -}; - - -/** - * Subclass of draw_stage - */ -struct aapoint_stage -{ - struct draw_stage stage; - - int psize_slot; - float radius; - - /** this is the vertex attrib slot for the new texcoords */ - uint tex_slot; - - /* - * Currently bound state - */ - struct aapoint_fragment_shader *fs; - - /* - * Driver interface/override functions - */ - void * (*driver_create_fs_state)(struct pipe_context *, - const struct pipe_shader_state *); - void (*driver_bind_fs_state)(struct pipe_context *, void *); - void (*driver_delete_fs_state)(struct pipe_context *, void *); - - struct pipe_context *pipe; -}; - - - -/** - * Subclass of tgsi_transform_context, used for transforming the - * user's fragment shader to add the special AA instructions. - */ -struct aa_transform_context { - struct tgsi_transform_context base; - uint tempsUsed; /**< bitmask */ - int colorOutput; /**< which output is the primary color */ - int maxInput, maxGeneric; /**< max input index found */ - int tmp0, colorTemp; /**< temp registers */ - boolean firstInstruction; -}; - - -/** - * TGSI declaration transform callback. - * Look for two free temp regs and available input reg for new texcoords. - */ -static void -aa_transform_decl(struct tgsi_transform_context *ctx, - struct tgsi_full_declaration *decl) -{ - struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; - - if (decl->Declaration.File == TGSI_FILE_OUTPUT && - decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && - decl->Semantic.SemanticIndex == 0) { - aactx->colorOutput = decl->u.DeclarationRange.First; - } - else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->u.DeclarationRange.Last; - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && - (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { - aactx->maxGeneric = decl->Semantic.SemanticIndex; - } - } - else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { - uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { - aactx->tempsUsed |= (1 << i); - } - } - - ctx->emit_declaration(ctx, decl); -} - - -/** - * TGSI instruction transform callback. - * Replace writes to result.color w/ a temp reg. - * Upon END instruction, insert texture sampling code for antialiasing. - */ -static void -aa_transform_inst(struct tgsi_transform_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; - struct tgsi_full_instruction newInst; - - if (aactx->firstInstruction) { - /* emit our new declarations before the first instruction */ - - struct tgsi_full_declaration decl; - const int texInput = aactx->maxInput + 1; - int tmp0; - uint i; - - /* find two free temp regs */ - for (i = 0; i < 32; i++) { - if ((aactx->tempsUsed & (1 << i)) == 0) { - /* found a free temp */ - if (aactx->tmp0 < 0) - aactx->tmp0 = i; - else if (aactx->colorTemp < 0) - aactx->colorTemp = i; - else - break; - } - } - - assert(aactx->colorTemp != aactx->tmp0); - - tmp0 = aactx->tmp0; - - /* declare new generic input/texcoord */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; - decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; - decl.Declaration.Interpolate = 1; - /* XXX this could be linear... */ - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = texInput; - ctx->emit_declaration(ctx, &decl); - - /* declare new temp regs */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = tmp0; - ctx->emit_declaration(ctx, &decl); - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->colorTemp; - ctx->emit_declaration(ctx, &decl); - - aactx->firstInstruction = FALSE; - - - /* - * Emit code to compute fragment coverage, kill if outside point radius - * - * Temp reg0 usage: - * t0.x = distance of fragment from center point - * t0.y = boolean, is t0.x > 1.0, also misc temp usage - * t0.z = temporary for computing 1/(1-k) value - * t0.w = final coverage value - */ - - /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MUL; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - ctx->emit_instruction(ctx, &newInst); - - /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_ADD; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - ctx->emit_instruction(ctx, &newInst); - -#if NORMALIZE /* OPTIONAL normalization of length */ - /* RSQ t0.x, t0.x; */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - ctx->emit_instruction(ctx, &newInst); - - /* RCP t0.x, t0.x; */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_RCP; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - ctx->emit_instruction(ctx, &newInst); -#endif - - /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SGT; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - ctx->emit_instruction(ctx, &newInst); - - /* KILP -t0.yyyy; # if b, KILL */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_KILP; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; - ctx->emit_instruction(ctx, &newInst); - - - /* compute coverage factor = (1-d)/(1-k) */ - - /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SUB; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - - /* RCP t0.z, t0.z; # t0.z = 1 / m */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_RCP; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - - /* SUB t0.y, 1, t0.x; # d = 1 - d */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SUB; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - ctx->emit_instruction(ctx, &newInst); - - /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MUL; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - - /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_SLE; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; - ctx->emit_instruction(ctx, &newInst); - - /* CMP t0.w, -t0.y, tex.w, t0.w; - * # if -t0.y < 0 then - * t0.w = 1 - * else - * t0.w = t0.w - */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_CMP; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = tmp0; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; - newInst.Instruction.NumSrcRegs = 3; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; - newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; - ctx->emit_instruction(ctx, &newInst); - - } - - if (inst->Instruction.Opcode == TGSI_OPCODE_END) { - /* add alpha modulation code at tail of program */ - - /* MOV result.color.xyz, colorTemp; */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MOV; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - ctx->emit_instruction(ctx, &newInst); - - /* MUL result.color.w, colorTemp, tmp0.w; */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MUL; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; - newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; - newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; - ctx->emit_instruction(ctx, &newInst); - } - else { - /* Not an END instruction. - * Look for writes to result.color and replace with colorTemp reg. - */ - uint i; - - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if (dst->DstRegister.File == TGSI_FILE_OUTPUT && - dst->DstRegister.Index == aactx->colorOutput) { - dst->DstRegister.File = TGSI_FILE_TEMPORARY; - dst->DstRegister.Index = aactx->colorTemp; - } - } - } - - ctx->emit_instruction(ctx, inst); -} - - -/** - * Generate the frag shader we'll use for drawing AA lines. - * This will be the user's shader plus some texture/modulate instructions. - */ -static void -generate_aapoint_fs(struct aapoint_stage *aapoint) -{ - const struct pipe_shader_state *orig_fs = &aapoint->fs->state; - struct pipe_shader_state aapoint_fs; - struct aa_transform_context transform; - -#define MAX 1000 - - aapoint_fs = *orig_fs; /* copy to init */ - aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); - - memset(&transform, 0, sizeof(transform)); - transform.colorOutput = -1; - transform.maxInput = -1; - transform.maxGeneric = -1; - transform.colorTemp = -1; - transform.tmp0 = -1; - transform.firstInstruction = TRUE; - transform.base.transform_instruction = aa_transform_inst; - transform.base.transform_declaration = aa_transform_decl; - - tgsi_transform_shader(orig_fs->tokens, - (struct tgsi_token *) aapoint_fs.tokens, - MAX, &transform.base); - -#if 0 /* DEBUG */ - printf("draw_aapoint, orig shader:\n"); - tgsi_dump(orig_fs->tokens, 0); - printf("draw_aapoint, new shader:\n"); - tgsi_dump(aapoint_fs.tokens, 0); -#endif - - aapoint->fs->aapoint_fs - = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); - - aapoint->fs->generic_attrib = transform.maxGeneric + 1; -} - - -/** - * When we're about to draw our first AA line in a batch, this function is - * called to tell the driver to bind our modified fragment shader. - */ -static void -bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) -{ - if (!aapoint->fs->aapoint_fs) { - generate_aapoint_fs(aapoint); - } - aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); -} - - - -static INLINE struct aapoint_stage * -aapoint_stage( struct draw_stage *stage ) -{ - return (struct aapoint_stage *) stage; -} - - -static void -passthrough_line(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->line(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} - - -/** - * Draw an AA point by drawing a quad. - */ -static void -aapoint_point(struct draw_stage *stage, struct prim_header *header) -{ - const struct aapoint_stage *aapoint = aapoint_stage(stage); - struct prim_header tri; - struct vertex_header *v[4]; - uint texPos = aapoint->tex_slot; - float radius, *pos, *tex; - uint i; - float k; - - if (aapoint->psize_slot >= 0) { - radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0]; - } - else { - radius = aapoint->radius; - } - - /* - * Note: the texcoords (generic attrib, really) we use are special: - * The S and T components simply vary from -1 to +1. - * The R component is k, below. - * The Q component is 1.0 and will used as a handy constant in the - * fragment shader. - */ - - /* - * k is the threshold distance from the point's center at which - * we begin alpha attenuation (the coverage value). - * Operating within a unit circle, we'll compute the fragment's - * distance 'd' from the center point using the texcoords. - * IF d > 1.0 THEN - * KILL fragment - * ELSE IF d > k THEN - * compute coverage in [0,1] proportional to d in [k, 1]. - * ELSE - * coverage = 1.0; // full coverage - * ENDIF - * - * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to - * avoid using IF/ELSE/ENDIF TGSI opcodes. - */ - -#if !NORMALIZE - k = 1.0f / radius; - k = 1.0f - 2.0f * k + k * k; -#else - k = 1.0f - 1.0f / radius; -#endif - - /* allocate/dup new verts */ - for (i = 0; i < 4; i++) { - v[i] = dup_vert(stage, header->v[0], i); - } - - /* new verts */ - pos = v[0]->data[0]; - pos[0] -= radius; - pos[1] -= radius; - - pos = v[1]->data[0]; - pos[0] += radius; - pos[1] -= radius; - - pos = v[2]->data[0]; - pos[0] += radius; - pos[1] += radius; - - pos = v[3]->data[0]; - pos[0] -= radius; - pos[1] += radius; - - /* new texcoords */ - tex = v[0]->data[texPos]; - ASSIGN_4V(tex, -1, -1, k, 1); - - tex = v[1]->data[texPos]; - ASSIGN_4V(tex, 1, -1, k, 1); - - tex = v[2]->data[texPos]; - ASSIGN_4V(tex, 1, 1, k, 1); - - tex = v[3]->data[texPos]; - ASSIGN_4V(tex, -1, 1, k, 1); - - /* emit 2 tris for the quad strip */ - tri.v[0] = v[0]; - tri.v[1] = v[1]; - tri.v[2] = v[2]; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v[0]; - tri.v[1] = v[2]; - tri.v[2] = v[3]; - stage->next->tri( stage->next, &tri ); -} - - -static void -aapoint_first_point(struct draw_stage *stage, struct prim_header *header) -{ - auto struct aapoint_stage *aapoint = aapoint_stage(stage); - struct draw_context *draw = stage->draw; - - assert(draw->rasterizer->point_smooth); - - if (draw->rasterizer->point_size <= 2.0) - aapoint->radius = 1.0; - else - aapoint->radius = 0.5f * draw->rasterizer->point_size; - - /* - * Bind (generate) our fragprog. - */ - bind_aapoint_fragment_shader(aapoint); - - /* update vertex attrib info */ - aapoint->tex_slot = draw->num_vs_outputs; - assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ - - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; - draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib; - draw->extra_vp_outputs.slot = aapoint->tex_slot; - - /* find psize slot in post-transform vertex */ - aapoint->psize_slot = -1; - if (draw->rasterizer->point_size_per_vertex) { - /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { - aapoint->psize_slot = i; - break; - } - } - } - - /* now really draw first line */ - stage->point = aapoint_point; - stage->point(stage, header); -} - - -static void -aapoint_flush(struct draw_stage *stage, unsigned flags) -{ - struct draw_context *draw = stage->draw; - struct aapoint_stage *aapoint = aapoint_stage(stage); - struct pipe_context *pipe = aapoint->pipe; - - stage->point = aapoint_first_point; - stage->next->flush( stage->next, flags ); - - /* restore original frag shader */ - aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); - - draw->extra_vp_outputs.slot = 0; -} - - -static void -aapoint_reset_stipple_counter(struct draw_stage *stage) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void -aapoint_destroy(struct draw_stage *stage) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -static struct aapoint_stage * -draw_aapoint_stage(struct draw_context *draw) -{ - struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); - - draw_alloc_temp_verts( &aapoint->stage, 4 ); - - aapoint->stage.draw = draw; - aapoint->stage.next = NULL; - aapoint->stage.point = aapoint_first_point; - aapoint->stage.line = passthrough_line; - aapoint->stage.tri = passthrough_tri; - aapoint->stage.flush = aapoint_flush; - aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; - aapoint->stage.destroy = aapoint_destroy; - - return aapoint; -} - - -static struct aapoint_stage * -aapoint_stage_from_pipe(struct pipe_context *pipe) -{ - struct draw_context *draw = (struct draw_context *) pipe->draw; - return aapoint_stage(draw->pipeline.aapoint); -} - - -/** - * This function overrides the driver's create_fs_state() function and - * will typically be called by the state tracker. - */ -static void * -aapoint_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *fs) -{ - struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); - struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); - - if (aafs) { - aafs->state = *fs; - - /* pass-through */ - aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); - } - - return aafs; -} - - -static void -aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) -{ - struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); - struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; - /* save current */ - aapoint->fs = aafs; - /* pass-through */ - aapoint->driver_bind_fs_state(aapoint->pipe, - (aafs ? aafs->driver_fs : NULL)); -} - - -static void -aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) -{ - struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); - struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; - /* pass-through */ - aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs); - FREE(aafs); -} - - -/** - * Called by drivers that want to install this AA point prim stage - * into the draw module's pipeline. This will not be used if the - * hardware has native support for AA points. - */ -void -draw_install_aapoint_stage(struct draw_context *draw, - struct pipe_context *pipe) -{ - struct aapoint_stage *aapoint; - - pipe->draw = (void *) draw; - - /* - * Create / install AA point drawing / prim stage - */ - aapoint = draw_aapoint_stage( draw ); - assert(aapoint); - draw->pipeline.aapoint = &aapoint->stage; - - aapoint->pipe = pipe; - - /* save original driver functions */ - aapoint->driver_create_fs_state = pipe->create_fs_state; - aapoint->driver_bind_fs_state = pipe->bind_fs_state; - aapoint->driver_delete_fs_state = pipe->delete_fs_state; - - /* override the driver's functions */ - pipe->create_fs_state = aapoint_create_fs_state; - pipe->bind_fs_state = aapoint_bind_fs_state; - pipe->delete_fs_state = aapoint_delete_fs_state; -} diff --git a/src/gallium/auxiliary/draw/draw_clip.c b/src/gallium/auxiliary/draw/draw_clip.c deleted file mode 100644 index 0ac3a240e5..0000000000 --- a/src/gallium/auxiliary/draw/draw_clip.c +++ /dev/null @@ -1,503 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Clipping stage - * - * \author Keith Whitwell - */ - - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" - -#include "draw_context.h" -#include "draw_vs.h" - - -#ifndef IS_NEGATIVE -#define IS_NEGATIVE(X) ((X) < 0.0) -#endif - -#ifndef DIFFERENT_SIGNS -#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F) -#endif - -#ifndef MAX_CLIPPED_VERTICES -#define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1) -#endif - - - -struct clipper { - struct draw_stage stage; /**< base class */ - - /* Basically duplicate some of the flatshading logic here: - */ - boolean flat; - uint num_color_attribs; - uint color_attribs[4]; /* front/back primary/secondary colors */ - - float (*plane)[4]; -}; - - -/* This is a bit confusing: - */ -static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) -{ - return (struct clipper *)stage; -} - - -#define LINTERP(T, OUT, IN) ((OUT) + (T) * ((IN) - (OUT))) - - -/* All attributes are float[4], so this is easy: - */ -static void interp_attr( float *fdst, - float t, - const float *fin, - const float *fout ) -{ - fdst[0] = LINTERP( t, fout[0], fin[0] ); - fdst[1] = LINTERP( t, fout[1], fin[1] ); - fdst[2] = LINTERP( t, fout[2], fin[2] ); - fdst[3] = LINTERP( t, fout[3], fin[3] ); -} - -static void copy_colors( struct draw_stage *stage, - struct vertex_header *dst, - const struct vertex_header *src ) -{ - const struct clipper *clipper = clipper_stage(stage); - uint i; - for (i = 0; i < clipper->num_color_attribs; i++) { - const uint attr = clipper->color_attribs[i]; - COPY_4FV(dst->data[attr], src->data[attr]); - } -} - - - -/* Interpolate between two vertices to produce a third. - */ -static void interp( const struct clipper *clip, - struct vertex_header *dst, - float t, - const struct vertex_header *out, - const struct vertex_header *in ) -{ - const unsigned nr_attrs = clip->stage.draw->num_vs_outputs; - unsigned j; - - /* Vertex header. - */ - { - dst->clipmask = 0; - dst->edgeflag = 0; - dst->pad = 0; - dst->vertex_id = UNDEFINED_VERTEX_ID; - } - - /* Clip coordinates: interpolate normally - */ - { - interp_attr(dst->clip, t, in->clip, out->clip); - } - - /* Do the projective divide and insert window coordinates: - */ - { - const float *pos = dst->clip; - const float *scale = clip->stage.draw->viewport.scale; - const float *trans = clip->stage.draw->viewport.translate; - const float oow = 1.0f / pos[3]; - - dst->data[0][0] = pos[0] * oow * scale[0] + trans[0]; - dst->data[0][1] = pos[1] * oow * scale[1] + trans[1]; - dst->data[0][2] = pos[2] * oow * scale[2] + trans[2]; - dst->data[0][3] = oow; - } - - /* Other attributes - * Note: start at 1 to skip winpos (data[0]) since we just computed - * it above. - */ - for (j = 1; j < nr_attrs; j++) { - interp_attr(dst->data[j], t, in->data[j], out->data[j]); - } -} - - -static void emit_poly( struct draw_stage *stage, - struct vertex_header **inlist, - unsigned n, - const struct prim_header *origPrim) -{ - struct prim_header header; - unsigned i; - - /* later stages may need the determinant, but only the sign matters */ - header.det = origPrim->det; - - for (i = 2; i < n; i++) { - header.v[0] = inlist[i-1]; - header.v[1] = inlist[i]; - header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ - - { - unsigned tmp1 = header.v[1]->edgeflag; - unsigned tmp2 = header.v[2]->edgeflag; - - if (i != n-1) header.v[1]->edgeflag = 0; - if (i != 2) header.v[2]->edgeflag = 0; - - header.edgeflags = ((header.v[0]->edgeflag << 0) | - (header.v[1]->edgeflag << 1) | - (header.v[2]->edgeflag << 2)); - - if (0) { - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; - uint j, k; - debug_printf("Clipped tri:\n"); - for (j = 0; j < 3; j++) { - for (k = 0; k < vs->info.num_outputs; k++) { - debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, - header.v[j]->data[k][0], - header.v[j]->data[k][1], - header.v[j]->data[k][2], - header.v[j]->data[k][3]); - } - } - } - - stage->next->tri( stage->next, &header ); - - header.v[1]->edgeflag = tmp1; - header.v[2]->edgeflag = tmp2; - } - } -} - - - - -/* Clip a triangle against the viewport and user clip planes. - */ -static void -do_clip_tri( struct draw_stage *stage, - struct prim_header *header, - unsigned clipmask ) -{ - struct clipper *clipper = clipper_stage( stage ); - struct vertex_header *a[MAX_CLIPPED_VERTICES]; - struct vertex_header *b[MAX_CLIPPED_VERTICES]; - struct vertex_header **inlist = a; - struct vertex_header **outlist = b; - unsigned tmpnr = 0; - unsigned n = 3; - unsigned i; - - inlist[0] = header->v[0]; - inlist[1] = header->v[1]; - inlist[2] = header->v[2]; - - while (clipmask && n >= 3) { - const unsigned plane_idx = ffs(clipmask)-1; - const float *plane = clipper->plane[plane_idx]; - struct vertex_header *vert_prev = inlist[0]; - float dp_prev = dot4( vert_prev->clip, plane ); - unsigned outcount = 0; - - clipmask &= ~(1<clip, plane ); - - if (!IS_NEGATIVE(dp_prev)) { - outlist[outcount++] = vert_prev; - } - - if (DIFFERENT_SIGNS(dp, dp_prev)) { - struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++]; - outlist[outcount++] = new_vert; - - if (IS_NEGATIVE(dp)) { - /* Going out of bounds. Avoid division by zero as we - * know dp != dp_prev from DIFFERENT_SIGNS, above. - */ - float t = dp / (dp - dp_prev); - interp( clipper, new_vert, t, vert, vert_prev ); - - /* Force edgeflag true in this case: - */ - new_vert->edgeflag = 1; - } else { - /* Coming back in. - */ - float t = dp_prev / (dp_prev - dp); - interp( clipper, new_vert, t, vert_prev, vert ); - - /* Copy starting vert's edgeflag: - */ - new_vert->edgeflag = vert_prev->edgeflag; - } - } - - vert_prev = vert; - dp_prev = dp; - } - - { - struct vertex_header **tmp = inlist; - inlist = outlist; - outlist = tmp; - n = outcount; - } - } - - /* If flat-shading, copy color to new provoking vertex. - */ - if (clipper->flat && inlist[0] != header->v[2]) { - if (1) { - inlist[0] = dup_vert(stage, inlist[0], tmpnr++); - } - - copy_colors(stage, inlist[0], header->v[2]); - } - - - - /* Emit the polygon as triangles to the setup stage: - */ - if (n >= 3) - emit_poly( stage, inlist, n, header ); -} - - -/* Clip a line against the viewport and user clip planes. - */ -static void -do_clip_line( struct draw_stage *stage, - struct prim_header *header, - unsigned clipmask ) -{ - const struct clipper *clipper = clipper_stage( stage ); - struct vertex_header *v0 = header->v[0]; - struct vertex_header *v1 = header->v[1]; - const float *pos0 = v0->clip; - const float *pos1 = v1->clip; - float t0 = 0.0F; - float t1 = 0.0F; - struct prim_header newprim; - - while (clipmask) { - const unsigned plane_idx = ffs(clipmask)-1; - const float *plane = clipper->plane[plane_idx]; - const float dp0 = dot4( pos0, plane ); - const float dp1 = dot4( pos1, plane ); - - if (dp1 < 0.0F) { - float t = dp1 / (dp1 - dp0); - t1 = MAX2(t1, t); - } - - if (dp0 < 0.0F) { - float t = dp0 / (dp0 - dp1); - t0 = MAX2(t0, t); - } - - if (t0 + t1 >= 1.0F) - return; /* discard */ - - clipmask &= ~(1 << plane_idx); /* turn off this plane's bit */ - } - - if (v0->clipmask) { - interp( clipper, stage->tmp[0], t0, v0, v1 ); - - if (clipper->flat) - copy_colors(stage, stage->tmp[0], v0); - - newprim.v[0] = stage->tmp[0]; - } - else { - newprim.v[0] = v0; - } - - if (v1->clipmask) { - interp( clipper, stage->tmp[1], t1, v1, v0 ); - newprim.v[1] = stage->tmp[1]; - } - else { - newprim.v[1] = v1; - } - - stage->next->line( stage->next, &newprim ); -} - - -static void -clip_point( struct draw_stage *stage, - struct prim_header *header ) -{ - if (header->v[0]->clipmask == 0) - stage->next->point( stage->next, header ); -} - - -static void -clip_line( struct draw_stage *stage, - struct prim_header *header ) -{ - unsigned clipmask = (header->v[0]->clipmask | - header->v[1]->clipmask); - - if (clipmask == 0) { - /* no clipping needed */ - stage->next->line( stage->next, header ); - } - else if ((header->v[0]->clipmask & - header->v[1]->clipmask) == 0) { - do_clip_line(stage, header, clipmask); - } - /* else, totally clipped */ -} - - -static void -clip_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - unsigned clipmask = (header->v[0]->clipmask | - header->v[1]->clipmask | - header->v[2]->clipmask); - - if (clipmask == 0) { - /* no clipping needed */ - stage->next->tri( stage->next, header ); - } - else if ((header->v[0]->clipmask & - header->v[1]->clipmask & - header->v[2]->clipmask) == 0) { - do_clip_tri(stage, header, clipmask); - } -} - -/* Update state. Could further delay this until we hit the first - * primitive that really requires clipping. - */ -static void -clip_init_state( struct draw_stage *stage ) -{ - struct clipper *clipper = clipper_stage( stage ); - - clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; - - if (clipper->flat) { - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; - uint i; - - clipper->num_color_attribs = 0; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || - vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { - clipper->color_attribs[clipper->num_color_attribs++] = i; - } - } - } - - stage->tri = clip_tri; - stage->line = clip_line; -} - - - -static void clip_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - clip_init_state( stage ); - stage->tri( stage, header ); -} - -static void clip_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - clip_init_state( stage ); - stage->line( stage, header ); -} - - -static void clip_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->tri = clip_first_tri; - stage->line = clip_first_line; - stage->next->flush( stage->next, flags ); -} - - -static void clip_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void clip_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -/** - * Allocate a new clipper stage. - * \return pointer to new stage object - */ -struct draw_stage *draw_clip_stage( struct draw_context *draw ) -{ - struct clipper *clipper = CALLOC_STRUCT(clipper); - - draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ); - - clipper->stage.draw = draw; - clipper->stage.point = clip_point; - clipper->stage.line = clip_first_line; - clipper->stage.tri = clip_first_tri; - clipper->stage.flush = clip_flush; - clipper->stage.reset_stipple_counter = clip_reset_stipple_counter; - clipper->stage.destroy = clip_destroy; - - clipper->plane = draw->plane; - - return &clipper->stage; -} diff --git a/src/gallium/auxiliary/draw/draw_cull.c b/src/gallium/auxiliary/draw/draw_cull.c deleted file mode 100644 index 8177b0ac86..0000000000 --- a/src/gallium/auxiliary/draw/draw_cull.c +++ /dev/null @@ -1,150 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Drawing stage for polygon culling - */ - -/* Authors: Keith Whitwell - */ - - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "draw_private.h" - - -struct cull_stage { - struct draw_stage stage; - unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */ -}; - - -static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) -{ - return (struct cull_stage *)stage; -} - - - - -static void cull_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - /* Window coords: */ - const float *v0 = header->v[0]->data[0]; - const float *v1 = header->v[1]->data[0]; - const float *v2 = header->v[2]->data[0]; - - /* edge vectors e = v0 - v2, f = v1 - v2 */ - const float ex = v0[0] - v2[0]; - const float ey = v0[1] - v2[1]; - const float fx = v1[0] - v2[0]; - const float fy = v1[1] - v2[1]; - - /* det = cross(e,f).z */ - header->det = ex * fy - ey * fx; - - if (header->det != 0) { - /* if (det < 0 then Z points toward camera and triangle is - * counter-clockwise winding. - */ - unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; - - if ((winding & cull_stage(stage)->winding) == 0) { - /* triangle is not culled, pass to next stage */ - stage->next->tri( stage->next, header ); - } - } -} - -static void cull_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct cull_stage *cull = cull_stage(stage); - - cull->winding = stage->draw->rasterizer->cull_mode; - - stage->tri = cull_tri; - stage->tri( stage, header ); -} - - - -static void cull_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void cull_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - -static void cull_flush( struct draw_stage *stage, unsigned flags ) -{ - stage->tri = cull_first_tri; - stage->next->flush( stage->next, flags ); -} - -static void cull_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void cull_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -/** - * Create a new polygon culling stage. - */ -struct draw_stage *draw_cull_stage( struct draw_context *draw ) -{ - struct cull_stage *cull = CALLOC_STRUCT(cull_stage); - - draw_alloc_temp_verts( &cull->stage, 0 ); - - cull->stage.draw = draw; - cull->stage.next = NULL; - cull->stage.point = cull_point; - cull->stage.line = cull_line; - cull->stage.tri = cull_first_tri; - cull->stage.flush = cull_flush; - cull->stage.reset_stipple_counter = cull_reset_stipple_counter; - cull->stage.destroy = cull_destroy; - - return &cull->stage; -} diff --git a/src/gallium/auxiliary/draw/draw_flatshade.c b/src/gallium/auxiliary/draw/draw_flatshade.c deleted file mode 100644 index 54baa1fbc9..0000000000 --- a/src/gallium/auxiliary/draw/draw_flatshade.c +++ /dev/null @@ -1,248 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "draw_vs.h" - - -/** subclass of draw_stage */ -struct flat_stage -{ - struct draw_stage stage; - - uint num_color_attribs; - uint color_attribs[4]; /* front/back primary/secondary colors */ -}; - - -static INLINE struct flat_stage * -flat_stage(struct draw_stage *stage) -{ - return (struct flat_stage *) stage; -} - - -/** Copy all the color attributes from 'src' vertex to 'dst' vertex */ -static INLINE void copy_colors( struct draw_stage *stage, - struct vertex_header *dst, - const struct vertex_header *src ) -{ - const struct flat_stage *flat = flat_stage(stage); - uint i; - for (i = 0; i < flat->num_color_attribs; i++) { - const uint attr = flat->color_attribs[i]; - COPY_4FV(dst->data[attr], src->data[attr]); - } -} - - -/** Copy all the color attributes from src vertex to dst0 & dst1 vertices */ -static INLINE void copy_colors2( struct draw_stage *stage, - struct vertex_header *dst0, - struct vertex_header *dst1, - const struct vertex_header *src ) -{ - const struct flat_stage *flat = flat_stage(stage); - uint i; - for (i = 0; i < flat->num_color_attribs; i++) { - const uint attr = flat->color_attribs[i]; - COPY_4FV(dst0->data[attr], src->data[attr]); - COPY_4FV(dst1->data[attr], src->data[attr]); - } -} - - -/** - * Flatshade tri. Required for clipping and when unfilled tris are - * active, otherwise handled by hardware. - */ -static void flatshade_tri_0( struct draw_stage *stage, - struct prim_header *header ) -{ - struct prim_header tmp; - - tmp.det = header->det; - tmp.edgeflags = header->edgeflags; - tmp.v[0] = header->v[0]; - tmp.v[1] = dup_vert(stage, header->v[1], 0); - tmp.v[2] = dup_vert(stage, header->v[2], 1); - - copy_colors2(stage, tmp.v[1], tmp.v[2], tmp.v[0]); - - stage->next->tri( stage->next, &tmp ); -} - - -static void flatshade_tri_2( struct draw_stage *stage, - struct prim_header *header ) -{ - struct prim_header tmp; - - tmp.det = header->det; - tmp.edgeflags = header->edgeflags; - tmp.v[0] = dup_vert(stage, header->v[0], 0); - tmp.v[1] = dup_vert(stage, header->v[1], 1); - tmp.v[2] = header->v[2]; - - copy_colors2(stage, tmp.v[0], tmp.v[1], tmp.v[2]); - - stage->next->tri( stage->next, &tmp ); -} - - - - - -/** - * Flatshade line. Required for clipping. - */ -static void flatshade_line_0( struct draw_stage *stage, - struct prim_header *header ) -{ - struct prim_header tmp; - - tmp.v[0] = header->v[0]; - tmp.v[1] = dup_vert(stage, header->v[1], 0); - - copy_colors(stage, tmp.v[1], tmp.v[0]); - - stage->next->line( stage->next, &tmp ); -} - -static void flatshade_line_1( struct draw_stage *stage, - struct prim_header *header ) -{ - struct prim_header tmp; - - tmp.v[0] = dup_vert(stage, header->v[0], 0); - tmp.v[1] = header->v[1]; - - copy_colors(stage, tmp.v[0], tmp.v[1]); - - stage->next->line( stage->next, &tmp ); -} - - -/* Flatshade point -- passthrough. - */ -static void flatshade_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - -static void flatshade_init_state( struct draw_stage *stage ) -{ - struct flat_stage *flat = flat_stage(stage); - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; - uint i; - - /* Find which vertex shader outputs are colors, make a list */ - flat->num_color_attribs = 0; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || - vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { - flat->color_attribs[flat->num_color_attribs++] = i; - } - } - - /* Choose flatshade routine according to provoking vertex: - */ - if (stage->draw->rasterizer->flatshade_first) { - stage->line = flatshade_line_0; - stage->tri = flatshade_tri_0; - } - else { - stage->line = flatshade_line_1; - stage->tri = flatshade_tri_2; - } -} - -static void flatshade_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - flatshade_init_state( stage ); - stage->tri( stage, header ); -} - -static void flatshade_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - flatshade_init_state( stage ); - stage->line( stage, header ); -} - - -static void flatshade_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->tri = flatshade_first_tri; - stage->line = flatshade_first_line; - stage->next->flush( stage->next, flags ); -} - - -static void flatshade_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void flatshade_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -/** - * Create flatshading drawing stage. - */ -struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) -{ - struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage); - - draw_alloc_temp_verts( &flatshade->stage, 2 ); - - flatshade->stage.draw = draw; - flatshade->stage.next = NULL; - flatshade->stage.point = flatshade_point; - flatshade->stage.line = flatshade_first_line; - flatshade->stage.tri = flatshade_first_tri; - flatshade->stage.flush = flatshade_flush; - flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter; - flatshade->stage.destroy = flatshade_destroy; - - return &flatshade->stage; -} - - diff --git a/src/gallium/auxiliary/draw/draw_offset.c b/src/gallium/auxiliary/draw/draw_offset.c deleted file mode 100644 index dbc676deae..0000000000 --- a/src/gallium/auxiliary/draw/draw_offset.c +++ /dev/null @@ -1,186 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief polygon offset state - * - * \author Keith Whitwell - * \author Brian Paul - */ - -#include "pipe/p_util.h" -#include "draw_private.h" - - - -struct offset_stage { - struct draw_stage stage; - - float scale; - float units; -}; - - - -static INLINE struct offset_stage *offset_stage( struct draw_stage *stage ) -{ - return (struct offset_stage *) stage; -} - - - - - -/** - * Offset tri Z. Some hardware can handle this, but not usually when - * doing unfilled rendering. - */ -static void do_offset_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct offset_stage *offset = offset_stage(stage); - float inv_det = 1.0f / header->det; - - /* Window coords: - */ - float *v0 = header->v[0]->data[0]; - float *v1 = header->v[1]->data[0]; - float *v2 = header->v[2]->data[0]; - - /* edge vectors e = v0 - v2, f = v1 - v2 */ - float ex = v0[0] - v2[0]; - float ey = v0[1] - v2[1]; - float ez = v0[2] - v2[2]; - float fx = v1[0] - v2[0]; - float fy = v1[1] - v2[1]; - float fz = v1[2] - v2[2]; - - /* (a,b) = cross(e,f).xy */ - float a = ey*fz - ez*fy; - float b = ez*fx - ex*fz; - - float dzdx = FABSF(a * inv_det); - float dzdy = FABSF(b * inv_det); - - float zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale; - - /* - * Note: we're applying the offset and clamping per-vertex. - * Ideally, the offset is applied per-fragment prior to fragment shading. - */ - v0[2] = CLAMP(v0[2] + zoffset, 0.0f, 1.0f); - v1[2] = CLAMP(v1[2] + zoffset, 0.0f, 1.0f); - v2[2] = CLAMP(v2[2] + zoffset, 0.0f, 1.0f); - - stage->next->tri( stage->next, header ); -} - - -static void offset_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct prim_header tmp; - - tmp.det = header->det; - tmp.edgeflags = header->edgeflags; - tmp.v[0] = dup_vert(stage, header->v[0], 0); - tmp.v[1] = dup_vert(stage, header->v[1], 1); - tmp.v[2] = dup_vert(stage, header->v[2], 2); - - do_offset_tri( stage, &tmp ); -} - - -static void offset_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct offset_stage *offset = offset_stage(stage); - float mrd = 1.0f / 65535.0f; /* XXX this depends on depthbuffer bits! */ - - offset->units = stage->draw->rasterizer->offset_units * mrd; - offset->scale = stage->draw->rasterizer->offset_scale; - - stage->tri = offset_tri; - stage->tri( stage, header ); -} - - -static void offset_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void offset_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - -static void offset_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->tri = offset_first_tri; - stage->next->flush( stage->next, flags ); -} - - -static void offset_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void offset_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -/** - * Create polygon offset drawing stage. - */ -struct draw_stage *draw_offset_stage( struct draw_context *draw ) -{ - struct offset_stage *offset = CALLOC_STRUCT(offset_stage); - - draw_alloc_temp_verts( &offset->stage, 3 ); - - offset->stage.draw = draw; - offset->stage.next = NULL; - offset->stage.point = offset_point; - offset->stage.line = offset_line; - offset->stage.tri = offset_first_tri; - offset->stage.flush = offset_flush; - offset->stage.reset_stipple_counter = offset_reset_stipple_counter; - offset->stage.destroy = offset_destroy; - - return &offset->stage; -} diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c new file mode 100644 index 0000000000..e8d2a45102 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -0,0 +1,859 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA line stage: AA lines are converted to texture mapped triangles. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + +/** + * Max texture level for the alpha texture used for antialiasing + */ +#define MAX_TEXTURE_LEVEL 5 /* 32 x 32 */ + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aaline_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *aaline_fs; + void *aapoint_fs; /* not yet */ + void *sprite_fs; /* not yet */ + uint sampler_unit; + int generic_attrib; /**< texcoord/generic used for texture */ +}; + + +/** + * Subclass of draw_stage + */ +struct aaline_stage +{ + struct draw_stage stage; + + float half_line_width; + + /** For AA lines, this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + void *sampler_cso; + struct pipe_texture *texture; + uint num_samplers; + uint num_textures; + + + /* + * Currently bound state + */ + struct aaline_fragment_shader *fs; + struct { + void *sampler[PIPE_MAX_SAMPLERS]; + struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, + void **); + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + uint samplersUsed; /**< bitfield of samplers used */ + int freeSampler; /** an available sampler for the pstipple */ + int maxInput, maxGeneric; /**< max input index found */ + int colorTemp, texTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->samplersUsed |= 1 << i; + } + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. + */ +static int +free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) + return i; + } + return -1; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + uint i; + + /* find free sampler */ + aactx->freeSampler = free_bit(aactx->samplersUsed); + if (aactx->freeSampler >= PIPE_MAX_SAMPLERS) + aactx->freeSampler = PIPE_MAX_SAMPLERS - 1; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else if (aactx->texTemp < 0) + aactx->texTemp = i; + else + break; + } + } + assert(aactx->colorTemp >= 0); + assert(aactx->texTemp >= 0); + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->maxInput + 1; + ctx->emit_declaration(ctx, &decl); + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->freeSampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->texTemp; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END && + aactx->colorOutput != -1) { + struct tgsi_full_instruction newInst; + + /* TEX */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->maxInput + 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->freeSampler; + + ctx->emit_instruction(ctx, &newInst); + + /* MOV rgb */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL alpha */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->texTemp; + ctx->emit_instruction(ctx, &newInst); + + /* END */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_END; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + + ctx->emit_instruction(ctx, inst); + } +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aaline_fs(struct aaline_stage *aaline) +{ + const struct pipe_shader_state *orig_fs = &aaline->fs->state; + //struct draw_context *draw = aaline->stage.draw; + struct pipe_shader_state aaline_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aaline_fs = *orig_fs; /* copy to init */ + aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aaline_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(aaline_fs.tokens, 0); +#endif + + aaline->fs->sampler_unit = transform.freeSampler; + + aaline->fs->aaline_fs + = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + + aaline->fs->generic_attrib = transform.maxGeneric + 1; +} + + +/** + * Create the texture map we'll use for antialiasing the lines. + */ +static void +aaline_create_texture(struct aaline_stage *aaline) +{ + struct pipe_context *pipe = aaline->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp; + uint level; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = MAX_TEXTURE_LEVEL; + texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + aaline->texture = screen->texture_create(screen, &texTemp); + + /* Fill in mipmap images. + * Basically each level is solid opaque, except for the outermost + * texels which are zero. Special case the 1x1 and 2x2 levels. + */ + for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) { + struct pipe_surface *surface; + const uint size = aaline->texture->width[level]; + ubyte *data; + uint i, j; + + assert(aaline->texture->width[level] == aaline->texture->height[level]); + + surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); + data = pipe_surface_map(surface); + + for (i = 0; i < size; i++) { + for (j = 0; j < size; j++) { + ubyte d; + if (size == 1) { + d = 255; + } + else if (size == 2) { + d = 200; /* tuneable */ + } + else if (i == 0 || j == 0 || i == size - 1 || j == size - 1) { + d = 0; + } + else { + d = 255; + } + data[i * surface->pitch + j] = d; + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); + } +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +aaline_create_sampler(struct aaline_stage *aaline) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = aaline->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_LINEAR; + sampler.min_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.mag_img_filter = PIPE_TEX_FILTER_LINEAR; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = MAX_TEXTURE_LEVEL; + + aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aaline_fragment_shader(struct aaline_stage *aaline) +{ + if (!aaline->fs->aaline_fs) { + generate_aaline_fs(aaline); + } + aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); +} + + + +static INLINE struct aaline_stage * +aaline_stage( struct draw_stage *stage ) +{ + return (struct aaline_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad, using geometry which will + * fullfill GL's antialiased line requirements. + */ +static void +aaline_line(struct draw_stage *stage, struct prim_header *header) +{ + const struct aaline_stage *aaline = aaline_stage(stage); + const float half_width = aaline->half_line_width; + struct prim_header tri; + struct vertex_header *v[8]; + uint texPos = aaline->tex_slot; + float *pos, *tex; + float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; + float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; + double a = atan2(dy, dx); + float c_a = (float) cos(a), s_a = (float) sin(a); + uint i; + + /* XXX the ends of lines aren't quite perfect yet, but probably passable */ + dx = 0.5F * half_width; + dy = half_width; + + /* allocate/dup new verts */ + for (i = 0; i < 8; i++) { + v[i] = dup_vert(stage, header->v[i/4], i); + } + + /* + * Quad strip for line from v0 to v1 (*=endpoints): + * + * 1 3 5 7 + * +---+---------------------+---+ + * | | + * | *v0 v1* | + * | | + * +---+---------------------+---+ + * 0 2 4 6 + */ + + /* new verts */ + pos = v[0]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[1]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[2]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[3]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + pos = v[4]->data[0]; + pos[0] += (-dx * c_a - dy * s_a); + pos[1] += (-dx * s_a + dy * c_a); + + pos = v[5]->data[0]; + pos[0] += (-dx * c_a - -dy * s_a); + pos[1] += (-dx * s_a + -dy * c_a); + + pos = v[6]->data[0]; + pos[0] += ( dx * c_a - dy * s_a); + pos[1] += ( dx * s_a + dy * c_a); + + pos = v[7]->data[0]; + pos[0] += ( dx * c_a - -dy * s_a); + pos[1] += ( dx * s_a + -dy * c_a); + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, 0, 0, 0, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 0, 1, 0, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[4]->data[texPos]; + ASSIGN_4V(tex, .5, 0, 0, 1); + + tex = v[5]->data[texPos]; + ASSIGN_4V(tex, .5, 1, 0, 1); + + tex = v[6]->data[texPos]; + ASSIGN_4V(tex, 1, 0, 0, 1); + + tex = v[7]->data[texPos]; + ASSIGN_4V(tex, 1, 1, 0, 1); + + /* emit 6 tris for the quad strip */ + tri.v[0] = v[2]; tri.v[1] = v[1]; tri.v[2] = v[0]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[3]; tri.v[1] = v[1]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[4]; tri.v[1] = v[3]; tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[5]; tri.v[1] = v[3]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[6]; tri.v[1] = v[5]; tri.v[2] = v[4]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[7]; tri.v[1] = v[5]; tri.v[2] = v[6]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aaline_first_line(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aaline_stage *aaline = aaline_stage(stage); + struct draw_context *draw = stage->draw; + struct pipe_context *pipe = aaline->pipe; + uint num_samplers; + + assert(draw->rasterizer->line_smooth); + + if (draw->rasterizer->line_width <= 3.0) + aaline->half_line_width = 1.5f; + else + aaline->half_line_width = 0.5f * draw->rasterizer->line_width; + + /* + * Bind (generate) our fragprog, sampler and texture + */ + bind_aaline_fragment_shader(aaline); + + /* update vertex attrib info */ + aaline->tex_slot = draw->num_vs_outputs; + assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ + + /* advertise the extra post-transformed vertex attribute */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = aaline->fs->generic_attrib; + draw->extra_vp_outputs.slot = aaline->tex_slot; + + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(aaline->num_textures, aaline->num_samplers); + num_samplers = MAX2(num_samplers, aaline->fs->sampler_unit + 1); + + aaline->state.sampler[aaline->fs->sampler_unit] = aaline->sampler_cso; + pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit], + aaline->texture); + + aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture); + + /* now really draw first line */ + stage->line = aaline_line; + stage->line(stage, header); +} + + +static void +aaline_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aaline_stage *aaline = aaline_stage(stage); + struct pipe_context *pipe = aaline->pipe; + + stage->line = aaline_first_line; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, + aaline->state.sampler); + aaline->driver_set_sampler_textures(pipe, aaline->num_textures, + aaline->state.texture); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aaline_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aaline_destroy(struct draw_stage *stage) +{ + struct aaline_stage *aaline = aaline_stage(stage); + + aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); + + pipe_texture_release(&aaline->texture); + + draw_free_temp_verts( stage ); + + FREE( stage ); +} + + +static struct aaline_stage * +draw_aaline_stage(struct draw_context *draw) +{ + struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + + draw_alloc_temp_verts( &aaline->stage, 8 ); + + aaline->stage.draw = draw; + aaline->stage.next = NULL; + aaline->stage.point = passthrough_point; + aaline->stage.line = aaline_first_line; + aaline->stage.tri = passthrough_tri; + aaline->stage.flush = aaline_flush; + aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; + aaline->stage.destroy = aaline_destroy; + + return aaline; +} + + +static struct aaline_stage * +aaline_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return aaline_stage(draw->pipeline.aaline); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aaline_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); + } + + return aafs; +} + + +static void +aaline_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* save current */ + aaline->fs = aafs; + /* pass-through */ + aaline->driver_bind_fs_state(aaline->pipe, + (aafs ? aafs->driver_fs : NULL)); +} + + +static void +aaline_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* pass-through */ + aaline->driver_delete_fs_state(aaline->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +aaline_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + /* save current */ + memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); + aaline->num_samplers = num; + /* pass-through */ + aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); +} + + +static void +aaline_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) +{ + struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + uint i; + + /* save current */ + for (i = 0; i < num; i++) { + pipe_texture_reference(&aaline->state.texture[i], texture[i]); + } + aaline->num_textures = num; + + /* pass-through */ + aaline->driver_set_sampler_textures(aaline->pipe, num, texture); +} + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) +{ + struct aaline_stage *aaline; + + pipe->draw = (void *) draw; + + /* + * Create / install AA line drawing / prim stage + */ + aaline = draw_aaline_stage( draw ); + assert(aaline); + draw->pipeline.aaline = &aaline->stage; + + aaline->pipe = pipe; + + /* create special texture, sampler state */ + aaline_create_texture(aaline); + aaline_create_sampler(aaline); + + /* save original driver functions */ + aaline->driver_create_fs_state = pipe->create_fs_state; + aaline->driver_bind_fs_state = pipe->bind_fs_state; + aaline->driver_delete_fs_state = pipe->delete_fs_state; + + aaline->driver_bind_sampler_states = pipe->bind_sampler_states; + aaline->driver_set_sampler_textures = pipe->set_sampler_textures; + + /* override the driver's functions */ + pipe->create_fs_state = aaline_create_fs_state; + pipe->bind_fs_state = aaline_bind_fs_state; + pipe->delete_fs_state = aaline_delete_fs_state; + + pipe->bind_sampler_states = aaline_bind_sampler_states; + pipe->set_sampler_textures = aaline_set_sampler_textures; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c new file mode 100644 index 0000000000..e84d380e50 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -0,0 +1,846 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * AA point stage: AA points are converted to quads and rendered with a + * special fragment shader. Another approach would be to use a texture + * map image of a point, but experiments indicate the quality isn't nearly + * as good as this approach. + * + * Note: this looks a lot like draw_aaline.c but there's actually little + * if any code that can be shared. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_vs.h" + + +/* + * Enabling NORMALIZE might give _slightly_ better results. + * Basically, it controls whether we compute distance as d=sqrt(x*x+y*y) or + * d=x*x+y*y. Since we're working with a unit circle, the later seems + * close enough and saves some costly instructions. + */ +#define NORMALIZE 0 + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct aapoint_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; /**< the regular shader */ + void *aapoint_fs; /**< the aa point-augmented shader */ + int generic_attrib; /**< The generic input attrib/texcoord we'll use */ +}; + + +/** + * Subclass of draw_stage + */ +struct aapoint_stage +{ + struct draw_stage stage; + + int psize_slot; + float radius; + + /** this is the vertex attrib slot for the new texcoords */ + uint tex_slot; + + /* + * Currently bound state + */ + struct aapoint_fragment_shader *fs; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct aa_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int colorOutput; /**< which output is the primary color */ + int maxInput, maxGeneric; /**< max input index found */ + int tmp0, colorTemp; /**< temp registers */ + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for two free temp regs and available input reg for new texcoords. + */ +static void +aa_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_OUTPUT && + decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && + decl->Semantic.SemanticIndex == 0) { + aactx->colorOutput = decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->u.DeclarationRange.Last; + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && + (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { + aactx->maxGeneric = decl->Semantic.SemanticIndex; + } + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + aactx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +aa_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct aa_transform_context *aactx = (struct aa_transform_context *) ctx; + struct tgsi_full_instruction newInst; + + if (aactx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + const int texInput = aactx->maxInput + 1; + int tmp0; + uint i; + + /* find two free temp regs */ + for (i = 0; i < 32; i++) { + if ((aactx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (aactx->tmp0 < 0) + aactx->tmp0 = i; + else if (aactx->colorTemp < 0) + aactx->colorTemp = i; + else + break; + } + } + + assert(aactx->colorTemp != aactx->tmp0); + + tmp0 = aactx->tmp0; + + /* declare new generic input/texcoord */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; + decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; + decl.Declaration.Interpolate = 1; + /* XXX this could be linear... */ + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = texInput; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = tmp0; + ctx->emit_declaration(ctx, &decl); + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = aactx->colorTemp; + ctx->emit_declaration(ctx, &decl); + + aactx->firstInstruction = FALSE; + + + /* + * Emit code to compute fragment coverage, kill if outside point radius + * + * Temp reg0 usage: + * t0.x = distance of fragment from center point + * t0.y = boolean, is t0.x > 1.0, also misc temp usage + * t0.z = temporary for computing 1/(1-k) value + * t0.w = final coverage value + */ + + /* MUL t0.xy, tex, tex; # compute x^2, y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XY; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + ctx->emit_instruction(ctx, &newInst); + + /* ADD t0.x, t0.x, t0.y; # x^2 + y^2 */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_ADD; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + ctx->emit_instruction(ctx, &newInst); + +#if NORMALIZE /* OPTIONAL normalization of length */ + /* RSQ t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RSQ; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.x, t0.x; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_X; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + ctx->emit_instruction(ctx, &newInst); +#endif + + /* SGT t0.y, t0.xxxx, t0.wwww; # bool b = d > 1 (NOTE t0.w == 1) */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SGT; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + /* KILP -t0.yyyy; # if b, KILL */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + + + /* compute coverage factor = (1-d)/(1-k) */ + + /* SUB t0.z, tex.w, tex.z; # m = 1 - k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* RCP t0.z, t0.z; # t0.z = 1 / m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_RCP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Z; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SUB t0.y, 1, t0.x; # d = 1 - d */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SUB; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + ctx->emit_instruction(ctx, &newInst); + + /* MUL t0.w, t0.y, t0.z; # coverage = d * m */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* SLE t0.y, t0.x, tex.z; # bool b = distance <= k */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_SLE; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_Y; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_X; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_Z; + ctx->emit_instruction(ctx, &newInst); + + /* CMP t0.w, -t0.y, tex.w, t0.w; + * # if -t0.y < 0 then + * t0.w = 1 + * else + * t0.w = t0.w + */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_CMP; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = tmp0; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 3; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleX = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleY = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleZ = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.SwizzleW = TGSI_SWIZZLE_Y; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[1].SrcRegister.Index = texInput; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[1].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[2].SrcRegister.Index = tmp0; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleX = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleZ = TGSI_SWIZZLE_W; + newInst.FullSrcRegisters[2].SrcRegister.SwizzleW = TGSI_SWIZZLE_W; + ctx->emit_instruction(ctx, &newInst); + + } + + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + /* add alpha modulation code at tail of program */ + + /* MOV result.color.xyz, colorTemp; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MOV; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_XYZ; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + ctx->emit_instruction(ctx, &newInst); + + /* MUL result.color.w, colorTemp, tmp0.w; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT; + newInst.FullDstRegisters[0].DstRegister.Index = aactx->colorOutput; + newInst.FullDstRegisters[0].DstRegister.WriteMask = TGSI_WRITEMASK_W; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = aactx->colorTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[1].SrcRegister.Index = aactx->tmp0; + ctx->emit_instruction(ctx, &newInst); + } + else { + /* Not an END instruction. + * Look for writes to result.color and replace with colorTemp reg. + */ + uint i; + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + if (dst->DstRegister.File == TGSI_FILE_OUTPUT && + dst->DstRegister.Index == aactx->colorOutput) { + dst->DstRegister.File = TGSI_FILE_TEMPORARY; + dst->DstRegister.Index = aactx->colorTemp; + } + } + } + + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for drawing AA lines. + * This will be the user's shader plus some texture/modulate instructions. + */ +static void +generate_aapoint_fs(struct aapoint_stage *aapoint) +{ + const struct pipe_shader_state *orig_fs = &aapoint->fs->state; + struct pipe_shader_state aapoint_fs; + struct aa_transform_context transform; + +#define MAX 1000 + + aapoint_fs = *orig_fs; /* copy to init */ + aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.colorOutput = -1; + transform.maxInput = -1; + transform.maxGeneric = -1; + transform.colorTemp = -1; + transform.tmp0 = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = aa_transform_inst; + transform.base.transform_declaration = aa_transform_decl; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) aapoint_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + printf("draw_aapoint, orig shader:\n"); + tgsi_dump(orig_fs->tokens, 0); + printf("draw_aapoint, new shader:\n"); + tgsi_dump(aapoint_fs.tokens, 0); +#endif + + aapoint->fs->aapoint_fs + = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + + aapoint->fs->generic_attrib = transform.maxGeneric + 1; +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) +{ + if (!aapoint->fs->aapoint_fs) { + generate_aapoint_fs(aapoint); + } + aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); +} + + + +static INLINE struct aapoint_stage * +aapoint_stage( struct draw_stage *stage ) +{ + return (struct aapoint_stage *) stage; +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw an AA point by drawing a quad. + */ +static void +aapoint_point(struct draw_stage *stage, struct prim_header *header) +{ + const struct aapoint_stage *aapoint = aapoint_stage(stage); + struct prim_header tri; + struct vertex_header *v[4]; + uint texPos = aapoint->tex_slot; + float radius, *pos, *tex; + uint i; + float k; + + if (aapoint->psize_slot >= 0) { + radius = 0.5f * header->v[0]->data[aapoint->psize_slot][0]; + } + else { + radius = aapoint->radius; + } + + /* + * Note: the texcoords (generic attrib, really) we use are special: + * The S and T components simply vary from -1 to +1. + * The R component is k, below. + * The Q component is 1.0 and will used as a handy constant in the + * fragment shader. + */ + + /* + * k is the threshold distance from the point's center at which + * we begin alpha attenuation (the coverage value). + * Operating within a unit circle, we'll compute the fragment's + * distance 'd' from the center point using the texcoords. + * IF d > 1.0 THEN + * KILL fragment + * ELSE IF d > k THEN + * compute coverage in [0,1] proportional to d in [k, 1]. + * ELSE + * coverage = 1.0; // full coverage + * ENDIF + * + * Note: the ELSEIF and ELSE clauses are actually implemented with CMP to + * avoid using IF/ELSE/ENDIF TGSI opcodes. + */ + +#if !NORMALIZE + k = 1.0f / radius; + k = 1.0f - 2.0f * k + k * k; +#else + k = 1.0f - 1.0f / radius; +#endif + + /* allocate/dup new verts */ + for (i = 0; i < 4; i++) { + v[i] = dup_vert(stage, header->v[0], i); + } + + /* new verts */ + pos = v[0]->data[0]; + pos[0] -= radius; + pos[1] -= radius; + + pos = v[1]->data[0]; + pos[0] += radius; + pos[1] -= radius; + + pos = v[2]->data[0]; + pos[0] += radius; + pos[1] += radius; + + pos = v[3]->data[0]; + pos[0] -= radius; + pos[1] += radius; + + /* new texcoords */ + tex = v[0]->data[texPos]; + ASSIGN_4V(tex, -1, -1, k, 1); + + tex = v[1]->data[texPos]; + ASSIGN_4V(tex, 1, -1, k, 1); + + tex = v[2]->data[texPos]; + ASSIGN_4V(tex, 1, 1, k, 1); + + tex = v[3]->data[texPos]; + ASSIGN_4V(tex, -1, 1, k, 1); + + /* emit 2 tris for the quad strip */ + tri.v[0] = v[0]; + tri.v[1] = v[1]; + tri.v[2] = v[2]; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v[0]; + tri.v[1] = v[2]; + tri.v[2] = v[3]; + stage->next->tri( stage->next, &tri ); +} + + +static void +aapoint_first_point(struct draw_stage *stage, struct prim_header *header) +{ + auto struct aapoint_stage *aapoint = aapoint_stage(stage); + struct draw_context *draw = stage->draw; + + assert(draw->rasterizer->point_smooth); + + if (draw->rasterizer->point_size <= 2.0) + aapoint->radius = 1.0; + else + aapoint->radius = 0.5f * draw->rasterizer->point_size; + + /* + * Bind (generate) our fragprog. + */ + bind_aapoint_fragment_shader(aapoint); + + /* update vertex attrib info */ + aapoint->tex_slot = draw->num_vs_outputs; + assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ + + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; + draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib; + draw->extra_vp_outputs.slot = aapoint->tex_slot; + + /* find psize slot in post-transform vertex */ + aapoint->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + aapoint->psize_slot = i; + break; + } + } + } + + /* now really draw first line */ + stage->point = aapoint_point; + stage->point(stage, header); +} + + +static void +aapoint_flush(struct draw_stage *stage, unsigned flags) +{ + struct draw_context *draw = stage->draw; + struct aapoint_stage *aapoint = aapoint_stage(stage); + struct pipe_context *pipe = aapoint->pipe; + + stage->point = aapoint_first_point; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); + + draw->extra_vp_outputs.slot = 0; +} + + +static void +aapoint_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +aapoint_destroy(struct draw_stage *stage) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct aapoint_stage * +draw_aapoint_stage(struct draw_context *draw) +{ + struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + + draw_alloc_temp_verts( &aapoint->stage, 4 ); + + aapoint->stage.draw = draw; + aapoint->stage.next = NULL; + aapoint->stage.point = aapoint_first_point; + aapoint->stage.line = passthrough_line; + aapoint->stage.tri = passthrough_tri; + aapoint->stage.flush = aapoint_flush; + aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; + aapoint->stage.destroy = aapoint_destroy; + + return aapoint; +} + + +static struct aapoint_stage * +aapoint_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return aapoint_stage(draw->pipeline.aapoint); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +aapoint_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); + } + + return aafs; +} + + +static void +aapoint_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* save current */ + aapoint->fs = aafs; + /* pass-through */ + aapoint->driver_bind_fs_state(aapoint->pipe, + (aafs ? aafs->driver_fs : NULL)); +} + + +static void +aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); + struct aapoint_fragment_shader *aafs = (struct aapoint_fragment_shader *) fs; + /* pass-through */ + aapoint->driver_delete_fs_state(aapoint->pipe, aafs->driver_fs); + FREE(aafs); +} + + +/** + * Called by drivers that want to install this AA point prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA points. + */ +void +draw_install_aapoint_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct aapoint_stage *aapoint; + + pipe->draw = (void *) draw; + + /* + * Create / install AA point drawing / prim stage + */ + aapoint = draw_aapoint_stage( draw ); + assert(aapoint); + draw->pipeline.aapoint = &aapoint->stage; + + aapoint->pipe = pipe; + + /* save original driver functions */ + aapoint->driver_create_fs_state = pipe->create_fs_state; + aapoint->driver_bind_fs_state = pipe->bind_fs_state; + aapoint->driver_delete_fs_state = pipe->delete_fs_state; + + /* override the driver's functions */ + pipe->create_fs_state = aapoint_create_fs_state; + pipe->bind_fs_state = aapoint_bind_fs_state; + pipe->delete_fs_state = aapoint_delete_fs_state; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c new file mode 100644 index 0000000000..0ac3a240e5 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -0,0 +1,503 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Clipping stage + * + * \author Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" + +#include "draw_context.h" +#include "draw_vs.h" + + +#ifndef IS_NEGATIVE +#define IS_NEGATIVE(X) ((X) < 0.0) +#endif + +#ifndef DIFFERENT_SIGNS +#define DIFFERENT_SIGNS(x, y) ((x) * (y) <= 0.0F && (x) - (y) != 0.0F) +#endif + +#ifndef MAX_CLIPPED_VERTICES +#define MAX_CLIPPED_VERTICES ((2 * (6 + PIPE_MAX_CLIP_PLANES))+1) +#endif + + + +struct clipper { + struct draw_stage stage; /**< base class */ + + /* Basically duplicate some of the flatshading logic here: + */ + boolean flat; + uint num_color_attribs; + uint color_attribs[4]; /* front/back primary/secondary colors */ + + float (*plane)[4]; +}; + + +/* This is a bit confusing: + */ +static INLINE struct clipper *clipper_stage( struct draw_stage *stage ) +{ + return (struct clipper *)stage; +} + + +#define LINTERP(T, OUT, IN) ((OUT) + (T) * ((IN) - (OUT))) + + +/* All attributes are float[4], so this is easy: + */ +static void interp_attr( float *fdst, + float t, + const float *fin, + const float *fout ) +{ + fdst[0] = LINTERP( t, fout[0], fin[0] ); + fdst[1] = LINTERP( t, fout[1], fin[1] ); + fdst[2] = LINTERP( t, fout[2], fin[2] ); + fdst[3] = LINTERP( t, fout[3], fin[3] ); +} + +static void copy_colors( struct draw_stage *stage, + struct vertex_header *dst, + const struct vertex_header *src ) +{ + const struct clipper *clipper = clipper_stage(stage); + uint i; + for (i = 0; i < clipper->num_color_attribs; i++) { + const uint attr = clipper->color_attribs[i]; + COPY_4FV(dst->data[attr], src->data[attr]); + } +} + + + +/* Interpolate between two vertices to produce a third. + */ +static void interp( const struct clipper *clip, + struct vertex_header *dst, + float t, + const struct vertex_header *out, + const struct vertex_header *in ) +{ + const unsigned nr_attrs = clip->stage.draw->num_vs_outputs; + unsigned j; + + /* Vertex header. + */ + { + dst->clipmask = 0; + dst->edgeflag = 0; + dst->pad = 0; + dst->vertex_id = UNDEFINED_VERTEX_ID; + } + + /* Clip coordinates: interpolate normally + */ + { + interp_attr(dst->clip, t, in->clip, out->clip); + } + + /* Do the projective divide and insert window coordinates: + */ + { + const float *pos = dst->clip; + const float *scale = clip->stage.draw->viewport.scale; + const float *trans = clip->stage.draw->viewport.translate; + const float oow = 1.0f / pos[3]; + + dst->data[0][0] = pos[0] * oow * scale[0] + trans[0]; + dst->data[0][1] = pos[1] * oow * scale[1] + trans[1]; + dst->data[0][2] = pos[2] * oow * scale[2] + trans[2]; + dst->data[0][3] = oow; + } + + /* Other attributes + * Note: start at 1 to skip winpos (data[0]) since we just computed + * it above. + */ + for (j = 1; j < nr_attrs; j++) { + interp_attr(dst->data[j], t, in->data[j], out->data[j]); + } +} + + +static void emit_poly( struct draw_stage *stage, + struct vertex_header **inlist, + unsigned n, + const struct prim_header *origPrim) +{ + struct prim_header header; + unsigned i; + + /* later stages may need the determinant, but only the sign matters */ + header.det = origPrim->det; + + for (i = 2; i < n; i++) { + header.v[0] = inlist[i-1]; + header.v[1] = inlist[i]; + header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ + + { + unsigned tmp1 = header.v[1]->edgeflag; + unsigned tmp2 = header.v[2]->edgeflag; + + if (i != n-1) header.v[1]->edgeflag = 0; + if (i != 2) header.v[2]->edgeflag = 0; + + header.edgeflags = ((header.v[0]->edgeflag << 0) | + (header.v[1]->edgeflag << 1) | + (header.v[2]->edgeflag << 2)); + + if (0) { + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + uint j, k; + debug_printf("Clipped tri:\n"); + for (j = 0; j < 3; j++) { + for (k = 0; k < vs->info.num_outputs; k++) { + debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, + header.v[j]->data[k][0], + header.v[j]->data[k][1], + header.v[j]->data[k][2], + header.v[j]->data[k][3]); + } + } + } + + stage->next->tri( stage->next, &header ); + + header.v[1]->edgeflag = tmp1; + header.v[2]->edgeflag = tmp2; + } + } +} + + + + +/* Clip a triangle against the viewport and user clip planes. + */ +static void +do_clip_tri( struct draw_stage *stage, + struct prim_header *header, + unsigned clipmask ) +{ + struct clipper *clipper = clipper_stage( stage ); + struct vertex_header *a[MAX_CLIPPED_VERTICES]; + struct vertex_header *b[MAX_CLIPPED_VERTICES]; + struct vertex_header **inlist = a; + struct vertex_header **outlist = b; + unsigned tmpnr = 0; + unsigned n = 3; + unsigned i; + + inlist[0] = header->v[0]; + inlist[1] = header->v[1]; + inlist[2] = header->v[2]; + + while (clipmask && n >= 3) { + const unsigned plane_idx = ffs(clipmask)-1; + const float *plane = clipper->plane[plane_idx]; + struct vertex_header *vert_prev = inlist[0]; + float dp_prev = dot4( vert_prev->clip, plane ); + unsigned outcount = 0; + + clipmask &= ~(1<clip, plane ); + + if (!IS_NEGATIVE(dp_prev)) { + outlist[outcount++] = vert_prev; + } + + if (DIFFERENT_SIGNS(dp, dp_prev)) { + struct vertex_header *new_vert = clipper->stage.tmp[tmpnr++]; + outlist[outcount++] = new_vert; + + if (IS_NEGATIVE(dp)) { + /* Going out of bounds. Avoid division by zero as we + * know dp != dp_prev from DIFFERENT_SIGNS, above. + */ + float t = dp / (dp - dp_prev); + interp( clipper, new_vert, t, vert, vert_prev ); + + /* Force edgeflag true in this case: + */ + new_vert->edgeflag = 1; + } else { + /* Coming back in. + */ + float t = dp_prev / (dp_prev - dp); + interp( clipper, new_vert, t, vert_prev, vert ); + + /* Copy starting vert's edgeflag: + */ + new_vert->edgeflag = vert_prev->edgeflag; + } + } + + vert_prev = vert; + dp_prev = dp; + } + + { + struct vertex_header **tmp = inlist; + inlist = outlist; + outlist = tmp; + n = outcount; + } + } + + /* If flat-shading, copy color to new provoking vertex. + */ + if (clipper->flat && inlist[0] != header->v[2]) { + if (1) { + inlist[0] = dup_vert(stage, inlist[0], tmpnr++); + } + + copy_colors(stage, inlist[0], header->v[2]); + } + + + + /* Emit the polygon as triangles to the setup stage: + */ + if (n >= 3) + emit_poly( stage, inlist, n, header ); +} + + +/* Clip a line against the viewport and user clip planes. + */ +static void +do_clip_line( struct draw_stage *stage, + struct prim_header *header, + unsigned clipmask ) +{ + const struct clipper *clipper = clipper_stage( stage ); + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + const float *pos0 = v0->clip; + const float *pos1 = v1->clip; + float t0 = 0.0F; + float t1 = 0.0F; + struct prim_header newprim; + + while (clipmask) { + const unsigned plane_idx = ffs(clipmask)-1; + const float *plane = clipper->plane[plane_idx]; + const float dp0 = dot4( pos0, plane ); + const float dp1 = dot4( pos1, plane ); + + if (dp1 < 0.0F) { + float t = dp1 / (dp1 - dp0); + t1 = MAX2(t1, t); + } + + if (dp0 < 0.0F) { + float t = dp0 / (dp0 - dp1); + t0 = MAX2(t0, t); + } + + if (t0 + t1 >= 1.0F) + return; /* discard */ + + clipmask &= ~(1 << plane_idx); /* turn off this plane's bit */ + } + + if (v0->clipmask) { + interp( clipper, stage->tmp[0], t0, v0, v1 ); + + if (clipper->flat) + copy_colors(stage, stage->tmp[0], v0); + + newprim.v[0] = stage->tmp[0]; + } + else { + newprim.v[0] = v0; + } + + if (v1->clipmask) { + interp( clipper, stage->tmp[1], t1, v1, v0 ); + newprim.v[1] = stage->tmp[1]; + } + else { + newprim.v[1] = v1; + } + + stage->next->line( stage->next, &newprim ); +} + + +static void +clip_point( struct draw_stage *stage, + struct prim_header *header ) +{ + if (header->v[0]->clipmask == 0) + stage->next->point( stage->next, header ); +} + + +static void +clip_line( struct draw_stage *stage, + struct prim_header *header ) +{ + unsigned clipmask = (header->v[0]->clipmask | + header->v[1]->clipmask); + + if (clipmask == 0) { + /* no clipping needed */ + stage->next->line( stage->next, header ); + } + else if ((header->v[0]->clipmask & + header->v[1]->clipmask) == 0) { + do_clip_line(stage, header, clipmask); + } + /* else, totally clipped */ +} + + +static void +clip_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + unsigned clipmask = (header->v[0]->clipmask | + header->v[1]->clipmask | + header->v[2]->clipmask); + + if (clipmask == 0) { + /* no clipping needed */ + stage->next->tri( stage->next, header ); + } + else if ((header->v[0]->clipmask & + header->v[1]->clipmask & + header->v[2]->clipmask) == 0) { + do_clip_tri(stage, header, clipmask); + } +} + +/* Update state. Could further delay this until we hit the first + * primitive that really requires clipping. + */ +static void +clip_init_state( struct draw_stage *stage ) +{ + struct clipper *clipper = clipper_stage( stage ); + + clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; + + if (clipper->flat) { + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + uint i; + + clipper->num_color_attribs = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + clipper->color_attribs[clipper->num_color_attribs++] = i; + } + } + } + + stage->tri = clip_tri; + stage->line = clip_line; +} + + + +static void clip_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + clip_init_state( stage ); + stage->tri( stage, header ); +} + +static void clip_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + clip_init_state( stage ); + stage->line( stage, header ); +} + + +static void clip_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = clip_first_tri; + stage->line = clip_first_line; + stage->next->flush( stage->next, flags ); +} + + +static void clip_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void clip_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Allocate a new clipper stage. + * \return pointer to new stage object + */ +struct draw_stage *draw_clip_stage( struct draw_context *draw ) +{ + struct clipper *clipper = CALLOC_STRUCT(clipper); + + draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ); + + clipper->stage.draw = draw; + clipper->stage.point = clip_point; + clipper->stage.line = clip_first_line; + clipper->stage.tri = clip_first_tri; + clipper->stage.flush = clip_flush; + clipper->stage.reset_stipple_counter = clip_reset_stipple_counter; + clipper->stage.destroy = clip_destroy; + + clipper->plane = draw->plane; + + return &clipper->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c new file mode 100644 index 0000000000..8177b0ac86 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -0,0 +1,150 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Drawing stage for polygon culling + */ + +/* Authors: Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "draw_private.h" + + +struct cull_stage { + struct draw_stage stage; + unsigned winding; /**< which winding(s) to cull (one of PIPE_WINDING_x) */ +}; + + +static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) +{ + return (struct cull_stage *)stage; +} + + + + +static void cull_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + /* Window coords: */ + const float *v0 = header->v[0]->data[0]; + const float *v1 = header->v[1]->data[0]; + const float *v2 = header->v[2]->data[0]; + + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0] - v2[0]; + const float ey = v0[1] - v2[1]; + const float fx = v1[0] - v2[0]; + const float fy = v1[1] - v2[1]; + + /* det = cross(e,f).z */ + header->det = ex * fy - ey * fx; + + if (header->det != 0) { + /* if (det < 0 then Z points toward camera and triangle is + * counter-clockwise winding. + */ + unsigned winding = (header->det < 0) ? PIPE_WINDING_CCW : PIPE_WINDING_CW; + + if ((winding & cull_stage(stage)->winding) == 0) { + /* triangle is not culled, pass to next stage */ + stage->next->tri( stage->next, header ); + } + } +} + +static void cull_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct cull_stage *cull = cull_stage(stage); + + cull->winding = stage->draw->rasterizer->cull_mode; + + stage->tri = cull_tri; + stage->tri( stage, header ); +} + + + +static void cull_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line( stage->next, header ); +} + + +static void cull_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void cull_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->tri = cull_first_tri; + stage->next->flush( stage->next, flags ); +} + +static void cull_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void cull_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create a new polygon culling stage. + */ +struct draw_stage *draw_cull_stage( struct draw_context *draw ) +{ + struct cull_stage *cull = CALLOC_STRUCT(cull_stage); + + draw_alloc_temp_verts( &cull->stage, 0 ); + + cull->stage.draw = draw; + cull->stage.next = NULL; + cull->stage.point = cull_point; + cull->stage.line = cull_line; + cull->stage.tri = cull_first_tri; + cull->stage.flush = cull_flush; + cull->stage.reset_stipple_counter = cull_reset_stipple_counter; + cull->stage.destroy = cull_destroy; + + return &cull->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c new file mode 100644 index 0000000000..54baa1fbc9 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -0,0 +1,248 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "draw_vs.h" + + +/** subclass of draw_stage */ +struct flat_stage +{ + struct draw_stage stage; + + uint num_color_attribs; + uint color_attribs[4]; /* front/back primary/secondary colors */ +}; + + +static INLINE struct flat_stage * +flat_stage(struct draw_stage *stage) +{ + return (struct flat_stage *) stage; +} + + +/** Copy all the color attributes from 'src' vertex to 'dst' vertex */ +static INLINE void copy_colors( struct draw_stage *stage, + struct vertex_header *dst, + const struct vertex_header *src ) +{ + const struct flat_stage *flat = flat_stage(stage); + uint i; + for (i = 0; i < flat->num_color_attribs; i++) { + const uint attr = flat->color_attribs[i]; + COPY_4FV(dst->data[attr], src->data[attr]); + } +} + + +/** Copy all the color attributes from src vertex to dst0 & dst1 vertices */ +static INLINE void copy_colors2( struct draw_stage *stage, + struct vertex_header *dst0, + struct vertex_header *dst1, + const struct vertex_header *src ) +{ + const struct flat_stage *flat = flat_stage(stage); + uint i; + for (i = 0; i < flat->num_color_attribs; i++) { + const uint attr = flat->color_attribs[i]; + COPY_4FV(dst0->data[attr], src->data[attr]); + COPY_4FV(dst1->data[attr], src->data[attr]); + } +} + + +/** + * Flatshade tri. Required for clipping and when unfilled tris are + * active, otherwise handled by hardware. + */ +static void flatshade_tri_0( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.edgeflags = header->edgeflags; + tmp.v[0] = header->v[0]; + tmp.v[1] = dup_vert(stage, header->v[1], 0); + tmp.v[2] = dup_vert(stage, header->v[2], 1); + + copy_colors2(stage, tmp.v[1], tmp.v[2], tmp.v[0]); + + stage->next->tri( stage->next, &tmp ); +} + + +static void flatshade_tri_2( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.edgeflags = header->edgeflags; + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = dup_vert(stage, header->v[1], 1); + tmp.v[2] = header->v[2]; + + copy_colors2(stage, tmp.v[0], tmp.v[1], tmp.v[2]); + + stage->next->tri( stage->next, &tmp ); +} + + + + + +/** + * Flatshade line. Required for clipping. + */ +static void flatshade_line_0( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.v[0] = header->v[0]; + tmp.v[1] = dup_vert(stage, header->v[1], 0); + + copy_colors(stage, tmp.v[1], tmp.v[0]); + + stage->next->line( stage->next, &tmp ); +} + +static void flatshade_line_1( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = header->v[1]; + + copy_colors(stage, tmp.v[0], tmp.v[1]); + + stage->next->line( stage->next, &tmp ); +} + + +/* Flatshade point -- passthrough. + */ +static void flatshade_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void flatshade_init_state( struct draw_stage *stage ) +{ + struct flat_stage *flat = flat_stage(stage); + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + uint i; + + /* Find which vertex shader outputs are colors, make a list */ + flat->num_color_attribs = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || + vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + flat->color_attribs[flat->num_color_attribs++] = i; + } + } + + /* Choose flatshade routine according to provoking vertex: + */ + if (stage->draw->rasterizer->flatshade_first) { + stage->line = flatshade_line_0; + stage->tri = flatshade_tri_0; + } + else { + stage->line = flatshade_line_1; + stage->tri = flatshade_tri_2; + } +} + +static void flatshade_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + flatshade_init_state( stage ); + stage->tri( stage, header ); +} + +static void flatshade_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + flatshade_init_state( stage ); + stage->line( stage, header ); +} + + +static void flatshade_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = flatshade_first_tri; + stage->line = flatshade_first_line; + stage->next->flush( stage->next, flags ); +} + + +static void flatshade_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void flatshade_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create flatshading drawing stage. + */ +struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) +{ + struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage); + + draw_alloc_temp_verts( &flatshade->stage, 2 ); + + flatshade->stage.draw = draw; + flatshade->stage.next = NULL; + flatshade->stage.point = flatshade_point; + flatshade->stage.line = flatshade_first_line; + flatshade->stage.tri = flatshade_first_tri; + flatshade->stage.flush = flatshade_flush; + flatshade->stage.reset_stipple_counter = flatshade_reset_stipple_counter; + flatshade->stage.destroy = flatshade_destroy; + + return &flatshade->stage; +} + + diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c new file mode 100644 index 0000000000..dbc676deae --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -0,0 +1,186 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief polygon offset state + * + * \author Keith Whitwell + * \author Brian Paul + */ + +#include "pipe/p_util.h" +#include "draw_private.h" + + + +struct offset_stage { + struct draw_stage stage; + + float scale; + float units; +}; + + + +static INLINE struct offset_stage *offset_stage( struct draw_stage *stage ) +{ + return (struct offset_stage *) stage; +} + + + + + +/** + * Offset tri Z. Some hardware can handle this, but not usually when + * doing unfilled rendering. + */ +static void do_offset_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct offset_stage *offset = offset_stage(stage); + float inv_det = 1.0f / header->det; + + /* Window coords: + */ + float *v0 = header->v[0]->data[0]; + float *v1 = header->v[1]->data[0]; + float *v2 = header->v[2]->data[0]; + + /* edge vectors e = v0 - v2, f = v1 - v2 */ + float ex = v0[0] - v2[0]; + float ey = v0[1] - v2[1]; + float ez = v0[2] - v2[2]; + float fx = v1[0] - v2[0]; + float fy = v1[1] - v2[1]; + float fz = v1[2] - v2[2]; + + /* (a,b) = cross(e,f).xy */ + float a = ey*fz - ez*fy; + float b = ez*fx - ex*fz; + + float dzdx = FABSF(a * inv_det); + float dzdy = FABSF(b * inv_det); + + float zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale; + + /* + * Note: we're applying the offset and clamping per-vertex. + * Ideally, the offset is applied per-fragment prior to fragment shading. + */ + v0[2] = CLAMP(v0[2] + zoffset, 0.0f, 1.0f); + v1[2] = CLAMP(v1[2] + zoffset, 0.0f, 1.0f); + v2[2] = CLAMP(v2[2] + zoffset, 0.0f, 1.0f); + + stage->next->tri( stage->next, header ); +} + + +static void offset_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct prim_header tmp; + + tmp.det = header->det; + tmp.edgeflags = header->edgeflags; + tmp.v[0] = dup_vert(stage, header->v[0], 0); + tmp.v[1] = dup_vert(stage, header->v[1], 1); + tmp.v[2] = dup_vert(stage, header->v[2], 2); + + do_offset_tri( stage, &tmp ); +} + + +static void offset_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct offset_stage *offset = offset_stage(stage); + float mrd = 1.0f / 65535.0f; /* XXX this depends on depthbuffer bits! */ + + offset->units = stage->draw->rasterizer->offset_units * mrd; + offset->scale = stage->draw->rasterizer->offset_scale; + + stage->tri = offset_tri; + stage->tri( stage, header ); +} + + +static void offset_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line( stage->next, header ); +} + + +static void offset_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void offset_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->tri = offset_first_tri; + stage->next->flush( stage->next, flags ); +} + + +static void offset_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void offset_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create polygon offset drawing stage. + */ +struct draw_stage *draw_offset_stage( struct draw_context *draw ) +{ + struct offset_stage *offset = CALLOC_STRUCT(offset_stage); + + draw_alloc_temp_verts( &offset->stage, 3 ); + + offset->stage.draw = draw; + offset->stage.next = NULL; + offset->stage.point = offset_point; + offset->stage.line = offset_line; + offset->stage.tri = offset_first_tri; + offset->stage.flush = offset_flush; + offset->stage.reset_stipple_counter = offset_reset_stipple_counter; + offset->stage.destroy = offset_destroy; + + return &offset->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c new file mode 100644 index 0000000000..4dddb72906 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -0,0 +1,746 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Polygon stipple stage: implement polygon stipple with texture map and + * fragment program. The fragment program samples the texture and does + * a fragment kill for the stipple-failing fragments. + * + * Authors: Brian Paul + */ + + +#include "pipe/p_util.h" +#include "pipe/p_inlines.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" + +#include "tgsi/util/tgsi_transform.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_context.h" +#include "draw_private.h" + + + +/** + * Subclass of pipe_shader_state to carry extra fragment shader info. + */ +struct pstip_fragment_shader +{ + struct pipe_shader_state state; + void *driver_fs; + void *pstip_fs; + uint sampler_unit; +}; + + +/** + * Subclass of draw_stage + */ +struct pstip_stage +{ + struct draw_stage stage; + + void *sampler_cso; + struct pipe_texture *texture; + uint num_samplers; + uint num_textures; + + /* + * Currently bound state + */ + struct pstip_fragment_shader *fs; + struct { + void *samplers[PIPE_MAX_SAMPLERS]; + struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; + const struct pipe_poly_stipple *stipple; + } state; + + /* + * Driver interface/override functions + */ + void * (*driver_create_fs_state)(struct pipe_context *, + const struct pipe_shader_state *); + void (*driver_bind_fs_state)(struct pipe_context *, void *); + void (*driver_delete_fs_state)(struct pipe_context *, void *); + + void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); + + void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, + struct pipe_texture **); + + void (*driver_set_polygon_stipple)(struct pipe_context *, + const struct pipe_poly_stipple *); + + struct pipe_context *pipe; +}; + + + +/** + * Subclass of tgsi_transform_context, used for transforming the + * user's fragment shader to add the special AA instructions. + */ +struct pstip_transform_context { + struct tgsi_transform_context base; + uint tempsUsed; /**< bitmask */ + int wincoordInput; + int maxInput; + uint samplersUsed; /**< bitfield of samplers used */ + int freeSampler; /** an available sampler for the pstipple */ + int texTemp; /**< temp registers */ + int numImmed; + boolean firstInstruction; +}; + + +/** + * TGSI declaration transform callback. + * Look for a free sampler, a free input attrib, and two free temp regs. + */ +static void +pstip_transform_decl(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (decl->Declaration.File == TGSI_FILE_SAMPLER) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + pctx->samplersUsed |= 1 << i; + } + } + else if (decl->Declaration.File == TGSI_FILE_INPUT) { + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last); + if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) + pctx->wincoordInput = (int) decl->u.DeclarationRange.First; + } + else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { + uint i; + for (i = decl->u.DeclarationRange.First; + i <= decl->u.DeclarationRange.Last; i++) { + pctx->tempsUsed |= (1 << i); + } + } + + ctx->emit_declaration(ctx, decl); +} + + +static void +pstip_transform_immed(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *immed) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + pctx->numImmed++; +} + + +/** + * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. + */ +static int +free_bit(uint bitfield) +{ + int i; + for (i = 0; i < 32; i++) { + if ((bitfield & (1 << i)) == 0) + return i; + } + return -1; +} + + +/** + * TGSI instruction transform callback. + * Replace writes to result.color w/ a temp reg. + * Upon END instruction, insert texture sampling code for antialiasing. + */ +static void +pstip_transform_inst(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst) +{ + struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; + + if (pctx->firstInstruction) { + /* emit our new declarations before the first instruction */ + + struct tgsi_full_declaration decl; + struct tgsi_full_instruction newInst; + uint i; + int wincoordInput; + + /* find free sampler */ + pctx->freeSampler = free_bit(pctx->samplersUsed); + if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) + pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; + + if (pctx->wincoordInput < 0) + wincoordInput = pctx->maxInput + 1; + else + wincoordInput = pctx->wincoordInput; + + /* find one free temp reg */ + for (i = 0; i < 32; i++) { + if ((pctx->tempsUsed & (1 << i)) == 0) { + /* found a free temp */ + if (pctx->texTemp < 0) + pctx->texTemp = i; + else + break; + } + } + assert(pctx->texTemp >= 0); + + if (pctx->wincoordInput < 0) { + /* declare new position input reg */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; + decl.Semantic.SemanticIndex = 0; + decl.Declaration.Interpolate = 1; + decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = wincoordInput; + ctx->emit_declaration(ctx, &decl); + } + + /* declare new sampler */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_SAMPLER; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = pctx->freeSampler; + ctx->emit_declaration(ctx, &decl); + + /* declare new temp regs */ + decl = tgsi_default_full_declaration(); + decl.Declaration.File = TGSI_FILE_TEMPORARY; + decl.u.DeclarationRange.First = + decl.u.DeclarationRange.Last = pctx->texTemp; + ctx->emit_declaration(ctx, &decl); + + /* emit immediate = {1/32, 1/32, 1, 1} + * The index/position of this immediate will be pctx->numImmed + */ + { + static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; + struct tgsi_full_immediate immed; + uint size = 4; + immed = tgsi_default_full_immediate(); + immed.Immediate.Size = 1 + size; /* one for the token itself */ + immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; + ctx->emit_immediate(ctx, &immed); + } + + pctx->firstInstruction = FALSE; + + + /* + * Insert new MUL/TEX/KILP instructions at start of program + * Take gl_FragCoord, divide by 32 (stipple size), sample the + * texture and kill fragment if needed. + * + * We'd like to use non-normalized texcoords to index into a RECT + * texture, but we can only use GL_REPEAT wrap mode with normalized + * texcoords. Darn. + */ + + /* MUL texTemp, INPUT[wincoord], 1/32; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_MUL; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; + newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; + ctx->emit_instruction(ctx, &newInst); + + /* TEX texTemp, texTemp, sampler; */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_TEX; + newInst.Instruction.NumDstRegs = 1; + newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; + newInst.Instruction.NumSrcRegs = 2; + newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; + newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler; + ctx->emit_instruction(ctx, &newInst); + + /* KILP texTemp; # if texTemp < 0, KILL fragment */ + newInst = tgsi_default_full_instruction(); + newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.NumDstRegs = 0; + newInst.Instruction.NumSrcRegs = 1; + newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; + newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; + newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; + ctx->emit_instruction(ctx, &newInst); + } + + /* emit this instruction */ + ctx->emit_instruction(ctx, inst); +} + + +/** + * Generate the frag shader we'll use for doing polygon stipple. + * This will be the user's shader prefixed with a TEX and KIL instruction. + */ +static void +generate_pstip_fs(struct pstip_stage *pstip) +{ + const struct pipe_shader_state *orig_fs = &pstip->fs->state; + /*struct draw_context *draw = pstip->stage.draw;*/ + struct pipe_shader_state pstip_fs; + struct pstip_transform_context transform; + +#define MAX 1000 + + pstip_fs = *orig_fs; /* copy to init */ + pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + + memset(&transform, 0, sizeof(transform)); + transform.wincoordInput = -1; + transform.maxInput = -1; + transform.texTemp = -1; + transform.firstInstruction = TRUE; + transform.base.transform_instruction = pstip_transform_inst; + transform.base.transform_declaration = pstip_transform_decl; + transform.base.transform_immediate = pstip_transform_immed; + + tgsi_transform_shader(orig_fs->tokens, + (struct tgsi_token *) pstip_fs.tokens, + MAX, &transform.base); + +#if 0 /* DEBUG */ + tgsi_dump(orig_fs->tokens, 0); + tgsi_dump(pstip_fs.tokens, 0); +#endif + + pstip->fs->sampler_unit = transform.freeSampler; + assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); + + pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); +} + + +/** + * Load texture image with current stipple pattern. + */ +static void +pstip_update_texture(struct pstip_stage *pstip) +{ + static const uint bit31 = 1 << 31; + struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *surface; + const uint *stipple = pstip->state.stipple->stipple; + uint i, j; + ubyte *data; + + surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0); + data = pipe_surface_map(surface); + + /* + * Load alpha texture. + * Note: 0 means keep the fragment, 255 means kill it. + * We'll negate the texel value and use KILP which kills if value + * is negative. + */ + for (i = 0; i < 32; i++) { + for (j = 0; j < 32; j++) { + if (stipple[i] & (bit31 >> j)) { + /* fragment "on" */ + data[i * surface->pitch + j] = 0; + } + else { + /* fragment "off" */ + data[i * surface->pitch + j] = 255; + } + } + } + + /* unmap */ + pipe_surface_unmap(surface); + pipe_surface_reference(&surface, NULL); + pipe->texture_update(pipe, pstip->texture, 0, 0x1); +} + + +/** + * Create the texture map we'll use for stippling. + */ +static void +pstip_create_texture(struct pstip_stage *pstip) +{ + struct pipe_context *pipe = pstip->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_texture texTemp; + + memset(&texTemp, 0, sizeof(texTemp)); + texTemp.target = PIPE_TEXTURE_2D; + texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.last_level = 0; + texTemp.width[0] = 32; + texTemp.height[0] = 32; + texTemp.depth[0] = 1; + texTemp.cpp = 1; + + pstip->texture = screen->texture_create(screen, &texTemp); + assert(pstip->texture->refcount == 1); +} + + +/** + * Create the sampler CSO that'll be used for antialiasing. + * By using a mipmapped texture, we don't have to generate a different + * texture image for each line size. + */ +static void +pstip_create_sampler(struct pstip_stage *pstip) +{ + struct pipe_sampler_state sampler; + struct pipe_context *pipe = pstip->pipe; + + memset(&sampler, 0, sizeof(sampler)); + sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; + sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; + sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; + sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; + sampler.normalized_coords = 1; + sampler.min_lod = 0.0f; + sampler.max_lod = 0.0f; + + pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); +} + + +/** + * When we're about to draw our first AA line in a batch, this function is + * called to tell the driver to bind our modified fragment shader. + */ +static void +bind_pstip_fragment_shader(struct pstip_stage *pstip) +{ + if (!pstip->fs->pstip_fs) { + generate_pstip_fs(pstip); + } + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); +} + + + +static INLINE struct pstip_stage * +pstip_stage( struct draw_stage *stage ) +{ + return (struct pstip_stage *) stage; +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + + +static void +passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + +static void +pstip_first_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + uint num_samplers; + + assert(stage->draw->rasterizer->poly_stipple_enable); + + /* bind our fragprog */ + bind_pstip_fragment_shader(pstip); + + /* how many samplers? */ + /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ + num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); + num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1); + + /* plug in our sampler, texture */ + pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso; + pipe_texture_reference(&pstip->state.textures[pstip->fs->sampler_unit], + pstip->texture); + + assert(num_samplers <= PIPE_MAX_SAMPLERS); + + pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); + + /* now really draw first line */ + stage->tri = passthrough_tri; + stage->tri(stage, header); +} + + +static void +pstip_flush(struct draw_stage *stage, unsigned flags) +{ + /*struct draw_context *draw = stage->draw;*/ + struct pstip_stage *pstip = pstip_stage(stage); + struct pipe_context *pipe = pstip->pipe; + + stage->tri = pstip_first_tri; + stage->next->flush( stage->next, flags ); + + /* restore original frag shader */ + pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); + + /* XXX restore original texture, sampler state */ + pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, + pstip->state.samplers); + pstip->driver_set_sampler_textures(pipe, pstip->num_textures, + pstip->state.textures); +} + + +static void +pstip_reset_stipple_counter(struct draw_stage *stage) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +pstip_destroy(struct draw_stage *stage) +{ + struct pstip_stage *pstip = pstip_stage(stage); + + pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso); + + pipe_texture_release(&pstip->texture); + + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +static struct pstip_stage * +draw_pstip_stage(struct draw_context *draw) +{ + struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); + + draw_alloc_temp_verts( &pstip->stage, 8 ); + + pstip->stage.draw = draw; + pstip->stage.next = NULL; + pstip->stage.point = passthrough_point; + pstip->stage.line = passthrough_line; + pstip->stage.tri = pstip_first_tri; + pstip->stage.flush = pstip_flush; + pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; + pstip->stage.destroy = pstip_destroy; + + return pstip; +} + + +static struct pstip_stage * +pstip_stage_from_pipe(struct pipe_context *pipe) +{ + struct draw_context *draw = (struct draw_context *) pipe->draw; + return pstip_stage(draw->pipeline.pstipple); +} + + +/** + * This function overrides the driver's create_fs_state() function and + * will typically be called by the state tracker. + */ +static void * +pstip_create_fs_state(struct pipe_context *pipe, + const struct pipe_shader_state *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); + + if (aafs) { + aafs->state = *fs; + + /* pass-through */ + aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); + } + + return aafs; +} + + +static void +pstip_bind_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* save current */ + pstip->fs = aafs; + /* pass-through */ + pstip->driver_bind_fs_state(pstip->pipe, + (aafs ? aafs->driver_fs : NULL)); +} + + +static void +pstip_delete_fs_state(struct pipe_context *pipe, void *fs) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; + /* pass-through */ + pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); + FREE(aafs); +} + + +static void +pstip_bind_sampler_states(struct pipe_context *pipe, + unsigned num, void **sampler) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + uint i; + + /* save current */ + memcpy(pstip->state.samplers, sampler, num * sizeof(void *)); + for (i = num; i < PIPE_MAX_SAMPLERS; i++) { + pstip->state.samplers[i] = NULL; + } + + pstip->num_samplers = num; + /* pass-through */ + pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); +} + + +static void +pstip_set_sampler_textures(struct pipe_context *pipe, + unsigned num, struct pipe_texture **texture) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + uint i; + + /* save current */ + for (i = 0; i < num; i++) { + pipe_texture_reference(&pstip->state.textures[i], texture[i]); + } + for (; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&pstip->state.textures[i], NULL); + } + + pstip->num_textures = num; + + /* pass-through */ + pstip->driver_set_sampler_textures(pstip->pipe, num, texture); +} + + +static void +pstip_set_polygon_stipple(struct pipe_context *pipe, + const struct pipe_poly_stipple *stipple) +{ + struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + /* save current */ + pstip->state.stipple = stipple; + /* pass-through */ + pstip->driver_set_polygon_stipple(pstip->pipe, stipple); + + pstip_update_texture(pstip); +} + + + +/** + * Called by drivers that want to install this AA line prim stage + * into the draw module's pipeline. This will not be used if the + * hardware has native support for AA lines. + */ +void +draw_install_pstipple_stage(struct draw_context *draw, + struct pipe_context *pipe) +{ + struct pstip_stage *pstip; + + pipe->draw = (void *) draw; + + /* + * Create / install AA line drawing / prim stage + */ + pstip = draw_pstip_stage( draw ); + assert(pstip); + draw->pipeline.pstipple = &pstip->stage; + + pstip->pipe = pipe; + + /* create special texture, sampler state */ + pstip_create_texture(pstip); + pstip_create_sampler(pstip); + + /* save original driver functions */ + pstip->driver_create_fs_state = pipe->create_fs_state; + pstip->driver_bind_fs_state = pipe->bind_fs_state; + pstip->driver_delete_fs_state = pipe->delete_fs_state; + + pstip->driver_bind_sampler_states = pipe->bind_sampler_states; + pstip->driver_set_sampler_textures = pipe->set_sampler_textures; + pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; + + /* override the driver's functions */ + pipe->create_fs_state = pstip_create_fs_state; + pipe->bind_fs_state = pstip_bind_fs_state; + pipe->delete_fs_state = pstip_delete_fs_state; + + pipe->bind_sampler_states = pstip_bind_sampler_states; + pipe->set_sampler_textures = pstip_set_sampler_textures; + pipe->set_polygon_stipple = pstip_set_polygon_stipple; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c new file mode 100644 index 0000000000..506f33512c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -0,0 +1,239 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +/* Implement line stipple by cutting lines up into smaller lines. + * There are hundreds of ways to implement line stipple, this is one + * choice that should work in all situations, requires no state + * manipulations, but with a penalty in terms of large amounts of + * generated geometry. + */ + + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +/** Subclass of draw_stage */ +struct stipple_stage { + struct draw_stage stage; + float counter; + uint pattern; + uint factor; +}; + + +static INLINE struct stipple_stage * +stipple_stage(struct draw_stage *stage) +{ + return (struct stipple_stage *) stage; +} + + +/** + * Compute interpolated vertex attributes for 'dst' at position 't' + * between 'v0' and 'v1'. + * XXX using linear interpolation for all attribs at this time. + */ +static void +screen_interp( struct draw_context *draw, + struct vertex_header *dst, + float t, + const struct vertex_header *v0, + const struct vertex_header *v1 ) +{ + uint attr; + for (attr = 0; attr < draw->num_vs_outputs; attr++) { + const float *val0 = v0->data[attr]; + const float *val1 = v1->data[attr]; + float *newv = dst->data[attr]; + uint i; + for (i = 0; i < 4; i++) { + newv[i] = val0[i] + t * (val1[i] - val0[i]); + } + } +} + + +static void +emit_segment(struct draw_stage *stage, struct prim_header *header, + float t0, float t1) +{ + struct vertex_header *v0new = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1new = dup_vert(stage, header->v[1], 1); + struct prim_header newprim = *header; + + if (t0 > 0.0) { + screen_interp( stage->draw, v0new, t0, header->v[0], header->v[1] ); + newprim.v[0] = v0new; + } + + if (t1 < 1.0) { + screen_interp( stage->draw, v1new, t1, header->v[0], header->v[1] ); + newprim.v[1] = v1new; + } + + stage->next->line( stage->next, &newprim ); +} + + +static INLINE unsigned +stipple_test(int counter, ushort pattern, int factor) +{ + int b = (counter / factor) & 0xf; + return (1 << b) & pattern; +} + + +static void +stipple_line(struct draw_stage *stage, struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + const float *pos0 = v0->data[0]; + const float *pos1 = v1->data[0]; + float start = 0; + int state = 0; + + float x0 = pos0[0]; + float x1 = pos1[0]; + float y0 = pos0[1]; + float y1 = pos1[1]; + + float dx = x0 > x1 ? x0 - x1 : x1 - x0; + float dy = y0 > y1 ? y0 - y1 : y1 - y0; + + float length = MAX2(dx, dy); + int i; + + /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table. + */ + for (i = 0; i < length; i++) { + int result = stipple_test( (int) stipple->counter+i, + (ushort) stipple->pattern, stipple->factor ); + if (result != state) { + /* changing from "off" to "on" or vice versa */ + if (state) { + if (start != i) { + /* finishing an "on" segment */ + emit_segment( stage, header, start / length, i / length ); + } + } + else { + /* starting an "on" segment */ + start = (float) i; + } + state = result; + } + } + + if (state && start < length) + emit_segment( stage, header, start / length, 1.0 ); + + stipple->counter += length; +} + + +static void +reset_stipple_counter(struct draw_stage *stage) +{ + struct stipple_stage *stipple = stipple_stage(stage); + stipple->counter = 0; + stage->next->reset_stipple_counter( stage->next ); +} + + +static void +stipple_first_line(struct draw_stage *stage, + struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + struct draw_context *draw = stage->draw; + + stipple->pattern = draw->rasterizer->line_stipple_pattern; + stipple->factor = draw->rasterizer->line_stipple_factor + 1; + + stage->line = stipple_line; + stage->line( stage, header ); +} + + +static void +stipple_flush(struct draw_stage *stage, unsigned flags) +{ + stage->line = stipple_first_line; + stage->next->flush( stage->next, flags ); +} + + +static void +passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point( stage->next, header ); +} + + +static void +passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + +static void +stipple_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create line stippler stage + */ +struct draw_stage *draw_stipple_stage( struct draw_context *draw ) +{ + struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage); + + draw_alloc_temp_verts( &stipple->stage, 2 ); + + stipple->stage.draw = draw; + stipple->stage.next = NULL; + stipple->stage.point = passthrough_point; + stipple->stage.line = stipple_first_line; + stipple->stage.tri = passthrough_tri; + stipple->stage.reset_stipple_counter = reset_stipple_counter; + stipple->stage.flush = stipple_flush; + stipple->stage.destroy = stipple_destroy; + + return &stipple->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c new file mode 100644 index 0000000000..01d905c153 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -0,0 +1,203 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_vs.h" + + +struct twoside_stage { + struct draw_stage stage; + float sign; /**< +1 or -1 */ + uint attrib_front0, attrib_back0; + uint attrib_front1, attrib_back1; +}; + + +static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage ) +{ + return (struct twoside_stage *)stage; +} + + + + +/** + * Copy back color(s) to front color(s). + */ +static INLINE struct vertex_header * +copy_bfc( struct twoside_stage *twoside, + const struct vertex_header *v, + unsigned idx ) +{ + struct vertex_header *tmp = dup_vert( &twoside->stage, v, idx ); + + if (twoside->attrib_back0) { + COPY_4FV(tmp->data[twoside->attrib_front0], + tmp->data[twoside->attrib_back0]); + } + if (twoside->attrib_back1) { + COPY_4FV(tmp->data[twoside->attrib_front1], + tmp->data[twoside->attrib_back1]); + } + + return tmp; +} + + +/* Twoside tri: + */ +static void twoside_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct twoside_stage *twoside = twoside_stage(stage); + + if (header->det * twoside->sign < 0.0) { + /* this is a back-facing triangle */ + struct prim_header tmp; + + tmp.det = header->det; + tmp.edgeflags = header->edgeflags; + /* copy back attribs to front attribs */ + tmp.v[0] = copy_bfc(twoside, header->v[0], 0); + tmp.v[1] = copy_bfc(twoside, header->v[1], 1); + tmp.v[2] = copy_bfc(twoside, header->v[2], 2); + + stage->next->tri( stage->next, &tmp ); + } + else { + stage->next->tri( stage->next, header ); + } +} + + +static void twoside_line( struct draw_stage *stage, + struct prim_header *header ) +{ + /* pass-through */ + stage->next->line( stage->next, header ); +} + + +static void twoside_point( struct draw_stage *stage, + struct prim_header *header ) +{ + /* pass-through */ + stage->next->point( stage->next, header ); +} + + +static void twoside_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct twoside_stage *twoside = twoside_stage(stage); + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + uint i; + + twoside->attrib_front0 = 0; + twoside->attrib_front1 = 0; + twoside->attrib_back0 = 0; + twoside->attrib_back1 = 0; + + /* Find which vertex shader outputs are front/back colors */ + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR) { + if (vs->info.output_semantic_index[i] == 0) + twoside->attrib_front0 = i; + else + twoside->attrib_front1 = i; + } + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { + if (vs->info.output_semantic_index[i] == 0) + twoside->attrib_back0 = i; + else + twoside->attrib_back1 = i; + } + } + + if (!twoside->attrib_back0) + twoside->attrib_front0 = 0; + + if (!twoside->attrib_back1) + twoside->attrib_front1 = 0; + + /* + * We'll multiply the primitive's determinant by this sign to determine + * if the triangle is back-facing (negative). + * sign = -1 for CCW, +1 for CW + */ + twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f; + + stage->tri = twoside_tri; + stage->tri( stage, header ); +} + + +static void twoside_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->tri = twoside_first_tri; + stage->next->flush( stage->next, flags ); +} + + +static void twoside_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void twoside_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create twoside pipeline stage. + */ +struct draw_stage *draw_twoside_stage( struct draw_context *draw ) +{ + struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage); + + draw_alloc_temp_verts( &twoside->stage, 3 ); + + twoside->stage.draw = draw; + twoside->stage.next = NULL; + twoside->stage.point = twoside_point; + twoside->stage.line = twoside_line; + twoside->stage.tri = twoside_first_tri; + twoside->stage.flush = twoside_flush; + twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; + twoside->stage.destroy = twoside_destroy; + + return &twoside->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c new file mode 100644 index 0000000000..b07860cd9e --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -0,0 +1,206 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \brief Drawing stage for handling glPolygonMode(line/point). + * Convert triangles to points or lines as needed. + */ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "draw_private.h" + + +struct unfilled_stage { + struct draw_stage stage; + + /** [0] = front face, [1] = back face. + * legal values: PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE, + * and PIPE_POLYGON_MODE_POINT, + */ + unsigned mode[2]; +}; + + +static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage ) +{ + return (struct unfilled_stage *)stage; +} + + + +static void point( struct draw_stage *stage, + struct vertex_header *v0 ) +{ + struct prim_header tmp; + tmp.v[0] = v0; + stage->next->point( stage->next, &tmp ); +} + +static void line( struct draw_stage *stage, + struct vertex_header *v0, + struct vertex_header *v1 ) +{ + struct prim_header tmp; + tmp.v[0] = v0; + tmp.v[1] = v1; + stage->next->line( stage->next, &tmp ); +} + + +static void points( struct draw_stage *stage, + struct prim_header *header ) +{ + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + struct vertex_header *v2 = header->v[2]; + + if (header->edgeflags & 0x1) point( stage, v0 ); + if (header->edgeflags & 0x2) point( stage, v1 ); + if (header->edgeflags & 0x4) point( stage, v2 ); +} + + +static void lines( struct draw_stage *stage, + struct prim_header *header ) +{ + struct vertex_header *v0 = header->v[0]; + struct vertex_header *v1 = header->v[1]; + struct vertex_header *v2 = header->v[2]; + +#if 0 + assert(((header->edgeflags & 0x1) >> 0) == header->v[0]->edgeflag); + assert(((header->edgeflags & 0x2) >> 1) == header->v[1]->edgeflag); + assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag); +#endif + + if (header->edgeflags & 0x4) line( stage, v2, v0 ); + if (header->edgeflags & 0x1) line( stage, v0, v1 ); + if (header->edgeflags & 0x2) line( stage, v1, v2 ); +} + + +/* Unfilled tri: + * + * Note edgeflags in the vertex struct is not sufficient as we will + * need to manipulate them when decomposing primitives??? + */ +static void unfilled_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct unfilled_stage *unfilled = unfilled_stage(stage); + unsigned mode = unfilled->mode[header->det >= 0.0]; + + switch (mode) { + case PIPE_POLYGON_MODE_FILL: + stage->next->tri( stage->next, header ); + break; + case PIPE_POLYGON_MODE_LINE: + lines( stage, header ); + break; + case PIPE_POLYGON_MODE_POINT: + points( stage, header ); + break; + default: + assert(0); + } +} + + +static void unfilled_first_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct unfilled_stage *unfilled = unfilled_stage(stage); + + unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */ + unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */ + + stage->tri = unfilled_tri; + stage->tri( stage, header ); +} + + +static void unfilled_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line( stage->next, header ); +} + + +static void unfilled_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void unfilled_flush( struct draw_stage *stage, + unsigned flags ) +{ + stage->next->flush( stage->next, flags ); + + stage->tri = unfilled_first_tri; +} + + +static void unfilled_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void unfilled_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +/** + * Create unfilled triangle stage. + */ +struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) +{ + struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage); + + draw_alloc_temp_verts( &unfilled->stage, 0 ); + + unfilled->stage.draw = draw; + unfilled->stage.next = NULL; + unfilled->stage.tmp = NULL; + unfilled->stage.point = unfilled_point; + unfilled->stage.line = unfilled_line; + unfilled->stage.tri = unfilled_first_tri; + unfilled->stage.flush = unfilled_flush; + unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; + unfilled->stage.destroy = unfilled_destroy; + + return &unfilled->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c new file mode 100644 index 0000000000..e163e078f0 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -0,0 +1,312 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "draw_private.h" + +static boolean points( unsigned prim ) +{ + return (prim == PIPE_PRIM_POINTS); +} + +static boolean lines( unsigned prim ) +{ + return (prim == PIPE_PRIM_LINES || + prim == PIPE_PRIM_LINE_STRIP || + prim == PIPE_PRIM_LINE_LOOP); +} + +static boolean triangles( unsigned prim ) +{ + return prim >= PIPE_PRIM_TRIANGLES; +} + +/** + * Check if we need any special pipeline stages, or whether + * prims/verts can go through untouched. Don't test for bypass + * clipping or vs modes, this function is just about the primitive + * pipeline stages. + */ +boolean +draw_need_pipeline(const struct draw_context *draw, + unsigned int prim ) +{ + /* Don't have to worry about triangles turning into lines/points + * and triggering the pipeline, because we have to trigger the + * pipeline *anyway* if unfilled mode is active. + */ + if (lines(prim)) + { + /* line stipple */ + if (draw->rasterizer->line_stipple_enable && draw->line_stipple) + return TRUE; + + /* wide lines */ + if (draw->rasterizer->line_width > draw->wide_line_threshold) + return TRUE; + + /* AA lines */ + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) + return TRUE; + } + + if (points(prim)) + { + /* large points */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) + return TRUE; + + /* AA points */ + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + return TRUE; + + /* point sprites */ + if (draw->rasterizer->point_sprite && draw->point_sprite) + return TRUE; + } + + + if (triangles(prim)) + { + /* polygon stipple */ + if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) + return TRUE; + + /* unfilled polygons */ + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) + return TRUE; + + /* polygon offset */ + if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) + return TRUE; + + /* two-side lighting */ + if (draw->rasterizer->light_twoside) + return TRUE; + } + + /* polygon cull - this is difficult - hardware can cull just fine + * most of the time (though sometimes CULL_NEITHER is unsupported. + * + * Generally this isn't a reason to require the pipeline, though. + * + if (draw->rasterizer->cull_mode) + return TRUE; + */ + + return FALSE; +} + + + +/** + * Rebuild the rendering pipeline. + */ +static struct draw_stage *validate_pipeline( struct draw_stage *stage ) +{ + struct draw_context *draw = stage->draw; + struct draw_stage *next = draw->pipeline.rasterize; + int need_det = 0; + int precalc_flat = 0; + boolean wide_lines, wide_points; + + /* Set the validate's next stage to the rasterize stage, so that it + * can be found later if needed for flushing. + */ + stage->next = next; + + /* drawing wide lines? */ + wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold + && !draw->rasterizer->line_smooth); + + /* drawing large points? */ + if (draw->rasterizer->point_sprite && draw->point_sprite) + wide_points = TRUE; + else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + wide_points = FALSE; + else if (draw->rasterizer->point_size > draw->wide_point_threshold) + wide_points = TRUE; + else + wide_points = FALSE; + + /* + * NOTE: we build up the pipeline in end-to-start order. + * + * TODO: make the current primitive part of the state and build + * shorter pipelines for lines & points. + */ + + if (draw->rasterizer->line_smooth && draw->pipeline.aaline) { + draw->pipeline.aaline->next = next; + next = draw->pipeline.aaline; + } + + if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) { + draw->pipeline.aapoint->next = next; + next = draw->pipeline.aapoint; + } + + if (wide_lines) { + draw->pipeline.wide_line->next = next; + next = draw->pipeline.wide_line; + precalc_flat = 1; + } + + if (wide_points || draw->rasterizer->point_sprite) { + draw->pipeline.wide_point->next = next; + next = draw->pipeline.wide_point; + } + + if (draw->rasterizer->line_stipple_enable && draw->line_stipple) { + draw->pipeline.stipple->next = next; + next = draw->pipeline.stipple; + precalc_flat = 1; /* only needed for lines really */ + } + + if (draw->rasterizer->poly_stipple_enable + && draw->pipeline.pstipple) { + draw->pipeline.pstipple->next = next; + next = draw->pipeline.pstipple; + } + + if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { + draw->pipeline.unfilled->next = next; + next = draw->pipeline.unfilled; + precalc_flat = 1; /* only needed for triangles really */ + need_det = 1; + } + + if (draw->rasterizer->flatshade && precalc_flat) { + draw->pipeline.flatshade->next = next; + next = draw->pipeline.flatshade; + } + + if (draw->rasterizer->offset_cw || + draw->rasterizer->offset_ccw) { + draw->pipeline.offset->next = next; + next = draw->pipeline.offset; + need_det = 1; + } + + if (draw->rasterizer->light_twoside) { + draw->pipeline.twoside->next = next; + next = draw->pipeline.twoside; + need_det = 1; + } + + /* Always run the cull stage as we calculate determinant there + * also. + * + * This can actually be a win as culling out the triangles can lead + * to less work emitting vertices, smaller vertex buffers, etc. + * It's difficult to say whether this will be true in general. + */ + if (need_det || draw->rasterizer->cull_mode) { + draw->pipeline.cull->next = next; + next = draw->pipeline.cull; + } + + /* Clip stage + */ + if (!draw->rasterizer->bypass_clipping) + { + draw->pipeline.clip->next = next; + next = draw->pipeline.clip; + } + + + draw->pipeline.first = next; + return next; +} + +static void validate_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->tri( pipeline, header ); +} + +static void validate_line( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->line( pipeline, header ); +} + +static void validate_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->point( pipeline, header ); +} + +static void validate_reset_stipple_counter( struct draw_stage *stage ) +{ + struct draw_stage *pipeline = validate_pipeline( stage ); + pipeline->reset_stipple_counter( pipeline ); +} + +static void validate_flush( struct draw_stage *stage, + unsigned flags ) +{ + /* May need to pass a backend flush on to the rasterize stage. + */ + if (stage->next) + stage->next->flush( stage->next, flags ); +} + + +static void validate_destroy( struct draw_stage *stage ) +{ + FREE( stage ); +} + + +/** + * Create validate pipeline stage. + */ +struct draw_stage *draw_validate_stage( struct draw_context *draw ) +{ + struct draw_stage *stage = CALLOC_STRUCT(draw_stage); + + stage->draw = draw; + stage->next = NULL; + stage->point = validate_point; + stage->line = validate_line; + stage->tri = validate_tri; + stage->flush = validate_flush; + stage->reset_stipple_counter = validate_reset_stipple_counter; + stage->destroy = validate_destroy; + + return stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c new file mode 100644 index 0000000000..30dceeb43d --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -0,0 +1,529 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Vertex buffer drawing stage. + * + * \author José Fonseca + * \author Keith Whitwell + */ + + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "draw_vbuf.h" +#include "draw_private.h" +#include "draw_vertex.h" +#include "translate/translate.h" + + +/** + * Vertex buffer emit stage. + */ +struct vbuf_stage { + struct draw_stage stage; /**< This must be first (base class) */ + + struct vbuf_render *render; + + const struct vertex_info *vinfo; + + /** Vertex size in bytes */ + unsigned vertex_size; + + struct translate *translate; + + /* FIXME: we have no guarantee that 'unsigned' is 32bit */ + + /** Vertices in hardware format */ + unsigned *vertices; + unsigned *vertex_ptr; + unsigned max_vertices; + unsigned nr_vertices; + + /** Indices */ + ushort *indices; + unsigned max_indices; + unsigned nr_indices; + + /* Cache point size somewhere it's address won't change: + */ + float point_size; +}; + + +/** + * Basically a cast wrapper. + */ +static INLINE struct vbuf_stage * +vbuf_stage( struct draw_stage *stage ) +{ + assert(stage); + return (struct vbuf_stage *)stage; +} + + +static void vbuf_flush_indices( struct vbuf_stage *vbuf ); +static void vbuf_flush_vertices( struct vbuf_stage *vbuf ); +static void vbuf_alloc_vertices( struct vbuf_stage *vbuf ); + + +static INLINE boolean +overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz ) +{ + unsigned long used = (unsigned long) ((char *)ptr - (char *)map); + return (used + bytes) > bufsz; +} + + +static INLINE void +check_space( struct vbuf_stage *vbuf, unsigned nr ) +{ + if (vbuf->nr_vertices + nr > vbuf->max_vertices ) { + vbuf_flush_vertices(vbuf); + vbuf_alloc_vertices(vbuf); + } + + if (vbuf->nr_indices + nr > vbuf->max_indices ) + vbuf_flush_indices(vbuf); +} + + +static INLINE void +dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) +{ +// assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); + unsigned i, j; + + for (i = 0; i < vinfo->num_attribs; i++) { + j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + debug_printf("EMIT_OMIT:"); + break; + case EMIT_1F: + debug_printf("EMIT_1F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_1F_PSIZE: + debug_printf("EMIT_1F_PSIZE:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_2F: + debug_printf("EMIT_2F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_3F: + debug_printf("EMIT_3F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + data += sizeof(float); + break; + case EMIT_4F: + debug_printf("EMIT_4F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_4UB: + debug_printf("EMIT_4UB:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + break; + default: + assert(0); + } + debug_printf("\n"); + } + debug_printf("\n"); +} + + +/** + * Extract the needed fields from post-transformed vertex and emit + * a hardware(driver) vertex. + * Recall that the vertices are constructed by the 'draw' module and + * have a couple of slots at the beginning (1-dword header, 4-dword + * clip pos) that we ignore here. We only use the vertex->data[] fields. + */ +static INLINE ushort +emit_vertex( struct vbuf_stage *vbuf, + struct vertex_header *vertex ) +{ + if(vertex->vertex_id == UNDEFINED_VERTEX_ID) { + /* Hmm - vertices are emitted one at a time - better make sure + * set_buffer is efficient. Consider a special one-shot mode for + * translate. + */ + vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); + vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); + + if (0) dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); + + vbuf->vertex_ptr += vbuf->vertex_size/4; + vertex->vertex_id = vbuf->nr_vertices++; + } + + return vertex->vertex_id; +} + + +static void +vbuf_tri( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + unsigned i; + + check_space( vbuf, 3 ); + + for (i = 0; i < 3; i++) { + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); + } +} + + +static void +vbuf_line( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + unsigned i; + + check_space( vbuf, 2 ); + + for (i = 0; i < 2; i++) { + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); + } +} + + +static void +vbuf_point( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + check_space( vbuf, 1 ); + + vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] ); +} + + + + +/** + * Set the prim type for subsequent vertices. + * This may result in a new vertex size. The existing vbuffer (if any) + * will be flushed if needed and a new one allocated. + */ +static void +vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) +{ + struct translate_key hw_key; + unsigned dst_offset; + unsigned i; + + vbuf->render->set_primitive(vbuf->render, prim); + + /* Must do this after set_primitive() above: + * + * XXX: need some state managment to track when this needs to be + * recalculated. The driver should tell us whether there was a + * state change. + */ + vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render); + + if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) { + vbuf_flush_vertices(vbuf); + vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); + } + + /* Translate from pipeline vertices to hw vertices. + */ + dst_offset = 0; + memset(&hw_key, 0, sizeof(hw_key)); + + for (i = 0; i < vbuf->vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned src_buffer = 0; + unsigned output_format; + unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) ); + + switch (vbuf->vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + src_buffer = 1; + src_offset = 0; + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); + default: + assert(0); + output_format = PIPE_FORMAT_NONE; + emit_sz = 0; + break; + } + + hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + hw_key.element[i].input_buffer = src_buffer; + hw_key.element[i].input_offset = src_offset; + hw_key.element[i].output_format = output_format; + hw_key.element[i].output_offset = dst_offset; + + dst_offset += emit_sz; + } + + hw_key.nr_elements = vbuf->vinfo->num_attribs; + hw_key.output_stride = vbuf->vinfo->size * 4; + + /* Don't bother with caching at this stage: + */ + if (!vbuf->translate || + memcmp(&vbuf->translate->key, &hw_key, sizeof(hw_key)) != 0) + { + if (vbuf->translate) + vbuf->translate->release(vbuf->translate); + + vbuf->translate = translate_create( &hw_key ); + + vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); + } + + vbuf->point_size = vbuf->stage.draw->rasterizer->point_size; + + /* Allocate new buffer? + */ + if (!vbuf->vertices) + vbuf_alloc_vertices(vbuf); +} + + +static void +vbuf_first_tri( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->tri = vbuf_tri; + vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES); + stage->tri( stage, prim ); +} + + +static void +vbuf_first_line( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->line = vbuf_line; + vbuf_set_prim(vbuf, PIPE_PRIM_LINES); + stage->line( stage, prim ); +} + + +static void +vbuf_first_point( struct draw_stage *stage, + struct prim_header *prim ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + stage->point = vbuf_point; + vbuf_set_prim(vbuf, PIPE_PRIM_POINTS); + stage->point( stage, prim ); +} + + +static void +vbuf_flush_indices( struct vbuf_stage *vbuf ) +{ + if(!vbuf->nr_indices) + return; + + assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == + vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); + + vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); + + vbuf->nr_indices = 0; +} + + +/** + * Flush existing vertex buffer and allocate a new one. + * + * XXX: We separate flush-on-index-full and flush-on-vb-full, but may + * raise issues uploading vertices if the hardware wants to flush when + * we flush. + */ +static void +vbuf_flush_vertices( struct vbuf_stage *vbuf ) +{ + if(vbuf->vertices) { + vbuf_flush_indices(vbuf); + + /* Reset temporary vertices ids */ + if(vbuf->nr_vertices) + draw_reset_vertex_ids( vbuf->stage.draw ); + + /* Free the vertex buffer */ + vbuf->render->release_vertices(vbuf->render, + vbuf->vertices, + vbuf->vertex_size, + vbuf->nr_vertices); + vbuf->max_vertices = vbuf->nr_vertices = 0; + vbuf->vertex_ptr = vbuf->vertices = NULL; + + } +} + + +static void +vbuf_alloc_vertices( struct vbuf_stage *vbuf ) +{ + assert(!vbuf->nr_indices); + assert(!vbuf->vertices); + + /* Allocate a new vertex buffer */ + vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; + vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, + (ushort) vbuf->vertex_size, + (ushort) vbuf->max_vertices); + vbuf->vertex_ptr = vbuf->vertices; +} + + + +static void +vbuf_flush( struct draw_stage *stage, unsigned flags ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + vbuf_flush_indices( vbuf ); + + stage->point = vbuf_first_point; + stage->line = vbuf_first_line; + stage->tri = vbuf_first_tri; + + if (flags & DRAW_FLUSH_BACKEND) + vbuf_flush_vertices( vbuf ); +} + + +static void +vbuf_reset_stipple_counter( struct draw_stage *stage ) +{ + /* XXX: Need to do something here for hardware with linestipple. + */ + (void) stage; +} + + +static void vbuf_destroy( struct draw_stage *stage ) +{ + struct vbuf_stage *vbuf = vbuf_stage( stage ); + + if(vbuf->indices) + align_free( vbuf->indices ); + + if(vbuf->translate) + vbuf->translate->release( vbuf->translate ); + + if (vbuf->render) + vbuf->render->destroy( vbuf->render ); + + FREE( stage ); +} + + +/** + * Create a new primitive vbuf/render stage. + */ +struct draw_stage *draw_vbuf_stage( struct draw_context *draw, + struct vbuf_render *render ) +{ + struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); + + if(!vbuf) + goto fail; + + vbuf->stage.draw = draw; + vbuf->stage.point = vbuf_first_point; + vbuf->stage.line = vbuf_first_line; + vbuf->stage.tri = vbuf_first_tri; + vbuf->stage.flush = vbuf_flush; + vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter; + vbuf->stage.destroy = vbuf_destroy; + + vbuf->render = render; + vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1); + + vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * + sizeof(vbuf->indices[0]), + 16 ); + if(!vbuf->indices) + goto fail; + + vbuf->vertices = NULL; + vbuf->vertex_ptr = vbuf->vertices; + + return &vbuf->stage; + + fail: + if (vbuf) + vbuf_destroy(&vbuf->stage); + + return NULL; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c new file mode 100644 index 0000000000..9a168ce8bd --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -0,0 +1,190 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +struct wideline_stage { + struct draw_stage stage; + + float half_line_width; +}; + + + +static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) +{ + return (struct wideline_stage *)stage; +} + + +static void wideline_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + + +static void wideline_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad (two triangles). + * XXX need to disable polygon stipple. + */ +static void wideline_line( struct draw_stage *stage, + struct prim_header *header ) +{ + /*const struct wideline_stage *wide = wideline_stage(stage);*/ + const float half_width = 0.5f * stage->draw->rasterizer->line_width; + + struct prim_header tri; + + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + const float dx = FABSF(pos0[0] - pos2[0]); + const float dy = FABSF(pos0[1] - pos2[1]); + + /* small tweak to meet GL specification */ + const float bias = 0.125f; + + /* + * Draw wide line as a quad (two tris) by "stretching" the line along + * X or Y. + * We need to tweak coords in several ways to be conformant here. + */ + + if (dx > dy) { + /* x-major line */ + pos0[1] = pos0[1] - half_width - bias; + pos1[1] = pos1[1] + half_width - bias; + pos2[1] = pos2[1] - half_width - bias; + pos3[1] = pos3[1] + half_width - bias; + if (pos0[0] < pos2[0]) { + /* left to right line */ + pos0[0] -= 0.5f; + pos1[0] -= 0.5f; + pos2[0] -= 0.5f; + pos3[0] -= 0.5f; + } + else { + /* right to left line */ + pos0[0] += 0.5f; + pos1[0] += 0.5f; + pos2[0] += 0.5f; + pos3[0] += 0.5f; + } + } + else { + /* y-major line */ + pos0[0] = pos0[0] - half_width + bias; + pos1[0] = pos1[0] + half_width + bias; + pos2[0] = pos2[0] - half_width + bias; + pos3[0] = pos3[0] + half_width + bias; + if (pos0[1] < pos2[1]) { + /* top to bottom line */ + pos0[1] -= 0.5f; + pos1[1] -= 0.5f; + pos2[1] -= 0.5f; + pos3[1] -= 0.5f; + } + else { + /* bottom to top line */ + pos0[1] += 0.5f; + pos1[1] += 0.5f; + pos2[1] += 0.5f; + pos3[1] += 0.5f; + } + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void wideline_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->next->flush( stage->next, flags ); +} + + +static void wideline_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void wideline_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) +{ + struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = wideline_point; + wide->stage.line = wideline_line; + wide->stage.tri = wideline_tri; + wide->stage.flush = wideline_flush; + wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; + wide->stage.destroy = wideline_destroy; + + return &wide->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c new file mode 100644 index 0000000000..3d0add0c1a --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -0,0 +1,281 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_vs.h" + + +struct widepoint_stage { + struct draw_stage stage; + + float half_point_size; + float point_size_min; + float point_size_max; + + float xbias; + float ybias; + + uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; + uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; + uint num_texcoords; + + int psize_slot; +}; + + + +static INLINE struct widepoint_stage * +widepoint_stage( struct draw_stage *stage ) +{ + return (struct widepoint_stage *)stage; +} + + +static void passthrough_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + +static void widepoint_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line(stage->next, header); +} + +static void widepoint_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Set the vertex texcoords for sprite mode. + * Coords may be left untouched or set to a right-side-up or upside-down + * orientation. + */ +static void set_texcoords(const struct widepoint_stage *wide, + struct vertex_header *v, const float tc[4]) +{ + uint i; + for (i = 0; i < wide->num_texcoords; i++) { + if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { + uint j = wide->texcoord_slot[i]; + v->data[j][0] = tc[0]; + if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) + v->data[j][1] = 1.0f - tc[1]; + else + v->data[j][1] = tc[1]; + v->data[j][2] = tc[2]; + v->data[j][3] = tc[3]; + } + } +} + + +/* If there are lots of sprite points (and why wouldn't there be?) it + * would probably be more sensible to change hardware setup to + * optimize this rather than doing the whole thing in software like + * this. + */ +static void widepoint_point( struct draw_stage *stage, + struct prim_header *header ) +{ + const struct widepoint_stage *wide = widepoint_stage(stage); + const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; + float half_size; + float left_adj, right_adj, bot_adj, top_adj; + + struct prim_header tri; + + /* four dups of original vertex */ + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + /* point size is either per-vertex or fixed size */ + if (wide->psize_slot >= 0) { + half_size = header->v[0]->data[wide->psize_slot][0]; + + /* XXX: temporary -- do this in the vertex shader?? + */ + half_size = CLAMP(half_size, + wide->point_size_min, + wide->point_size_max); + + half_size *= 0.5f; + } + else { + half_size = wide->half_point_size; + } + + left_adj = -half_size + wide->xbias; + right_adj = half_size + wide->xbias; + bot_adj = half_size + wide->ybias; + top_adj = -half_size + wide->ybias; + + pos0[0] += left_adj; + pos0[1] += top_adj; + + pos1[0] += left_adj; + pos1[1] += bot_adj; + + pos2[0] += right_adj; + pos2[1] += top_adj; + + pos3[0] += right_adj; + pos3[1] += bot_adj; + + if (sprite) { + static const float tex00[4] = { 0, 0, 0, 1 }; + static const float tex01[4] = { 0, 1, 0, 1 }; + static const float tex11[4] = { 1, 1, 0, 1 }; + static const float tex10[4] = { 1, 0, 0, 1 }; + set_texcoords( wide, v0, tex00 ); + set_texcoords( wide, v1, tex01 ); + set_texcoords( wide, v2, tex10 ); + set_texcoords( wide, v3, tex11 ); + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void widepoint_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct widepoint_stage *wide = widepoint_stage(stage); + struct draw_context *draw = stage->draw; + + wide->half_point_size = 0.5f * draw->rasterizer->point_size; + wide->point_size_min = draw->rasterizer->point_size_min; + wide->point_size_max = draw->rasterizer->point_size_max; + wide->xbias = 0.0; + wide->ybias = 0.0; + + if (draw->rasterizer->gl_rasterization_rules) { + wide->xbias = 0.125; + } + + /* XXX we won't know the real size if it's computed by the vertex shader! */ + if ((draw->rasterizer->point_size > draw->wide_point_threshold) || + (draw->rasterizer->point_sprite && draw->point_sprite)) { + stage->point = widepoint_point; + } + else { + stage->point = passthrough_point; + } + + if (draw->rasterizer->point_sprite) { + /* find vertex shader texcoord outputs */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i, j = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + wide->texcoord_slot[j] = i; + wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; + j++; + } + } + wide->num_texcoords = j; + } + + wide->psize_slot = -1; + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + wide->psize_slot = i; + break; + } + } + } + + stage->point( stage, header ); +} + + +static void widepoint_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->point = widepoint_first_point; + stage->next->flush( stage->next, flags ); +} + + +static void widepoint_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void widepoint_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) +{ + struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = widepoint_first_point; + wide->stage.line = widepoint_line; + wide->stage.tri = widepoint_tri; + wide->stage.flush = widepoint_flush; + wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; + wide->stage.destroy = widepoint_destroy; + + return &wide->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_prims.c b/src/gallium/auxiliary/draw/draw_pipe_wide_prims.c new file mode 100644 index 0000000000..d6bff110b4 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_prims.c @@ -0,0 +1,366 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "draw_private.h" + + +struct wide_stage { + struct draw_stage stage; + + float half_line_width; + float half_point_size; + + uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; + uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; + uint num_texcoords; + + int psize_slot; +}; + + + +static INLINE struct wide_stage *wide_stage( struct draw_stage *stage ) +{ + return (struct wide_stage *)stage; +} + + +static void passthrough_point( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->point( stage->next, header ); +} + +static void passthrough_line( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->line(stage->next, header); +} + +static void passthrough_tri( struct draw_stage *stage, + struct prim_header *header ) +{ + stage->next->tri(stage->next, header); +} + + +/** + * Draw a wide line by drawing a quad (two triangles). + * XXX need to disable polygon stipple. + */ +static void wide_line( struct draw_stage *stage, + struct prim_header *header ) +{ + const struct wide_stage *wide = wide_stage(stage); + const float half_width = wide->half_line_width; + + struct prim_header tri; + + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + const float dx = FABSF(pos0[0] - pos2[0]); + const float dy = FABSF(pos0[1] - pos2[1]); + + /* + * Draw wide line as a quad (two tris) by "stretching" the line along + * X or Y. + * We need to tweak coords in several ways to be conformant here. + */ + + if (dx > dy) { + /* x-major line */ + pos0[1] = pos0[1] - half_width - 0.25f; + pos1[1] = pos1[1] + half_width - 0.25f; + pos2[1] = pos2[1] - half_width - 0.25f; + pos3[1] = pos3[1] + half_width - 0.25f; + if (pos0[0] < pos2[0]) { + /* left to right line */ + pos0[0] -= 0.5f; + pos1[0] -= 0.5f; + pos2[0] -= 0.5f; + pos3[0] -= 0.5f; + } + else { + /* right to left line */ + pos0[0] += 0.5f; + pos1[0] += 0.5f; + pos2[0] += 0.5f; + pos3[0] += 0.5f; + } + } + else { + /* y-major line */ + pos0[0] = pos0[0] - half_width + 0.25f; + pos1[0] = pos1[0] + half_width + 0.25f; + pos2[0] = pos2[0] - half_width + 0.25f; + pos3[0] = pos3[0] + half_width + 0.25f; + if (pos0[1] < pos2[1]) { + /* top to bottom line */ + pos0[1] -= 0.5f; + pos1[1] -= 0.5f; + pos2[1] -= 0.5f; + pos3[1] -= 0.5f; + } + else { + /* bottom to top line */ + pos0[1] += 0.5f; + pos1[1] += 0.5f; + pos2[1] += 0.5f; + pos3[1] += 0.5f; + } + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +/** + * Set the vertex texcoords for sprite mode. + * Coords may be left untouched or set to a right-side-up or upside-down + * orientation. + */ +static void set_texcoords(const struct wide_stage *wide, + struct vertex_header *v, const float tc[4]) +{ + uint i; + for (i = 0; i < wide->num_texcoords; i++) { + if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { + uint j = wide->texcoord_slot[i]; + v->data[j][0] = tc[0]; + if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) + v->data[j][1] = 1.0f - tc[1]; + else + v->data[j][1] = tc[1]; + v->data[j][2] = tc[2]; + v->data[j][3] = tc[3]; + } + } +} + + +/* If there are lots of sprite points (and why wouldn't there be?) it + * would probably be more sensible to change hardware setup to + * optimize this rather than doing the whole thing in software like + * this. + */ +static void wide_point( struct draw_stage *stage, + struct prim_header *header ) +{ + const struct wide_stage *wide = wide_stage(stage); + const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; + float half_size; + float left_adj, right_adj; + + struct prim_header tri; + + /* four dups of original vertex */ + struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); + struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); + struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); + struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); + + float *pos0 = v0->data[0]; + float *pos1 = v1->data[0]; + float *pos2 = v2->data[0]; + float *pos3 = v3->data[0]; + + /* point size is either per-vertex or fixed size */ + if (wide->psize_slot >= 0) { + half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; + } + else { + half_size = wide->half_point_size; + } + + left_adj = -half_size; /* + 0.25f;*/ + right_adj = half_size; /* + 0.25f;*/ + + pos0[0] += left_adj; + pos0[1] -= half_size; + + pos1[0] += left_adj; + pos1[1] += half_size; + + pos2[0] += right_adj; + pos2[1] -= half_size; + + pos3[0] += right_adj; + pos3[1] += half_size; + + if (sprite) { + static const float tex00[4] = { 0, 0, 0, 1 }; + static const float tex01[4] = { 0, 1, 0, 1 }; + static const float tex11[4] = { 1, 1, 0, 1 }; + static const float tex10[4] = { 1, 0, 0, 1 }; + set_texcoords( wide, v0, tex00 ); + set_texcoords( wide, v1, tex01 ); + set_texcoords( wide, v2, tex10 ); + set_texcoords( wide, v3, tex11 ); + } + + tri.det = header->det; /* only the sign matters */ + tri.v[0] = v0; + tri.v[1] = v2; + tri.v[2] = v3; + stage->next->tri( stage->next, &tri ); + + tri.v[0] = v0; + tri.v[1] = v3; + tri.v[2] = v1; + stage->next->tri( stage->next, &tri ); +} + + +static void wide_first_point( struct draw_stage *stage, + struct prim_header *header ) +{ + struct wide_stage *wide = wide_stage(stage); + struct draw_context *draw = stage->draw; + + wide->half_point_size = 0.5f * draw->rasterizer->point_size; + + /* XXX we won't know the real size if it's computed by the vertex shader! */ + if (draw->rasterizer->point_size > draw->wide_point_threshold) { + stage->point = wide_point; + } + else { + stage->point = passthrough_point; + } + + if (draw->rasterizer->point_sprite) { + /* find vertex shader texcoord outputs */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i, j = 0; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { + wide->texcoord_slot[j] = i; + wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; + j++; + } + } + wide->num_texcoords = j; + } + + wide->psize_slot = -1; + + if (draw->rasterizer->point_size_per_vertex) { + /* find PSIZ vertex output */ + const struct draw_vertex_shader *vs = draw->vertex_shader; + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { + wide->psize_slot = i; + break; + } + } + } + + stage->point( stage, header ); +} + + + +static void wide_first_line( struct draw_stage *stage, + struct prim_header *header ) +{ + struct wide_stage *wide = wide_stage(stage); + struct draw_context *draw = stage->draw; + + wide->half_line_width = 0.5f * draw->rasterizer->line_width; + + if (draw->rasterizer->line_width != 1.0) { + wide->stage.line = wide_line; + } + else { + wide->stage.line = passthrough_line; + } + + stage->line( stage, header ); +} + + +static void wide_flush( struct draw_stage *stage, unsigned flags ) +{ + stage->line = wide_first_line; + stage->point = wide_first_point; + stage->next->flush( stage->next, flags ); +} + + +static void wide_reset_stipple_counter( struct draw_stage *stage ) +{ + stage->next->reset_stipple_counter( stage->next ); +} + + +static void wide_destroy( struct draw_stage *stage ) +{ + draw_free_temp_verts( stage ); + FREE( stage ); +} + + +struct draw_stage *draw_wide_stage( struct draw_context *draw ) +{ + struct wide_stage *wide = CALLOC_STRUCT(wide_stage); + + draw_alloc_temp_verts( &wide->stage, 4 ); + + wide->stage.draw = draw; + wide->stage.next = NULL; + wide->stage.point = wide_first_point; + wide->stage.line = wide_first_line; + wide->stage.tri = passthrough_tri; + wide->stage.flush = wide_flush; + wide->stage.reset_stipple_counter = wide_reset_stipple_counter; + wide->stage.destroy = wide_destroy; + + return &wide->stage; +} diff --git a/src/gallium/auxiliary/draw/draw_pstipple.c b/src/gallium/auxiliary/draw/draw_pstipple.c deleted file mode 100644 index 4dddb72906..0000000000 --- a/src/gallium/auxiliary/draw/draw_pstipple.c +++ /dev/null @@ -1,746 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Polygon stipple stage: implement polygon stipple with texture map and - * fragment program. The fragment program samples the texture and does - * a fragment kill for the stipple-failing fragments. - * - * Authors: Brian Paul - */ - - -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" -#include "pipe/p_context.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" - -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" - -#include "draw_context.h" -#include "draw_private.h" - - - -/** - * Subclass of pipe_shader_state to carry extra fragment shader info. - */ -struct pstip_fragment_shader -{ - struct pipe_shader_state state; - void *driver_fs; - void *pstip_fs; - uint sampler_unit; -}; - - -/** - * Subclass of draw_stage - */ -struct pstip_stage -{ - struct draw_stage stage; - - void *sampler_cso; - struct pipe_texture *texture; - uint num_samplers; - uint num_textures; - - /* - * Currently bound state - */ - struct pstip_fragment_shader *fs; - struct { - void *samplers[PIPE_MAX_SAMPLERS]; - struct pipe_texture *textures[PIPE_MAX_SAMPLERS]; - const struct pipe_poly_stipple *stipple; - } state; - - /* - * Driver interface/override functions - */ - void * (*driver_create_fs_state)(struct pipe_context *, - const struct pipe_shader_state *); - void (*driver_bind_fs_state)(struct pipe_context *, void *); - void (*driver_delete_fs_state)(struct pipe_context *, void *); - - void (*driver_bind_sampler_states)(struct pipe_context *, unsigned, void **); - - void (*driver_set_sampler_textures)(struct pipe_context *, unsigned, - struct pipe_texture **); - - void (*driver_set_polygon_stipple)(struct pipe_context *, - const struct pipe_poly_stipple *); - - struct pipe_context *pipe; -}; - - - -/** - * Subclass of tgsi_transform_context, used for transforming the - * user's fragment shader to add the special AA instructions. - */ -struct pstip_transform_context { - struct tgsi_transform_context base; - uint tempsUsed; /**< bitmask */ - int wincoordInput; - int maxInput; - uint samplersUsed; /**< bitfield of samplers used */ - int freeSampler; /** an available sampler for the pstipple */ - int texTemp; /**< temp registers */ - int numImmed; - boolean firstInstruction; -}; - - -/** - * TGSI declaration transform callback. - * Look for a free sampler, a free input attrib, and two free temp regs. - */ -static void -pstip_transform_decl(struct tgsi_transform_context *ctx, - struct tgsi_full_declaration *decl) -{ - struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; - - if (decl->Declaration.File == TGSI_FILE_SAMPLER) { - uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { - pctx->samplersUsed |= 1 << i; - } - } - else if (decl->Declaration.File == TGSI_FILE_INPUT) { - pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last); - if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) - pctx->wincoordInput = (int) decl->u.DeclarationRange.First; - } - else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { - uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { - pctx->tempsUsed |= (1 << i); - } - } - - ctx->emit_declaration(ctx, decl); -} - - -static void -pstip_transform_immed(struct tgsi_transform_context *ctx, - struct tgsi_full_immediate *immed) -{ - struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; - pctx->numImmed++; -} - - -/** - * Find the lowest zero bit in the given word, or -1 if bitfield is all ones. - */ -static int -free_bit(uint bitfield) -{ - int i; - for (i = 0; i < 32; i++) { - if ((bitfield & (1 << i)) == 0) - return i; - } - return -1; -} - - -/** - * TGSI instruction transform callback. - * Replace writes to result.color w/ a temp reg. - * Upon END instruction, insert texture sampling code for antialiasing. - */ -static void -pstip_transform_inst(struct tgsi_transform_context *ctx, - struct tgsi_full_instruction *inst) -{ - struct pstip_transform_context *pctx = (struct pstip_transform_context *) ctx; - - if (pctx->firstInstruction) { - /* emit our new declarations before the first instruction */ - - struct tgsi_full_declaration decl; - struct tgsi_full_instruction newInst; - uint i; - int wincoordInput; - - /* find free sampler */ - pctx->freeSampler = free_bit(pctx->samplersUsed); - if (pctx->freeSampler >= PIPE_MAX_SAMPLERS) - pctx->freeSampler = PIPE_MAX_SAMPLERS - 1; - - if (pctx->wincoordInput < 0) - wincoordInput = pctx->maxInput + 1; - else - wincoordInput = pctx->wincoordInput; - - /* find one free temp reg */ - for (i = 0; i < 32; i++) { - if ((pctx->tempsUsed & (1 << i)) == 0) { - /* found a free temp */ - if (pctx->texTemp < 0) - pctx->texTemp = i; - else - break; - } - } - assert(pctx->texTemp >= 0); - - if (pctx->wincoordInput < 0) { - /* declare new position input reg */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_INPUT; - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; - decl.Semantic.SemanticIndex = 0; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = wincoordInput; - ctx->emit_declaration(ctx, &decl); - } - - /* declare new sampler */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = pctx->freeSampler; - ctx->emit_declaration(ctx, &decl); - - /* declare new temp regs */ - decl = tgsi_default_full_declaration(); - decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = pctx->texTemp; - ctx->emit_declaration(ctx, &decl); - - /* emit immediate = {1/32, 1/32, 1, 1} - * The index/position of this immediate will be pctx->numImmed - */ - { - static const float value[4] = { 1.0/32, 1.0/32, 1.0, 1.0 }; - struct tgsi_full_immediate immed; - uint size = 4; - immed = tgsi_default_full_immediate(); - immed.Immediate.Size = 1 + size; /* one for the token itself */ - immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; - ctx->emit_immediate(ctx, &immed); - } - - pctx->firstInstruction = FALSE; - - - /* - * Insert new MUL/TEX/KILP instructions at start of program - * Take gl_FragCoord, divide by 32 (stipple size), sample the - * texture and kill fragment if needed. - * - * We'd like to use non-normalized texcoords to index into a RECT - * texture, but we can only use GL_REPEAT wrap mode with normalized - * texcoords. Darn. - */ - - /* MUL texTemp, INPUT[wincoord], 1/32; */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_MUL; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; - newInst.Instruction.NumSrcRegs = 2; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT; - newInst.FullSrcRegisters[0].SrcRegister.Index = wincoordInput; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_IMMEDIATE; - newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->numImmed; - ctx->emit_instruction(ctx, &newInst); - - /* TEX texTemp, texTemp, sampler; */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_TEX; - newInst.Instruction.NumDstRegs = 1; - newInst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullDstRegisters[0].DstRegister.Index = pctx->texTemp; - newInst.Instruction.NumSrcRegs = 2; - newInst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; - newInst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER; - newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler; - ctx->emit_instruction(ctx, &newInst); - - /* KILP texTemp; # if texTemp < 0, KILL fragment */ - newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_KILP; - newInst.Instruction.NumDstRegs = 0; - newInst.Instruction.NumSrcRegs = 1; - newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; - newInst.FullSrcRegisters[0].SrcRegister.Index = pctx->texTemp; - newInst.FullSrcRegisters[0].SrcRegister.Negate = 1; - ctx->emit_instruction(ctx, &newInst); - } - - /* emit this instruction */ - ctx->emit_instruction(ctx, inst); -} - - -/** - * Generate the frag shader we'll use for doing polygon stipple. - * This will be the user's shader prefixed with a TEX and KIL instruction. - */ -static void -generate_pstip_fs(struct pstip_stage *pstip) -{ - const struct pipe_shader_state *orig_fs = &pstip->fs->state; - /*struct draw_context *draw = pstip->stage.draw;*/ - struct pipe_shader_state pstip_fs; - struct pstip_transform_context transform; - -#define MAX 1000 - - pstip_fs = *orig_fs; /* copy to init */ - pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); - - memset(&transform, 0, sizeof(transform)); - transform.wincoordInput = -1; - transform.maxInput = -1; - transform.texTemp = -1; - transform.firstInstruction = TRUE; - transform.base.transform_instruction = pstip_transform_inst; - transform.base.transform_declaration = pstip_transform_decl; - transform.base.transform_immediate = pstip_transform_immed; - - tgsi_transform_shader(orig_fs->tokens, - (struct tgsi_token *) pstip_fs.tokens, - MAX, &transform.base); - -#if 0 /* DEBUG */ - tgsi_dump(orig_fs->tokens, 0); - tgsi_dump(pstip_fs.tokens, 0); -#endif - - pstip->fs->sampler_unit = transform.freeSampler; - assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); - - pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); -} - - -/** - * Load texture image with current stipple pattern. - */ -static void -pstip_update_texture(struct pstip_stage *pstip) -{ - static const uint bit31 = 1 << 31; - struct pipe_context *pipe = pstip->pipe; - struct pipe_screen *screen = pipe->screen; - struct pipe_surface *surface; - const uint *stipple = pstip->state.stipple->stipple; - uint i, j; - ubyte *data; - - surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0); - data = pipe_surface_map(surface); - - /* - * Load alpha texture. - * Note: 0 means keep the fragment, 255 means kill it. - * We'll negate the texel value and use KILP which kills if value - * is negative. - */ - for (i = 0; i < 32; i++) { - for (j = 0; j < 32; j++) { - if (stipple[i] & (bit31 >> j)) { - /* fragment "on" */ - data[i * surface->pitch + j] = 0; - } - else { - /* fragment "off" */ - data[i * surface->pitch + j] = 255; - } - } - } - - /* unmap */ - pipe_surface_unmap(surface); - pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pstip->texture, 0, 0x1); -} - - -/** - * Create the texture map we'll use for stippling. - */ -static void -pstip_create_texture(struct pstip_stage *pstip) -{ - struct pipe_context *pipe = pstip->pipe; - struct pipe_screen *screen = pipe->screen; - struct pipe_texture texTemp; - - memset(&texTemp, 0, sizeof(texTemp)); - texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ - texTemp.last_level = 0; - texTemp.width[0] = 32; - texTemp.height[0] = 32; - texTemp.depth[0] = 1; - texTemp.cpp = 1; - - pstip->texture = screen->texture_create(screen, &texTemp); - assert(pstip->texture->refcount == 1); -} - - -/** - * Create the sampler CSO that'll be used for antialiasing. - * By using a mipmapped texture, we don't have to generate a different - * texture image for each line size. - */ -static void -pstip_create_sampler(struct pstip_stage *pstip) -{ - struct pipe_sampler_state sampler; - struct pipe_context *pipe = pstip->pipe; - - memset(&sampler, 0, sizeof(sampler)); - sampler.wrap_s = PIPE_TEX_WRAP_REPEAT; - sampler.wrap_t = PIPE_TEX_WRAP_REPEAT; - sampler.wrap_r = PIPE_TEX_WRAP_REPEAT; - sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; - sampler.min_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.mag_img_filter = PIPE_TEX_FILTER_NEAREST; - sampler.normalized_coords = 1; - sampler.min_lod = 0.0f; - sampler.max_lod = 0.0f; - - pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); -} - - -/** - * When we're about to draw our first AA line in a batch, this function is - * called to tell the driver to bind our modified fragment shader. - */ -static void -bind_pstip_fragment_shader(struct pstip_stage *pstip) -{ - if (!pstip->fs->pstip_fs) { - generate_pstip_fs(pstip); - } - pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); -} - - - -static INLINE struct pstip_stage * -pstip_stage( struct draw_stage *stage ) -{ - return (struct pstip_stage *) stage; -} - - -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point(stage->next, header); -} - - -static void -passthrough_line(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->line(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} - - - -static void -pstip_first_tri(struct draw_stage *stage, struct prim_header *header) -{ - struct pstip_stage *pstip = pstip_stage(stage); - struct pipe_context *pipe = pstip->pipe; - uint num_samplers; - - assert(stage->draw->rasterizer->poly_stipple_enable); - - /* bind our fragprog */ - bind_pstip_fragment_shader(pstip); - - /* how many samplers? */ - /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ - num_samplers = MAX2(pstip->num_textures, pstip->num_samplers); - num_samplers = MAX2(num_samplers, pstip->fs->sampler_unit + 1); - - /* plug in our sampler, texture */ - pstip->state.samplers[pstip->fs->sampler_unit] = pstip->sampler_cso; - pipe_texture_reference(&pstip->state.textures[pstip->fs->sampler_unit], - pstip->texture); - - assert(num_samplers <= PIPE_MAX_SAMPLERS); - - pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); - pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); - - /* now really draw first line */ - stage->tri = passthrough_tri; - stage->tri(stage, header); -} - - -static void -pstip_flush(struct draw_stage *stage, unsigned flags) -{ - /*struct draw_context *draw = stage->draw;*/ - struct pstip_stage *pstip = pstip_stage(stage); - struct pipe_context *pipe = pstip->pipe; - - stage->tri = pstip_first_tri; - stage->next->flush( stage->next, flags ); - - /* restore original frag shader */ - pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); - - /* XXX restore original texture, sampler state */ - pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, - pstip->state.samplers); - pstip->driver_set_sampler_textures(pipe, pstip->num_textures, - pstip->state.textures); -} - - -static void -pstip_reset_stipple_counter(struct draw_stage *stage) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void -pstip_destroy(struct draw_stage *stage) -{ - struct pstip_stage *pstip = pstip_stage(stage); - - pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso); - - pipe_texture_release(&pstip->texture); - - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -static struct pstip_stage * -draw_pstip_stage(struct draw_context *draw) -{ - struct pstip_stage *pstip = CALLOC_STRUCT(pstip_stage); - - draw_alloc_temp_verts( &pstip->stage, 8 ); - - pstip->stage.draw = draw; - pstip->stage.next = NULL; - pstip->stage.point = passthrough_point; - pstip->stage.line = passthrough_line; - pstip->stage.tri = pstip_first_tri; - pstip->stage.flush = pstip_flush; - pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; - pstip->stage.destroy = pstip_destroy; - - return pstip; -} - - -static struct pstip_stage * -pstip_stage_from_pipe(struct pipe_context *pipe) -{ - struct draw_context *draw = (struct draw_context *) pipe->draw; - return pstip_stage(draw->pipeline.pstipple); -} - - -/** - * This function overrides the driver's create_fs_state() function and - * will typically be called by the state tracker. - */ -static void * -pstip_create_fs_state(struct pipe_context *pipe, - const struct pipe_shader_state *fs) -{ - struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = CALLOC_STRUCT(pstip_fragment_shader); - - if (aafs) { - aafs->state = *fs; - - /* pass-through */ - aafs->driver_fs = pstip->driver_create_fs_state(pstip->pipe, fs); - } - - return aafs; -} - - -static void -pstip_bind_fs_state(struct pipe_context *pipe, void *fs) -{ - struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; - /* save current */ - pstip->fs = aafs; - /* pass-through */ - pstip->driver_bind_fs_state(pstip->pipe, - (aafs ? aafs->driver_fs : NULL)); -} - - -static void -pstip_delete_fs_state(struct pipe_context *pipe, void *fs) -{ - struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct pstip_fragment_shader *aafs = (struct pstip_fragment_shader *) fs; - /* pass-through */ - pstip->driver_delete_fs_state(pstip->pipe, aafs->driver_fs); - FREE(aafs); -} - - -static void -pstip_bind_sampler_states(struct pipe_context *pipe, - unsigned num, void **sampler) -{ - struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - uint i; - - /* save current */ - memcpy(pstip->state.samplers, sampler, num * sizeof(void *)); - for (i = num; i < PIPE_MAX_SAMPLERS; i++) { - pstip->state.samplers[i] = NULL; - } - - pstip->num_samplers = num; - /* pass-through */ - pstip->driver_bind_sampler_states(pstip->pipe, num, sampler); -} - - -static void -pstip_set_sampler_textures(struct pipe_context *pipe, - unsigned num, struct pipe_texture **texture) -{ - struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - uint i; - - /* save current */ - for (i = 0; i < num; i++) { - pipe_texture_reference(&pstip->state.textures[i], texture[i]); - } - for (; i < PIPE_MAX_SAMPLERS; i++) { - pipe_texture_reference(&pstip->state.textures[i], NULL); - } - - pstip->num_textures = num; - - /* pass-through */ - pstip->driver_set_sampler_textures(pstip->pipe, num, texture); -} - - -static void -pstip_set_polygon_stipple(struct pipe_context *pipe, - const struct pipe_poly_stipple *stipple) -{ - struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - /* save current */ - pstip->state.stipple = stipple; - /* pass-through */ - pstip->driver_set_polygon_stipple(pstip->pipe, stipple); - - pstip_update_texture(pstip); -} - - - -/** - * Called by drivers that want to install this AA line prim stage - * into the draw module's pipeline. This will not be used if the - * hardware has native support for AA lines. - */ -void -draw_install_pstipple_stage(struct draw_context *draw, - struct pipe_context *pipe) -{ - struct pstip_stage *pstip; - - pipe->draw = (void *) draw; - - /* - * Create / install AA line drawing / prim stage - */ - pstip = draw_pstip_stage( draw ); - assert(pstip); - draw->pipeline.pstipple = &pstip->stage; - - pstip->pipe = pipe; - - /* create special texture, sampler state */ - pstip_create_texture(pstip); - pstip_create_sampler(pstip); - - /* save original driver functions */ - pstip->driver_create_fs_state = pipe->create_fs_state; - pstip->driver_bind_fs_state = pipe->bind_fs_state; - pstip->driver_delete_fs_state = pipe->delete_fs_state; - - pstip->driver_bind_sampler_states = pipe->bind_sampler_states; - pstip->driver_set_sampler_textures = pipe->set_sampler_textures; - pstip->driver_set_polygon_stipple = pipe->set_polygon_stipple; - - /* override the driver's functions */ - pipe->create_fs_state = pstip_create_fs_state; - pipe->bind_fs_state = pstip_bind_fs_state; - pipe->delete_fs_state = pstip_delete_fs_state; - - pipe->bind_sampler_states = pstip_bind_sampler_states; - pipe->set_sampler_textures = pstip_set_sampler_textures; - pipe->set_polygon_stipple = pstip_set_polygon_stipple; -} diff --git a/src/gallium/auxiliary/draw/draw_stipple.c b/src/gallium/auxiliary/draw/draw_stipple.c deleted file mode 100644 index 506f33512c..0000000000 --- a/src/gallium/auxiliary/draw/draw_stipple.c +++ /dev/null @@ -1,239 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -/* Implement line stipple by cutting lines up into smaller lines. - * There are hundreds of ways to implement line stipple, this is one - * choice that should work in all situations, requires no state - * manipulations, but with a penalty in terms of large amounts of - * generated geometry. - */ - - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" - - -/** Subclass of draw_stage */ -struct stipple_stage { - struct draw_stage stage; - float counter; - uint pattern; - uint factor; -}; - - -static INLINE struct stipple_stage * -stipple_stage(struct draw_stage *stage) -{ - return (struct stipple_stage *) stage; -} - - -/** - * Compute interpolated vertex attributes for 'dst' at position 't' - * between 'v0' and 'v1'. - * XXX using linear interpolation for all attribs at this time. - */ -static void -screen_interp( struct draw_context *draw, - struct vertex_header *dst, - float t, - const struct vertex_header *v0, - const struct vertex_header *v1 ) -{ - uint attr; - for (attr = 0; attr < draw->num_vs_outputs; attr++) { - const float *val0 = v0->data[attr]; - const float *val1 = v1->data[attr]; - float *newv = dst->data[attr]; - uint i; - for (i = 0; i < 4; i++) { - newv[i] = val0[i] + t * (val1[i] - val0[i]); - } - } -} - - -static void -emit_segment(struct draw_stage *stage, struct prim_header *header, - float t0, float t1) -{ - struct vertex_header *v0new = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1new = dup_vert(stage, header->v[1], 1); - struct prim_header newprim = *header; - - if (t0 > 0.0) { - screen_interp( stage->draw, v0new, t0, header->v[0], header->v[1] ); - newprim.v[0] = v0new; - } - - if (t1 < 1.0) { - screen_interp( stage->draw, v1new, t1, header->v[0], header->v[1] ); - newprim.v[1] = v1new; - } - - stage->next->line( stage->next, &newprim ); -} - - -static INLINE unsigned -stipple_test(int counter, ushort pattern, int factor) -{ - int b = (counter / factor) & 0xf; - return (1 << b) & pattern; -} - - -static void -stipple_line(struct draw_stage *stage, struct prim_header *header) -{ - struct stipple_stage *stipple = stipple_stage(stage); - struct vertex_header *v0 = header->v[0]; - struct vertex_header *v1 = header->v[1]; - const float *pos0 = v0->data[0]; - const float *pos1 = v1->data[0]; - float start = 0; - int state = 0; - - float x0 = pos0[0]; - float x1 = pos1[0]; - float y0 = pos0[1]; - float y1 = pos1[1]; - - float dx = x0 > x1 ? x0 - x1 : x1 - x0; - float dy = y0 > y1 ? y0 - y1 : y1 - y0; - - float length = MAX2(dx, dy); - int i; - - /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table. - */ - for (i = 0; i < length; i++) { - int result = stipple_test( (int) stipple->counter+i, - (ushort) stipple->pattern, stipple->factor ); - if (result != state) { - /* changing from "off" to "on" or vice versa */ - if (state) { - if (start != i) { - /* finishing an "on" segment */ - emit_segment( stage, header, start / length, i / length ); - } - } - else { - /* starting an "on" segment */ - start = (float) i; - } - state = result; - } - } - - if (state && start < length) - emit_segment( stage, header, start / length, 1.0 ); - - stipple->counter += length; -} - - -static void -reset_stipple_counter(struct draw_stage *stage) -{ - struct stipple_stage *stipple = stipple_stage(stage); - stipple->counter = 0; - stage->next->reset_stipple_counter( stage->next ); -} - - -static void -stipple_first_line(struct draw_stage *stage, - struct prim_header *header) -{ - struct stipple_stage *stipple = stipple_stage(stage); - struct draw_context *draw = stage->draw; - - stipple->pattern = draw->rasterizer->line_stipple_pattern; - stipple->factor = draw->rasterizer->line_stipple_factor + 1; - - stage->line = stipple_line; - stage->line( stage, header ); -} - - -static void -stipple_flush(struct draw_stage *stage, unsigned flags) -{ - stage->line = stipple_first_line; - stage->next->flush( stage->next, flags ); -} - - -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point( stage->next, header ); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} - - -static void -stipple_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -/** - * Create line stippler stage - */ -struct draw_stage *draw_stipple_stage( struct draw_context *draw ) -{ - struct stipple_stage *stipple = CALLOC_STRUCT(stipple_stage); - - draw_alloc_temp_verts( &stipple->stage, 2 ); - - stipple->stage.draw = draw; - stipple->stage.next = NULL; - stipple->stage.point = passthrough_point; - stipple->stage.line = stipple_first_line; - stipple->stage.tri = passthrough_tri; - stipple->stage.reset_stipple_counter = reset_stipple_counter; - stipple->stage.flush = stipple_flush; - stipple->stage.destroy = stipple_destroy; - - return &stipple->stage; -} diff --git a/src/gallium/auxiliary/draw/draw_twoside.c b/src/gallium/auxiliary/draw/draw_twoside.c deleted file mode 100644 index 01d905c153..0000000000 --- a/src/gallium/auxiliary/draw/draw_twoside.c +++ /dev/null @@ -1,203 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "draw_vs.h" - - -struct twoside_stage { - struct draw_stage stage; - float sign; /**< +1 or -1 */ - uint attrib_front0, attrib_back0; - uint attrib_front1, attrib_back1; -}; - - -static INLINE struct twoside_stage *twoside_stage( struct draw_stage *stage ) -{ - return (struct twoside_stage *)stage; -} - - - - -/** - * Copy back color(s) to front color(s). - */ -static INLINE struct vertex_header * -copy_bfc( struct twoside_stage *twoside, - const struct vertex_header *v, - unsigned idx ) -{ - struct vertex_header *tmp = dup_vert( &twoside->stage, v, idx ); - - if (twoside->attrib_back0) { - COPY_4FV(tmp->data[twoside->attrib_front0], - tmp->data[twoside->attrib_back0]); - } - if (twoside->attrib_back1) { - COPY_4FV(tmp->data[twoside->attrib_front1], - tmp->data[twoside->attrib_back1]); - } - - return tmp; -} - - -/* Twoside tri: - */ -static void twoside_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct twoside_stage *twoside = twoside_stage(stage); - - if (header->det * twoside->sign < 0.0) { - /* this is a back-facing triangle */ - struct prim_header tmp; - - tmp.det = header->det; - tmp.edgeflags = header->edgeflags; - /* copy back attribs to front attribs */ - tmp.v[0] = copy_bfc(twoside, header->v[0], 0); - tmp.v[1] = copy_bfc(twoside, header->v[1], 1); - tmp.v[2] = copy_bfc(twoside, header->v[2], 2); - - stage->next->tri( stage->next, &tmp ); - } - else { - stage->next->tri( stage->next, header ); - } -} - - -static void twoside_line( struct draw_stage *stage, - struct prim_header *header ) -{ - /* pass-through */ - stage->next->line( stage->next, header ); -} - - -static void twoside_point( struct draw_stage *stage, - struct prim_header *header ) -{ - /* pass-through */ - stage->next->point( stage->next, header ); -} - - -static void twoside_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct twoside_stage *twoside = twoside_stage(stage); - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; - uint i; - - twoside->attrib_front0 = 0; - twoside->attrib_front1 = 0; - twoside->attrib_back0 = 0; - twoside->attrib_back1 = 0; - - /* Find which vertex shader outputs are front/back colors */ - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR) { - if (vs->info.output_semantic_index[i] == 0) - twoside->attrib_front0 = i; - else - twoside->attrib_front1 = i; - } - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { - if (vs->info.output_semantic_index[i] == 0) - twoside->attrib_back0 = i; - else - twoside->attrib_back1 = i; - } - } - - if (!twoside->attrib_back0) - twoside->attrib_front0 = 0; - - if (!twoside->attrib_back1) - twoside->attrib_front1 = 0; - - /* - * We'll multiply the primitive's determinant by this sign to determine - * if the triangle is back-facing (negative). - * sign = -1 for CCW, +1 for CW - */ - twoside->sign = (stage->draw->rasterizer->front_winding == PIPE_WINDING_CCW) ? -1.0f : 1.0f; - - stage->tri = twoside_tri; - stage->tri( stage, header ); -} - - -static void twoside_flush( struct draw_stage *stage, unsigned flags ) -{ - stage->tri = twoside_first_tri; - stage->next->flush( stage->next, flags ); -} - - -static void twoside_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void twoside_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -/** - * Create twoside pipeline stage. - */ -struct draw_stage *draw_twoside_stage( struct draw_context *draw ) -{ - struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage); - - draw_alloc_temp_verts( &twoside->stage, 3 ); - - twoside->stage.draw = draw; - twoside->stage.next = NULL; - twoside->stage.point = twoside_point; - twoside->stage.line = twoside_line; - twoside->stage.tri = twoside_first_tri; - twoside->stage.flush = twoside_flush; - twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; - twoside->stage.destroy = twoside_destroy; - - return &twoside->stage; -} diff --git a/src/gallium/auxiliary/draw/draw_unfilled.c b/src/gallium/auxiliary/draw/draw_unfilled.c deleted file mode 100644 index b07860cd9e..0000000000 --- a/src/gallium/auxiliary/draw/draw_unfilled.c +++ /dev/null @@ -1,206 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \brief Drawing stage for handling glPolygonMode(line/point). - * Convert triangles to points or lines as needed. - */ - -/* Authors: Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "draw_private.h" - - -struct unfilled_stage { - struct draw_stage stage; - - /** [0] = front face, [1] = back face. - * legal values: PIPE_POLYGON_MODE_FILL, PIPE_POLYGON_MODE_LINE, - * and PIPE_POLYGON_MODE_POINT, - */ - unsigned mode[2]; -}; - - -static INLINE struct unfilled_stage *unfilled_stage( struct draw_stage *stage ) -{ - return (struct unfilled_stage *)stage; -} - - - -static void point( struct draw_stage *stage, - struct vertex_header *v0 ) -{ - struct prim_header tmp; - tmp.v[0] = v0; - stage->next->point( stage->next, &tmp ); -} - -static void line( struct draw_stage *stage, - struct vertex_header *v0, - struct vertex_header *v1 ) -{ - struct prim_header tmp; - tmp.v[0] = v0; - tmp.v[1] = v1; - stage->next->line( stage->next, &tmp ); -} - - -static void points( struct draw_stage *stage, - struct prim_header *header ) -{ - struct vertex_header *v0 = header->v[0]; - struct vertex_header *v1 = header->v[1]; - struct vertex_header *v2 = header->v[2]; - - if (header->edgeflags & 0x1) point( stage, v0 ); - if (header->edgeflags & 0x2) point( stage, v1 ); - if (header->edgeflags & 0x4) point( stage, v2 ); -} - - -static void lines( struct draw_stage *stage, - struct prim_header *header ) -{ - struct vertex_header *v0 = header->v[0]; - struct vertex_header *v1 = header->v[1]; - struct vertex_header *v2 = header->v[2]; - -#if 0 - assert(((header->edgeflags & 0x1) >> 0) == header->v[0]->edgeflag); - assert(((header->edgeflags & 0x2) >> 1) == header->v[1]->edgeflag); - assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag); -#endif - - if (header->edgeflags & 0x4) line( stage, v2, v0 ); - if (header->edgeflags & 0x1) line( stage, v0, v1 ); - if (header->edgeflags & 0x2) line( stage, v1, v2 ); -} - - -/* Unfilled tri: - * - * Note edgeflags in the vertex struct is not sufficient as we will - * need to manipulate them when decomposing primitives??? - */ -static void unfilled_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct unfilled_stage *unfilled = unfilled_stage(stage); - unsigned mode = unfilled->mode[header->det >= 0.0]; - - switch (mode) { - case PIPE_POLYGON_MODE_FILL: - stage->next->tri( stage->next, header ); - break; - case PIPE_POLYGON_MODE_LINE: - lines( stage, header ); - break; - case PIPE_POLYGON_MODE_POINT: - points( stage, header ); - break; - default: - assert(0); - } -} - - -static void unfilled_first_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct unfilled_stage *unfilled = unfilled_stage(stage); - - unfilled->mode[0] = stage->draw->rasterizer->fill_ccw; /* front */ - unfilled->mode[1] = stage->draw->rasterizer->fill_cw; /* back */ - - stage->tri = unfilled_tri; - stage->tri( stage, header ); -} - - -static void unfilled_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void unfilled_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - -static void unfilled_flush( struct draw_stage *stage, - unsigned flags ) -{ - stage->next->flush( stage->next, flags ); - - stage->tri = unfilled_first_tri; -} - - -static void unfilled_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void unfilled_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -/** - * Create unfilled triangle stage. - */ -struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) -{ - struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage); - - draw_alloc_temp_verts( &unfilled->stage, 0 ); - - unfilled->stage.draw = draw; - unfilled->stage.next = NULL; - unfilled->stage.tmp = NULL; - unfilled->stage.point = unfilled_point; - unfilled->stage.line = unfilled_line; - unfilled->stage.tri = unfilled_first_tri; - unfilled->stage.flush = unfilled_flush; - unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; - unfilled->stage.destroy = unfilled_destroy; - - return &unfilled->stage; -} diff --git a/src/gallium/auxiliary/draw/draw_validate.c b/src/gallium/auxiliary/draw/draw_validate.c deleted file mode 100644 index e163e078f0..0000000000 --- a/src/gallium/auxiliary/draw/draw_validate.c +++ /dev/null @@ -1,312 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "draw_private.h" - -static boolean points( unsigned prim ) -{ - return (prim == PIPE_PRIM_POINTS); -} - -static boolean lines( unsigned prim ) -{ - return (prim == PIPE_PRIM_LINES || - prim == PIPE_PRIM_LINE_STRIP || - prim == PIPE_PRIM_LINE_LOOP); -} - -static boolean triangles( unsigned prim ) -{ - return prim >= PIPE_PRIM_TRIANGLES; -} - -/** - * Check if we need any special pipeline stages, or whether - * prims/verts can go through untouched. Don't test for bypass - * clipping or vs modes, this function is just about the primitive - * pipeline stages. - */ -boolean -draw_need_pipeline(const struct draw_context *draw, - unsigned int prim ) -{ - /* Don't have to worry about triangles turning into lines/points - * and triggering the pipeline, because we have to trigger the - * pipeline *anyway* if unfilled mode is active. - */ - if (lines(prim)) - { - /* line stipple */ - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) - return TRUE; - - /* wide lines */ - if (draw->rasterizer->line_width > draw->wide_line_threshold) - return TRUE; - - /* AA lines */ - if (draw->rasterizer->line_smooth && draw->pipeline.aaline) - return TRUE; - } - - if (points(prim)) - { - /* large points */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) - return TRUE; - - /* AA points */ - if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) - return TRUE; - - /* point sprites */ - if (draw->rasterizer->point_sprite && draw->point_sprite) - return TRUE; - } - - - if (triangles(prim)) - { - /* polygon stipple */ - if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) - return TRUE; - - /* unfilled polygons */ - if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) - return TRUE; - - /* polygon offset */ - if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) - return TRUE; - - /* two-side lighting */ - if (draw->rasterizer->light_twoside) - return TRUE; - } - - /* polygon cull - this is difficult - hardware can cull just fine - * most of the time (though sometimes CULL_NEITHER is unsupported. - * - * Generally this isn't a reason to require the pipeline, though. - * - if (draw->rasterizer->cull_mode) - return TRUE; - */ - - return FALSE; -} - - - -/** - * Rebuild the rendering pipeline. - */ -static struct draw_stage *validate_pipeline( struct draw_stage *stage ) -{ - struct draw_context *draw = stage->draw; - struct draw_stage *next = draw->pipeline.rasterize; - int need_det = 0; - int precalc_flat = 0; - boolean wide_lines, wide_points; - - /* Set the validate's next stage to the rasterize stage, so that it - * can be found later if needed for flushing. - */ - stage->next = next; - - /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold - && !draw->rasterizer->line_smooth); - - /* drawing large points? */ - if (draw->rasterizer->point_sprite && draw->point_sprite) - wide_points = TRUE; - else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) - wide_points = FALSE; - else if (draw->rasterizer->point_size > draw->wide_point_threshold) - wide_points = TRUE; - else - wide_points = FALSE; - - /* - * NOTE: we build up the pipeline in end-to-start order. - * - * TODO: make the current primitive part of the state and build - * shorter pipelines for lines & points. - */ - - if (draw->rasterizer->line_smooth && draw->pipeline.aaline) { - draw->pipeline.aaline->next = next; - next = draw->pipeline.aaline; - } - - if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) { - draw->pipeline.aapoint->next = next; - next = draw->pipeline.aapoint; - } - - if (wide_lines) { - draw->pipeline.wide_line->next = next; - next = draw->pipeline.wide_line; - precalc_flat = 1; - } - - if (wide_points || draw->rasterizer->point_sprite) { - draw->pipeline.wide_point->next = next; - next = draw->pipeline.wide_point; - } - - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) { - draw->pipeline.stipple->next = next; - next = draw->pipeline.stipple; - precalc_flat = 1; /* only needed for lines really */ - } - - if (draw->rasterizer->poly_stipple_enable - && draw->pipeline.pstipple) { - draw->pipeline.pstipple->next = next; - next = draw->pipeline.pstipple; - } - - if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) { - draw->pipeline.unfilled->next = next; - next = draw->pipeline.unfilled; - precalc_flat = 1; /* only needed for triangles really */ - need_det = 1; - } - - if (draw->rasterizer->flatshade && precalc_flat) { - draw->pipeline.flatshade->next = next; - next = draw->pipeline.flatshade; - } - - if (draw->rasterizer->offset_cw || - draw->rasterizer->offset_ccw) { - draw->pipeline.offset->next = next; - next = draw->pipeline.offset; - need_det = 1; - } - - if (draw->rasterizer->light_twoside) { - draw->pipeline.twoside->next = next; - next = draw->pipeline.twoside; - need_det = 1; - } - - /* Always run the cull stage as we calculate determinant there - * also. - * - * This can actually be a win as culling out the triangles can lead - * to less work emitting vertices, smaller vertex buffers, etc. - * It's difficult to say whether this will be true in general. - */ - if (need_det || draw->rasterizer->cull_mode) { - draw->pipeline.cull->next = next; - next = draw->pipeline.cull; - } - - /* Clip stage - */ - if (!draw->rasterizer->bypass_clipping) - { - draw->pipeline.clip->next = next; - next = draw->pipeline.clip; - } - - - draw->pipeline.first = next; - return next; -} - -static void validate_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - struct draw_stage *pipeline = validate_pipeline( stage ); - pipeline->tri( pipeline, header ); -} - -static void validate_line( struct draw_stage *stage, - struct prim_header *header ) -{ - struct draw_stage *pipeline = validate_pipeline( stage ); - pipeline->line( pipeline, header ); -} - -static void validate_point( struct draw_stage *stage, - struct prim_header *header ) -{ - struct draw_stage *pipeline = validate_pipeline( stage ); - pipeline->point( pipeline, header ); -} - -static void validate_reset_stipple_counter( struct draw_stage *stage ) -{ - struct draw_stage *pipeline = validate_pipeline( stage ); - pipeline->reset_stipple_counter( pipeline ); -} - -static void validate_flush( struct draw_stage *stage, - unsigned flags ) -{ - /* May need to pass a backend flush on to the rasterize stage. - */ - if (stage->next) - stage->next->flush( stage->next, flags ); -} - - -static void validate_destroy( struct draw_stage *stage ) -{ - FREE( stage ); -} - - -/** - * Create validate pipeline stage. - */ -struct draw_stage *draw_validate_stage( struct draw_context *draw ) -{ - struct draw_stage *stage = CALLOC_STRUCT(draw_stage); - - stage->draw = draw; - stage->next = NULL; - stage->point = validate_point; - stage->line = validate_line; - stage->tri = validate_tri; - stage->flush = validate_flush; - stage->reset_stipple_counter = validate_reset_stipple_counter; - stage->destroy = validate_destroy; - - return stage; -} diff --git a/src/gallium/auxiliary/draw/draw_vbuf.c b/src/gallium/auxiliary/draw/draw_vbuf.c deleted file mode 100644 index 30dceeb43d..0000000000 --- a/src/gallium/auxiliary/draw/draw_vbuf.c +++ /dev/null @@ -1,529 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * Vertex buffer drawing stage. - * - * \author José Fonseca - * \author Keith Whitwell - */ - - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" - -#include "draw_vbuf.h" -#include "draw_private.h" -#include "draw_vertex.h" -#include "translate/translate.h" - - -/** - * Vertex buffer emit stage. - */ -struct vbuf_stage { - struct draw_stage stage; /**< This must be first (base class) */ - - struct vbuf_render *render; - - const struct vertex_info *vinfo; - - /** Vertex size in bytes */ - unsigned vertex_size; - - struct translate *translate; - - /* FIXME: we have no guarantee that 'unsigned' is 32bit */ - - /** Vertices in hardware format */ - unsigned *vertices; - unsigned *vertex_ptr; - unsigned max_vertices; - unsigned nr_vertices; - - /** Indices */ - ushort *indices; - unsigned max_indices; - unsigned nr_indices; - - /* Cache point size somewhere it's address won't change: - */ - float point_size; -}; - - -/** - * Basically a cast wrapper. - */ -static INLINE struct vbuf_stage * -vbuf_stage( struct draw_stage *stage ) -{ - assert(stage); - return (struct vbuf_stage *)stage; -} - - -static void vbuf_flush_indices( struct vbuf_stage *vbuf ); -static void vbuf_flush_vertices( struct vbuf_stage *vbuf ); -static void vbuf_alloc_vertices( struct vbuf_stage *vbuf ); - - -static INLINE boolean -overflow( void *map, void *ptr, unsigned bytes, unsigned bufsz ) -{ - unsigned long used = (unsigned long) ((char *)ptr - (char *)map); - return (used + bytes) > bufsz; -} - - -static INLINE void -check_space( struct vbuf_stage *vbuf, unsigned nr ) -{ - if (vbuf->nr_vertices + nr > vbuf->max_vertices ) { - vbuf_flush_vertices(vbuf); - vbuf_alloc_vertices(vbuf); - } - - if (vbuf->nr_indices + nr > vbuf->max_indices ) - vbuf_flush_indices(vbuf); -} - - -static INLINE void -dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) -{ -// assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - unsigned i, j; - - for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - debug_printf("EMIT_OMIT:"); - break; - case EMIT_1F: - debug_printf("EMIT_1F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_1F_PSIZE: - debug_printf("EMIT_1F_PSIZE:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_2F: - debug_printf("EMIT_2F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_3F: - debug_printf("EMIT_3F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - data += sizeof(float); - break; - case EMIT_4F: - debug_printf("EMIT_4F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_4UB: - debug_printf("EMIT_4UB:\t"); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - break; - default: - assert(0); - } - debug_printf("\n"); - } - debug_printf("\n"); -} - - -/** - * Extract the needed fields from post-transformed vertex and emit - * a hardware(driver) vertex. - * Recall that the vertices are constructed by the 'draw' module and - * have a couple of slots at the beginning (1-dword header, 4-dword - * clip pos) that we ignore here. We only use the vertex->data[] fields. - */ -static INLINE ushort -emit_vertex( struct vbuf_stage *vbuf, - struct vertex_header *vertex ) -{ - if(vertex->vertex_id == UNDEFINED_VERTEX_ID) { - /* Hmm - vertices are emitted one at a time - better make sure - * set_buffer is efficient. Consider a special one-shot mode for - * translate. - */ - vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); - vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); - - if (0) dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); - - vbuf->vertex_ptr += vbuf->vertex_size/4; - vertex->vertex_id = vbuf->nr_vertices++; - } - - return vertex->vertex_id; -} - - -static void -vbuf_tri( struct draw_stage *stage, - struct prim_header *prim ) -{ - struct vbuf_stage *vbuf = vbuf_stage( stage ); - unsigned i; - - check_space( vbuf, 3 ); - - for (i = 0; i < 3; i++) { - vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); - } -} - - -static void -vbuf_line( struct draw_stage *stage, - struct prim_header *prim ) -{ - struct vbuf_stage *vbuf = vbuf_stage( stage ); - unsigned i; - - check_space( vbuf, 2 ); - - for (i = 0; i < 2; i++) { - vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] ); - } -} - - -static void -vbuf_point( struct draw_stage *stage, - struct prim_header *prim ) -{ - struct vbuf_stage *vbuf = vbuf_stage( stage ); - - check_space( vbuf, 1 ); - - vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] ); -} - - - - -/** - * Set the prim type for subsequent vertices. - * This may result in a new vertex size. The existing vbuffer (if any) - * will be flushed if needed and a new one allocated. - */ -static void -vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) -{ - struct translate_key hw_key; - unsigned dst_offset; - unsigned i; - - vbuf->render->set_primitive(vbuf->render, prim); - - /* Must do this after set_primitive() above: - * - * XXX: need some state managment to track when this needs to be - * recalculated. The driver should tell us whether there was a - * state change. - */ - vbuf->vinfo = vbuf->render->get_vertex_info(vbuf->render); - - if (vbuf->vertex_size != vbuf->vinfo->size * sizeof(float)) { - vbuf_flush_vertices(vbuf); - vbuf->vertex_size = vbuf->vinfo->size * sizeof(float); - } - - /* Translate from pipeline vertices to hw vertices. - */ - dst_offset = 0; - memset(&hw_key, 0, sizeof(hw_key)); - - for (i = 0; i < vbuf->vinfo->num_attribs; i++) { - unsigned emit_sz = 0; - unsigned src_buffer = 0; - unsigned output_format; - unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) ); - - switch (vbuf->vinfo->emit[i]) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - src_buffer = 1; - src_offset = 0; - break; - case EMIT_4UB: - output_format = PIPE_FORMAT_B8G8R8A8_UNORM; - emit_sz = 4 * sizeof(ubyte); - default: - assert(0); - output_format = PIPE_FORMAT_NONE; - emit_sz = 0; - break; - } - - hw_key.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - hw_key.element[i].input_buffer = src_buffer; - hw_key.element[i].input_offset = src_offset; - hw_key.element[i].output_format = output_format; - hw_key.element[i].output_offset = dst_offset; - - dst_offset += emit_sz; - } - - hw_key.nr_elements = vbuf->vinfo->num_attribs; - hw_key.output_stride = vbuf->vinfo->size * 4; - - /* Don't bother with caching at this stage: - */ - if (!vbuf->translate || - memcmp(&vbuf->translate->key, &hw_key, sizeof(hw_key)) != 0) - { - if (vbuf->translate) - vbuf->translate->release(vbuf->translate); - - vbuf->translate = translate_create( &hw_key ); - - vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); - } - - vbuf->point_size = vbuf->stage.draw->rasterizer->point_size; - - /* Allocate new buffer? - */ - if (!vbuf->vertices) - vbuf_alloc_vertices(vbuf); -} - - -static void -vbuf_first_tri( struct draw_stage *stage, - struct prim_header *prim ) -{ - struct vbuf_stage *vbuf = vbuf_stage( stage ); - - vbuf_flush_indices( vbuf ); - stage->tri = vbuf_tri; - vbuf_set_prim(vbuf, PIPE_PRIM_TRIANGLES); - stage->tri( stage, prim ); -} - - -static void -vbuf_first_line( struct draw_stage *stage, - struct prim_header *prim ) -{ - struct vbuf_stage *vbuf = vbuf_stage( stage ); - - vbuf_flush_indices( vbuf ); - stage->line = vbuf_line; - vbuf_set_prim(vbuf, PIPE_PRIM_LINES); - stage->line( stage, prim ); -} - - -static void -vbuf_first_point( struct draw_stage *stage, - struct prim_header *prim ) -{ - struct vbuf_stage *vbuf = vbuf_stage( stage ); - - vbuf_flush_indices( vbuf ); - stage->point = vbuf_point; - vbuf_set_prim(vbuf, PIPE_PRIM_POINTS); - stage->point( stage, prim ); -} - - -static void -vbuf_flush_indices( struct vbuf_stage *vbuf ) -{ - if(!vbuf->nr_indices) - return; - - assert((uint) (vbuf->vertex_ptr - vbuf->vertices) == - vbuf->nr_vertices * vbuf->vertex_size / sizeof(unsigned)); - - vbuf->render->draw(vbuf->render, vbuf->indices, vbuf->nr_indices); - - vbuf->nr_indices = 0; -} - - -/** - * Flush existing vertex buffer and allocate a new one. - * - * XXX: We separate flush-on-index-full and flush-on-vb-full, but may - * raise issues uploading vertices if the hardware wants to flush when - * we flush. - */ -static void -vbuf_flush_vertices( struct vbuf_stage *vbuf ) -{ - if(vbuf->vertices) { - vbuf_flush_indices(vbuf); - - /* Reset temporary vertices ids */ - if(vbuf->nr_vertices) - draw_reset_vertex_ids( vbuf->stage.draw ); - - /* Free the vertex buffer */ - vbuf->render->release_vertices(vbuf->render, - vbuf->vertices, - vbuf->vertex_size, - vbuf->nr_vertices); - vbuf->max_vertices = vbuf->nr_vertices = 0; - vbuf->vertex_ptr = vbuf->vertices = NULL; - - } -} - - -static void -vbuf_alloc_vertices( struct vbuf_stage *vbuf ) -{ - assert(!vbuf->nr_indices); - assert(!vbuf->vertices); - - /* Allocate a new vertex buffer */ - vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; - vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, - (ushort) vbuf->vertex_size, - (ushort) vbuf->max_vertices); - vbuf->vertex_ptr = vbuf->vertices; -} - - - -static void -vbuf_flush( struct draw_stage *stage, unsigned flags ) -{ - struct vbuf_stage *vbuf = vbuf_stage( stage ); - - vbuf_flush_indices( vbuf ); - - stage->point = vbuf_first_point; - stage->line = vbuf_first_line; - stage->tri = vbuf_first_tri; - - if (flags & DRAW_FLUSH_BACKEND) - vbuf_flush_vertices( vbuf ); -} - - -static void -vbuf_reset_stipple_counter( struct draw_stage *stage ) -{ - /* XXX: Need to do something here for hardware with linestipple. - */ - (void) stage; -} - - -static void vbuf_destroy( struct draw_stage *stage ) -{ - struct vbuf_stage *vbuf = vbuf_stage( stage ); - - if(vbuf->indices) - align_free( vbuf->indices ); - - if(vbuf->translate) - vbuf->translate->release( vbuf->translate ); - - if (vbuf->render) - vbuf->render->destroy( vbuf->render ); - - FREE( stage ); -} - - -/** - * Create a new primitive vbuf/render stage. - */ -struct draw_stage *draw_vbuf_stage( struct draw_context *draw, - struct vbuf_render *render ) -{ - struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); - - if(!vbuf) - goto fail; - - vbuf->stage.draw = draw; - vbuf->stage.point = vbuf_first_point; - vbuf->stage.line = vbuf_first_line; - vbuf->stage.tri = vbuf_first_tri; - vbuf->stage.flush = vbuf_flush; - vbuf->stage.reset_stipple_counter = vbuf_reset_stipple_counter; - vbuf->stage.destroy = vbuf_destroy; - - vbuf->render = render; - vbuf->max_indices = MAX2(render->max_indices, UNDEFINED_VERTEX_ID-1); - - vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * - sizeof(vbuf->indices[0]), - 16 ); - if(!vbuf->indices) - goto fail; - - vbuf->vertices = NULL; - vbuf->vertex_ptr = vbuf->vertices; - - return &vbuf->stage; - - fail: - if (vbuf) - vbuf_destroy(&vbuf->stage); - - return NULL; -} diff --git a/src/gallium/auxiliary/draw/draw_wide_line.c b/src/gallium/auxiliary/draw/draw_wide_line.c deleted file mode 100644 index 9a168ce8bd..0000000000 --- a/src/gallium/auxiliary/draw/draw_wide_line.c +++ /dev/null @@ -1,190 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" - - -struct wideline_stage { - struct draw_stage stage; - - float half_line_width; -}; - - - -static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) -{ - return (struct wideline_stage *)stage; -} - - -static void wideline_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - -static void wideline_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - - -/** - * Draw a wide line by drawing a quad (two triangles). - * XXX need to disable polygon stipple. - */ -static void wideline_line( struct draw_stage *stage, - struct prim_header *header ) -{ - /*const struct wideline_stage *wide = wideline_stage(stage);*/ - const float half_width = 0.5f * stage->draw->rasterizer->line_width; - - struct prim_header tri; - - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - const float dx = FABSF(pos0[0] - pos2[0]); - const float dy = FABSF(pos0[1] - pos2[1]); - - /* small tweak to meet GL specification */ - const float bias = 0.125f; - - /* - * Draw wide line as a quad (two tris) by "stretching" the line along - * X or Y. - * We need to tweak coords in several ways to be conformant here. - */ - - if (dx > dy) { - /* x-major line */ - pos0[1] = pos0[1] - half_width - bias; - pos1[1] = pos1[1] + half_width - bias; - pos2[1] = pos2[1] - half_width - bias; - pos3[1] = pos3[1] + half_width - bias; - if (pos0[0] < pos2[0]) { - /* left to right line */ - pos0[0] -= 0.5f; - pos1[0] -= 0.5f; - pos2[0] -= 0.5f; - pos3[0] -= 0.5f; - } - else { - /* right to left line */ - pos0[0] += 0.5f; - pos1[0] += 0.5f; - pos2[0] += 0.5f; - pos3[0] += 0.5f; - } - } - else { - /* y-major line */ - pos0[0] = pos0[0] - half_width + bias; - pos1[0] = pos1[0] + half_width + bias; - pos2[0] = pos2[0] - half_width + bias; - pos3[0] = pos3[0] + half_width + bias; - if (pos0[1] < pos2[1]) { - /* top to bottom line */ - pos0[1] -= 0.5f; - pos1[1] -= 0.5f; - pos2[1] -= 0.5f; - pos3[1] -= 0.5f; - } - else { - /* bottom to top line */ - pos0[1] += 0.5f; - pos1[1] += 0.5f; - pos2[1] += 0.5f; - pos3[1] += 0.5f; - } - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -static void wideline_flush( struct draw_stage *stage, unsigned flags ) -{ - stage->next->flush( stage->next, flags ); -} - - -static void wideline_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void wideline_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) -{ - struct wideline_stage *wide = CALLOC_STRUCT(wideline_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); - - wide->stage.draw = draw; - wide->stage.next = NULL; - wide->stage.point = wideline_point; - wide->stage.line = wideline_line; - wide->stage.tri = wideline_tri; - wide->stage.flush = wideline_flush; - wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; - wide->stage.destroy = wideline_destroy; - - return &wide->stage; -} diff --git a/src/gallium/auxiliary/draw/draw_wide_point.c b/src/gallium/auxiliary/draw/draw_wide_point.c deleted file mode 100644 index 3d0add0c1a..0000000000 --- a/src/gallium/auxiliary/draw/draw_wide_point.c +++ /dev/null @@ -1,281 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "draw_vs.h" - - -struct widepoint_stage { - struct draw_stage stage; - - float half_point_size; - float point_size_min; - float point_size_max; - - float xbias; - float ybias; - - uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; - uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; - uint num_texcoords; - - int psize_slot; -}; - - - -static INLINE struct widepoint_stage * -widepoint_stage( struct draw_stage *stage ) -{ - return (struct widepoint_stage *)stage; -} - - -static void passthrough_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - -static void widepoint_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line(stage->next, header); -} - -static void widepoint_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - - -/** - * Set the vertex texcoords for sprite mode. - * Coords may be left untouched or set to a right-side-up or upside-down - * orientation. - */ -static void set_texcoords(const struct widepoint_stage *wide, - struct vertex_header *v, const float tc[4]) -{ - uint i; - for (i = 0; i < wide->num_texcoords; i++) { - if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { - uint j = wide->texcoord_slot[i]; - v->data[j][0] = tc[0]; - if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) - v->data[j][1] = 1.0f - tc[1]; - else - v->data[j][1] = tc[1]; - v->data[j][2] = tc[2]; - v->data[j][3] = tc[3]; - } - } -} - - -/* If there are lots of sprite points (and why wouldn't there be?) it - * would probably be more sensible to change hardware setup to - * optimize this rather than doing the whole thing in software like - * this. - */ -static void widepoint_point( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct widepoint_stage *wide = widepoint_stage(stage); - const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; - float half_size; - float left_adj, right_adj, bot_adj, top_adj; - - struct prim_header tri; - - /* four dups of original vertex */ - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - /* point size is either per-vertex or fixed size */ - if (wide->psize_slot >= 0) { - half_size = header->v[0]->data[wide->psize_slot][0]; - - /* XXX: temporary -- do this in the vertex shader?? - */ - half_size = CLAMP(half_size, - wide->point_size_min, - wide->point_size_max); - - half_size *= 0.5f; - } - else { - half_size = wide->half_point_size; - } - - left_adj = -half_size + wide->xbias; - right_adj = half_size + wide->xbias; - bot_adj = half_size + wide->ybias; - top_adj = -half_size + wide->ybias; - - pos0[0] += left_adj; - pos0[1] += top_adj; - - pos1[0] += left_adj; - pos1[1] += bot_adj; - - pos2[0] += right_adj; - pos2[1] += top_adj; - - pos3[0] += right_adj; - pos3[1] += bot_adj; - - if (sprite) { - static const float tex00[4] = { 0, 0, 0, 1 }; - static const float tex01[4] = { 0, 1, 0, 1 }; - static const float tex11[4] = { 1, 1, 0, 1 }; - static const float tex10[4] = { 1, 0, 0, 1 }; - set_texcoords( wide, v0, tex00 ); - set_texcoords( wide, v1, tex01 ); - set_texcoords( wide, v2, tex10 ); - set_texcoords( wide, v3, tex11 ); - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -static void widepoint_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - struct widepoint_stage *wide = widepoint_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_point_size = 0.5f * draw->rasterizer->point_size; - wide->point_size_min = draw->rasterizer->point_size_min; - wide->point_size_max = draw->rasterizer->point_size_max; - wide->xbias = 0.0; - wide->ybias = 0.0; - - if (draw->rasterizer->gl_rasterization_rules) { - wide->xbias = 0.125; - } - - /* XXX we won't know the real size if it's computed by the vertex shader! */ - if ((draw->rasterizer->point_size > draw->wide_point_threshold) || - (draw->rasterizer->point_sprite && draw->point_sprite)) { - stage->point = widepoint_point; - } - else { - stage->point = passthrough_point; - } - - if (draw->rasterizer->point_sprite) { - /* find vertex shader texcoord outputs */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i, j = 0; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { - wide->texcoord_slot[j] = i; - wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; - j++; - } - } - wide->num_texcoords = j; - } - - wide->psize_slot = -1; - if (draw->rasterizer->point_size_per_vertex) { - /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { - wide->psize_slot = i; - break; - } - } - } - - stage->point( stage, header ); -} - - -static void widepoint_flush( struct draw_stage *stage, unsigned flags ) -{ - stage->point = widepoint_first_point; - stage->next->flush( stage->next, flags ); -} - - -static void widepoint_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void widepoint_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) -{ - struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); - - wide->stage.draw = draw; - wide->stage.next = NULL; - wide->stage.point = widepoint_first_point; - wide->stage.line = widepoint_line; - wide->stage.tri = widepoint_tri; - wide->stage.flush = widepoint_flush; - wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; - wide->stage.destroy = widepoint_destroy; - - return &wide->stage; -} diff --git a/src/gallium/auxiliary/draw/draw_wide_prims.c b/src/gallium/auxiliary/draw/draw_wide_prims.c deleted file mode 100644 index d6bff110b4..0000000000 --- a/src/gallium/auxiliary/draw/draw_wide_prims.c +++ /dev/null @@ -1,366 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" - - -struct wide_stage { - struct draw_stage stage; - - float half_line_width; - float half_point_size; - - uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; - uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; - uint num_texcoords; - - int psize_slot; -}; - - - -static INLINE struct wide_stage *wide_stage( struct draw_stage *stage ) -{ - return (struct wide_stage *)stage; -} - - -static void passthrough_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - -static void passthrough_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line(stage->next, header); -} - -static void passthrough_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - - -/** - * Draw a wide line by drawing a quad (two triangles). - * XXX need to disable polygon stipple. - */ -static void wide_line( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct wide_stage *wide = wide_stage(stage); - const float half_width = wide->half_line_width; - - struct prim_header tri; - - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - const float dx = FABSF(pos0[0] - pos2[0]); - const float dy = FABSF(pos0[1] - pos2[1]); - - /* - * Draw wide line as a quad (two tris) by "stretching" the line along - * X or Y. - * We need to tweak coords in several ways to be conformant here. - */ - - if (dx > dy) { - /* x-major line */ - pos0[1] = pos0[1] - half_width - 0.25f; - pos1[1] = pos1[1] + half_width - 0.25f; - pos2[1] = pos2[1] - half_width - 0.25f; - pos3[1] = pos3[1] + half_width - 0.25f; - if (pos0[0] < pos2[0]) { - /* left to right line */ - pos0[0] -= 0.5f; - pos1[0] -= 0.5f; - pos2[0] -= 0.5f; - pos3[0] -= 0.5f; - } - else { - /* right to left line */ - pos0[0] += 0.5f; - pos1[0] += 0.5f; - pos2[0] += 0.5f; - pos3[0] += 0.5f; - } - } - else { - /* y-major line */ - pos0[0] = pos0[0] - half_width + 0.25f; - pos1[0] = pos1[0] + half_width + 0.25f; - pos2[0] = pos2[0] - half_width + 0.25f; - pos3[0] = pos3[0] + half_width + 0.25f; - if (pos0[1] < pos2[1]) { - /* top to bottom line */ - pos0[1] -= 0.5f; - pos1[1] -= 0.5f; - pos2[1] -= 0.5f; - pos3[1] -= 0.5f; - } - else { - /* bottom to top line */ - pos0[1] += 0.5f; - pos1[1] += 0.5f; - pos2[1] += 0.5f; - pos3[1] += 0.5f; - } - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -/** - * Set the vertex texcoords for sprite mode. - * Coords may be left untouched or set to a right-side-up or upside-down - * orientation. - */ -static void set_texcoords(const struct wide_stage *wide, - struct vertex_header *v, const float tc[4]) -{ - uint i; - for (i = 0; i < wide->num_texcoords; i++) { - if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { - uint j = wide->texcoord_slot[i]; - v->data[j][0] = tc[0]; - if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) - v->data[j][1] = 1.0f - tc[1]; - else - v->data[j][1] = tc[1]; - v->data[j][2] = tc[2]; - v->data[j][3] = tc[3]; - } - } -} - - -/* If there are lots of sprite points (and why wouldn't there be?) it - * would probably be more sensible to change hardware setup to - * optimize this rather than doing the whole thing in software like - * this. - */ -static void wide_point( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct wide_stage *wide = wide_stage(stage); - const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; - float half_size; - float left_adj, right_adj; - - struct prim_header tri; - - /* four dups of original vertex */ - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - /* point size is either per-vertex or fixed size */ - if (wide->psize_slot >= 0) { - half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; - } - else { - half_size = wide->half_point_size; - } - - left_adj = -half_size; /* + 0.25f;*/ - right_adj = half_size; /* + 0.25f;*/ - - pos0[0] += left_adj; - pos0[1] -= half_size; - - pos1[0] += left_adj; - pos1[1] += half_size; - - pos2[0] += right_adj; - pos2[1] -= half_size; - - pos3[0] += right_adj; - pos3[1] += half_size; - - if (sprite) { - static const float tex00[4] = { 0, 0, 0, 1 }; - static const float tex01[4] = { 0, 1, 0, 1 }; - static const float tex11[4] = { 1, 1, 0, 1 }; - static const float tex10[4] = { 1, 0, 0, 1 }; - set_texcoords( wide, v0, tex00 ); - set_texcoords( wide, v1, tex01 ); - set_texcoords( wide, v2, tex10 ); - set_texcoords( wide, v3, tex11 ); - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -static void wide_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - struct wide_stage *wide = wide_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_point_size = 0.5f * draw->rasterizer->point_size; - - /* XXX we won't know the real size if it's computed by the vertex shader! */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) { - stage->point = wide_point; - } - else { - stage->point = passthrough_point; - } - - if (draw->rasterizer->point_sprite) { - /* find vertex shader texcoord outputs */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i, j = 0; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { - wide->texcoord_slot[j] = i; - wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; - j++; - } - } - wide->num_texcoords = j; - } - - wide->psize_slot = -1; - - if (draw->rasterizer->point_size_per_vertex) { - /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { - wide->psize_slot = i; - break; - } - } - } - - stage->point( stage, header ); -} - - - -static void wide_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - struct wide_stage *wide = wide_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_line_width = 0.5f * draw->rasterizer->line_width; - - if (draw->rasterizer->line_width != 1.0) { - wide->stage.line = wide_line; - } - else { - wide->stage.line = passthrough_line; - } - - stage->line( stage, header ); -} - - -static void wide_flush( struct draw_stage *stage, unsigned flags ) -{ - stage->line = wide_first_line; - stage->point = wide_first_point; - stage->next->flush( stage->next, flags ); -} - - -static void wide_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void wide_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -struct draw_stage *draw_wide_stage( struct draw_context *draw ) -{ - struct wide_stage *wide = CALLOC_STRUCT(wide_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); - - wide->stage.draw = draw; - wide->stage.next = NULL; - wide->stage.point = wide_first_point; - wide->stage.line = wide_first_line; - wide->stage.tri = passthrough_tri; - wide->stage.flush = wide_flush; - wide->stage.reset_stipple_counter = wide_reset_stipple_counter; - wide->stage.destroy = wide_destroy; - - return &wide->stage; -} -- cgit v1.2.3 From 882e5d84dcb19c24b7b56cfe6049810023f3a17e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 13:12:07 +0100 Subject: draw: remove dead code --- src/gallium/auxiliary/draw/draw_vs.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index b6f3bbdaea..f9772b83b8 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -81,33 +81,6 @@ draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); -/* Should be part of the generated shader: - */ -static INLINE unsigned -compute_clipmask(const float *clip, /*const*/ float plane[][4], unsigned nr) -{ - unsigned mask = 0x0; - unsigned i; - - /* Do the hardwired planes first: - */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; - - /* Followed by any remaining ones: - */ - for (i = 6; i < nr; i++) { - if (dot4(clip, plane[i]) < 0) - mask |= (1< Date: Sat, 19 Apr 2008 13:20:26 +0100 Subject: draw: remove named clipmask flags, tidy up pt middle ends --- src/gallium/auxiliary/draw/draw_context.h | 37 +++------------------------- src/gallium/auxiliary/draw/draw_private.h | 9 ++----- src/gallium/auxiliary/draw/draw_pt.c | 36 +++++++++++++-------------- src/gallium/auxiliary/draw/draw_pt.h | 6 +++++ src/gallium/auxiliary/draw/draw_pt_post_vs.c | 12 ++++----- 5 files changed, 35 insertions(+), 65 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c7ac32b452..a0ac980c89 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -42,37 +42,11 @@ struct pipe_context; -struct vertex_buffer; -struct vertex_info; struct draw_context; struct draw_stage; struct draw_vertex_shader; -/** - * Clipmask flags - */ -/*@{*/ -#define CLIP_RIGHT_BIT 0x01 -#define CLIP_LEFT_BIT 0x02 -#define CLIP_TOP_BIT 0x04 -#define CLIP_BOTTOM_BIT 0x08 -#define CLIP_NEAR_BIT 0x10 -#define CLIP_FAR_BIT 0x20 -/*@}*/ - -/** - * Bitshift for each clip flag - */ -/*@{*/ -#define CLIP_RIGHT_SHIFT 0 -#define CLIP_LEFT_SHIFT 1 -#define CLIP_TOP_SHIFT 2 -#define CLIP_BOTTOM_SHIFT 3 -#define CLIP_NEAR_SHIFT 4 -#define CLIP_FAR_SHIFT 5 -/*@}*/ - struct draw_context *draw_create( void ); @@ -168,15 +142,10 @@ void draw_arrays(struct draw_context *draw, unsigned prim, void draw_flush(struct draw_context *draw); -/*********************************************************************** - * draw_debug.c - */ -boolean draw_validate_prim( unsigned prim, unsigned length ); -unsigned draw_trim_prim( unsigned mode, unsigned count ); - - - +/******************************************************************************* + * Driver backend interface + */ struct vbuf_render; void draw_set_render( struct draw_context *draw, struct vbuf_render *render ); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 476b1184d4..37ffdbf902 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,7 +44,6 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_exec.h" #include "tgsi/util/tgsi_scan.h" @@ -171,7 +170,8 @@ struct draw_context */ struct { struct { - struct draw_pt_middle_end *opt[PT_MAX_MIDDLE]; + struct draw_pt_middle_end *fetch_emit; + struct draw_pt_middle_end *general; } middle; struct { @@ -289,11 +289,6 @@ extern boolean draw_need_pipeline(const struct draw_context *draw, */ boolean draw_pt_init( struct draw_context *draw ); void draw_pt_destroy( struct draw_context *draw ); -boolean draw_pt_arrays( struct draw_context *draw, - unsigned prim, - unsigned start, - unsigned count ); - void draw_pt_reset_vertex_ids( struct draw_context *draw ); #define DRAW_FLUSH_STATE_CHANGE 0x8 diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index ecaed84070..f153a3ee2c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -44,7 +44,7 @@ * - pipeline -- the prim pipeline: clipping, wide lines, etc * - backend -- the vbuf_render provided by the driver. */ -boolean +static boolean draw_pt_arrays(struct draw_context *draw, unsigned prim, unsigned start, @@ -70,19 +70,16 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_SHADE; } + if (opt) + middle = draw->pt.middle.general; + else + middle = draw->pt.middle.fetch_emit; - middle = draw->pt.middle.opt[opt]; - if (middle == NULL) { - middle = draw->pt.middle.opt[PT_PIPELINE | PT_CLIPTEST | PT_SHADE]; - } - - assert(middle); /* May create a short-circuited version of this for small primitives: */ frontend = draw->pt.front.vcache; - frontend->prepare( frontend, prim, middle, opt ); frontend->run( frontend, @@ -102,11 +99,12 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.front.vcache) return FALSE; - draw->pt.middle.opt[0] = draw_pt_fetch_emit( draw ); - draw->pt.middle.opt[PT_SHADE | PT_CLIPTEST | PT_PIPELINE] = - draw_pt_fetch_pipeline_or_emit( draw ); + draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); + if (!draw->pt.middle.fetch_emit) + return FALSE; - if (!draw->pt.middle.opt[PT_SHADE | PT_CLIPTEST | PT_PIPELINE]) + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); + if (!draw->pt.middle.general) return FALSE; return TRUE; @@ -115,13 +113,15 @@ boolean draw_pt_init( struct draw_context *draw ) void draw_pt_destroy( struct draw_context *draw ) { - int i; + if (draw->pt.middle.general) { + draw->pt.middle.general->destroy( draw->pt.middle.general ); + draw->pt.middle.general = NULL; + } - for (i = 0; i < PT_MAX_MIDDLE; i++) - if (draw->pt.middle.opt[i]) { - draw->pt.middle.opt[i]->destroy( draw->pt.middle.opt[i] ); - draw->pt.middle.opt[i] = NULL; - } + if (draw->pt.middle.fetch_emit) { + draw->pt.middle.fetch_emit->destroy( draw->pt.middle.fetch_emit ); + draw->pt.middle.fetch_emit = NULL; + } if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index eb23ee1c1e..1119e9c6b8 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -50,6 +50,12 @@ struct draw_context; #define DRAW_PT_FLAG_MASK (3<<30) +#define PT_SHADE 0x1 +#define PT_CLIPTEST 0x2 +#define PT_PIPELINE 0x4 +#define PT_MAX_MIDDLE 0x8 + + /* The "front end" - prepare sets of fetch, draw elements for the * middle end. * diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 315b02f4ee..b3ecec6ece 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -52,12 +52,12 @@ compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) /* Do the hardwired planes first: */ - if (-clip[0] + clip[3] < 0) mask |= CLIP_RIGHT_BIT; - if ( clip[0] + clip[3] < 0) mask |= CLIP_LEFT_BIT; - if (-clip[1] + clip[3] < 0) mask |= CLIP_TOP_BIT; - if ( clip[1] + clip[3] < 0) mask |= CLIP_BOTTOM_BIT; - if (-clip[2] + clip[3] < 0) mask |= CLIP_FAR_BIT; - if ( clip[2] + clip[3] < 0) mask |= CLIP_NEAR_BIT; + if (-clip[0] + clip[3] < 0) mask |= (1<<0); + if ( clip[0] + clip[3] < 0) mask |= (1<<1); + if (-clip[1] + clip[3] < 0) mask |= (1<<2); + if ( clip[1] + clip[3] < 0) mask |= (1<<3); + if ( clip[2] + clip[3] < 0) mask |= (1<<4); /* match mesa clipplane numbering - for now */ + if (-clip[2] + clip[3] < 0) mask |= (1<<5); /* match mesa clipplane numbering - for now */ /* Followed by any remaining ones: */ -- cgit v1.2.3 From 0959f909ba585968f0408e78961e1c0ffc69a9f8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 13:30:19 +0100 Subject: draw: remove dead file pipe_wide_prims.c --- src/gallium/auxiliary/draw/draw_pipe_wide_prims.c | 366 ---------------------- 1 file changed, 366 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_pipe_wide_prims.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_prims.c b/src/gallium/auxiliary/draw/draw_pipe_wide_prims.c deleted file mode 100644 index d6bff110b4..0000000000 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_prims.c +++ /dev/null @@ -1,366 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/* Authors: Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "pipe/p_defines.h" -#include "pipe/p_shader_tokens.h" -#include "draw_private.h" - - -struct wide_stage { - struct draw_stage stage; - - float half_line_width; - float half_point_size; - - uint texcoord_slot[PIPE_MAX_SHADER_OUTPUTS]; - uint texcoord_mode[PIPE_MAX_SHADER_OUTPUTS]; - uint num_texcoords; - - int psize_slot; -}; - - - -static INLINE struct wide_stage *wide_stage( struct draw_stage *stage ) -{ - return (struct wide_stage *)stage; -} - - -static void passthrough_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - -static void passthrough_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line(stage->next, header); -} - -static void passthrough_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - - -/** - * Draw a wide line by drawing a quad (two triangles). - * XXX need to disable polygon stipple. - */ -static void wide_line( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct wide_stage *wide = wide_stage(stage); - const float half_width = wide->half_line_width; - - struct prim_header tri; - - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - const float dx = FABSF(pos0[0] - pos2[0]); - const float dy = FABSF(pos0[1] - pos2[1]); - - /* - * Draw wide line as a quad (two tris) by "stretching" the line along - * X or Y. - * We need to tweak coords in several ways to be conformant here. - */ - - if (dx > dy) { - /* x-major line */ - pos0[1] = pos0[1] - half_width - 0.25f; - pos1[1] = pos1[1] + half_width - 0.25f; - pos2[1] = pos2[1] - half_width - 0.25f; - pos3[1] = pos3[1] + half_width - 0.25f; - if (pos0[0] < pos2[0]) { - /* left to right line */ - pos0[0] -= 0.5f; - pos1[0] -= 0.5f; - pos2[0] -= 0.5f; - pos3[0] -= 0.5f; - } - else { - /* right to left line */ - pos0[0] += 0.5f; - pos1[0] += 0.5f; - pos2[0] += 0.5f; - pos3[0] += 0.5f; - } - } - else { - /* y-major line */ - pos0[0] = pos0[0] - half_width + 0.25f; - pos1[0] = pos1[0] + half_width + 0.25f; - pos2[0] = pos2[0] - half_width + 0.25f; - pos3[0] = pos3[0] + half_width + 0.25f; - if (pos0[1] < pos2[1]) { - /* top to bottom line */ - pos0[1] -= 0.5f; - pos1[1] -= 0.5f; - pos2[1] -= 0.5f; - pos3[1] -= 0.5f; - } - else { - /* bottom to top line */ - pos0[1] += 0.5f; - pos1[1] += 0.5f; - pos2[1] += 0.5f; - pos3[1] += 0.5f; - } - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -/** - * Set the vertex texcoords for sprite mode. - * Coords may be left untouched or set to a right-side-up or upside-down - * orientation. - */ -static void set_texcoords(const struct wide_stage *wide, - struct vertex_header *v, const float tc[4]) -{ - uint i; - for (i = 0; i < wide->num_texcoords; i++) { - if (wide->texcoord_mode[i] != PIPE_SPRITE_COORD_NONE) { - uint j = wide->texcoord_slot[i]; - v->data[j][0] = tc[0]; - if (wide->texcoord_mode[i] == PIPE_SPRITE_COORD_LOWER_LEFT) - v->data[j][1] = 1.0f - tc[1]; - else - v->data[j][1] = tc[1]; - v->data[j][2] = tc[2]; - v->data[j][3] = tc[3]; - } - } -} - - -/* If there are lots of sprite points (and why wouldn't there be?) it - * would probably be more sensible to change hardware setup to - * optimize this rather than doing the whole thing in software like - * this. - */ -static void wide_point( struct draw_stage *stage, - struct prim_header *header ) -{ - const struct wide_stage *wide = wide_stage(stage); - const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; - float half_size; - float left_adj, right_adj; - - struct prim_header tri; - - /* four dups of original vertex */ - struct vertex_header *v0 = dup_vert(stage, header->v[0], 0); - struct vertex_header *v1 = dup_vert(stage, header->v[0], 1); - struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); - struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); - - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; - - /* point size is either per-vertex or fixed size */ - if (wide->psize_slot >= 0) { - half_size = 0.5f * header->v[0]->data[wide->psize_slot][0]; - } - else { - half_size = wide->half_point_size; - } - - left_adj = -half_size; /* + 0.25f;*/ - right_adj = half_size; /* + 0.25f;*/ - - pos0[0] += left_adj; - pos0[1] -= half_size; - - pos1[0] += left_adj; - pos1[1] += half_size; - - pos2[0] += right_adj; - pos2[1] -= half_size; - - pos3[0] += right_adj; - pos3[1] += half_size; - - if (sprite) { - static const float tex00[4] = { 0, 0, 0, 1 }; - static const float tex01[4] = { 0, 1, 0, 1 }; - static const float tex11[4] = { 1, 1, 0, 1 }; - static const float tex10[4] = { 1, 0, 0, 1 }; - set_texcoords( wide, v0, tex00 ); - set_texcoords( wide, v1, tex01 ); - set_texcoords( wide, v2, tex10 ); - set_texcoords( wide, v3, tex11 ); - } - - tri.det = header->det; /* only the sign matters */ - tri.v[0] = v0; - tri.v[1] = v2; - tri.v[2] = v3; - stage->next->tri( stage->next, &tri ); - - tri.v[0] = v0; - tri.v[1] = v3; - tri.v[2] = v1; - stage->next->tri( stage->next, &tri ); -} - - -static void wide_first_point( struct draw_stage *stage, - struct prim_header *header ) -{ - struct wide_stage *wide = wide_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_point_size = 0.5f * draw->rasterizer->point_size; - - /* XXX we won't know the real size if it's computed by the vertex shader! */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) { - stage->point = wide_point; - } - else { - stage->point = passthrough_point; - } - - if (draw->rasterizer->point_sprite) { - /* find vertex shader texcoord outputs */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i, j = 0; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { - wide->texcoord_slot[j] = i; - wide->texcoord_mode[j] = draw->rasterizer->sprite_coord_mode[j]; - j++; - } - } - wide->num_texcoords = j; - } - - wide->psize_slot = -1; - - if (draw->rasterizer->point_size_per_vertex) { - /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; - uint i; - for (i = 0; i < vs->info.num_outputs; i++) { - if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { - wide->psize_slot = i; - break; - } - } - } - - stage->point( stage, header ); -} - - - -static void wide_first_line( struct draw_stage *stage, - struct prim_header *header ) -{ - struct wide_stage *wide = wide_stage(stage); - struct draw_context *draw = stage->draw; - - wide->half_line_width = 0.5f * draw->rasterizer->line_width; - - if (draw->rasterizer->line_width != 1.0) { - wide->stage.line = wide_line; - } - else { - wide->stage.line = passthrough_line; - } - - stage->line( stage, header ); -} - - -static void wide_flush( struct draw_stage *stage, unsigned flags ) -{ - stage->line = wide_first_line; - stage->point = wide_first_point; - stage->next->flush( stage->next, flags ); -} - - -static void wide_reset_stipple_counter( struct draw_stage *stage ) -{ - stage->next->reset_stipple_counter( stage->next ); -} - - -static void wide_destroy( struct draw_stage *stage ) -{ - draw_free_temp_verts( stage ); - FREE( stage ); -} - - -struct draw_stage *draw_wide_stage( struct draw_context *draw ) -{ - struct wide_stage *wide = CALLOC_STRUCT(wide_stage); - - draw_alloc_temp_verts( &wide->stage, 4 ); - - wide->stage.draw = draw; - wide->stage.next = NULL; - wide->stage.point = wide_first_point; - wide->stage.line = wide_first_line; - wide->stage.tri = passthrough_tri; - wide->stage.flush = wide_flush; - wide->stage.reset_stipple_counter = wide_reset_stipple_counter; - wide->stage.destroy = wide_destroy; - - return &wide->stage; -} -- cgit v1.2.3 From 507fbe2d327efb8d608ce8e07436b97321560808 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 15:29:27 +0100 Subject: draw: move some pipeline-specific code & state to draw_pipe.[ch] --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_context.c | 141 +++++-------------- src/gallium/auxiliary/draw/draw_pipe.c | 161 ++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pipe.h | 110 +++++++++++++++ src/gallium/auxiliary/draw/draw_pipe_aaline.c | 1 + src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 1 + src/gallium/auxiliary/draw/draw_pipe_clip.c | 11 +- src/gallium/auxiliary/draw/draw_pipe_cull.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 1 + src/gallium/auxiliary/draw/draw_pipe_offset.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 1 + src/gallium/auxiliary/draw/draw_pipe_validate.c | 17 +-- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 1 + src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 1 + src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 5 +- src/gallium/auxiliary/draw/draw_private.h | 132 ++++-------------- src/gallium/auxiliary/draw/draw_pt.c | 18 --- src/gallium/auxiliary/draw/draw_pt_pipeline.c | 1 + src/gallium/auxiliary/draw/draw_pt_post_vs.c | 11 ++ src/gallium/auxiliary/draw/draw_vertex.c | 2 - src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_prim_emit.c | 5 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 2 +- src/mesa/state_tracker/st_cb_feedback.c | 2 +- src/mesa/state_tracker/st_cb_rasterpos.c | 2 +- 28 files changed, 377 insertions(+), 262 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pipe.c create mode 100644 src/gallium/auxiliary/draw/draw_pipe.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 62f46c6db1..4fffd11b77 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -5,6 +5,7 @@ LIBNAME = draw C_SOURCES = \ draw_context.c \ + draw_pipe.c \ draw_pipe_aaline.c \ draw_pipe_aapoint.c \ draw_pipe_clip.c \ diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 6012bc155e..fa6791fa0b 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -35,6 +35,8 @@ #include "draw_context.h" #include "draw_vbuf.h" #include "draw_vs.h" +#include "draw_pt.h" +#include "draw_pipe.h" struct draw_context *draw_create( void ) @@ -49,32 +51,6 @@ struct draw_context *draw_create( void ) draw->use_sse = FALSE; #endif - /* create pipeline stages */ - draw->pipeline.wide_line = draw_wide_line_stage( draw ); - draw->pipeline.wide_point = draw_wide_point_stage( draw ); - draw->pipeline.stipple = draw_stipple_stage( draw ); - draw->pipeline.unfilled = draw_unfilled_stage( draw ); - draw->pipeline.twoside = draw_twoside_stage( draw ); - draw->pipeline.offset = draw_offset_stage( draw ); - draw->pipeline.clip = draw_clip_stage( draw ); - draw->pipeline.flatshade = draw_flatshade_stage( draw ); - draw->pipeline.cull = draw_cull_stage( draw ); - draw->pipeline.validate = draw_validate_stage( draw ); - draw->pipeline.first = draw->pipeline.validate; - - if (!draw->pipeline.wide_line || - !draw->pipeline.wide_point || - !draw->pipeline.stipple || - !draw->pipeline.unfilled || - !draw->pipeline.twoside || - !draw->pipeline.offset || - !draw->pipeline.clip || - !draw->pipeline.flatshade || - !draw->pipeline.cull || - !draw->pipeline.validate) - goto fail; - - ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -83,11 +59,6 @@ struct draw_context *draw_create( void ) ASSIGN_4V( draw->plane[5], 0, 0, -1, 1 ); /* mesa's a bit wonky */ draw->nr_planes = 6; - /* these defaults are oriented toward the needs of softpipe */ - draw->wide_point_threshold = 1000000.0; /* infinity */ - draw->wide_line_threshold = 1.0; - draw->line_stipple = TRUE; - draw->point_sprite = TRUE; draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ @@ -100,6 +71,8 @@ struct draw_context *draw_create( void ) draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + if (!draw_pipeline_init( draw )) + goto fail; if (!draw_pt_init( draw )) goto fail; @@ -117,39 +90,13 @@ void draw_destroy( struct draw_context *draw ) if (!draw) return; - if (draw->pipeline.wide_line) - draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); - if (draw->pipeline.wide_point) - draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); - if (draw->pipeline.stipple) - draw->pipeline.stipple->destroy( draw->pipeline.stipple ); - if (draw->pipeline.unfilled) - draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); - if (draw->pipeline.twoside) - draw->pipeline.twoside->destroy( draw->pipeline.twoside ); - if (draw->pipeline.offset) - draw->pipeline.offset->destroy( draw->pipeline.offset ); - if (draw->pipeline.clip) - draw->pipeline.clip->destroy( draw->pipeline.clip ); - if (draw->pipeline.flatshade) - draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); - if (draw->pipeline.cull) - draw->pipeline.cull->destroy( draw->pipeline.cull ); - if (draw->pipeline.validate) - draw->pipeline.validate->destroy( draw->pipeline.validate ); - if (draw->pipeline.aaline) - draw->pipeline.aaline->destroy( draw->pipeline.aaline ); - if (draw->pipeline.aapoint) - draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); - if (draw->pipeline.pstipple) - draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); - if (draw->pipeline.rasterize) - draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); if (draw->machine.Inputs) align_free(draw->machine.Inputs); + if (draw->machine.Outputs) align_free(draw->machine.Outputs); + tgsi_exec_machine_free_data(&draw->machine); /* Not so fast -- we're just borrowing this at the moment. @@ -158,6 +105,7 @@ void draw_destroy( struct draw_context *draw ) draw->render->destroy( draw->render ); */ + draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); FREE( draw ); @@ -284,7 +232,7 @@ void draw_wide_point_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->wide_point_threshold = threshold; + draw->pipeline.wide_point_threshold = threshold; } @@ -296,7 +244,7 @@ void draw_wide_line_threshold(struct draw_context *draw, float threshold) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->wide_line_threshold = threshold; + draw->pipeline.wide_line_threshold = threshold; } @@ -307,7 +255,7 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->line_stipple = enable; + draw->pipeline.line_stipple = enable; } @@ -318,7 +266,7 @@ void draw_enable_point_sprites(struct draw_context *draw, boolean enable) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->point_sprite = enable; + draw->pipeline.point_sprite = enable; } @@ -373,36 +321,6 @@ draw_num_vs_outputs(struct draw_context *draw) } -/** - * Allocate space for temporary post-transform vertices, such as for clipping. - */ -void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) -{ - assert(!stage->tmp); - - stage->nr_tmps = nr; - - if (nr) { - ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); - unsigned i; - - stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); - - for (i = 0; i < nr; i++) - stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); - } -} - - -void draw_free_temp_verts( struct draw_stage *stage ) -{ - if (stage->tmp) { - FREE( stage->tmp[0] ); - FREE( stage->tmp ); - stage->tmp = NULL; - } -} - boolean draw_use_sse(struct draw_context *draw) { @@ -410,23 +328,6 @@ boolean draw_use_sse(struct draw_context *draw) } -void draw_reset_vertex_ids(struct draw_context *draw) -{ - struct draw_stage *stage = draw->pipeline.first; - - while (stage) { - unsigned i; - - for (i = 0; i < stage->nr_tmps; i++) - stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; - - stage = stage->next; - } - - draw_pt_reset_vertex_ids(draw); -} - - void draw_set_render( struct draw_context *draw, struct vbuf_render *render ) { @@ -467,3 +368,23 @@ draw_set_mapped_element_buffer( struct draw_context *draw, draw->user.elts = elements; draw->user.eltSize = eltSize; } + + + +/* Revamp me please: + */ +void draw_do_flush( struct draw_context *draw, unsigned flags ) +{ + if (!draw->flushing) + { + draw->flushing = TRUE; + + if (flags >= DRAW_FLUSH_STATE_CHANGE) { + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; + draw->reduced_prim = ~0; /* is reduced_prim needed any more? */ + } + + draw->flushing = FALSE; + } +} diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c new file mode 100644 index 0000000000..9d62cb2c65 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_private.h" +#include "draw/draw_pipe.h" + + +boolean draw_pipeline_init( struct draw_context *draw ) +{ + /* create pipeline stages */ + draw->pipeline.wide_line = draw_wide_line_stage( draw ); + draw->pipeline.wide_point = draw_wide_point_stage( draw ); + draw->pipeline.stipple = draw_stipple_stage( draw ); + draw->pipeline.unfilled = draw_unfilled_stage( draw ); + draw->pipeline.twoside = draw_twoside_stage( draw ); + draw->pipeline.offset = draw_offset_stage( draw ); + draw->pipeline.clip = draw_clip_stage( draw ); + draw->pipeline.flatshade = draw_flatshade_stage( draw ); + draw->pipeline.cull = draw_cull_stage( draw ); + draw->pipeline.validate = draw_validate_stage( draw ); + draw->pipeline.first = draw->pipeline.validate; + + if (!draw->pipeline.wide_line || + !draw->pipeline.wide_point || + !draw->pipeline.stipple || + !draw->pipeline.unfilled || + !draw->pipeline.twoside || + !draw->pipeline.offset || + !draw->pipeline.clip || + !draw->pipeline.flatshade || + !draw->pipeline.cull || + !draw->pipeline.validate) + return FALSE; + + /* these defaults are oriented toward the needs of softpipe */ + draw->pipeline.wide_point_threshold = 1000000.0; /* infinity */ + draw->pipeline.wide_line_threshold = 1.0; + draw->pipeline.line_stipple = TRUE; + draw->pipeline.point_sprite = TRUE; + + return TRUE; +} + + +void draw_pipeline_destroy( struct draw_context *draw ) +{ + if (draw->pipeline.wide_line) + draw->pipeline.wide_line->destroy( draw->pipeline.wide_line ); + if (draw->pipeline.wide_point) + draw->pipeline.wide_point->destroy( draw->pipeline.wide_point ); + if (draw->pipeline.stipple) + draw->pipeline.stipple->destroy( draw->pipeline.stipple ); + if (draw->pipeline.unfilled) + draw->pipeline.unfilled->destroy( draw->pipeline.unfilled ); + if (draw->pipeline.twoside) + draw->pipeline.twoside->destroy( draw->pipeline.twoside ); + if (draw->pipeline.offset) + draw->pipeline.offset->destroy( draw->pipeline.offset ); + if (draw->pipeline.clip) + draw->pipeline.clip->destroy( draw->pipeline.clip ); + if (draw->pipeline.flatshade) + draw->pipeline.flatshade->destroy( draw->pipeline.flatshade ); + if (draw->pipeline.cull) + draw->pipeline.cull->destroy( draw->pipeline.cull ); + if (draw->pipeline.validate) + draw->pipeline.validate->destroy( draw->pipeline.validate ); + if (draw->pipeline.aaline) + draw->pipeline.aaline->destroy( draw->pipeline.aaline ); + if (draw->pipeline.aapoint) + draw->pipeline.aapoint->destroy( draw->pipeline.aapoint ); + if (draw->pipeline.pstipple) + draw->pipeline.pstipple->destroy( draw->pipeline.pstipple ); + if (draw->pipeline.rasterize) + draw->pipeline.rasterize->destroy( draw->pipeline.rasterize ); +} + + + + +/* This is only used for temporary verts. + */ +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +/** + * Allocate space for temporary post-transform vertices, such as for clipping. + */ +void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) +{ + assert(!stage->tmp); + + stage->nr_tmps = nr; + + if (nr) { + ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); + unsigned i; + + stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); + + for (i = 0; i < nr; i++) + stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + } +} + + +void draw_free_temp_verts( struct draw_stage *stage ) +{ + if (stage->tmp) { + FREE( stage->tmp[0] ); + FREE( stage->tmp ); + stage->tmp = NULL; + } +} + +void draw_reset_vertex_ids(struct draw_context *draw) +{ + struct draw_stage *stage = draw->pipeline.first; + + while (stage) { + unsigned i; + + for (i = 0; i < stage->nr_tmps; i++) + stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; + + stage = stage->next; + } + + draw_pt_reset_vertex_ids(draw); +} + + diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h new file mode 100644 index 0000000000..5b97ca5c8c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -0,0 +1,110 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#ifndef DRAW_PIPE_H +#define DRAW_PIPE_H + +#include "pipe/p_compiler.h" +#include "draw_private.h" /* for sizeof(vertex_header) */ + + + +/** + * Base class for all primitive drawing stages. + */ +struct draw_stage +{ + struct draw_context *draw; /**< parent context */ + + struct draw_stage *next; /**< next stage in pipeline */ + + struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ + unsigned nr_tmps; + + void (*point)( struct draw_stage *, + struct prim_header * ); + + void (*line)( struct draw_stage *, + struct prim_header * ); + + void (*tri)( struct draw_stage *, + struct prim_header * ); + + void (*flush)( struct draw_stage *, + unsigned flags ); + + void (*reset_stipple_counter)( struct draw_stage * ); + + void (*destroy)( struct draw_stage * ); +}; + + +extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); +extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); +extern struct draw_stage *draw_offset_stage( struct draw_context *context ); +extern struct draw_stage *draw_clip_stage( struct draw_context *context ); +extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); +extern struct draw_stage *draw_cull_stage( struct draw_context *context ); +extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); +extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); +extern struct draw_stage *draw_validate_stage( struct draw_context *context ); + + +extern void draw_free_temp_verts( struct draw_stage *stage ); + +extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); + + + + +/** + * Get a writeable copy of a vertex. + * \param stage drawing stage info + * \param vert the vertex to copy (source) + * \param idx index into stage's tmp[] array to put the copy (dest) + * \return pointer to the copied vertex + */ +static INLINE struct vertex_header * +dup_vert( struct draw_stage *stage, + const struct vertex_header *vert, + unsigned idx ) +{ + struct vertex_header *tmp = stage->tmp[idx]; + const uint vsize = sizeof(struct vertex_header) + + stage->draw->num_vs_outputs * 4 * sizeof(float); + memcpy(tmp, vert, vsize); + tmp->vertex_id = UNDEFINED_VERTEX_ID; + return tmp; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index e8d2a45102..24bc87d4f8 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -43,6 +43,7 @@ #include "draw_context.h" #include "draw_private.h" +#include "draw_pipe.h" /** diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index e84d380e50..9f878f6c02 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -49,6 +49,7 @@ #include "draw_context.h" #include "draw_vs.h" +#include "draw_pipe.h" /* diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 0ac3a240e5..6780f275d9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -35,8 +35,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "draw_context.h" #include "draw_vs.h" +#include "draw_pipe.h" #ifndef IS_NEGATIVE @@ -204,7 +204,14 @@ static void emit_poly( struct draw_stage *stage, } } - +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} /* Clip a triangle against the viewport and user clip planes. diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 8177b0ac86..c406f89d05 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -35,7 +35,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" -#include "draw_private.h" +#include "draw_pipe.h" struct cull_stage { diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 54baa1fbc9..bdb8b49dc4 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -31,6 +31,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" +#include "draw_pipe.h" /** subclass of draw_stage */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index dbc676deae..dbdece45bb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -33,7 +33,7 @@ */ #include "pipe/p_util.h" -#include "draw_private.h" +#include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 4dddb72906..4903ba2133 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -44,7 +44,7 @@ #include "tgsi/util/tgsi_dump.h" #include "draw_context.h" -#include "draw_private.h" +#include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 506f33512c..49429ee9e1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -39,7 +39,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_private.h" +#include "draw_pipe.h" /** Subclass of draw_stage */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 01d905c153..09a9d23d57 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -32,7 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" - +#include "draw_pipe.h" struct twoside_stage { struct draw_stage stage; diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index b07860cd9e..31e24f6a14 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -36,6 +36,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "draw_private.h" +#include "draw_pipe.h" struct unfilled_stage { diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index e163e078f0..ffddc2f62c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -31,6 +31,7 @@ #include "pipe/p_util.h" #include "pipe/p_defines.h" #include "draw_private.h" +#include "draw_pipe.h" static boolean points( unsigned prim ) { @@ -66,11 +67,11 @@ draw_need_pipeline(const struct draw_context *draw, if (lines(prim)) { /* line stipple */ - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) + if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) return TRUE; /* wide lines */ - if (draw->rasterizer->line_width > draw->wide_line_threshold) + if (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold) return TRUE; /* AA lines */ @@ -81,7 +82,7 @@ draw_need_pipeline(const struct draw_context *draw, if (points(prim)) { /* large points */ - if (draw->rasterizer->point_size > draw->wide_point_threshold) + if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) return TRUE; /* AA points */ @@ -89,7 +90,7 @@ draw_need_pipeline(const struct draw_context *draw, return TRUE; /* point sprites */ - if (draw->rasterizer->point_sprite && draw->point_sprite) + if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite) return TRUE; } @@ -145,15 +146,15 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) stage->next = next; /* drawing wide lines? */ - wide_lines = (draw->rasterizer->line_width > draw->wide_line_threshold + wide_lines = (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold && !draw->rasterizer->line_smooth); /* drawing large points? */ - if (draw->rasterizer->point_sprite && draw->point_sprite) + if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite) wide_points = TRUE; else if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) wide_points = FALSE; - else if (draw->rasterizer->point_size > draw->wide_point_threshold) + else if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) wide_points = TRUE; else wide_points = FALSE; @@ -186,7 +187,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) next = draw->pipeline.wide_point; } - if (draw->rasterizer->line_stipple_enable && draw->line_stipple) { + if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) { draw->pipeline.stipple->next = next; next = draw->pipeline.stipple; precalc_flat = 1; /* only needed for lines really */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 30dceeb43d..c835727e32 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -40,6 +40,7 @@ #include "draw_vbuf.h" #include "draw_private.h" #include "draw_vertex.h" +#include "draw_pipe.h" #include "translate/translate.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 9a168ce8bd..329b5d0fb0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" +#include "draw_pipe.h" struct wideline_stage { diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 3d0add0c1a..7a439178a0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" +#include "draw_pipe.h" struct widepoint_stage { @@ -203,8 +204,8 @@ static void widepoint_first_point( struct draw_stage *stage, } /* XXX we won't know the real size if it's computed by the vertex shader! */ - if ((draw->rasterizer->point_size > draw->wide_point_threshold) || - (draw->rasterizer->point_sprite && draw->point_sprite)) { + if ((draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) || + (draw->rasterizer->point_sprite && draw->pipeline.point_sprite)) { stage->point = widepoint_point; } else { diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 37ffdbf902..b2b2f82b8f 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -51,12 +51,11 @@ struct pipe_context; struct gallivm_prog; struct gallivm_cpu_engine; - -struct draw_pt_middle_end; -struct draw_pt_front_end; struct draw_vertex_shader; +struct draw_context; +struct draw_stage; +struct vbuf_render; -#define MAX_SHADER_VERTICES 128 /** * Basic vertex info. @@ -70,17 +69,14 @@ struct vertex_header { float clip[4]; - float data[][4]; /* Note variable size */ + /* This will probably become float (*data)[4] soon: + */ + float data[][4]; }; /* NOTE: It should match vertex_id size above */ #define UNDEFINED_VERTEX_ID 0xffff -/* XXX This is too large */ -#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) -#define MAX_VERTEX_ALLOCATION ((MAX_VERTEX_SIZE + 0x0f) & ~0x0f) - - /** * Basic info for a point/line/triangle primitive. @@ -95,41 +91,6 @@ struct prim_header { -struct draw_context; - -/** - * Base class for all primitive drawing stages. - */ -struct draw_stage -{ - struct draw_context *draw; /**< parent context */ - - struct draw_stage *next; /**< next stage in pipeline */ - - struct vertex_header **tmp; /**< temp vert storage, such as for clipping */ - unsigned nr_tmps; - - void (*point)( struct draw_stage *, - struct prim_header * ); - - void (*line)( struct draw_stage *, - struct prim_header * ); - - void (*tri)( struct draw_stage *, - struct prim_header * ); - - void (*flush)( struct draw_stage *, - unsigned flags ); - - void (*reset_stipple_counter)( struct draw_stage * ); - - void (*destroy)( struct draw_stage * ); -}; - - - -struct vbuf_render; - #define PT_SHADE 0x1 #define PT_CLIPTEST 0x2 @@ -161,6 +122,12 @@ struct draw_context struct draw_stage *wide_line; struct draw_stage *wide_point; struct draw_stage *rasterize; + + float wide_point_threshold; /**< convert pnts to tris if larger than this */ + float wide_line_threshold; /**< convert lines to tris if wider than this */ + boolean line_stipple; /**< do line stipple? */ + boolean point_sprite; /**< convert points to quads for sprites? */ + } pipeline; @@ -225,10 +192,6 @@ struct draw_context float plane[12][4]; unsigned nr_planes; - float wide_point_threshold; /**< convert pnts to tris if larger than this */ - float wide_line_threshold; /**< convert lines to tris if wider than this */ - boolean line_stipple; /**< do line stipple? */ - boolean point_sprite; /**< convert points to quads for sprites? */ boolean use_sse; /* If a prim stage introduces new vertex attributes, they'll be stored here @@ -252,44 +215,29 @@ struct draw_context -extern struct draw_stage *draw_unfilled_stage( struct draw_context *context ); -extern struct draw_stage *draw_twoside_stage( struct draw_context *context ); -extern struct draw_stage *draw_offset_stage( struct draw_context *context ); -extern struct draw_stage *draw_clip_stage( struct draw_context *context ); -extern struct draw_stage *draw_flatshade_stage( struct draw_context *context ); -extern struct draw_stage *draw_cull_stage( struct draw_context *context ); -extern struct draw_stage *draw_stipple_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_line_stage( struct draw_context *context ); -extern struct draw_stage *draw_wide_point_stage( struct draw_context *context ); -extern struct draw_stage *draw_validate_stage( struct draw_context *context ); - - -extern void draw_free_temp_verts( struct draw_stage *stage ); - -extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); extern void draw_reset_vertex_ids( struct draw_context *draw ); -extern int draw_vertex_cache_check_space( struct draw_context *draw, - unsigned nr_verts ); -extern void draw_vertex_cache_invalidate( struct draw_context *draw ); -extern void draw_vertex_cache_unreference( struct draw_context *draw ); -extern void draw_vertex_cache_reset_vertex_ids( struct draw_context *draw ); +/******************************************************************************* + * Vertex processing (was passthrough) code: + */ +boolean draw_pt_init( struct draw_context *draw ); +void draw_pt_destroy( struct draw_context *draw ); +void draw_pt_reset_vertex_ids( struct draw_context *draw ); -extern void draw_update_vertex_fetch( struct draw_context *draw ); +/******************************************************************************* + * Primitive processing (pipelnie) code: + */ -extern boolean draw_need_pipeline(const struct draw_context *draw, - unsigned prim ); +boolean draw_pipeline_init( struct draw_context *draw ); +void draw_pipeline_destroy( struct draw_context *draw ); +boolean draw_need_pipeline(const struct draw_context *draw, + unsigned prim ); -/* Passthrough mode (second attempt): - */ -boolean draw_pt_init( struct draw_context *draw ); -void draw_pt_destroy( struct draw_context *draw ); -void draw_pt_reset_vertex_ids( struct draw_context *draw ); #define DRAW_FLUSH_STATE_CHANGE 0x8 #define DRAW_FLUSH_BACKEND 0x10 @@ -301,36 +249,6 @@ boolean draw_get_edgeflag( struct draw_context *draw, unsigned idx ); -/** - * Get a writeable copy of a vertex. - * \param stage drawing stage info - * \param vert the vertex to copy (source) - * \param idx index into stage's tmp[] array to put the copy (dest) - * \return pointer to the copied vertex - */ -static INLINE struct vertex_header * -dup_vert( struct draw_stage *stage, - const struct vertex_header *vert, - unsigned idx ) -{ - struct vertex_header *tmp = stage->tmp[idx]; - const uint vsize = sizeof(struct vertex_header) - + stage->draw->num_vs_outputs * 4 * sizeof(float); - memcpy(tmp, vert, vsize); - tmp->vertex_id = UNDEFINED_VERTEX_ID; - return tmp; -} - -static INLINE float -dot4(const float *a, const float *b) -{ - float result = (a[0]*b[0] + - a[1]*b[1] + - a[2]*b[2] + - a[3]*b[3]); - - return result; -} #endif /* DRAW_PRIVATE_H */ diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f153a3ee2c..965269251f 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -165,21 +165,3 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw_pt_arrays(draw, prim, start, count); } - -/* Revamp me please: - */ -void draw_do_flush( struct draw_context *draw, unsigned flags ) -{ - if (!draw->flushing) - { - draw->flushing = TRUE; - - if (flags >= DRAW_FLUSH_STATE_CHANGE) { - draw->pipeline.first->flush( draw->pipeline.first, flags ); - draw->pipeline.first = draw->pipeline.validate; - draw->reduced_prim = ~0; - } - - draw->flushing = FALSE; - } -} diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c index 1a9a3adb03..922344e448 100644 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_pipeline.c @@ -35,6 +35,7 @@ #include "draw/draw_private.h" #include "draw/draw_vertex.h" #include "draw/draw_pt.h" +#include "draw/draw_pipe.h" static void do_point( struct draw_context *draw, const char *v0 ) diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index b3ecec6ece..581f044dae 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -44,6 +44,17 @@ struct pt_post_vs { +static INLINE float +dot4(const float *a, const float *b) +{ + return (a[0]*b[0] + + a[1]*b[1] + + a[2]*b[2] + + a[3]*b[3]); +} + + + static INLINE unsigned compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) { diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 168036eee8..a42adaafb1 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -72,6 +72,4 @@ draw_compute_vertex_size(struct vertex_info *vinfo) assert(0); } } - - assert(vinfo->size * 4 <= MAX_VERTEX_SIZE); } diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 58a5854f0d..4bef21619c 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -142,7 +142,7 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, */ i915->draw = draw_create(); assert(i915->draw); - if (GETENV("I915_VBUF")) { + if (!GETENV("I915_NO_VBUF")) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index b6fb0a6d88..9ffa460138 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "pipe/p_util.h" #include "i915_context.h" @@ -78,9 +78,6 @@ emit_hw_vertex( struct i915_context *i915, const uint j = vinfo->src_index[i]; const float *attrib = vertex->data[j]; switch (vinfo->emit[i]) { - case EMIT_OMIT: - /* no-op */ - break; case EMIT_1F: OUT_BATCH( fui(attrib[0]) ); count++; diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 0ddb06764a..feb35d492a 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -39,7 +39,7 @@ #include "sp_setup.h" #include "sp_state.h" #include "sp_prim_setup.h" -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "draw/draw_vertex.h" #include "pipe/p_util.h" diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index 605bfee743..1b50792bd1 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -56,7 +56,7 @@ #include "cso_cache/cso_cache.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" /** diff --git a/src/mesa/state_tracker/st_cb_rasterpos.c b/src/mesa/state_tracker/st_cb_rasterpos.c index 2ed228778e..3cb7b68bea 100644 --- a/src/mesa/state_tracker/st_cb_rasterpos.c +++ b/src/mesa/state_tracker/st_cb_rasterpos.c @@ -46,7 +46,7 @@ #include "st_cb_rasterpos.h" #include "st_draw.h" #include "draw/draw_context.h" -#include "draw/draw_private.h" +#include "draw/draw_pipe.h" #include "shader/prog_instruction.h" #include "vbo/vbo.h" -- cgit v1.2.3 From c898eae27221bd23b11327553c215a94369eeb99 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 15:35:22 +0100 Subject: draw: always emit header in draw_pt_fetch.c --- src/gallium/auxiliary/draw/draw_pt.h | 1 - src/gallium/auxiliary/draw/draw_pt_fetch.c | 8 ++------ src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 1 - 3 files changed, 2 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 1119e9c6b8..eaf8e0374a 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -176,7 +176,6 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); struct pt_fetch; void draw_pt_fetch_prepare( struct pt_fetch *fetch, - boolean emit_header, unsigned vertex_size ); void draw_pt_fetch_run( struct pt_fetch *fetch, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 1aed251c85..037e3765da 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -54,7 +54,6 @@ struct pt_fetch { * */ void draw_pt_fetch_prepare( struct pt_fetch *fetch, - boolean emit_header, unsigned vertex_size ) { struct draw_context *draw = fetch->draw; @@ -66,16 +65,13 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, memset(&key, 0, sizeof(key)); - /* If PT_SHADE is not set, then we are creating post-shader - * vertices, meaning that we need to emit/leave space for a vertex - * header. + /* Always emit/leave space for a vertex header. * * It's worth considering whether the vertex headers should contain * a pointer to the 'data', rather than having it inline. * Something to look at after we've fully switched over to the pt * paths. */ - if (emit_header) { /* Need to set header->vertex_id = 0xffff somehow. */ @@ -121,7 +117,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->translate = translate_create( &key ); - if (emit_header) { + { static struct vertex_header vh = { 0, 0, 0, 0xffff }; fetch->translate->set_buffer(fetch->translate, draw->nr_vertex_buffers, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 0b9e8d15ba..560f1bcd92 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -71,7 +71,6 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, draw_pt_fetch_prepare( fpme->fetch, - (opt & (PT_CLIPTEST | PT_PIPELINE)) != 0, fpme->vertex_size ); /* XXX: it's not really gl rasterization rules we care about here, -- cgit v1.2.3 From bee1d31641674c67676de86fbb4b35ca5bf7f33f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 16:39:24 +0100 Subject: draw: move pt_pipeline code to draw_pipe.c This is now the drawing interface to the pipeline. No more calling into pipeline.first->tri(), etc. --- src/gallium/auxiliary/draw/Makefile | 1 - src/gallium/auxiliary/draw/SConscript | 1 - src/gallium/auxiliary/draw/draw_pipe.c | 117 ++++++++++++++- src/gallium/auxiliary/draw/draw_private.h | 24 +++- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 14 +- src/gallium/auxiliary/draw/draw_pt_pipeline.c | 158 --------------------- 6 files changed, 140 insertions(+), 175 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_pt_pipeline.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 4fffd11b77..5289f2660a 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -26,7 +26,6 @@ C_SOURCES = \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ draw_pt_fetch_shade_pipeline.c \ - draw_pt_pipeline.c \ draw_pt_post_vs.c \ draw_pt_vcache.c \ draw_vertex.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 5fa35d3005..6f3ca4fa49 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -25,7 +25,6 @@ draw = env.ConvenienceLibrary( 'draw_pt_fetch.c', 'draw_pt_fetch_emit.c', 'draw_pt_fetch_shade_pipeline.c', - 'draw_pt_pipeline.c', 'draw_pt_post_vs.c', 'draw_pt_vcache.c', 'draw_vs.c', diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 9d62cb2c65..b18b747d9c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -35,6 +35,7 @@ #include "draw/draw_pipe.h" + boolean draw_pipeline_init( struct draw_context *draw ) { /* create pipeline stages */ @@ -142,6 +143,60 @@ void draw_free_temp_verts( struct draw_stage *stage ) } } + + +static void do_point( struct draw_context *draw, + const char *v0 ) +{ + struct prim_header prim; + + prim.reset_line_stipple = 0; + prim.edgeflags = 1; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + + draw->pipeline.first->point( draw->pipeline.first, &prim ); +} + + +static void do_line( struct draw_context *draw, + const char *v0, + const char *v1 ) +{ + struct prim_header prim; + + prim.reset_line_stipple = 1; /* fixme */ + prim.edgeflags = 1; + prim.pad = 0; + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + + draw->pipeline.first->line( draw->pipeline.first, &prim ); +} + + +static void do_triangle( struct draw_context *draw, + char *v0, + char *v1, + char *v2 ) +{ + struct prim_header prim; + + prim.v[0] = (struct vertex_header *)v0; + prim.v[1] = (struct vertex_header *)v1; + prim.v[2] = (struct vertex_header *)v2; + prim.reset_line_stipple = 1; + prim.edgeflags = ((prim.v[0]->edgeflag) | + (prim.v[1]->edgeflag << 1) | + (prim.v[2]->edgeflag << 2)); + prim.pad = 0; + + draw->pipeline.first->tri( draw->pipeline.first, &prim ); +} + + +/* Reset vertex ids. This is basically a type of flush. + */ void draw_reset_vertex_ids(struct draw_context *draw) { struct draw_stage *stage = draw->pipeline.first; @@ -155,7 +210,67 @@ void draw_reset_vertex_ids(struct draw_context *draw) stage = stage->next; } - draw_pt_reset_vertex_ids(draw); + if (draw->pipeline.verts) + { + unsigned i; + char *verts = draw->pipeline.verts; + unsigned stride = draw->pipeline.vertex_stride; + + for (i = 0; i < draw->pipeline.vertex_count; i++) { + ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; + verts += stride; + } + } } +/* Code to run the pipeline on a fairly arbitary collection of vertices. + * + * Vertex headers must be pre-initialized with the + * UNDEFINED_VERTEX_ID, this code will cause that id to become + * overwritten, so it may have to be reset if there is the intention + * to reuse the vertices. + * + * This code provides a callback to reset the vertex id's which the + * draw_vbuf.c code uses when it has to perform a flush. + */ +void draw_pipeline_run( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ) +{ + char *verts = (char *)vertices; + unsigned i; + + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = stride; + draw->pipeline.vertex_count = vertex_count; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) + do_point( draw, + verts + stride * elts[i] ); + break; + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) + do_line( draw, + verts + stride * elts[i+0], + verts + stride * elts[i+1]); + break; + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) + do_triangle( draw, + verts + stride * elts[i+0], + verts + stride * elts[i+1], + verts + stride * elts[i+2]); + break; + } + + draw->pipeline.verts = NULL; + draw->pipeline.vertex_count = 0; +} + diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index b2b2f82b8f..4d123cf8d9 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -128,6 +128,11 @@ struct draw_context boolean line_stipple; /**< do line stipple? */ boolean point_sprite; /**< convert points to quads for sprites? */ + /* Temporary storage while the pipeline is being run: + */ + char *verts; + unsigned vertex_stride; + unsigned vertex_count; } pipeline; @@ -144,13 +149,6 @@ struct draw_context struct { struct draw_pt_front_end *vcache; } front; - - struct { - char *verts; - unsigned vertex_stride; - unsigned vertex_count; - } pipeline; - } pt; boolean flushing; @@ -235,10 +233,22 @@ void draw_pt_reset_vertex_ids( struct draw_context *draw ); boolean draw_pipeline_init( struct draw_context *draw ); void draw_pipeline_destroy( struct draw_context *draw ); +void draw_pipeline_run( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned vertex_count, + unsigned stride, + const ushort *elts, + unsigned count ); + boolean draw_need_pipeline(const struct draw_context *draw, unsigned prim ); +/******************************************************************************* + * Flushing + */ + #define DRAW_FLUSH_STATE_CHANGE 0x8 #define DRAW_FLUSH_BACKEND 0x10 diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 560f1bcd92..a47693ba20 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -148,13 +148,13 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, /* Do we need to run the pipeline? */ if (opt & PT_PIPELINE) { - draw_pt_run_pipeline( fpme->draw, - fpme->prim, - pipeline_verts, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); } else { draw_pt_emit( fpme->emit, diff --git a/src/gallium/auxiliary/draw/draw_pt_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_pipeline.c deleted file mode 100644 index 922344e448..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_pipeline.c +++ /dev/null @@ -1,158 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" -#include "draw/draw_pipe.h" - -static void do_point( struct draw_context *draw, - const char *v0 ) -{ - struct prim_header prim; - - prim.reset_line_stipple = 0; - prim.edgeflags = 1; - prim.pad = 0; - prim.v[0] = (struct vertex_header *)v0; - - draw->pipeline.first->point( draw->pipeline.first, &prim ); -} - - -static void do_line( struct draw_context *draw, - const char *v0, - const char *v1 ) -{ - struct prim_header prim; - - prim.reset_line_stipple = 1; /* fixme */ - prim.edgeflags = 1; - prim.pad = 0; - prim.v[0] = (struct vertex_header *)v0; - prim.v[1] = (struct vertex_header *)v1; - - draw->pipeline.first->line( draw->pipeline.first, &prim ); -} - - -static void do_triangle( struct draw_context *draw, - char *v0, - char *v1, - char *v2 ) -{ - struct prim_header prim; - - prim.v[0] = (struct vertex_header *)v0; - prim.v[1] = (struct vertex_header *)v1; - prim.v[2] = (struct vertex_header *)v2; - prim.reset_line_stipple = 1; - prim.edgeflags = ((prim.v[0]->edgeflag) | - (prim.v[1]->edgeflag << 1) | - (prim.v[2]->edgeflag << 2)); - prim.pad = 0; - - if (0) debug_printf("tri ef: %d %d %d\n", - prim.v[0]->edgeflag, - prim.v[1]->edgeflag, - prim.v[2]->edgeflag); - - draw->pipeline.first->tri( draw->pipeline.first, &prim ); -} - - - -void draw_pt_reset_vertex_ids( struct draw_context *draw ) -{ - unsigned i; - char *verts = draw->pt.pipeline.verts; - unsigned stride = draw->pt.pipeline.vertex_stride; - - for (i = 0; i < draw->pt.pipeline.vertex_count; i++) { - ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; - verts += stride; - } -} - - -/* Code to run the pipeline on a fairly arbitary collection of vertices. - * - * Vertex headers must be pre-initialized with the - * UNDEFINED_VERTEX_ID, this code will cause that id to become - * overwritten, so it may have to be reset if there is the intention - * to reuse the vertices. - * - * This code provides a callback to reset the vertex id's which the - * draw_vbuf.c code uses when it has to perform a flush. - */ -void draw_pt_run_pipeline( struct draw_context *draw, - unsigned prim, - struct vertex_header *pipeline_verts, - unsigned vertex_count, - unsigned stride, - const ushort *elts, - unsigned count ) -{ - char *verts = (char *)pipeline_verts; - unsigned i; - - draw->pt.pipeline.verts = verts; - draw->pt.pipeline.vertex_stride = stride; - draw->pt.pipeline.vertex_count = vertex_count; - - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) - do_point( draw, - verts + stride * elts[i] ); - break; - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) - do_line( draw, - verts + stride * elts[i+0], - verts + stride * elts[i+1]); - break; - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) - do_triangle( draw, - verts + stride * elts[i+0], - verts + stride * elts[i+1], - verts + stride * elts[i+2]); - break; - } - - draw->pt.pipeline.verts = NULL; - draw->pt.pipeline.vertex_count = 0; -} - -- cgit v1.2.3 From e7bac4276634ea1ee81ac71f6f6869f87e689872 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 16:43:15 +0100 Subject: draw: put pipeline flushing behind a new interface --- src/gallium/auxiliary/draw/draw_context.c | 8 +++----- src/gallium/auxiliary/draw/draw_pipe.c | 8 ++++++++ src/gallium/auxiliary/draw/draw_private.h | 5 ++++- 3 files changed, 15 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index fa6791fa0b..3e69867d11 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -379,11 +379,9 @@ void draw_do_flush( struct draw_context *draw, unsigned flags ) { draw->flushing = TRUE; - if (flags >= DRAW_FLUSH_STATE_CHANGE) { - draw->pipeline.first->flush( draw->pipeline.first, flags ); - draw->pipeline.first = draw->pipeline.validate; - draw->reduced_prim = ~0; /* is reduced_prim needed any more? */ - } + draw_pipeline_flush( draw, flags ); + + draw->reduced_prim = ~0; /* is reduced_prim needed any more? */ draw->flushing = FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index b18b747d9c..3fae908514 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -274,3 +274,11 @@ void draw_pipeline_run( struct draw_context *draw, draw->pipeline.vertex_count = 0; } + + +void draw_pipeline_flush( struct draw_context *draw, + unsigned flags ) +{ + draw->pipeline.first->flush( draw->pipeline.first, flags ); + draw->pipeline.first = draw->pipeline.validate; +} diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4d123cf8d9..59635d643b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -227,7 +227,7 @@ void draw_pt_reset_vertex_ids( struct draw_context *draw ); /******************************************************************************* - * Primitive processing (pipelnie) code: + * Primitive processing (pipeline) code: */ boolean draw_pipeline_init( struct draw_context *draw ); @@ -241,6 +241,9 @@ void draw_pipeline_run( struct draw_context *draw, const ushort *elts, unsigned count ); +void draw_pipeline_flush( struct draw_context *draw, + unsigned flags ); + boolean draw_need_pipeline(const struct draw_context *draw, unsigned prim ); -- cgit v1.2.3 From 2dae208fb19e79c7446a29ee5dee53e50283b57c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 17:16:23 +0100 Subject: draw: make room for extra_vs_outputs --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index a47693ba20..e1df594035 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -56,8 +56,12 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *vs = draw->vertex_shader; + + /* Add one to num_outputs because the pipeline occasionally tags on + * an additional texcoord, eg for AA lines. + */ unsigned nr = MAX2( vs->info.num_inputs, - vs->info.num_outputs ); + vs->info.num_outputs + 1 ); fpme->prim = prim; fpme->opt = opt; -- cgit v1.2.3 From dcf6f776ce32b89b7ff784bb38030bd29698e005 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 17:16:41 +0100 Subject: draw: make draw_reset_vertex_ids private to the draw_pipe_* code --- src/gallium/auxiliary/draw/draw_pipe.c | 2 ++ src/gallium/auxiliary/draw/draw_pipe.h | 2 +- src/gallium/auxiliary/draw/draw_private.h | 1 - 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 3fae908514..0c0840af0c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -196,6 +196,8 @@ static void do_triangle( struct draw_context *draw, /* Reset vertex ids. This is basically a type of flush. + * + * Called only from draw_pipe_vbuf.c */ void draw_reset_vertex_ids(struct draw_context *draw) { diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index 5b97ca5c8c..f2749b34bb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -81,9 +81,9 @@ extern struct draw_stage *draw_validate_stage( struct draw_context *context ); extern void draw_free_temp_verts( struct draw_stage *stage ); - extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); +extern void draw_reset_vertex_ids( struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 59635d643b..6cf3e54e28 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -214,7 +214,6 @@ struct draw_context -extern void draw_reset_vertex_ids( struct draw_context *draw ); -- cgit v1.2.3 From 7d72607e142c0412b88183b849fd701e698b8f79 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 17:27:52 +0100 Subject: draw: move incoming vertex state into draw->pt This state is effectively private to the vertex processing part of the draw module. --- src/gallium/auxiliary/draw/draw_context.c | 28 +++++-------- src/gallium/auxiliary/draw/draw_private.h | 47 +++++++++++----------- src/gallium/auxiliary/draw/draw_pt.c | 8 ++++ src/gallium/auxiliary/draw/draw_pt.h | 9 +---- src/gallium/auxiliary/draw/draw_pt_elts.c | 6 +-- src/gallium/auxiliary/draw/draw_pt_fetch.c | 20 ++++----- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 6 +-- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 2 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 +- src/mesa/state_tracker/st_draw.c | 6 +-- 10 files changed, 63 insertions(+), 71 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 3e69867d11..4988d67faa 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -62,8 +62,6 @@ struct draw_context *draw_create( void ) draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ - draw_set_mapped_element_buffer( draw, 0, NULL ); - tgsi_exec_machine_init(&draw->machine); /* FIXME: give this machine thing a proper constructor: @@ -188,8 +186,8 @@ draw_set_vertex_buffers(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - memcpy(draw->vertex_buffer, buffers, count * sizeof(buffers[0])); - draw->nr_vertex_buffers = count; + memcpy(draw->pt.vertex_buffer, buffers, count * sizeof(buffers[0])); + draw->pt.nr_vertex_buffers = count; } @@ -200,8 +198,8 @@ draw_set_vertex_elements(struct draw_context *draw, { assert(count <= PIPE_MAX_ATTRIBS); - memcpy(draw->vertex_element, elements, count * sizeof(elements[0])); - draw->nr_vertex_elements = count; + memcpy(draw->pt.vertex_element, elements, count * sizeof(elements[0])); + draw->pt.nr_vertex_elements = count; } @@ -212,7 +210,7 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer) { - draw->user.vbuffer[attr] = buffer; + draw->pt.user.vbuffer[attr] = buffer; } @@ -220,7 +218,7 @@ void draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer) { - draw->user.constants = buffer; + draw->pt.user.constants = buffer; } @@ -337,18 +335,10 @@ void draw_set_render( struct draw_context *draw, void draw_set_edgeflags( struct draw_context *draw, const unsigned *edgeflag ) { - draw->user.edgeflag = edgeflag; + draw->pt.user.edgeflag = edgeflag; } -boolean draw_get_edgeflag( struct draw_context *draw, - unsigned idx ) -{ - if (draw->user.edgeflag) - return (draw->user.edgeflag[idx/32] & (1 << (idx%32))) != 0; - else - return 1; -} /** @@ -365,8 +355,8 @@ void draw_set_mapped_element_buffer( struct draw_context *draw, unsigned eltSize, void *elements ) { - draw->user.elts = elements; - draw->user.eltSize = eltSize; + draw->pt.user.elts = elements; + draw->pt.user.eltSize = eltSize; } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 6cf3e54e28..27f61c2f40 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -149,6 +149,29 @@ struct draw_context struct { struct draw_pt_front_end *vcache; } front; + + struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_buffers; + + struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; + unsigned nr_vertex_elements; + + /* user-space vertex data, buffers */ + struct { + const unsigned *edgeflag; + + /** vertex element/index buffer (ex: glDrawElements) */ + const void *elts; + /** bytes per index (0, 1, 2 or 4) */ + unsigned eltSize; + + /** vertex arrays */ + const void *vbuffer[PIPE_MAX_ATTRIBS]; + + /** constant buffer (for vertex shader) */ + const void *constants; + } user; + } pt; boolean flushing; @@ -157,33 +180,12 @@ struct draw_context const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; - struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_buffers; - - struct pipe_vertex_element vertex_element[PIPE_MAX_ATTRIBS]; - unsigned nr_vertex_elements; - struct draw_vertex_shader *vertex_shader; boolean identity_viewport; uint num_vs_outputs; /**< convenience, from vertex_shader */ - /* user-space vertex data, buffers */ - struct { - const unsigned *edgeflag; - - /** vertex element/index buffer (ex: glDrawElements) */ - const void *elts; - /** bytes per index (0, 1, 2 or 4) */ - unsigned eltSize; - - /** vertex arrays */ - const void *vbuffer[PIPE_MAX_ATTRIBS]; - - /** constant buffer (for vertex shader) */ - const void *constants; - } user; /* Clip derived state: */ @@ -257,9 +259,6 @@ boolean draw_need_pipeline(const struct draw_context *draw, void draw_do_flush( struct draw_context *draw, unsigned flags ); -boolean draw_get_edgeflag( struct draw_context *draw, - unsigned idx ); - diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 965269251f..9f8e8d3d62 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -165,3 +165,11 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw_pt_arrays(draw, prim, start, count); } +boolean draw_pt_get_edgeflag( struct draw_context *draw, + unsigned idx ) +{ + if (draw->pt.user.edgeflag) + return (draw->pt.user.edgeflag[idx/32] & (1 << (idx%32))) != 0; + else + return 1; +} diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index eaf8e0374a..fd0d158fcf 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -141,13 +141,8 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *d /* More helpers: */ -void draw_pt_run_pipeline( struct draw_context *draw, - unsigned prim, - struct vertex_header *verts, - unsigned vertex_count, - unsigned vertex_stride, - const ushort *elts, - unsigned count ); +boolean draw_pt_get_edgeflag( struct draw_context *draw, + unsigned idx ); /******************************************************************************* diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index d49770e7b2..2094c081ed 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -59,7 +59,7 @@ static unsigned elt_vert( const void *elts, unsigned idx ) pt_elt_func draw_pt_elt_func( struct draw_context *draw ) { - switch (draw->user.eltSize) { + switch (draw->pt.user.eltSize) { case 0: return elt_vert; case 1: return elt_ubyte; case 2: return elt_ushort; @@ -71,9 +71,9 @@ pt_elt_func draw_pt_elt_func( struct draw_context *draw ) const void *draw_pt_elt_ptr( struct draw_context *draw, unsigned start ) { - const char *elts = draw->user.elts; + const char *elts = draw->pt.user.elts; - switch (draw->user.eltSize) { + switch (draw->pt.user.eltSize) { case 0: return (const void *)(((const ubyte *)NULL) + start); case 1: diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 037e3765da..c588710b75 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -76,7 +76,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, /* Need to set header->vertex_id = 0xffff somehow. */ key.element[nr].input_format = PIPE_FORMAT_R32_FLOAT; - key.element[nr].input_buffer = draw->nr_vertex_buffers; + key.element[nr].input_buffer = draw->pt.nr_vertex_buffers; key.element[nr].input_offset = 0; key.element[nr].output_format = PIPE_FORMAT_R32_FLOAT; key.element[nr].output_offset = dst_offset; @@ -90,10 +90,10 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, } - for (i = 0; i < draw->nr_vertex_elements; i++) { - key.element[nr].input_format = draw->vertex_element[i].src_format; - key.element[nr].input_buffer = draw->vertex_element[i].vertex_buffer_index; - key.element[nr].input_offset = draw->vertex_element[i].src_offset; + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + key.element[nr].input_format = draw->pt.vertex_element[i].src_format; + key.element[nr].input_buffer = draw->pt.vertex_element[i].vertex_buffer_index; + key.element[nr].input_offset = draw->pt.vertex_element[i].src_offset; key.element[nr].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; key.element[nr].output_offset = dst_offset; @@ -120,7 +120,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, { static struct vertex_header vh = { 0, 0, 0, 0xffff }; fetch->translate->set_buffer(fetch->translate, - draw->nr_vertex_buffers, + draw->pt.nr_vertex_buffers, &vh, 0); } @@ -139,12 +139,12 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, struct translate *translate = fetch->translate; unsigned i; - for (i = 0; i < draw->nr_vertex_buffers; i++) { + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { translate->set_buffer(translate, i, - ((char *)draw->user.vbuffer[i] + - draw->vertex_buffer[i].buffer_offset), - draw->vertex_buffer[i].pitch ); + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); } translate->run_elts( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 6e4fea460b..1b9b3bfaa6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -117,7 +117,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, memset(&key, 0, sizeof(key)); for (i = 0; i < vinfo->num_attribs; i++) { - const struct pipe_vertex_element *src = &draw->vertex_element[vinfo->src_index[i]]; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]]; unsigned emit_sz = 0; unsigned input_format = src->src_format; @@ -144,7 +144,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, break; case EMIT_1F_PSIZE: input_format = PIPE_FORMAT_R32_FLOAT; - input_buffer = draw->nr_vertex_buffers; + input_buffer = draw->pt.nr_vertex_buffers; input_offset = 0; output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); @@ -179,7 +179,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, feme->translate = translate_create( &key ); feme->translate->set_buffer(feme->translate, - draw->nr_vertex_buffers, + draw->pt.nr_vertex_buffers, &feme->point_size, 0); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index e1df594035..881e47d59d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -135,7 +135,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, - (const float (*)[4])draw->user.constants, + (const float (*)[4])draw->pt.user.constants, fetch_count, fpme->vertex_size, fpme->vertex_size); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 5561f2b6fb..b61bb50664 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -106,7 +106,7 @@ static unsigned add_edgeflag( struct vcache_frontend *vcache, unsigned idx, unsigned mask ) { - if (mask && draw_get_edgeflag(vcache->draw, idx)) + if (mask && draw_pt_get_edgeflag(vcache->draw, idx)) return idx | DRAW_PT_EDGEFLAG; else return idx; diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index f0f62246dd..befcb96bd8 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -611,10 +611,10 @@ st_feedback_draw_vbo(GLcontext *ctx, * unmap vertex/index buffers */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { - if (draw->vertex_buffer[i].buffer) { + if (draw->pt.vertex_buffer[i].buffer) { pipe->winsys->buffer_unmap(pipe->winsys, - draw->vertex_buffer[i].buffer); - pipe_buffer_reference(winsys, &draw->vertex_buffer[i].buffer, NULL); + draw->pt.vertex_buffer[i].buffer); + pipe_buffer_reference(winsys, &draw->pt.vertex_buffer[i].buffer, NULL); draw_set_mapped_vertex_buffer(draw, i, NULL); } } -- cgit v1.2.3 From d3cb62b8b3ea03bfb9800bf4b738d9814ef3c516 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 18:02:42 +0100 Subject: draw: fix scons build --- src/gallium/auxiliary/draw/SConscript | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 6f3ca4fa49..91e6a35c5e 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -4,6 +4,7 @@ draw = env.ConvenienceLibrary( target = 'draw', source = [ 'draw_context.c', + 'draw_pipe.c', 'draw_pipe_aaline.c', 'draw_pipe_aapoint.c', 'draw_pipe_clip.c', @@ -16,7 +17,6 @@ draw = env.ConvenienceLibrary( 'draw_pipe_unfilled.c', 'draw_pipe_validate.c', 'draw_pipe_vbuf.c', - 'draw_pipe_vertex.c', 'draw_pipe_wide_line.c', 'draw_pipe_wide_point.c', 'draw_pt.c', @@ -27,6 +27,7 @@ draw = env.ConvenienceLibrary( 'draw_pt_fetch_shade_pipeline.c', 'draw_pt_post_vs.c', 'draw_pt_vcache.c', + 'draw_vertex.c', 'draw_vs.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', -- cgit v1.2.3 From af523a5bd7828fd554669cf83f18992af967a075 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 18:25:33 +0100 Subject: rtasm: include yet another i386 define varient --- src/gallium/auxiliary/rtasm/rtasm_cpu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index eb3359750b..d577ff5b42 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -32,7 +32,7 @@ int rtasm_cpu_has_sse(void) { /* FIXME: actually detect this at run-time */ -#if defined(__i386__) || defined(__386__) +#if defined(__i386__) || defined(__386__) || defined(i386) return 1; #else return 0; @@ -42,7 +42,7 @@ int rtasm_cpu_has_sse(void) int rtasm_cpu_has_sse2(void) { /* FIXME: actually detect this at run-time */ -#if defined(__i386__) || defined(__386__) +#if defined(__i386__) || defined(__386__) || defined(i386) return 1; #else return 0; -- cgit v1.2.3 From b1158a5e0031aa33a71baa7bc14ca2c0fe0dabc4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 18:26:39 +0100 Subject: translate: don't crash on failure to create sse version --- src/gallium/auxiliary/translate/translate_generic.c | 11 +++++++++++ src/gallium/auxiliary/translate/translate_sse.c | 13 ++++++------- 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index e7fb1dcb2d..9de007c767 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -567,8 +567,14 @@ static void generic_run_elts( struct translate *translate, tg->attrib[attr].output_offset); tg->attrib[attr].fetch( src, data ); + + debug_printf("vert %d/%d attr %d: %f %f %f %f\n", + i, elt, attr, data[0], data[1], data[2], data[3]); + + tg->attrib[attr].emit( data, dst ); } + debug_printf("\n"); vert += tg->translate.key.output_stride; } @@ -602,8 +608,13 @@ static void generic_run( struct translate *translate, tg->attrib[attr].output_offset); tg->attrib[attr].fetch( src, data ); + + debug_printf("vert %d attr %d: %f %f %f %f\n", + i, attr, data[0], data[1], data[2], data[3]); + tg->attrib[attr].emit( data, dst ); } + debug_printf("\n"); vert += tg->translate.key.output_stride; } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index cb8815c173..575852d222 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -145,7 +145,7 @@ static struct x86_reg get_inv_255( struct translate_sse *p ) p->inv_255[0] = p->inv_255[1] = p->inv_255[2] = - p->inv_255[3] = 1.0 / 255.0f; + p->inv_255[3] = 1.0f / 255.0f; sse_movups(p->func, reg, x86_make_disp(translateESI, @@ -575,22 +575,21 @@ struct translate *translate_sse2_create( const struct translate_key *key ) if (p == NULL) goto fail; - if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) - goto fail; - - - p->translate.key = *key; p->translate.release = translate_sse_release; p->translate.set_buffer = translate_sse_set_buffer; p->translate.run_elts = translate_sse_run_elts; p->translate.run = translate_sse_run; + if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) + goto fail; + if (!build_vertex_emit(p, &p->linear_func, TRUE)) goto fail; if (!build_vertex_emit(p, &p->elt_func, FALSE)) goto fail; + p->translate.key = *key; p->gen_run = (run_func)x86_get_func(&p->linear_func); p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func); @@ -598,7 +597,7 @@ struct translate *translate_sse2_create( const struct translate_key *key ) fail: if (p) - p->translate.release( &p->translate ); + translate_sse_release( &p->translate ); return NULL; } -- cgit v1.2.3 From bfd179776f5ded75c2134a54f0a57a1579118cd0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 18:41:53 +0100 Subject: draw: add missing translate->set_buffer for fetch emit path --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 1b9b3bfaa6..70d136dce6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -185,6 +185,14 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, } feme->point_size = draw->rasterizer->point_size; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + feme->translate->set_buffer(feme->translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } } -- cgit v1.2.3 From d0a4bf08b1a80d62f81301c5b37723dfca436b62 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 20:18:40 +0100 Subject: translate: fix several bugs - specify cdecl calling convention on WIN32 - fix load bgra8 function - fix previous don't crash fix. --- .../auxiliary/translate/translate_generic.c | 11 +++--- src/gallium/auxiliary/translate/translate_sse.c | 42 ++++++++++++---------- 2 files changed, 28 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 9de007c767..402780ee53 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -568,13 +568,11 @@ static void generic_run_elts( struct translate *translate, tg->attrib[attr].fetch( src, data ); - debug_printf("vert %d/%d attr %d: %f %f %f %f\n", - i, elt, attr, data[0], data[1], data[2], data[3]); - + if (0) debug_printf("vert %d/%d attr %d: %f %f %f %f\n", + i, elt, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } - debug_printf("\n"); vert += tg->translate.key.output_stride; } @@ -609,12 +607,11 @@ static void generic_run( struct translate *translate, tg->attrib[attr].fetch( src, data ); - debug_printf("vert %d attr %d: %f %f %f %f\n", - i, attr, data[0], data[1], data[2], data[3]); + if (0) debug_printf("vert %d attr %d: %f %f %f %f\n", + i, attr, data[0], data[1], data[2], data[3]); tg->attrib[attr].emit( data, dst ); } - debug_printf("\n"); vert += tg->translate.key.output_stride; } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 575852d222..e587e5afec 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -45,15 +45,21 @@ #define W 3 -typedef void (*run_func)( struct translate *translate, - unsigned start, - unsigned count, - void *output_buffer ); +#ifdef WIN32 +#define RTASM __cdecl +#else +#define RTASM +#endif + +typedef void (RTASM *run_func)( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ); -typedef void (*run_elts_func)( struct translate *translate, - const unsigned *elts, - unsigned count, - void *output_buffer ); +typedef void (RTASM *run_elts_func)( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ); @@ -212,17 +218,17 @@ static void emit_load_R8G8B8A8_UNORM( struct translate_sse *p, /* Load and unpack twice: */ sse_movss(p->func, data, src); - sse2_punpcklbw(p->func, src, get_identity(p)); - sse2_punpcklbw(p->func, src, get_identity(p)); + sse2_punpcklbw(p->func, data, get_identity(p)); + sse2_punpcklbw(p->func, data, get_identity(p)); /* Convert to float: */ - sse2_cvtdq2ps(p->func, src, src); + sse2_cvtdq2ps(p->func, data, data); /* Scale by 1/255.0 */ - sse_mulps(p->func, src, get_inv_255(p)); + sse_mulps(p->func, data, get_inv_255(p)); } @@ -551,7 +557,6 @@ static void translate_sse_run_elts( struct translate *translate, elts, count, output_buffer ); - } static void translate_sse_run( struct translate *translate, @@ -570,26 +575,27 @@ static void translate_sse_run( struct translate *translate, struct translate *translate_sse2_create( const struct translate_key *key ) { - struct translate_sse *p = CALLOC_STRUCT( translate_sse ); + struct translate_sse *p = NULL; + + if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) + goto fail; + p = CALLOC_STRUCT( translate_sse ); if (p == NULL) goto fail; + p->translate.key = *key; p->translate.release = translate_sse_release; p->translate.set_buffer = translate_sse_set_buffer; p->translate.run_elts = translate_sse_run_elts; p->translate.run = translate_sse_run; - if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) - goto fail; - if (!build_vertex_emit(p, &p->linear_func, TRUE)) goto fail; if (!build_vertex_emit(p, &p->elt_func, FALSE)) goto fail; - p->translate.key = *key; p->gen_run = (run_func)x86_get_func(&p->linear_func); p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func); -- cgit v1.2.3 From 68a7cb21fa14eac9e38bf398623739a892cc0d52 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 19 Apr 2008 20:20:40 +0100 Subject: draw: rearrange debug code --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 55 +------------------------ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 16 +++++-- src/gallium/auxiliary/draw/draw_vertex.c | 54 ++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vertex.h | 2 + 4 files changed, 70 insertions(+), 57 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index c835727e32..c5810febe0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -115,59 +115,6 @@ check_space( struct vbuf_stage *vbuf, unsigned nr ) } -static INLINE void -dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) -{ -// assert(vinfo == vbuf->render->get_vertex_info(vbuf->render)); - unsigned i, j; - - for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { - case EMIT_OMIT: - debug_printf("EMIT_OMIT:"); - break; - case EMIT_1F: - debug_printf("EMIT_1F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_1F_PSIZE: - debug_printf("EMIT_1F_PSIZE:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_2F: - debug_printf("EMIT_2F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_3F: - debug_printf("EMIT_3F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - data += sizeof(float); - break; - case EMIT_4F: - debug_printf("EMIT_4F:\t"); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - debug_printf("%f ", *(float *)data); data += sizeof(float); - break; - case EMIT_4UB: - debug_printf("EMIT_4UB:\t"); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - debug_printf("%u ", *data++); - break; - default: - assert(0); - } - debug_printf("\n"); - } - debug_printf("\n"); -} /** @@ -189,7 +136,7 @@ emit_vertex( struct vbuf_stage *vbuf, vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); - if (0) dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); + if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); vbuf->vertex_ptr += vbuf->vertex_size/4; vertex->vertex_id = vbuf->nr_vertices++; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 70d136dce6..002ced2186 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -75,6 +75,7 @@ struct fetch_emit_middle_end { struct draw_context *draw; struct translate *translate; + const struct vertex_info *vinfo; /* Cache point size somewhere it's address won't change: */ @@ -106,9 +107,9 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, /* Must do this after set_primitive() above: */ - vinfo = draw->render->get_vertex_info(draw->render); - - + vinfo = feme->vinfo = draw->render->get_vertex_info(draw->render); + + /* Transform from API vertices to HW vertices, skipping the * pipeline_vertex intermediate step. @@ -229,6 +230,15 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, fetch_count, hw_verts ); + if (0) { + unsigned i; + for (i = 0; i < fetch_count; i++) { + debug_printf("\n\nvertex %d:\n", i); + draw_dump_emitted_vertex( feme->vinfo, + (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); + } + } + /* XXX: Draw arrays path to avoid re-emitting index list again and * again. */ diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index a42adaafb1..1446f785c5 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -73,3 +73,57 @@ draw_compute_vertex_size(struct vertex_info *vinfo) } } } + + +void +draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) +{ + unsigned i, j; + + for (i = 0; i < vinfo->num_attribs; i++) { + j = vinfo->src_index[i]; + switch (vinfo->emit[i]) { + case EMIT_OMIT: + debug_printf("EMIT_OMIT:"); + break; + case EMIT_1F: + debug_printf("EMIT_1F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_1F_PSIZE: + debug_printf("EMIT_1F_PSIZE:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_2F: + debug_printf("EMIT_2F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_3F: + debug_printf("EMIT_3F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + data += sizeof(float); + break; + case EMIT_4F: + debug_printf("EMIT_4F:\t"); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + debug_printf("%f ", *(float *)data); data += sizeof(float); + break; + case EMIT_4UB: + debug_printf("EMIT_4UB:\t"); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + debug_printf("%u ", *data++); + break; + default: + assert(0); + } + debug_printf("\n"); + } + debug_printf("\n"); +} diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 65818463ca..6d8bac5138 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -106,5 +106,7 @@ draw_emit_vertex_attr(struct vertex_info *vinfo, extern void draw_compute_vertex_size(struct vertex_info *vinfo); +void draw_dump_emitted_vertex(const struct vertex_info *vinfo, + const uint8_t *data); #endif /* DRAW_VERTEX_H */ -- cgit v1.2.3 From 29858e1b553cee1fd7e3380ea62c69d2a6b91b95 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 20 Apr 2008 14:41:02 +0900 Subject: gallium: Refcount textures. Pipe driver does refcount textures. If cso_context does not, dangling pointers appear. --- src/gallium/auxiliary/cso_cache/cso_context.c | 37 ++++++++++++++++++++------- 1 file changed, 28 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 4a1a6cb79c..746b176185 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -25,16 +25,19 @@ * **************************************************************************/ - /* Wrap the cso cache & hash mechanisms in a simplified + /** + * @file + * + * Wrap the cso cache & hash mechanisms in a simplified * pipe-driver-specific interface. * - * Authors: - * Zack Rusin - * Keith Whitwell + * @author Zack Rusin + * @author Keith Whitwell */ #include "pipe/p_state.h" #include "pipe/p_util.h" +#include "pipe/p_inlines.h" #include "cso_cache/cso_context.h" #include "cso_cache/cso_cache.h" @@ -277,23 +280,39 @@ void cso_set_sampler_textures( struct cso_context *ctx, ctx->nr_textures = count; for (i = 0; i < count; i++) - ctx->textures[i] = textures[i]; + pipe_texture_reference(&ctx->textures[i], textures[i]); for ( ; i < PIPE_MAX_SAMPLERS; i++) - ctx->textures[i] = NULL; + pipe_texture_reference(&ctx->textures[i], NULL); ctx->pipe->set_sampler_textures(ctx->pipe, count, textures); } void cso_save_sampler_textures( struct cso_context *ctx ) { + uint i; + ctx->nr_textures_saved = ctx->nr_textures; - memcpy(ctx->textures_saved, ctx->textures, sizeof(ctx->textures)); + for (i = 0; i < ctx->nr_textures; i++) { + assert(!ctx->textures_saved[i]); + pipe_texture_reference(&ctx->textures_saved[i], ctx->textures[i]); + } } void cso_restore_sampler_textures( struct cso_context *ctx ) { - cso_set_sampler_textures(ctx, ctx->nr_textures_saved, ctx->textures_saved); - ctx->nr_textures_saved = 0; + uint i; + + ctx->nr_textures = ctx->nr_textures_saved; + + for (i = 0; i < ctx->nr_textures; i++) { + pipe_texture_reference(&ctx->textures[i], NULL); + ctx->textures[i] = ctx->textures_saved[i]; + ctx->textures_saved[i] = NULL; + } + for ( ; i < PIPE_MAX_SAMPLERS; i++) + pipe_texture_reference(&ctx->textures[i], NULL); + + ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); } -- cgit v1.2.3 From 40e0439db448a7d93ddb18faac7f14b47b1343c0 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 21 Apr 2008 13:02:59 +0900 Subject: gallium: Centralize SSE usage logic. --- src/gallium/auxiliary/draw/draw_context.c | 12 ------------ src/gallium/auxiliary/draw/draw_context.h | 2 -- src/gallium/auxiliary/draw/draw_private.h | 2 -- src/gallium/auxiliary/draw/draw_vs_sse.c | 3 ++- src/gallium/auxiliary/rtasm/rtasm_cpu.c | 10 ++++++++-- 5 files changed, 10 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 4988d67faa..b4dbdccd61 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -45,12 +45,6 @@ struct draw_context *draw_create( void ) if (draw == NULL) goto fail; -#if defined(__i386__) || defined(__386__) - draw->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; -#else - draw->use_sse = FALSE; -#endif - ASSIGN_4V( draw->plane[0], -1, 0, 0, 1 ); ASSIGN_4V( draw->plane[1], 1, 0, 0, 1 ); ASSIGN_4V( draw->plane[2], 0, -1, 0, 1 ); @@ -320,12 +314,6 @@ draw_num_vs_outputs(struct draw_context *draw) -boolean draw_use_sse(struct draw_context *draw) -{ - return (boolean) draw->use_sse; -} - - void draw_set_render( struct draw_context *draw, struct vbuf_render *render ) { diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index a0ac980c89..68e2efb865 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -73,8 +73,6 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable); void draw_enable_point_sprites(struct draw_context *draw, boolean enable); -boolean draw_use_sse(struct draw_context *draw); - void draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 27f61c2f40..da973e868b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -192,8 +192,6 @@ struct draw_context float plane[12][4]; unsigned nr_planes; - boolean use_sse; - /* If a prim stage introduces new vertex attributes, they'll be stored here */ struct { diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 8e2d381f14..b1e9f67114 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -41,6 +41,7 @@ #include "draw_private.h" #include "draw_context.h" +#include "rtasm/rtasm_cpu.h" #include "rtasm/rtasm_x86sse.h" #include "tgsi/exec/tgsi_sse2.h" #include "tgsi/util/tgsi_parse.h" @@ -155,7 +156,7 @@ draw_create_vs_sse(struct draw_context *draw, struct draw_sse_vertex_shader *vs; uint nt = tgsi_num_tokens(templ->tokens); - if (!draw->use_sse) + if (!rtasm_cpu_has_sse2()) return NULL; vs = CALLOC_STRUCT( draw_sse_vertex_shader ); diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index d577ff5b42..175245a9f6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -26,14 +26,20 @@ **************************************************************************/ +#include "pipe/p_debug.h" #include "rtasm_cpu.h" +static boolean rtasm_sse_enabled(void) +{ + return !debug_get_bool_option("GALLIUM_NOSSE", FALSE); +} + int rtasm_cpu_has_sse(void) { /* FIXME: actually detect this at run-time */ #if defined(__i386__) || defined(__386__) || defined(i386) - return 1; + return rtasm_sse_enabled(); #else return 0; #endif @@ -43,7 +49,7 @@ int rtasm_cpu_has_sse2(void) { /* FIXME: actually detect this at run-time */ #if defined(__i386__) || defined(__386__) || defined(i386) - return 1; + return rtasm_sse_enabled(); #else return 0; #endif -- cgit v1.2.3 From 201ac414d4df00745e487a6ffbc9979a2e70f0c6 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 21 Apr 2008 00:10:39 -0400 Subject: make llvm draw paths compile with the latest changes switch the method of distribution of builtins (to get rid of the llvm2cpp dependency) --- src/gallium/auxiliary/draw/draw_vs_llvm.c | 13 +- src/gallium/auxiliary/gallivm/Makefile | 8 +- src/gallium/auxiliary/gallivm/gallivm_builtins.cpp | 708 ++++----------------- src/gallium/auxiliary/gallivm/instructions.cpp | 11 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 37 +- src/gallium/auxiliary/gallivm/llvm_builtins.c | 2 +- src/gallium/auxiliary/gallivm/soabuiltins.c | 2 +- 7 files changed, 199 insertions(+), 582 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index c219a91156..2ebf05526a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -73,16 +73,17 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, (struct draw_llvm_vertex_shader *)base; struct tgsi_exec_machine *machine = shader->machine; - unsigned int j; + unsigned int i, j; + unsigned slot; for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - /* Swizzle inputs. + /* Swizzle inputs. */ for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < draw->num_vs_inputs; slot++) { + for (slot = 0; slot < base->info.num_inputs; slot++) { machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; @@ -102,7 +103,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, /* Unswizzle all output results */ - for (slot = 1; slot < draw->num_vs_outputs; slot++) { + for (slot = 1; slot < base->info.num_inputs; slot++) { output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; @@ -145,7 +146,7 @@ draw_create_vs_llvm(struct draw_context *draw, /* we make a private copy of the tokens */ vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); - tgsi_scan_shader(shader->tokens, &vs->base.info); + tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); vs->base.prepare = vs_llvm_prepare; vs->base.run_linear = vs_llvm_run_linear; @@ -156,7 +157,7 @@ draw_create_vs_llvm(struct draw_context *draw, struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); gallivm_ir_set_layout(ir, GALLIVM_SOA); gallivm_ir_set_components(ir, 4); - gallivm_ir_fill_from_tgsi(ir, vs->base.state->tokens); + gallivm_ir_fill_from_tgsi(ir, vs->base.state.tokens); vs->llvm_prog = gallivm_ir_compile(ir); gallivm_ir_delete(ir); } diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index c24e19e062..6bfd187fb3 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -65,10 +65,14 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=$@ -f -for=shader -funcname=createGallivmBuiltins + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp.bin + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + rm temp.bin gallivmsoabuiltins.cpp: soabuiltins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts|llvm2cpp -gen-module -o=$@ -f -for=shader -funcname=createSoaBuiltins + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp.bin + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + rm temp.bin # Emacs tags tags: diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index 1796f0a177..a6f8cd043b 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -1,567 +1,141 @@ -// Generated by llvm2cpp - DO NOT MODIFY! - - -Module* createGallivmBuiltins(Module *mod) { - -mod->setModuleIdentifier("shader"); - -// Type Definitions -ArrayType* ArrayTy_0 = ArrayType::get(IntegerType::get(8), 25); - -PointerType* PointerTy_1 = PointerType::get(ArrayTy_0, 0); - -std::vectorFuncTy_2_args; -FuncTy_2_args.push_back(Type::FloatTy); -FuncTy_2_args.push_back(Type::FloatTy); -FunctionType* FuncTy_2 = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/FuncTy_2_args, - /*isVarArg=*/false); - -PointerType* PointerTy_3 = PointerType::get(FuncTy_2, 0); - -VectorType* VectorTy_4 = VectorType::get(Type::FloatTy, 4); - -std::vectorFuncTy_5_args; -FuncTy_5_args.push_back(VectorTy_4); -FunctionType* FuncTy_5 = FunctionType::get( - /*Result=*/VectorTy_4, - /*Params=*/FuncTy_5_args, - /*isVarArg=*/false); - -std::vectorFuncTy_6_args; -FuncTy_6_args.push_back(VectorTy_4); -FuncTy_6_args.push_back(VectorTy_4); -FuncTy_6_args.push_back(VectorTy_4); -FunctionType* FuncTy_6 = FunctionType::get( - /*Result=*/VectorTy_4, - /*Params=*/FuncTy_6_args, - /*isVarArg=*/false); - -VectorType* VectorTy_7 = VectorType::get(IntegerType::get(32), 4); - -std::vectorFuncTy_9_args; -FunctionType* FuncTy_9 = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/FuncTy_9_args, - /*isVarArg=*/true); - -PointerType* PointerTy_8 = PointerType::get(FuncTy_9, 0); - -PointerType* PointerTy_10 = PointerType::get(IntegerType::get(8), 0); - -std::vectorFuncTy_12_args; -FuncTy_12_args.push_back(Type::FloatTy); -FunctionType* FuncTy_12 = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/FuncTy_12_args, - /*isVarArg=*/false); - -PointerType* PointerTy_11 = PointerType::get(FuncTy_12, 0); - -std::vectorFuncTy_13_args; -FuncTy_13_args.push_back(VectorTy_4); -FunctionType* FuncTy_13 = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/FuncTy_13_args, - /*isVarArg=*/false); - - -// Function Declarations - -Function* func_approx = new Function( - /*Type=*/FuncTy_2, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"approx", mod); -func_approx->setCallingConv(CallingConv::C); -const ParamAttrsList *func_approx_PAL = 0; -func_approx->setParamAttrs(func_approx_PAL); - -Function* func_powf = new Function( - /*Type=*/FuncTy_2, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"powf", mod); // (external, no body) -func_powf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_powf_PAL = 0; -func_powf->setParamAttrs(func_powf_PAL); - -Function* func_lit = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"lit", mod); -func_lit->setCallingConv(CallingConv::C); -const ParamAttrsList *func_lit_PAL = 0; -func_lit->setParamAttrs(func_lit_PAL); - -Function* func_cmp = new Function( - /*Type=*/FuncTy_6, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"cmp", mod); -func_cmp->setCallingConv(CallingConv::C); -const ParamAttrsList *func_cmp_PAL = 0; -{ - ParamAttrsVector Attrs; - ParamAttrsWithIndex PAWI; - PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; - Attrs.push_back(PAWI); - func_cmp_PAL = ParamAttrsList::get(Attrs); - -} -func_cmp->setParamAttrs(func_cmp_PAL); - -Function* func_vcos = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"vcos", mod); -func_vcos->setCallingConv(CallingConv::C); -const ParamAttrsList *func_vcos_PAL = 0; -func_vcos->setParamAttrs(func_vcos_PAL); - -Function* func_printf = new Function( - /*Type=*/FuncTy_9, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"printf", mod); // (external, no body) -func_printf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_printf_PAL = 0; -func_printf->setParamAttrs(func_printf_PAL); - -Function* func_cosf = new Function( - /*Type=*/FuncTy_12, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"cosf", mod); // (external, no body) -func_cosf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_cosf_PAL = 0; -func_cosf->setParamAttrs(func_cosf_PAL); - -Function* func_scs = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"scs", mod); -func_scs->setCallingConv(CallingConv::C); -const ParamAttrsList *func_scs_PAL = 0; -func_scs->setParamAttrs(func_scs_PAL); - -Function* func_sinf = new Function( - /*Type=*/FuncTy_12, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"sinf", mod); // (external, no body) -func_sinf->setCallingConv(CallingConv::C); -const ParamAttrsList *func_sinf_PAL = 0; -func_sinf->setParamAttrs(func_sinf_PAL); - -Function* func_vsin = new Function( - /*Type=*/FuncTy_5, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"vsin", mod); -func_vsin->setCallingConv(CallingConv::C); -const ParamAttrsList *func_vsin_PAL = 0; -func_vsin->setParamAttrs(func_vsin_PAL); - -Function* func_kilp = new Function( - /*Type=*/FuncTy_13, - /*Linkage=*/GlobalValue::WeakLinkage, - /*Name=*/"kilp", mod); -func_kilp->setCallingConv(CallingConv::C); -const ParamAttrsList *func_kilp_PAL = 0; -{ - ParamAttrsVector Attrs; - ParamAttrsWithIndex PAWI; - PAWI.index = 0; PAWI.attrs = 0 | ParamAttr::NoUnwind; - Attrs.push_back(PAWI); - func_kilp_PAL = ParamAttrsList::get(Attrs); - -} -func_kilp->setParamAttrs(func_kilp_PAL); - -// Global Variable Declarations - - -GlobalVariable* gvar_array__str = new GlobalVariable( -/*Type=*/ArrayTy_0, -/*isConstant=*/true, -/*Linkage=*/GlobalValue::InternalLinkage, -/*Initializer=*/0, // has initializer, specified below -/*Name=*/".str", -mod); - -GlobalVariable* gvar_array__str1 = new GlobalVariable( -/*Type=*/ArrayTy_0, -/*isConstant=*/true, -/*Linkage=*/GlobalValue::InternalLinkage, -/*Initializer=*/0, // has initializer, specified below -/*Name=*/".str1", -mod); - -// Constant Definitions -Constant* const_array_14 = ConstantArray::get("VEC IN is %f %f %f %f\x0A", true); -Constant* const_array_15 = ConstantArray::get("VEC OUT is %f %f %f %f\x0A", true); -ConstantFP* const_float_16 = ConstantFP::get(Type::FloatTy, APFloat(-1.280000e+02f)); -ConstantFP* const_float_17 = ConstantFP::get(Type::FloatTy, APFloat(1.280000e+02f)); -Constant* const_float_18 = Constant::getNullValue(Type::FloatTy); -Constant* const_int32_19 = Constant::getNullValue(IntegerType::get(32)); -std::vector const_packed_20_elems; -ConstantFP* const_float_21 = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); -const_packed_20_elems.push_back(const_float_21); -UndefValue* const_float_22 = UndefValue::get(Type::FloatTy); -const_packed_20_elems.push_back(const_float_22); -const_packed_20_elems.push_back(const_float_22); -const_packed_20_elems.push_back(const_float_21); -Constant* const_packed_20 = ConstantVector::get(VectorTy_4, const_packed_20_elems); -ConstantInt* const_int32_23 = ConstantInt::get(APInt(32, "1", 10)); -ConstantInt* const_int32_24 = ConstantInt::get(APInt(32, "3", 10)); -ConstantInt* const_int32_25 = ConstantInt::get(APInt(32, "2", 10)); -std::vector const_packed_26_elems; -const_packed_26_elems.push_back(const_float_21); -const_packed_26_elems.push_back(const_float_18); -const_packed_26_elems.push_back(const_float_18); -const_packed_26_elems.push_back(const_float_21); -Constant* const_packed_26 = ConstantVector::get(VectorTy_4, const_packed_26_elems); -Constant* const_double_27 = Constant::getNullValue(Type::DoubleTy); -std::vector const_packed_28_elems; -const_packed_28_elems.push_back(const_int32_19); -ConstantInt* const_int32_29 = ConstantInt::get(APInt(32, "5", 10)); -const_packed_28_elems.push_back(const_int32_29); -const_packed_28_elems.push_back(const_int32_25); -const_packed_28_elems.push_back(const_int32_24); -Constant* const_packed_28 = ConstantVector::get(VectorTy_7, const_packed_28_elems); -std::vector const_packed_30_elems; -const_packed_30_elems.push_back(const_int32_19); -const_packed_30_elems.push_back(const_int32_23); -ConstantInt* const_int32_31 = ConstantInt::get(APInt(32, "6", 10)); -const_packed_30_elems.push_back(const_int32_31); -const_packed_30_elems.push_back(const_int32_24); -Constant* const_packed_30 = ConstantVector::get(VectorTy_7, const_packed_30_elems); -std::vector const_packed_32_elems; -const_packed_32_elems.push_back(const_int32_19); -const_packed_32_elems.push_back(const_int32_23); -const_packed_32_elems.push_back(const_int32_25); -ConstantInt* const_int32_33 = ConstantInt::get(APInt(32, "7", 10)); -const_packed_32_elems.push_back(const_int32_33); -Constant* const_packed_32 = ConstantVector::get(VectorTy_7, const_packed_32_elems); -std::vector const_ptr_34_indices; -const_ptr_34_indices.push_back(const_int32_19); -const_ptr_34_indices.push_back(const_int32_19); -Constant* const_ptr_34 = ConstantExpr::getGetElementPtr(gvar_array__str, &const_ptr_34_indices[0], const_ptr_34_indices.size() ); -UndefValue* const_packed_35 = UndefValue::get(VectorTy_4); -std::vector const_ptr_36_indices; -const_ptr_36_indices.push_back(const_int32_19); -const_ptr_36_indices.push_back(const_int32_19); -Constant* const_ptr_36 = ConstantExpr::getGetElementPtr(gvar_array__str1, &const_ptr_36_indices[0], const_ptr_36_indices.size() ); - -// Global Variable Definitions -gvar_array__str->setInitializer(const_array_14); -gvar_array__str1->setInitializer(const_array_15); - -// Function Definitions - -// Function: approx (func_approx) -{ - Function::arg_iterator args = func_approx->arg_begin(); - Value* float_a = args++; - float_a->setName("a"); - Value* float_b = args++; - float_b->setName("b"); - - BasicBlock* label_entry = new BasicBlock("entry",func_approx,0); - - // Block entry (label_entry) - FCmpInst* int1_cmp = new FCmpInst(FCmpInst::FCMP_OLT, float_b, const_float_16, "cmp", label_entry); - SelectInst* float_b_addr_0 = new SelectInst(int1_cmp, const_float_16, float_b, "b.addr.0", label_entry); - FCmpInst* int1_cmp3 = new FCmpInst(FCmpInst::FCMP_OGT, float_b_addr_0, const_float_17, "cmp3", label_entry); - SelectInst* float_b_addr_1 = new SelectInst(int1_cmp3, const_float_17, float_b_addr_0, "b.addr.1", label_entry); - FCmpInst* int1_cmp7 = new FCmpInst(FCmpInst::FCMP_OLT, float_a, const_float_18, "cmp7", label_entry); - SelectInst* float_a_addr_0 = new SelectInst(int1_cmp7, const_float_18, float_a, "a.addr.0", label_entry); - std::vector float_call_params; - float_call_params.push_back(float_a_addr_0); - float_call_params.push_back(float_b_addr_1); - CallInst* float_call = new CallInst(func_powf, float_call_params.begin(), float_call_params.end(), "call", label_entry); - float_call->setCallingConv(CallingConv::C); - float_call->setTailCall(true);const ParamAttrsList *float_call_PAL = 0; - float_call->setParamAttrs(float_call_PAL); - - new ReturnInst(float_call, label_entry); - -} - -// Function: lit (func_lit) -{ - Function::arg_iterator args = func_lit->arg_begin(); - Value* packed_tmp = args++; - packed_tmp->setName("tmp"); - - BasicBlock* label_entry_38 = new BasicBlock("entry",func_lit,0); - BasicBlock* label_ifthen = new BasicBlock("ifthen",func_lit,0); - BasicBlock* label_UnifiedReturnBlock = new BasicBlock("UnifiedReturnBlock",func_lit,0); - - // Block entry (label_entry_38) - ExtractElementInst* float_tmp6 = new ExtractElementInst(packed_tmp, const_int32_19, "tmp6", label_entry_38); - FCmpInst* int1_cmp_39 = new FCmpInst(FCmpInst::FCMP_OGT, float_tmp6, const_float_18, "cmp", label_entry_38); - new BranchInst(label_ifthen, label_UnifiedReturnBlock, int1_cmp_39, label_entry_38); - - // Block ifthen (label_ifthen) - InsertElementInst* packed_tmp10 = new InsertElementInst(const_packed_20, float_tmp6, const_int32_23, "tmp10", label_ifthen); - ExtractElementInst* float_tmp12 = new ExtractElementInst(packed_tmp, const_int32_23, "tmp12", label_ifthen); - ExtractElementInst* float_tmp14 = new ExtractElementInst(packed_tmp, const_int32_24, "tmp14", label_ifthen); - std::vector float_call_41_params; - float_call_41_params.push_back(float_tmp12); - float_call_41_params.push_back(float_tmp14); - CallInst* float_call_41 = new CallInst(func_approx, float_call_41_params.begin(), float_call_41_params.end(), "call", label_ifthen); - float_call_41->setCallingConv(CallingConv::C); - float_call_41->setTailCall(true);const ParamAttrsList *float_call_41_PAL = 0; - float_call_41->setParamAttrs(float_call_41_PAL); - - InsertElementInst* packed_tmp16 = new InsertElementInst(packed_tmp10, float_call_41, const_int32_25, "tmp16", label_ifthen); - new ReturnInst(packed_tmp16, label_ifthen); - - // Block UnifiedReturnBlock (label_UnifiedReturnBlock) - new ReturnInst(const_packed_26, label_UnifiedReturnBlock); - -} - -// Function: cmp (func_cmp) -{ - Function::arg_iterator args = func_cmp->arg_begin(); - Value* packed_tmp0 = args++; - packed_tmp0->setName("tmp0"); - Value* packed_tmp1 = args++; - packed_tmp1->setName("tmp1"); - Value* packed_tmp2 = args++; - packed_tmp2->setName("tmp2"); - - BasicBlock* label_entry_44 = new BasicBlock("entry",func_cmp,0); - BasicBlock* label_cond__14 = new BasicBlock("cond.?14",func_cmp,0); - BasicBlock* label_cond_cont20 = new BasicBlock("cond.cont20",func_cmp,0); - BasicBlock* label_cond__28 = new BasicBlock("cond.?28",func_cmp,0); - BasicBlock* label_cond_cont34 = new BasicBlock("cond.cont34",func_cmp,0); - BasicBlock* label_cond__42 = new BasicBlock("cond.?42",func_cmp,0); - BasicBlock* label_cond_cont48 = new BasicBlock("cond.cont48",func_cmp,0); - - // Block entry (label_entry_44) - ExtractElementInst* float_tmp3 = new ExtractElementInst(packed_tmp0, const_int32_19, "tmp3", label_entry_44); - CastInst* double_conv = new FPExtInst(float_tmp3, Type::DoubleTy, "conv", label_entry_44); - FCmpInst* int1_cmp_45 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv, const_double_27, "cmp", label_entry_44); - ExtractElementInst* float_tmp11 = new ExtractElementInst(packed_tmp0, const_int32_23, "tmp11", label_entry_44); - CastInst* double_conv12 = new FPExtInst(float_tmp11, Type::DoubleTy, "conv12", label_entry_44); - FCmpInst* int1_cmp13 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv12, const_double_27, "cmp13", label_entry_44); - SelectInst* packed_tmp1_tmp2 = new SelectInst(int1_cmp_45, packed_tmp1, packed_tmp2, "tmp1.tmp2", label_entry_44); - new BranchInst(label_cond__14, label_cond_cont20, int1_cmp13, label_entry_44); - - // Block cond.?14 (label_cond__14) - ShuffleVectorInst* packed_tmp233 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp1, const_packed_28, "tmp233", label_cond__14); - ExtractElementInst* float_tmp254 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp254", label_cond__14); - CastInst* double_conv265 = new FPExtInst(float_tmp254, Type::DoubleTy, "conv265", label_cond__14); - FCmpInst* int1_cmp276 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv265, const_double_27, "cmp276", label_cond__14); - new BranchInst(label_cond__28, label_cond_cont34, int1_cmp276, label_cond__14); - - // Block cond.cont20 (label_cond_cont20) - ShuffleVectorInst* packed_tmp23 = new ShuffleVectorInst(packed_tmp1_tmp2, packed_tmp2, const_packed_28, "tmp23", label_cond_cont20); - ExtractElementInst* float_tmp25 = new ExtractElementInst(packed_tmp0, const_int32_25, "tmp25", label_cond_cont20); - CastInst* double_conv26 = new FPExtInst(float_tmp25, Type::DoubleTy, "conv26", label_cond_cont20); - FCmpInst* int1_cmp27 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv26, const_double_27, "cmp27", label_cond_cont20); - new BranchInst(label_cond__28, label_cond_cont34, int1_cmp27, label_cond_cont20); - - // Block cond.?28 (label_cond__28) - PHINode* packed_tmp23_reg2mem_0 = new PHINode(VectorTy_4, "tmp23.reg2mem.0", label_cond__28); - packed_tmp23_reg2mem_0->reserveOperandSpace(2); - packed_tmp23_reg2mem_0->addIncoming(packed_tmp233, label_cond__14); - packed_tmp23_reg2mem_0->addIncoming(packed_tmp23, label_cond_cont20); - - ShuffleVectorInst* packed_tmp378 = new ShuffleVectorInst(packed_tmp23_reg2mem_0, packed_tmp1, const_packed_30, "tmp378", label_cond__28); - ExtractElementInst* float_tmp399 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp399", label_cond__28); - CastInst* double_conv4010 = new FPExtInst(float_tmp399, Type::DoubleTy, "conv4010", label_cond__28); - FCmpInst* int1_cmp4111 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv4010, const_double_27, "cmp4111", label_cond__28); - new BranchInst(label_cond__42, label_cond_cont48, int1_cmp4111, label_cond__28); - - // Block cond.cont34 (label_cond_cont34) - PHINode* packed_tmp23_reg2mem_1 = new PHINode(VectorTy_4, "tmp23.reg2mem.1", label_cond_cont34); - packed_tmp23_reg2mem_1->reserveOperandSpace(2); - packed_tmp23_reg2mem_1->addIncoming(packed_tmp233, label_cond__14); - packed_tmp23_reg2mem_1->addIncoming(packed_tmp23, label_cond_cont20); - - ShuffleVectorInst* packed_tmp37 = new ShuffleVectorInst(packed_tmp23_reg2mem_1, packed_tmp2, const_packed_30, "tmp37", label_cond_cont34); - ExtractElementInst* float_tmp39 = new ExtractElementInst(packed_tmp0, const_int32_24, "tmp39", label_cond_cont34); - CastInst* double_conv40 = new FPExtInst(float_tmp39, Type::DoubleTy, "conv40", label_cond_cont34); - FCmpInst* int1_cmp41 = new FCmpInst(FCmpInst::FCMP_OLT, double_conv40, const_double_27, "cmp41", label_cond_cont34); - new BranchInst(label_cond__42, label_cond_cont48, int1_cmp41, label_cond_cont34); - - // Block cond.?42 (label_cond__42) - PHINode* packed_tmp37_reg2mem_0 = new PHINode(VectorTy_4, "tmp37.reg2mem.0", label_cond__42); - packed_tmp37_reg2mem_0->reserveOperandSpace(2); - packed_tmp37_reg2mem_0->addIncoming(packed_tmp378, label_cond__28); - packed_tmp37_reg2mem_0->addIncoming(packed_tmp37, label_cond_cont34); - - ShuffleVectorInst* packed_tmp5113 = new ShuffleVectorInst(packed_tmp37_reg2mem_0, packed_tmp1, const_packed_32, "tmp5113", label_cond__42); - new ReturnInst(packed_tmp5113, label_cond__42); - - // Block cond.cont48 (label_cond_cont48) - PHINode* packed_tmp37_reg2mem_1 = new PHINode(VectorTy_4, "tmp37.reg2mem.1", label_cond_cont48); - packed_tmp37_reg2mem_1->reserveOperandSpace(2); - packed_tmp37_reg2mem_1->addIncoming(packed_tmp378, label_cond__28); - packed_tmp37_reg2mem_1->addIncoming(packed_tmp37, label_cond_cont34); - - ShuffleVectorInst* packed_tmp51 = new ShuffleVectorInst(packed_tmp37_reg2mem_1, packed_tmp2, const_packed_32, "tmp51", label_cond_cont48); - new ReturnInst(packed_tmp51, label_cond_cont48); - -} - -// Function: vcos (func_vcos) -{ - Function::arg_iterator args = func_vcos->arg_begin(); - Value* packed_val = args++; - packed_val->setName("val"); - - BasicBlock* label_entry_53 = new BasicBlock("entry",func_vcos,0); - - // Block entry (label_entry_53) - ExtractElementInst* float_tmp1 = new ExtractElementInst(packed_val, const_int32_19, "tmp1", label_entry_53); - CastInst* double_conv_54 = new FPExtInst(float_tmp1, Type::DoubleTy, "conv", label_entry_53); - ExtractElementInst* float_tmp3_55 = new ExtractElementInst(packed_val, const_int32_23, "tmp3", label_entry_53); - CastInst* double_conv4 = new FPExtInst(float_tmp3_55, Type::DoubleTy, "conv4", label_entry_53); - ExtractElementInst* float_tmp6_56 = new ExtractElementInst(packed_val, const_int32_25, "tmp6", label_entry_53); - CastInst* double_conv7 = new FPExtInst(float_tmp6_56, Type::DoubleTy, "conv7", label_entry_53); - ExtractElementInst* float_tmp9 = new ExtractElementInst(packed_val, const_int32_24, "tmp9", label_entry_53); - CastInst* double_conv10 = new FPExtInst(float_tmp9, Type::DoubleTy, "conv10", label_entry_53); - std::vector int32_call_params; - int32_call_params.push_back(const_ptr_34); - int32_call_params.push_back(double_conv_54); - int32_call_params.push_back(double_conv4); - int32_call_params.push_back(double_conv7); - int32_call_params.push_back(double_conv10); - CallInst* int32_call = new CallInst(func_printf, int32_call_params.begin(), int32_call_params.end(), "call", label_entry_53); - int32_call->setCallingConv(CallingConv::C); - int32_call->setTailCall(true);const ParamAttrsList *int32_call_PAL = 0; - int32_call->setParamAttrs(int32_call_PAL); - - CallInst* float_call13 = new CallInst(func_cosf, float_tmp1, "call13", label_entry_53); - float_call13->setCallingConv(CallingConv::C); - float_call13->setTailCall(true);const ParamAttrsList *float_call13_PAL = 0; - float_call13->setParamAttrs(float_call13_PAL); - - InsertElementInst* packed_tmp15 = new InsertElementInst(const_packed_35, float_call13, const_int32_19, "tmp15", label_entry_53); - CallInst* float_call18 = new CallInst(func_cosf, float_tmp1, "call18", label_entry_53); - float_call18->setCallingConv(CallingConv::C); - float_call18->setTailCall(true);const ParamAttrsList *float_call18_PAL = 0; - float_call18->setParamAttrs(float_call18_PAL); - - InsertElementInst* packed_tmp20 = new InsertElementInst(packed_tmp15, float_call18, const_int32_23, "tmp20", label_entry_53); - CallInst* float_call23 = new CallInst(func_cosf, float_tmp1, "call23", label_entry_53); - float_call23->setCallingConv(CallingConv::C); - float_call23->setTailCall(true);const ParamAttrsList *float_call23_PAL = 0; - float_call23->setParamAttrs(float_call23_PAL); - - InsertElementInst* packed_tmp25 = new InsertElementInst(packed_tmp20, float_call23, const_int32_25, "tmp25", label_entry_53); - CallInst* float_call28 = new CallInst(func_cosf, float_tmp1, "call28", label_entry_53); - float_call28->setCallingConv(CallingConv::C); - float_call28->setTailCall(true);const ParamAttrsList *float_call28_PAL = 0; - float_call28->setParamAttrs(float_call28_PAL); - - InsertElementInst* packed_tmp30 = new InsertElementInst(packed_tmp25, float_call28, const_int32_24, "tmp30", label_entry_53); - CastInst* double_conv33 = new FPExtInst(float_call13, Type::DoubleTy, "conv33", label_entry_53); - CastInst* double_conv36 = new FPExtInst(float_call18, Type::DoubleTy, "conv36", label_entry_53); - CastInst* double_conv39 = new FPExtInst(float_call23, Type::DoubleTy, "conv39", label_entry_53); - CastInst* double_conv42 = new FPExtInst(float_call28, Type::DoubleTy, "conv42", label_entry_53); - std::vector int32_call43_params; - int32_call43_params.push_back(const_ptr_36); - int32_call43_params.push_back(double_conv33); - int32_call43_params.push_back(double_conv36); - int32_call43_params.push_back(double_conv39); - int32_call43_params.push_back(double_conv42); - CallInst* int32_call43 = new CallInst(func_printf, int32_call43_params.begin(), int32_call43_params.end(), "call43", label_entry_53); - int32_call43->setCallingConv(CallingConv::C); - int32_call43->setTailCall(true);const ParamAttrsList *int32_call43_PAL = 0; - int32_call43->setParamAttrs(int32_call43_PAL); - - new ReturnInst(packed_tmp30, label_entry_53); - -} - -// Function: scs (func_scs) -{ - Function::arg_iterator args = func_scs->arg_begin(); - Value* packed_val_58 = args++; - packed_val_58->setName("val"); - - BasicBlock* label_entry_59 = new BasicBlock("entry",func_scs,0); - - // Block entry (label_entry_59) - ExtractElementInst* float_tmp2 = new ExtractElementInst(packed_val_58, const_int32_19, "tmp2", label_entry_59); - CallInst* float_call_60 = new CallInst(func_cosf, float_tmp2, "call", label_entry_59); - float_call_60->setCallingConv(CallingConv::C); - float_call_60->setTailCall(true);const ParamAttrsList *float_call_60_PAL = 0; - float_call_60->setParamAttrs(float_call_60_PAL); - - InsertElementInst* packed_tmp5 = new InsertElementInst(const_packed_35, float_call_60, const_int32_19, "tmp5", label_entry_59); - CallInst* float_call7 = new CallInst(func_sinf, float_tmp2, "call7", label_entry_59); - float_call7->setCallingConv(CallingConv::C); - float_call7->setTailCall(true);const ParamAttrsList *float_call7_PAL = 0; - float_call7->setParamAttrs(float_call7_PAL); - - InsertElementInst* packed_tmp9 = new InsertElementInst(packed_tmp5, float_call7, const_int32_23, "tmp9", label_entry_59); - new ReturnInst(packed_tmp9, label_entry_59); - -} - -// Function: vsin (func_vsin) -{ - Function::arg_iterator args = func_vsin->arg_begin(); - Value* packed_val_62 = args++; - packed_val_62->setName("val"); - - BasicBlock* label_entry_63 = new BasicBlock("entry",func_vsin,0); - - // Block entry (label_entry_63) - ExtractElementInst* float_tmp2_64 = new ExtractElementInst(packed_val_62, const_int32_19, "tmp2", label_entry_63); - CallInst* float_call_65 = new CallInst(func_sinf, float_tmp2_64, "call", label_entry_63); - float_call_65->setCallingConv(CallingConv::C); - float_call_65->setTailCall(true);const ParamAttrsList *float_call_65_PAL = 0; - float_call_65->setParamAttrs(float_call_65_PAL); - - InsertElementInst* packed_tmp6 = new InsertElementInst(const_packed_35, float_call_65, const_int32_19, "tmp6", label_entry_63); - InsertElementInst* packed_tmp9_66 = new InsertElementInst(packed_tmp6, float_call_65, const_int32_23, "tmp9", label_entry_63); - InsertElementInst* packed_tmp12 = new InsertElementInst(packed_tmp9_66, float_call_65, const_int32_25, "tmp12", label_entry_63); - InsertElementInst* packed_tmp15_67 = new InsertElementInst(packed_tmp12, float_call_65, const_int32_24, "tmp15", label_entry_63); - new ReturnInst(packed_tmp15_67, label_entry_63); - -} - -// Function: kilp (func_kilp) -{ - Function::arg_iterator args = func_kilp->arg_begin(); - Value* packed_val_69 = args++; - packed_val_69->setName("val"); - - BasicBlock* label_entry_70 = new BasicBlock("entry",func_kilp,0); - BasicBlock* label_lor_rhs = new BasicBlock("lor_rhs",func_kilp,0); - BasicBlock* label_lor_rhs5 = new BasicBlock("lor_rhs5",func_kilp,0); - BasicBlock* label_lor_rhs11 = new BasicBlock("lor_rhs11",func_kilp,0); - BasicBlock* label_UnifiedReturnBlock_71 = new BasicBlock("UnifiedReturnBlock",func_kilp,0); - - // Block entry (label_entry_70) - ExtractElementInst* float_tmp1_72 = new ExtractElementInst(packed_val_69, const_int32_19, "tmp1", label_entry_70); - FCmpInst* int1_cmp_73 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp1_72, const_float_18, "cmp", label_entry_70); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs, int1_cmp_73, label_entry_70); - - // Block lor_rhs (label_lor_rhs) - ExtractElementInst* float_tmp3_75 = new ExtractElementInst(packed_val_69, const_int32_23, "tmp3", label_lor_rhs); - FCmpInst* int1_cmp4 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp3_75, const_float_18, "cmp4", label_lor_rhs); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs5, int1_cmp4, label_lor_rhs); - - // Block lor_rhs5 (label_lor_rhs5) - ExtractElementInst* float_tmp7 = new ExtractElementInst(packed_val_69, const_int32_25, "tmp7", label_lor_rhs5); - FCmpInst* int1_cmp8 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp7, const_float_18, "cmp8", label_lor_rhs5); - new BranchInst(label_UnifiedReturnBlock_71, label_lor_rhs11, int1_cmp8, label_lor_rhs5); - - // Block lor_rhs11 (label_lor_rhs11) - ExtractElementInst* float_tmp13 = new ExtractElementInst(packed_val_69, const_int32_24, "tmp13", label_lor_rhs11); - FCmpInst* int1_cmp14 = new FCmpInst(FCmpInst::FCMP_OLT, float_tmp13, const_float_18, "cmp14", label_lor_rhs11); - CastInst* int32_retval = new ZExtInst(int1_cmp14, IntegerType::get(32), "retval", label_lor_rhs11); - new ReturnInst(int32_retval, label_lor_rhs11); - - // Block UnifiedReturnBlock (label_UnifiedReturnBlock_71) - new ReturnInst(const_int32_23, label_UnifiedReturnBlock_71); - -} - -return mod; - -} +static const unsigned char llvm_builtins_data[] = { +0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x2b,0x02,0x00,0x00,0x01,0x10,0x00,0x00, +0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, +0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, +0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, +0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, +0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x02,0x00,0x00,0x00,0x0b,0x04,0x00,0x0c, +0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x13,0x00,0x00,0x00,0x32,0x22,0x48,0x09, +0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45,0xc6,0x05,0x42,0x52, +0x26,0x08,0xb0,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,0x21,0x80,0x39,0x82, +0x60,0x0a,0x80,0x2e,0xd5,0x61,0x04,0x42,0x20,0x49,0x90,0x22,0x4d,0xa2,0x73,0x04, +0x08,0xb9,0x32,0x00,0x00,0x8a,0x10,0xc2,0x65,0xb8,0x42,0x84,0x10,0x42,0x0d,0x44, +0x11,0x00,0x18,0x01,0x28,0x82,0x08,0x00,0x13,0xa2,0x74,0xb0,0x03,0x3c,0xb0,0x83, +0x36,0x80,0x87,0x71,0x68,0x03,0x76,0x48,0x07,0x77,0xa8,0x07,0x7c,0x68,0x83,0x73, +0x70,0x87,0x7a,0xd8,0x70,0x0f,0xe5,0xd0,0x06,0xf0,0xa0,0x07,0x73,0x20,0x07,0x7a, +0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x71,0xa0,0x07,0x78,0xa0, +0x07,0x78,0xd0,0x06,0xe9,0x80,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e, +0x71,0x60,0x07,0x7a,0x10,0x07,0x76,0xa0,0x07,0x71,0x60,0x07,0x6d,0x90,0x0e,0x73, +0x20,0x07,0x7a,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x76,0x40, +0x07,0x7a,0x30,0x07,0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0e,0x73,0x20,0x07, +0x7a,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x60,0x0e,0x76,0x40,0x07,0x7a, +0x30,0x07,0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x71,0x20,0x07,0x78,0xa0,0x07, +0x71,0x20,0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78,0xd0,0x06,0xe1,0x00,0x07,0x7a, +0x00,0x07,0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x80,0x07,0x70,0xa0,0x07,0x71,0x20, +0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78,0xa0,0xf3,0x40,0x88,0x04,0x32,0x32,0x02, +0x04,0x60,0x76,0xc6,0xfc,0x6c,0x48,0xa2,0x00,0x40,0x00,0x00,0x00,0x00,0x0c,0x49, +0x14,0x20,0x00,0x00,0x00,0x00,0x80,0x21,0xc9,0x02,0x00,0x01,0x00,0x00,0x00,0x30, +0x24,0x61,0x00,0x20,0x08,0x00,0x00,0x00,0x86,0x24,0x0b,0x00,0x04,0x00,0x00,0x00, +0xc0,0x90,0xa4,0x01,0x02,0x00,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00, +0x00,0x00,0x43,0x92,0x05,0x00,0x02,0x00,0x00,0x00,0x60,0x48,0x72,0x00,0x01,0x00, +0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x00,0x00,0x00,0x80,0x21,0x89,0x01,0x00, +0x41,0x00,0x00,0x00,0x90,0x05,0x02,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10, +0x19,0x11,0x4c,0x90,0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x92,0x8a,0x59,0x8b,0x43, +0x50,0xd2,0x09,0x02,0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8, +0x64,0x56,0x28,0x66,0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33, +0xa8,0x64,0x06,0x95,0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c, +0x04,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00, +0x05,0x00,0x00,0x00,0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41, +0x10,0x84,0xc3,0x60,0x04,0x00,0x00,0x00,0x93,0x0c,0xce,0x43,0x4c,0x31,0x3c,0x8e, +0x34,0xc9,0x30,0x41,0xc2,0x14,0x03,0x34,0x51,0x93,0x0c,0x4d,0x44,0x4c,0x31,0x44, +0x8d,0x35,0x56,0x01,0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00, +0x46,0x41,0x08,0xcc,0x73,0x9b,0x05,0x21,0x30,0xcf,0x6e,0x18,0x84,0x00,0x2c,0x8b, +0x35,0x04,0x80,0x39,0x04,0x81,0x5d,0x20,0x80,0x0f,0x0c,0x43,0xe4,0xd3,0x36,0x81, +0x04,0x3e,0x30,0x0c,0x91,0x4f,0x5b,0x05,0x12,0xf8,0xc0,0x30,0x44,0x7e,0x7d,0x00, +0x05,0xd1,0x4c,0x11,0x66,0x12,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x2a,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00, +0x08,0x00,0x00,0x00,0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55, +0x55,0xd6,0x1c,0x84,0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04, +0x41,0xfc,0x03,0x00,0x63,0x08,0x0d,0x34,0xc9,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60, +0x73,0x0c,0xd3,0x15,0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0, +0x6d,0x73,0x0c,0x19,0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x13,0x00,0x00,0x00, +0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,0x14,0x13, +0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,0xba,0x35, +0x0c,0x13,0xf3,0x9c,0x80,0xe4,0x36,0x48,0x81,0x10,0xc3,0x4a,0x4c,0x54,0xd4,0x6c, +0x8b,0x23,0x28,0x76,0x41,0x4c,0xcc,0xa3,0x1b,0x07,0x21,0x00,0xcb,0x72,0x00,0x05, +0xd1,0x4c,0x11,0x66,0x18,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x82,0x00,0x00,0x00,0x13,0x04,0x47,0x2c,0x10,0x00,0x00,0x00, +0x08,0x00,0x00,0x00,0x24,0x46,0x00,0x8a,0xa0,0x0c,0x4a,0xa0,0x14,0x8a,0xa1,0x1c, +0x68,0x8c,0x00,0x10,0x9a,0x83,0x80,0xa8,0x48,0x9a,0x83,0x80,0xa6,0x4a,0x9a,0x83, +0x80,0xa6,0xc8,0x02,0x63,0x08,0x0d,0x64,0xdb,0xc0,0x49,0x06,0xee,0x22,0xc6,0x10, +0x9a,0xc9,0xbc,0x81,0x93,0x0c,0xdf,0x45,0x4c,0x31,0x38,0x4f,0x37,0xcb,0x10,0x08, +0x60,0x30,0xc8,0x10,0x06,0x0e,0x36,0x86,0xd0,0x44,0x36,0x06,0x03,0x27,0x19,0xc8, +0xe0,0x22,0x66,0x19,0x06,0xa2,0x0c,0x06,0x19,0xc2,0xe0,0xc1,0xc6,0x10,0x9a,0xc8, +0xce,0x60,0xe0,0x24,0x03,0x1a,0x5c,0xc4,0x2c,0xc3,0x40,0xa4,0xc1,0x40,0x45,0x20, +0x06,0x81,0x19,0x08,0x83,0x0c,0x6a,0xe0,0x64,0x63,0x08,0x8d,0x64,0x6c,0x30,0x70, +0x92,0xa1,0x0d,0x2e,0x62,0x96,0xa1,0x30,0xdc,0x60,0xa0,0x22,0x10,0x83,0xc0,0x0c, +0x84,0x41,0x86,0x37,0x78,0xb2,0x31,0x84,0x46,0xb2,0x38,0x18,0x38,0xc9,0x20,0x07, +0x17,0x31,0xcb,0x50,0x18,0x73,0x30,0x50,0x11,0xac,0xc1,0x00,0x07,0xc4,0x20,0x03, +0x1d,0x38,0x1a,0xd6,0xc1,0x40,0x45,0xb0,0x06,0x03,0x1c,0x10,0x83,0x0c,0x76,0xf0, +0x68,0x78,0x07,0xe1,0x40,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x56,0x62,0x08,0xcc, +0x63,0xef,0x3a,0xa9,0x00,0x19,0x7b,0x73,0x23,0x73,0xf9,0xa1,0x91,0x31,0x98,0x62, +0x62,0x9e,0x7b,0xb7,0x06,0x62,0x62,0x1e,0xda,0x1c,0x88,0x89,0x79,0x6a,0x7b,0x20, +0x26,0xe6,0xb1,0x6d,0x83,0x98,0x98,0xe7,0x36,0x92,0x43,0x70,0x9a,0xca,0xd6,0x73, +0xa3,0x79,0x26,0xe6,0xb9,0x77,0x3f,0x22,0x0c,0x9b,0x21,0x18,0x9f,0xb6,0x90,0x64, +0x62,0x9e,0xda,0x9f,0x98,0xc7,0x36,0x9b,0x67,0x62,0x9e,0x7b,0xf7,0x23,0xc2,0xb0, +0x19,0x82,0xf1,0x6b,0x53,0x79,0x26,0xe6,0xb1,0x6f,0x3f,0x22,0x0c,0x9b,0x21,0x18, +0x9f,0xb6,0x98,0x62,0x62,0x9e,0xbb,0xb7,0x97,0x67,0x62,0x1e,0xfb,0xf6,0x23,0xc2, +0xb0,0x19,0x82,0xf1,0x6b,0x43,0x31,0x04,0xa7,0xa9,0x6c,0xdd,0x66,0x0a,0x81,0x79, +0xf0,0xfa,0x08,0x16,0xc1,0x69,0x06,0x5f,0x70,0x9a,0xe9,0xc6,0x49,0x01,0xc8,0xd8, +0x9b,0x1b,0x99,0xcb,0x4f,0x0c,0x8d,0xad,0x18,0x13,0xf3,0xdc,0x3b,0x6f,0x35,0xc7, +0xc4,0x3c,0x79,0x5d,0xdf,0x06,0x52,0x08,0xcc,0x53,0xdf,0x26,0x62,0x4c,0xcc,0x63, +0xdf,0xb7,0xb9,0x1c,0x02,0xf3,0xe0,0x75,0x5d,0x5b,0xc7,0x20,0x30,0x8f,0x79,0x14, +0x13,0xf3,0xd4,0xf5,0x19,0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0x13,0xce,0x5b,0x0b, +0x22,0x38,0x4d,0x85,0xd3,0x35,0x6d,0x37,0xc5,0xc4,0x3c,0x79,0x4d,0x1a,0x40,0xc6, +0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x8c,0x83,0x10,0x9c,0xa6,0xb2,0x94,0x42,0x60, +0x1e,0x7b,0x37,0x19,0x43,0x70,0x9a,0x0a,0xa7,0xcd,0xa4,0x98,0x98,0xc7,0xbe,0x8d, +0xc5,0x98,0x98,0xe7,0xee,0x7b,0x3b,0x29,0x26,0xe6,0xb1,0xf3,0x13,0x58,0x04,0xa7, +0x19,0x7c,0xc1,0x69,0x26,0x9b,0xb6,0x0f,0x43,0x70,0x9a,0xaa,0xb6,0x6d,0xc4,0x98, +0x98,0xc7,0xce,0xf1,0x03,0x28,0x88,0x66,0x8a,0x30,0x00,0x00,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00, +0x07,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xbd, +0x61,0x8c,0x04,0x10,0x1e,0xe1,0x19,0xc6,0x48,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00, +0x63,0x08,0xcd,0x63,0xd5,0xc0,0x31,0x84,0x06,0xb2,0x6b,0xe0,0x18,0x42,0x13,0x59, +0x36,0x70,0x0c,0xa1,0x71,0x6c,0x1b,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37, +0x16,0x01,0x04,0x48,0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3, +0x07,0x06,0xd0,0x58,0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01, +0x04,0x48,0x35,0xc7,0x30,0x06,0x64,0xe0,0x98,0x37,0xd0,0xc0,0x60,0xa0,0x89,0xc1, +0x40,0x23,0x83,0x81,0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58, +0x06,0xe1,0x40,0x00,0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41, +0x4c,0xcc,0x53,0xdb,0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10, +0x13,0xf3,0xf4,0xd6,0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46, +0x21,0x38,0x4d,0xb5,0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53, +0xdd,0xb7,0x9d,0x18,0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb, +0x47,0x31,0x31,0x4f,0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0, +0xb2,0xd4,0xbc,0x59,0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85, +0x14,0x13,0xf3,0xd8,0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c, +0xf6,0x6d,0x24,0x86,0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6, +0x30,0x54,0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00, +0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10, +0x28,0xd1,0x1c,0xc3,0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00, +0x16,0x41,0x4c,0xcc,0x63,0xdb,0x04,0x31,0x31,0x4f,0x6e,0x0d,0x43,0x05,0x2c,0x07, +0x50,0x10,0xcd,0x14,0x61,0x56,0x41,0x4c,0xcc,0xd3,0x1b,0x45,0x21,0x00,0xcb,0xb2, +0x9b,0x04,0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x1b,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33, +0x16,0x01,0x04,0xca,0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16, +0x15,0xcd,0x31,0x5c,0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, +0x86,0x51,0x4c,0xcc,0x53,0xe7,0x76,0x51,0x4c,0xcc,0x53,0xdb,0x36,0x41,0x4c,0xcc, +0x63,0x5b,0x05,0x31,0x31,0x8f,0x6e,0x0d,0x43,0x05,0x2c,0x66,0x41,0x4c,0xcc,0xd3, +0x1f,0x40,0x41,0x34,0x53,0x84,0x19,0x05,0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x2f,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00, +0x03,0x00,0x00,0x00,0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00, +0x63,0x08,0x0d,0x34,0xc9,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33, +0xc9,0x50,0x49,0xc4,0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xc9,0x70,0x49,0xc4, +0x2c,0x03,0x31,0x60,0x63,0x08,0x8d,0x33,0xc9,0x90,0x49,0x84,0x69,0x22,0x70,0xc3, +0x27,0x1c,0x08,0x00,0x1a,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41, +0x08,0xcc,0x83,0xdb,0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10, +0x02,0xf3,0xf0,0x47,0x20,0xb9,0x0d,0x52,0x20,0xc4,0xb0,0x12,0x13,0x15,0x35,0xdb, +0xe2,0x08,0x8a,0x5d,0x10,0x13,0xf3,0xec,0x37,0x90,0x2c,0x4e,0xf4,0x47,0x87,0x54, +0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44,0x7f,0x74,0x48, +0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0,0x62,0x0d,0x43, +0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03,0x00,0x00,0x00, +0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82, +0x23,0x59,0xc2,0x20,0x09,0x92,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3, +0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, +0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, +0x84,0x34,0x85,0x31,0x10,0x0a,0xb2,0x3c,0x56,0x30,0x08,0xcc,0x63,0x0b,0x44,0x25, +0x21,0x0d,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 8919491792..b35d6790f7 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -35,6 +35,8 @@ #include "storage.h" +#include "pipe/p_util.h" + #include #include #include @@ -42,7 +44,8 @@ #include #include #include -#include +#include +#include #include #include @@ -53,7 +56,6 @@ using namespace llvm; #include "gallivm_builtins.cpp" #if 0 - llvm::Value *arrayFromChannels(std::vector &vals) { VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -84,7 +86,10 @@ Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicB m_llvmLit = 0; m_fmtPtr = 0; - createGallivmBuiltins(m_mod); + MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( + (const char*)&llvm_builtins_data[0], + (const char*)&llvm_builtins_data[Elements(llvm_builtins_data)-1]); + m_mod = ParseBitcodeFile(buffer); } llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 6f83b56a72..3c16a3692d 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -1,8 +1,35 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ #include "instructionssoa.h" #include "storagesoa.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_util.h" #include #include @@ -10,7 +37,10 @@ #include #include #include -#include +#include +#include +#include + #include @@ -183,7 +213,10 @@ llvm::Module * InstructionsSoa::currentModule() const void InstructionsSoa::createBuiltins() { - m_builtins = createSoaBuiltins(); + MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( + (const char*)&soabuiltins_data[0], + (const char*)&soabuiltins_data[Elements(soabuiltins_data)-1]); + m_builtins = ParseBitcodeFile(buffer); createDependencies(); } diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c index 4f98d754ba..64b5d499a8 100644 --- a/src/gallium/auxiliary/gallivm/llvm_builtins.c +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c @@ -30,7 +30,7 @@ * Authors: * Zack Rusin zack@tungstengraphics.com */ -typedef __attribute__(( ocu_vector_type(4) )) float float4; +typedef __attribute__(( ext_vector_type(4) )) float float4; extern float powf(float a, float b); diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 4d658be520..40addebd8c 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -31,7 +31,7 @@ * Authors: * Zack Rusin zack@tungstengraphics.com */ -typedef __attribute__(( ocu_vector_type(4) )) float float4; +typedef __attribute__(( ext_vector_type(4) )) float float4; void dp3(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, -- cgit v1.2.3 From b6c9d2ef2cfadbbe3e7aa94f21fd0da36d089952 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 12:37:41 +0100 Subject: rtasm: add dump facility for x86 (from tgsi_sse2.c) --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 267 ++++++++++++++++++++++++++--- 1 file changed, 243 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 7f8cc23d15..f2c08c96a6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -34,6 +34,116 @@ #define X86_TWOB 0x0f +#define DUMP_SSE 0 + +#if DUMP_SSE + +static void +_print_reg( + struct x86_reg reg ) +{ + if (reg.mod != mod_REG) + debug_printf( "[" ); + + switch( reg.file ) { + case file_REG32: + switch( reg.idx ) { + case reg_AX: debug_printf( "EAX" ); break; + case reg_CX: debug_printf( "ECX" ); break; + case reg_DX: debug_printf( "EDX" ); break; + case reg_BX: debug_printf( "EBX" ); break; + case reg_SP: debug_printf( "ESP" ); break; + case reg_BP: debug_printf( "EBP" ); break; + case reg_SI: debug_printf( "ESI" ); break; + case reg_DI: debug_printf( "EDI" ); break; + } + break; + case file_MMX: + debug_printf( "MMX%u", reg.idx ); + break; + case file_XMM: + debug_printf( "XMM%u", reg.idx ); + break; + case file_x87: + debug_printf( "fp%u", reg.idx ); + break; + } + + if (reg.mod == mod_DISP8 || + reg.mod == mod_DISP32) + debug_printf("+%d", reg.disp); + + if (reg.mod != mod_REG) + debug_printf( "]" ); +} + +static void +_fill( + const char *op ) +{ + unsigned count = 10 - strlen( op ); + + while( count-- ) { + debug_printf( " " ); + } +} + +#define DUMP_START() debug_printf( "\nsse-dump start ----------------" ) +#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" ) +#define DUMP( OP ) debug_printf( "\n%s", OP ) + +#define DUMP_I( OP, I ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#define DUMP_R( OP, R0 ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + _print_reg( R0 ); \ +} while( 0 ) + +#define DUMP_RR( OP, R0, R1 ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + _print_reg( R0 ); \ + debug_printf( ", " ); \ + _print_reg( R1 ); \ +} while( 0 ) + +#define DUMP_RI( OP, R0, I ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + _print_reg( R0 ); \ + debug_printf( ", " ); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#define DUMP_RRI( OP, R0, R1, I ) do { \ + debug_printf( "\n%s", OP ); \ + _fill( OP ); \ + _print_reg( R0 ); \ + debug_printf( ", " ); \ + _print_reg( R1 ); \ + debug_printf( ", " ); \ + debug_printf( "%u", I ); \ +} while( 0 ) + +#else + +#define DUMP_START() +#define DUMP_END() +#define DUMP( OP ) +#define DUMP_I( OP, I ) +#define DUMP_R( OP, R0 ) +#define DUMP_RR( OP, R0, R1 ) +#define DUMP_RI( OP, R0, I ) +#define DUMP_RRI( OP, R0, R1, I ) + +#endif + + static void do_realloc( struct x86_function *p ) { if (p->size == 0) { @@ -272,6 +382,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p, unsigned char *x86_jmp_forward( struct x86_function *p) { + DUMP( __FUNCTION__ ); emit_1ub(p, 0xe9); emit_1i(p, 0); return x86_get_label(p); @@ -279,6 +390,8 @@ unsigned char *x86_jmp_forward( struct x86_function *p) unsigned char *x86_call_forward( struct x86_function *p) { + DUMP( __FUNCTION__ ); + emit_1ub(p, 0xe8); emit_1i(p, 0); return x86_get_label(p); @@ -294,6 +407,7 @@ void x86_fixup_fwd_jump( struct x86_function *p, void x86_jmp( struct x86_function *p, unsigned char *label) { + DUMP_I( __FUNCTION__, label ); emit_1ub(p, 0xe9); emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); } @@ -310,12 +424,14 @@ static unsigned char *cptr( void (*label)() ) */ void x86_call( struct x86_function *p, void (*label)()) { + DUMP_I( __FUNCTION__, label ); emit_1ub(p, 0xe8); emit_1i(p, cptr(label) - x86_get_label(p) - 4); } #else void x86_call( struct x86_function *p, struct x86_reg reg) { + DUMP_R( __FUNCTION__, reg ); emit_1ub(p, 0xff); emit_modrm_noreg(p, 2, reg); } @@ -328,6 +444,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg) */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { + DUMP_RI( __FUNCTION__, dst, imm ); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); @@ -336,6 +453,7 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) void x86_push( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( __FUNCTION__, reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x50 + reg.idx); p->stack_offset += 4; @@ -344,6 +462,7 @@ void x86_push( struct x86_function *p, void x86_pop( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( __FUNCTION__, reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x58 + reg.idx); p->stack_offset -= 4; @@ -352,6 +471,7 @@ void x86_pop( struct x86_function *p, void x86_inc( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( __FUNCTION__, reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x40 + reg.idx); } @@ -359,17 +479,20 @@ void x86_inc( struct x86_function *p, void x86_dec( struct x86_function *p, struct x86_reg reg ) { + DUMP_R( __FUNCTION__, reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x48 + reg.idx); } void x86_ret( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_1ub(p, 0xc3); } void x86_sahf( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_1ub(p, 0x9e); } @@ -377,6 +500,7 @@ void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x8b, 0x89, dst, src ); } @@ -384,6 +508,7 @@ void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x33, 0x31, dst, src ); } @@ -391,6 +516,7 @@ void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x3b, 0x39, dst, src ); } @@ -398,6 +524,7 @@ void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_1ub(p, 0x8d); emit_modrm( p, dst, src ); } @@ -406,6 +533,7 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_1ub(p, 0x85); emit_modrm( p, dst, src ); } @@ -414,6 +542,7 @@ void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm(p, 0x03, 0x01, dst, src ); } @@ -422,6 +551,7 @@ void x86_add( struct x86_function *p, void x86_mul( struct x86_function *p, struct x86_reg src ) { + DUMP_R( __FUNCTION__, src ); emit_1ub(p, 0xf7); emit_modrm_noreg(p, 4, src ); } @@ -431,6 +561,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0xAF); emit_modrm(p, dst, src); } @@ -440,6 +571,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm(p, 0x2b, 0x29, dst, src ); } @@ -447,6 +579,7 @@ void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x0b, 0x09, dst, src ); } @@ -454,6 +587,7 @@ void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_op_modrm( p, 0x23, 0x21, dst, src ); } @@ -468,6 +602,7 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, 0xF3, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -476,6 +611,7 @@ void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x28, 0x29, dst, src ); } @@ -484,6 +620,7 @@ void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -492,6 +629,7 @@ void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ @@ -501,6 +639,7 @@ void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ @@ -510,6 +649,7 @@ void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -518,6 +658,7 @@ void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -526,6 +667,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5E); emit_modrm( p, dst, src ); } @@ -534,6 +676,7 @@ void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x5D); emit_modrm( p, dst, src ); } @@ -542,6 +685,7 @@ void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x5C); emit_modrm( p, dst, src ); } @@ -550,6 +694,7 @@ void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -558,6 +703,7 @@ void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -566,6 +712,7 @@ void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -574,6 +721,7 @@ void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -582,6 +730,7 @@ void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x55); emit_modrm( p, dst, src ); } @@ -590,6 +739,7 @@ void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x54); emit_modrm( p, dst, src ); } @@ -598,6 +748,7 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x52); emit_modrm( p, dst, src ); } @@ -606,6 +757,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x52); emit_modrm( p, dst, src ); @@ -615,6 +767,7 @@ void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x12); emit_modrm( p, dst, src ); @@ -624,6 +777,7 @@ void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x16); emit_modrm( p, dst, src ); @@ -633,6 +787,7 @@ void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x56); emit_modrm( p, dst, src ); } @@ -641,6 +796,7 @@ void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x57); emit_modrm( p, dst, src ); } @@ -649,6 +805,7 @@ void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.file == file_MMX && (src.file == file_XMM || src.mod != mod_REG)); @@ -662,6 +819,7 @@ void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x5b); emit_modrm( p, dst, src ); } @@ -671,31 +829,34 @@ void sse2_cvtdq2ps( struct x86_function *p, * arg0. */ void sse_shufps( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char shuf) { + DUMP_RRI( __FUNCTION__, dst, src, shuf ); emit_2ub(p, X86_TWOB, 0xC6); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, shuf); } void sse_cmpps( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char cc) { + DUMP_RRI( "CMPPS", dst, src, cc ); emit_2ub(p, X86_TWOB, 0xC2); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, cc); } void sse_pmovmskb( struct x86_function *p, - struct x86_reg dest, + struct x86_reg dst, struct x86_reg src) { - emit_3ub(p, 0x66, X86_TWOB, 0xD7); - emit_modrm(p, dest, src); + DUMP_RR( __FUNCTION__, dst, src ); + emit_3ub(p, 0x66, X86_TWOB, 0xD7); + emit_modrm(p, dst, src); } /*********************************************************************** @@ -706,12 +867,13 @@ void sse_pmovmskb( struct x86_function *p, * Perform a reduced swizzle: */ void sse2_pshufd( struct x86_function *p, - struct x86_reg dest, - struct x86_reg arg0, + struct x86_reg dst, + struct x86_reg src, unsigned char shuf) { + DUMP_RRI( __FUNCTION__, dst, src, shuf ); emit_3ub(p, 0x66, X86_TWOB, 0x70); - emit_modrm(p, dest, arg0); + emit_modrm(p, dst, src); emit_1ub(p, shuf); } @@ -719,6 +881,7 @@ void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); emit_modrm( p, dst, src ); } @@ -727,6 +890,7 @@ void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x5B); emit_modrm( p, dst, src ); } @@ -735,6 +899,7 @@ void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x6B); emit_modrm( p, dst, src ); } @@ -743,6 +908,7 @@ void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x63); emit_modrm( p, dst, src ); } @@ -751,6 +917,7 @@ void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x67); emit_modrm( p, dst, src ); } @@ -759,6 +926,7 @@ void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x60); emit_modrm( p, dst, src ); } @@ -768,6 +936,7 @@ void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -776,6 +945,7 @@ void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -784,6 +954,7 @@ void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); emit_2ub(p, 0x66, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); } @@ -796,30 +967,35 @@ void sse2_movd( struct x86_function *p, */ void x87_fist( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 2, dst); } void x87_fistp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 3, dst); } void x87_fild( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); emit_1ub(p, 0xdf); emit_modrm_noreg(p, 0, arg); } void x87_fldz( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xee); } void x87_fldcw( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); assert(arg.file == file_REG32); assert(arg.mod != mod_REG); emit_1ub(p, 0xd9); @@ -828,26 +1004,31 @@ void x87_fldcw( struct x86_function *p, struct x86_reg arg ) void x87_fld1( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xe8); } void x87_fldl2e( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xea); } void x87_fldln2( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xed); } void x87_fwait( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_1ub(p, 0x9b); } void x87_fnclex( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xdb, 0xe2); } @@ -884,49 +1065,55 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86 assert(0); } -void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xc8, 0xdc, 0xc8, 4); } -void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xe0, 0xdc, 0xe8, 4); } -void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xe8, 0xdc, 0xe0, 5); } -void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xc0, 0xdc, 0xc0, 0); } -void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xf0, 0xdc, 0xf8, 6); } -void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) +void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - x87_arith_op(p, dst, arg, + DUMP_RR( __FUNCTION__, dst, src ); + x87_arith_op(p, dst, src, 0xd8, 0xf8, 0xdc, 0xf0, 7); @@ -934,6 +1121,7 @@ void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ) void x87_fmulp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc8+dst.idx); @@ -941,6 +1129,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) void x87_fsubp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe8+dst.idx); @@ -948,6 +1137,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe0+dst.idx); @@ -955,6 +1145,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) void x87_faddp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc0+dst.idx); @@ -962,6 +1153,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) void x87_fdivp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf8+dst.idx); @@ -969,6 +1161,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf0+dst.idx); @@ -976,70 +1169,83 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) void x87_fucom( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe0+arg.idx); } void x87_fucomp( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe8+arg.idx); } void x87_fucompp( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xda, 0xe9); } void x87_fxch( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); assert(arg.file == file_x87); emit_2ub(p, 0xd9, 0xc8+arg.idx); } void x87_fabs( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xe1); } void x87_fchs( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xe0); } void x87_fcos( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xff); } void x87_fprndint( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfc); } void x87_fscale( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfd); } void x87_fsin( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfe); } void x87_fsincos( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfb); } void x87_fsqrt( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xfa); } void x87_fxtract( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xf4); } @@ -1049,6 +1255,7 @@ void x87_fxtract( struct x86_function *p ) */ void x87_f2xm1( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xf0); } @@ -1057,6 +1264,7 @@ void x87_f2xm1( struct x86_function *p ) */ void x87_fyl2x( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xf1); } @@ -1067,12 +1275,14 @@ void x87_fyl2x( struct x86_function *p ) */ void x87_fyl2xp1( struct x86_function *p ) { + DUMP( __FUNCTION__ ); emit_2ub(p, 0xd9, 0xf9); } void x87_fld( struct x86_function *p, struct x86_reg arg ) { + DUMP_R( __FUNCTION__, arg ); if (arg.file == file_x87) emit_2ub(p, 0xd9, 0xc0 + arg.idx); else { @@ -1083,6 +1293,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) void x87_fst( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd0 + dst.idx); else { @@ -1093,6 +1304,7 @@ void x87_fst( struct x86_function *p, struct x86_reg dst ) void x87_fstp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd8 + dst.idx); else { @@ -1103,6 +1315,7 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) void x87_fcom( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd0 + dst.idx); else { @@ -1113,6 +1326,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) void x87_fcomp( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd8 + dst.idx); else { @@ -1124,6 +1338,7 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) { + DUMP_R( __FUNCTION__, dst ); assert(dst.file == file_REG32); if (dst.idx == reg_AX && @@ -1153,6 +1368,7 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1166,6 +1382,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1179,6 +1396,7 @@ void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); @@ -1188,6 +1406,7 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { + DUMP_RR( __FUNCTION__, dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6f, 0x7f, dst, src ); -- cgit v1.2.3 From 615cdd3a535bb71754baa8b37e79b85af01854dd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 12:39:59 +0100 Subject: tgsi: use new float math funcs, drop local disassembly code --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 88 ++--- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 591 +++++++--------------------- 2 files changed, 178 insertions(+), 501 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 78e7dec569..29e104bbd1 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -287,10 +287,10 @@ micro_abs( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) fabs( (double) src->f[0] ); - dst->f[1] = (float) fabs( (double) src->f[1] ); - dst->f[2] = (float) fabs( (double) src->f[2] ); - dst->f[3] = (float) fabs( (double) src->f[3] ); + dst->f[0] = fabsf( src->f[0] ); + dst->f[1] = fabsf( src->f[1] ); + dst->f[2] = fabsf( src->f[2] ); + dst->f[3] = fabsf( src->f[3] ); } static void @@ -334,10 +334,10 @@ micro_ceil( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) ceil( (double) src->f[0] ); - dst->f[1] = (float) ceil( (double) src->f[1] ); - dst->f[2] = (float) ceil( (double) src->f[2] ); - dst->f[3] = (float) ceil( (double) src->f[3] ); + dst->f[0] = ceilf( src->f[0] ); + dst->f[1] = ceilf( src->f[1] ); + dst->f[2] = ceilf( src->f[2] ); + dst->f[3] = ceilf( src->f[3] ); } static void @@ -345,10 +345,10 @@ micro_cos( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) cos( (double) src->f[0] ); - dst->f[1] = (float) cos( (double) src->f[1] ); - dst->f[2] = (float) cos( (double) src->f[2] ); - dst->f[3] = (float) cos( (double) src->f[3] ); + dst->f[0] = cosf( src->f[0] ); + dst->f[1] = cosf( src->f[1] ); + dst->f[2] = cosf( src->f[2] ); + dst->f[3] = cosf( src->f[3] ); } static void @@ -430,10 +430,10 @@ micro_exp2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { - dst->f[0] = (float) pow( 2.0, (double) src->f[0] ); - dst->f[1] = (float) pow( 2.0, (double) src->f[1] ); - dst->f[2] = (float) pow( 2.0, (double) src->f[2] ); - dst->f[3] = (float) pow( 2.0, (double) src->f[3] ); + dst->f[0] = powf( 2.0f, src->f[0] ); + dst->f[1] = powf( 2.0f, src->f[1] ); + dst->f[2] = powf( 2.0f, src->f[2] ); + dst->f[3] = powf( 2.0f, src->f[3] ); } static void @@ -463,10 +463,10 @@ micro_flr( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) floor( (double) src->f[0] ); - dst->f[1] = (float) floor( (double) src->f[1] ); - dst->f[2] = (float) floor( (double) src->f[2] ); - dst->f[3] = (float) floor( (double) src->f[3] ); + dst->f[0] = floorf( src->f[0] ); + dst->f[1] = floorf( src->f[1] ); + dst->f[2] = floorf( src->f[2] ); + dst->f[3] = floorf( src->f[3] ); } static void @@ -474,10 +474,10 @@ micro_frc( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = src->f[0] - (float) floor( (double) src->f[0] ); - dst->f[1] = src->f[1] - (float) floor( (double) src->f[1] ); - dst->f[2] = src->f[2] - (float) floor( (double) src->f[2] ); - dst->f[3] = src->f[3] - (float) floor( (double) src->f[3] ); + dst->f[0] = src->f[0] - floorf( src->f[0] ); + dst->f[1] = src->f[1] - floorf( src->f[1] ); + dst->f[2] = src->f[2] - floorf( src->f[2] ); + dst->f[3] = src->f[3] - floorf( src->f[3] ); } static void @@ -510,10 +510,10 @@ micro_lg2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) log( (double) src->f[0] ) * 1.442695f; - dst->f[1] = (float) log( (double) src->f[1] ) * 1.442695f; - dst->f[2] = (float) log( (double) src->f[2] ) * 1.442695f; - dst->f[3] = (float) log( (double) src->f[3] ) * 1.442695f; + dst->f[0] = logf( src->f[0] ) * 1.442695f; + dst->f[1] = logf( src->f[1] ) * 1.442695f; + dst->f[2] = logf( src->f[2] ) * 1.442695f; + dst->f[3] = logf( src->f[3] ) * 1.442695f; } static void @@ -764,10 +764,10 @@ micro_pow( const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { - dst->f[0] = (float) pow( (double) src0->f[0], (double) src1->f[0] ); - dst->f[1] = (float) pow( (double) src0->f[1], (double) src1->f[1] ); - dst->f[2] = (float) pow( (double) src0->f[2], (double) src1->f[2] ); - dst->f[3] = (float) pow( (double) src0->f[3], (double) src1->f[3] ); + dst->f[0] = powf( src0->f[0], src1->f[0] ); + dst->f[1] = powf( src0->f[1], src1->f[1] ); + dst->f[2] = powf( src0->f[2], src1->f[2] ); + dst->f[3] = powf( src0->f[3], src1->f[3] ); } static void @@ -775,10 +775,10 @@ micro_rnd( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) floor( (double) (src->f[0] + 0.5f) ); - dst->f[1] = (float) floor( (double) (src->f[1] + 0.5f) ); - dst->f[2] = (float) floor( (double) (src->f[2] + 0.5f) ); - dst->f[3] = (float) floor( (double) (src->f[3] + 0.5f) ); + dst->f[0] = floorf( src->f[0] + 0.5f ); + dst->f[1] = floorf( src->f[1] + 0.5f ); + dst->f[2] = floorf( src->f[2] + 0.5f ); + dst->f[3] = floorf( src->f[3] + 0.5f ); } static void @@ -833,20 +833,20 @@ micro_sin( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) sin( (double) src->f[0] ); - dst->f[1] = (float) sin( (double) src->f[1] ); - dst->f[2] = (float) sin( (double) src->f[2] ); - dst->f[3] = (float) sin( (double) src->f[3] ); + dst->f[0] = sinf( src->f[0] ); + dst->f[1] = sinf( src->f[1] ); + dst->f[2] = sinf( src->f[2] ); + dst->f[3] = sinf( src->f[3] ); } static void micro_sqrt( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { - dst->f[0] = (float) sqrt( (double) src->f[0] ); - dst->f[1] = (float) sqrt( (double) src->f[1] ); - dst->f[2] = (float) sqrt( (double) src->f[2] ); - dst->f[3] = (float) sqrt( (double) src->f[3] ); + dst->f[0] = sqrtf( src->f[0] ); + dst->f[1] = sqrtf( src->f[1] ); + dst->f[2] = sqrtf( src->f[2] ); + dst->f[3] = sqrtf( src->f[3] ); } static void diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index d47935e982..c3295a27ff 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -38,113 +38,6 @@ #define HIGH_PRECISION 1 /* for 1/sqrt() */ -#define DUMP_SSE 0 - -#if DUMP_SSE - -static void -_print_reg( - struct x86_reg reg ) -{ - if (reg.mod != mod_REG) - debug_printf( "[" ); - - switch( reg.file ) { - case file_REG32: - switch( reg.idx ) { - case reg_AX: - debug_printf( "EAX" ); - break; - case reg_CX: - debug_printf( "ECX" ); - break; - case reg_DX: - debug_printf( "EDX" ); - break; - case reg_BX: - debug_printf( "EBX" ); - break; - case reg_SP: - debug_printf( "ESP" ); - break; - case reg_BP: - debug_printf( "EBP" ); - break; - case reg_SI: - debug_printf( "ESI" ); - break; - case reg_DI: - debug_printf( "EDI" ); - break; - } - break; - case file_MMX: - assert( 0 ); - break; - case file_XMM: - debug_printf( "XMM%u", reg.idx ); - break; - case file_x87: - assert( 0 ); - break; - } - - if (reg.mod == mod_DISP8 || - reg.mod == mod_DISP32) - debug_printf("+%d", reg.disp); - - if (reg.mod != mod_REG) - debug_printf( "]" ); -} - -static void -_fill( - const char *op ) -{ - unsigned count = 10 - strlen( op ); - - while( count-- ) { - debug_printf( " " ); - } -} - -#define DUMP_START() debug_printf( "\nsse-dump start ----------------" ) -#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" ) -#define DUMP( OP ) debug_printf( "\n%s", OP ) -#define DUMP_I( OP, I ) do {\ - debug_printf( "\n%s", OP );\ - _fill( OP );\ - debug_printf( "%u", I ); } while( 0 ) -#define DUMP_R( OP, R0 ) do {\ - debug_printf( "\n%s", OP );\ - _fill( OP );\ - _print_reg( R0 ); } while( 0 ) -#define DUMP_RR( OP, R0, R1 ) do {\ - debug_printf( "\n%s", OP );\ - _fill( OP );\ - _print_reg( R0 );\ - debug_printf( ", " );\ - _print_reg( R1 ); } while( 0 ) -#define DUMP_RRI( OP, R0, R1, I ) do {\ - debug_printf( "\n%s", OP );\ - _fill( OP );\ - _print_reg( R0 );\ - debug_printf( ", " );\ - _print_reg( R1 );\ - debug_printf( ", " );\ - debug_printf( "%u", I ); } while( 0 ) - -#else - -#define DUMP_START() -#define DUMP_END() -#define DUMP( OP ) -#define DUMP_I( OP, I ) -#define DUMP_R( OP, R0 ) -#define DUMP_RR( OP, R0, R1 ) -#define DUMP_RRI( OP, R0, R1, I ) - -#endif #define FOR_EACH_CHANNEL( CHAN )\ for( CHAN = 0; CHAN < 4; CHAN++ ) @@ -310,200 +203,6 @@ get_coef( ((vec * 3 + member) * 4 + chan) * 4 ); } -/** - * X86 rtasm wrappers. - */ - -static void -emit_addps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "ADDPS", dst, src ); - sse_addps( func, dst, src ); -} - -static void -emit_andnps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "ANDNPS", dst, src ); - sse_andnps( func, dst, src ); -} - -static void -emit_andps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "ANDPS", dst, src ); - sse_andps( func, dst, src ); -} - -static void -emit_call( - struct x86_function *func, - void (* addr)() ) -{ - struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - - DUMP_I( "CALL", addr ); - x86_mov_reg_imm( func, ecx, (unsigned long) addr ); - x86_call( func, ecx ); -} - -static void -emit_cmpps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src, - enum sse_cc cc ) -{ - DUMP_RRI( "CMPPS", dst, src, cc ); - sse_cmpps( func, dst, src, cc ); -} - -static void -emit_cvttps2dq( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "CVTTPS2DQ", dst, src ); - sse2_cvttps2dq( func, dst, src ); -} - -static void -emit_maxps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MAXPS", dst, src ); - sse_maxps( func, dst, src ); -} - -static void -emit_minps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MINPS", dst, src ); - sse_minps( func, dst, src ); -} - -static void -emit_mov( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MOV", dst, src ); - x86_mov( func, dst, src ); -} - -static void -emit_movaps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MOVAPS", dst, src ); - sse_movaps( func, dst, src ); -} - -static void -emit_movss( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MOVSS", dst, src ); - sse_movss( func, dst, src ); -} - -static void -emit_movups( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MOVUPS", dst, src ); - sse_movups( func, dst, src ); -} - -static void -emit_mulps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "MULPS", dst, src ); - sse_mulps( func, dst, src ); -} - -static void -emit_or( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "OR", dst, src ); - x86_or( func, dst, src ); -} - -static void -emit_orps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "ORPS", dst, src ); - sse_orps( func, dst, src ); -} - -static void -emit_pmovmskb( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "PMOVMSKB", dst, src ); - sse_pmovmskb( func, dst, src ); -} - -static void -emit_pop( - struct x86_function *func, - struct x86_reg dst ) -{ - DUMP_R( "POP", dst ); - x86_pop( func, dst ); -} - -static void -emit_push( - struct x86_function *func, - struct x86_reg dst ) -{ - DUMP_R( "PUSH", dst ); - x86_push( func, dst ); -} - -static void -emit_rcpps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "RCPPS", dst, src ); - sse2_rcpps( func, dst, src ); -} #ifdef WIN32 static void @@ -511,7 +210,6 @@ emit_retw( struct x86_function *func, unsigned size ) { - DUMP_I( "RET", size ); x86_retw( func, size ); } #else @@ -519,51 +217,10 @@ static void emit_ret( struct x86_function *func ) { - DUMP( "RET" ); x86_ret( func ); } #endif -static void -emit_rsqrtps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "RSQRTPS", dst, src ); - sse_rsqrtps( func, dst, src ); -} - -static void -emit_shufps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src, - unsigned char shuf ) -{ - DUMP_RRI( "SHUFPS", dst, src, shuf ); - sse_shufps( func, dst, src, shuf ); -} - -static void -emit_subps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "SUBPS", dst, src ); - sse_subps( func, dst, src ); -} - -static void -emit_xorps( - struct x86_function *func, - struct x86_reg dst, - struct x86_reg src ) -{ - DUMP_RR( "XORPS", dst, src ); - sse_xorps( func, dst, src ); -} /** * Data fetch helpers. @@ -582,11 +239,11 @@ emit_const( unsigned vec, unsigned chan ) { - emit_movss( + sse_movss( func, make_xmm( xmm ), get_const( vec, chan ) ); - emit_shufps( + sse_shufps( func, make_xmm( xmm ), make_xmm( xmm ), @@ -600,11 +257,11 @@ emit_immediate( unsigned vec, unsigned chan ) { - emit_movss( + sse_movss( func, make_xmm( xmm ), get_immediate( vec, chan ) ); - emit_shufps( + sse_shufps( func, make_xmm( xmm ), make_xmm( xmm ), @@ -625,7 +282,7 @@ emit_inputf( unsigned vec, unsigned chan ) { - emit_movups( + sse_movups( func, make_xmm( xmm ), get_input( vec, chan ) ); @@ -644,7 +301,7 @@ emit_output( unsigned vec, unsigned chan ) { - emit_movups( + sse_movups( func, get_output( vec, chan ), make_xmm( xmm ) ); @@ -663,7 +320,7 @@ emit_tempf( unsigned vec, unsigned chan ) { - emit_movaps( + sse_movaps( func, make_xmm( xmm ), get_temp( vec, chan ) ); @@ -684,11 +341,11 @@ emit_coef( unsigned chan, unsigned member ) { - emit_movss( + sse_movss( func, make_xmm( xmm ), get_coef( vec, chan, member ) ); - emit_shufps( + sse_shufps( func, make_xmm( xmm ), make_xmm( xmm ), @@ -706,7 +363,7 @@ emit_inputs( unsigned vec, unsigned chan ) { - emit_movups( + sse_movups( func, get_input( vec, chan ), make_xmm( xmm ) ); @@ -719,7 +376,7 @@ emit_temps( unsigned vec, unsigned chan ) { - emit_movaps( + sse_movaps( func, get_temp( vec, chan ), make_xmm( xmm ) ); @@ -796,39 +453,39 @@ static void emit_push_gp( struct x86_function *func ) { - emit_push( + x86_push( func, get_const_base() ); - emit_push( + x86_push( func, get_input_base() ); - emit_push( + x86_push( func, get_output_base() ); /* It is important on non-win32 platforms that temp base is pushed last. */ - emit_push( + x86_push( func, get_temp_base() ); } static void -emit_pop_gp( +x86_pop_gp( struct x86_function *func ) { /* Restore GP registers in a reverse order. */ - emit_pop( + x86_pop( func, get_temp_base() ); - emit_pop( + x86_pop( func, get_output_base() ); - emit_pop( + x86_pop( func, get_input_base() ); - emit_pop( + x86_pop( func, get_const_base() ); } @@ -839,7 +496,7 @@ emit_func_call_dst( unsigned xmm_dst, void (*code)() ) { - emit_movaps( + sse_movaps( func, get_temp( TEMP_R0, 0 ), make_xmm( xmm_dst ) ); @@ -848,19 +505,22 @@ emit_func_call_dst( func ); #ifdef WIN32 - emit_push( + x86_push( func, get_temp( TEMP_R0, 0 ) ); #endif - emit_call( - func, - code ); + { + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + } - emit_pop_gp( + x86_pop_gp( func ); - emit_movaps( + sse_movaps( func, make_xmm( xmm_dst ), get_temp( TEMP_R0, 0 ) ); @@ -873,7 +533,7 @@ emit_func_call_dst_src( unsigned xmm_src, void (*code)() ) { - emit_movaps( + sse_movaps( func, get_temp( TEMP_R0, 1 ), make_xmm( xmm_src ) ); @@ -893,7 +553,7 @@ emit_abs( struct x86_function *func, unsigned xmm ) { - emit_andps( + sse_andps( func, make_xmm( xmm ), get_temp( @@ -907,7 +567,7 @@ emit_add( unsigned xmm_dst, unsigned xmm_src ) { - emit_addps( + sse_addps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -918,17 +578,15 @@ cos4f( float *store ) { #ifdef WIN32 - store[0] = (float) cos( (double) store[0] ); - store[1] = (float) cos( (double) store[1] ); - store[2] = (float) cos( (double) store[2] ); - store[3] = (float) cos( (double) store[3] ); + const unsigned X = 0; #else const unsigned X = TEMP_R0 * 16; +#endif + store[X + 0] = cosf( store[X + 0] ); store[X + 1] = cosf( store[X + 1] ); store[X + 2] = cosf( store[X + 2] ); store[X + 3] = cosf( store[X + 3] ); -#endif } static void @@ -947,17 +605,14 @@ ex24f( float *store ) { #ifdef WIN32 - store[0] = (float) pow( 2.0, (double) store[0] ); - store[1] = (float) pow( 2.0, (double) store[1] ); - store[2] = (float) pow( 2.0, (double) store[2] ); - store[3] = (float) pow( 2.0, (double) store[3] ); + const unsigned X = 0; #else const unsigned X = TEMP_R0 * 16; +#endif store[X + 0] = powf( 2.0f, store[X + 0] ); store[X + 1] = powf( 2.0f, store[X + 1] ); store[X + 2] = powf( 2.0f, store[X + 2] ); store[X + 3] = powf( 2.0f, store[X + 3] ); -#endif } static void @@ -976,7 +631,7 @@ emit_f2it( struct x86_function *func, unsigned xmm ) { - emit_cvttps2dq( + sse2_cvttps2dq( func, make_xmm( xmm ), make_xmm( xmm ) ); @@ -991,10 +646,10 @@ flr4f( #else const unsigned X = TEMP_R0 * 16; #endif - store[X + 0] = (float) floor( (double) store[X + 0] ); - store[X + 1] = (float) floor( (double) store[X + 1] ); - store[X + 2] = (float) floor( (double) store[X + 2] ); - store[X + 3] = (float) floor( (double) store[X + 3] ); + store[X + 0] = floorf( store[X + 0] ); + store[X + 1] = floorf( store[X + 1] ); + store[X + 2] = floorf( store[X + 2] ); + store[X + 3] = floorf( store[X + 3] ); } static void @@ -1017,10 +672,10 @@ frc4f( #else const unsigned X = TEMP_R0 * 16; #endif - store[X + 0] -= (float) floor( (double) store[X + 0] ); - store[X + 1] -= (float) floor( (double) store[X + 1] ); - store[X + 2] -= (float) floor( (double) store[X + 2] ); - store[X + 3] -= (float) floor( (double) store[X + 3] ); + store[X + 0] -= floorf( store[X + 0] ); + store[X + 1] -= floorf( store[X + 1] ); + store[X + 2] -= floorf( store[X + 2] ); + store[X + 3] -= floorf( store[X + 3] ); } static void @@ -1066,7 +721,7 @@ emit_MOV( unsigned xmm_dst, unsigned xmm_src ) { - emit_movups( + sse_movups( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -1077,7 +732,7 @@ emit_mul (struct x86_function *func, unsigned xmm_dst, unsigned xmm_src) { - emit_mulps( + sse_mulps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -1088,7 +743,7 @@ emit_neg( struct x86_function *func, unsigned xmm ) { - emit_xorps( + sse_xorps( func, make_xmm( xmm ), get_temp( @@ -1101,17 +756,14 @@ pow4f( float *store ) { #ifdef WIN32 - store[0] = (float) pow( (double) store[0], (double) store[4] ); - store[1] = (float) pow( (double) store[1], (double) store[5] ); - store[2] = (float) pow( (double) store[2], (double) store[6] ); - store[3] = (float) pow( (double) store[3], (double) store[7] ); + const unsigned X = 0; #else const unsigned X = TEMP_R0 * 16; +#endif store[X + 0] = powf( store[X + 0], store[X + 4] ); store[X + 1] = powf( store[X + 1], store[X + 5] ); store[X + 2] = powf( store[X + 2], store[X + 6] ); store[X + 3] = powf( store[X + 3], store[X + 7] ); -#endif } static void @@ -1133,7 +785,11 @@ emit_rcp ( unsigned xmm_dst, unsigned xmm_src ) { - emit_rcpps( + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. Need to either emit a proper divide or use the + * iterative technique described below in emit_rsqrt(). + */ + sse2_rcpps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -1145,17 +801,14 @@ rsqrt4f( float *store ) { #ifdef WIN32 - store[0] = 1.0F / (float) sqrt( (double) store[0] ); - store[1] = 1.0F / (float) sqrt( (double) store[1] ); - store[2] = 1.0F / (float) sqrt( (double) store[2] ); - store[3] = 1.0F / (float) sqrt( (double) store[3] ); + const unsigned X = 0; #else const unsigned X = TEMP_R0 * 16; - store[X + 0] = 1.0F / sqrt( store[X + 0] ); - store[X + 1] = 1.0F / sqrt( store[X + 1] ); - store[X + 2] = 1.0F / sqrt( store[X + 2] ); - store[X + 3] = 1.0F / sqrt( store[X + 3] ); #endif + store[X + 0] = 1.0F / sqrtf( store[X + 0] ); + store[X + 1] = 1.0F / sqrtf( store[X + 1] ); + store[X + 2] = 1.0F / sqrtf( store[X + 2] ); + store[X + 3] = 1.0F / sqrtf( store[X + 3] ); } #endif @@ -1166,12 +819,41 @@ emit_rsqrt( unsigned xmm_src ) { #if HIGH_PRECISION +#if 1 emit_func_call_dst_src( func, xmm_dst, xmm_src, rsqrt4f ); #else + /* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ + /* This is some code that woudl do the above for a scalar 'a'. We + * obviously are interested in a vector version: + * + * movss xmm3, a; + * movss xmm1, half; + * movss xmm2, three; + * rsqrtss xmm0, xmm3; + * mulss xmm3, xmm0; + * mulss xmm1, xmm0; + * mulss xmm3, xmm0; + * subss xmm2, xmm3; + * mulss xmm1, xmm2; + * movss x, xmm1; + */ +#endif +#else + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. + */ emit_rsqrtps( func, make_xmm( xmm_dst ), @@ -1184,7 +866,7 @@ emit_setsign( struct x86_function *func, unsigned xmm ) { - emit_orps( + sse_orps( func, make_xmm( xmm ), get_temp( @@ -1197,17 +879,14 @@ sin4f( float *store ) { #ifdef WIN32 - store[0] = (float) sin( (double) store[0] ); - store[1] = (float) sin( (double) store[1] ); - store[2] = (float) sin( (double) store[2] ); - store[3] = (float) sin( (double) store[3] ); + const unsigned X = 0; #else const unsigned X = TEMP_R0 * 16; +#endif store[X + 0] = sinf( store[X + 0] ); store[X + 1] = sinf( store[X + 1] ); store[X + 2] = sinf( store[X + 2] ); store[X + 3] = sinf( store[X + 3] ); -#endif } static void @@ -1226,7 +905,7 @@ emit_sub( unsigned xmm_dst, unsigned xmm_src ) { - emit_subps( + sse_subps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); @@ -1435,16 +1114,16 @@ emit_kil( } } - emit_push( + x86_push( func, x86_make_reg( file_REG32, reg_AX ) ); - emit_push( + x86_push( func, x86_make_reg( file_REG32, reg_DX ) ); FOR_EACH_CHANNEL( chan_index ) { if( uniquemask & (1 << chan_index) ) { - emit_cmpps( + sse_cmpps( func, make_xmm( registers[chan_index] ), get_temp( @@ -1453,17 +1132,17 @@ emit_kil( cc_LessThan ); if( chan_index == firstchan ) { - emit_pmovmskb( + sse_pmovmskb( func, x86_make_reg( file_REG32, reg_AX ), make_xmm( registers[chan_index] ) ); } else { - emit_pmovmskb( + sse_pmovmskb( func, x86_make_reg( file_REG32, reg_DX ), make_xmm( registers[chan_index] ) ); - emit_or( + x86_or( func, x86_make_reg( file_REG32, reg_AX ), x86_make_reg( file_REG32, reg_DX ) ); @@ -1471,17 +1150,17 @@ emit_kil( } } - emit_or( + x86_or( func, get_temp( TGSI_EXEC_TEMP_KILMASK_I, TGSI_EXEC_TEMP_KILMASK_C ), x86_make_reg( file_REG32, reg_AX ) ); - emit_pop( + x86_pop( func, x86_make_reg( file_REG32, reg_DX ) ); - emit_pop( + x86_pop( func, x86_make_reg( file_REG32, reg_AX ) ); } @@ -1497,12 +1176,12 @@ emit_setcc( FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); - emit_cmpps( + sse_cmpps( func, make_xmm( 0 ), make_xmm( 1 ), cc ); - emit_andps( + sse_andps( func, make_xmm( 0 ), get_temp( @@ -1523,22 +1202,22 @@ emit_cmp( FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); FETCH( func, *inst, 2, 2, chan_index ); - emit_cmpps( + sse_cmpps( func, make_xmm( 0 ), get_temp( TGSI_EXEC_TEMP_00000000_I, TGSI_EXEC_TEMP_00000000_C ), cc_LessThan ); - emit_andps( + sse_andps( func, make_xmm( 1 ), make_xmm( 0 ) ); - emit_andnps( + sse_andnps( func, make_xmm( 0 ), make_xmm( 2 ) ); - emit_orps( + sse_orps( func, make_xmm( 0 ), make_xmm( 1 ) ); @@ -1589,7 +1268,7 @@ emit_instruction( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { FETCH( func, *inst, 0, 0, CHAN_X ); - emit_maxps( + sse_maxps( func, make_xmm( 0 ), get_temp( @@ -1601,7 +1280,7 @@ emit_instruction( /* XMM[1] = SrcReg[0].yyyy */ FETCH( func, *inst, 1, 0, CHAN_Y ); /* XMM[1] = max(XMM[1], 0) */ - emit_maxps( + sse_maxps( func, make_xmm( 1 ), get_temp( @@ -1610,14 +1289,14 @@ emit_instruction( /* XMM[2] = SrcReg[0].wwww */ FETCH( func, *inst, 2, 0, CHAN_W ); /* XMM[2] = min(XMM[2], 128.0) */ - emit_minps( + sse_minps( func, make_xmm( 2 ), get_temp( TGSI_EXEC_TEMP_128_I, TGSI_EXEC_TEMP_128_C ) ); /* XMM[2] = max(XMM[2], -128.0) */ - emit_maxps( + sse_maxps( func, make_xmm( 2 ), get_temp( @@ -1625,16 +1304,16 @@ emit_instruction( TGSI_EXEC_TEMP_MINUS_128_C ) ); emit_pow( func, 1, 2 ); FETCH( func, *inst, 0, 0, CHAN_X ); - emit_xorps( + sse_xorps( func, make_xmm( 2 ), make_xmm( 2 ) ); - emit_cmpps( + sse_cmpps( func, make_xmm( 2 ), make_xmm( 0 ), cc_LessThanEqual ); - emit_andps( + sse_andps( func, make_xmm( 2 ), make_xmm( 1 ) ); @@ -1756,7 +1435,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); - emit_minps( + sse_minps( func, make_xmm( 0 ), make_xmm( 1 ) ); @@ -1768,7 +1447,7 @@ emit_instruction( FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); FETCH( func, *inst, 1, 1, chan_index ); - emit_maxps( + sse_maxps( func, make_xmm( 0 ), make_xmm( 1 ) ); @@ -2376,8 +2055,6 @@ tgsi_emit_sse2( unsigned ok = 1; uint num_immediates = 0; - DUMP_START(); - func->csr = func->store; tgsi_parse_init( &parse, tokens ); @@ -2387,24 +2064,24 @@ tgsi_emit_sse2( */ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { /* DECLARATION phase, do not load output argument. */ - emit_mov( + x86_mov( func, get_input_base(), get_argument( 0 ) ); /* skipping outputs argument here */ - emit_mov( + x86_mov( func, get_const_base(), get_argument( 2 ) ); - emit_mov( + x86_mov( func, get_temp_base(), get_argument( 3 ) ); - emit_mov( + x86_mov( func, get_coef_base(), get_argument( 4 ) ); - emit_mov( + x86_mov( func, get_immediate_base(), get_argument( 5 ) ); @@ -2412,23 +2089,23 @@ tgsi_emit_sse2( else { assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); - emit_mov( + x86_mov( func, get_input_base(), get_argument( 0 ) ); - emit_mov( + x86_mov( func, get_output_base(), get_argument( 1 ) ); - emit_mov( + x86_mov( func, get_const_base(), get_argument( 2 ) ); - emit_mov( + x86_mov( func, get_temp_base(), get_argument( 3 ) ); - emit_mov( + x86_mov( func, get_immediate_base(), get_argument( 4 ) ); @@ -2451,7 +2128,7 @@ tgsi_emit_sse2( if( !instruction_phase ) { /* INSTRUCTION phase, overwrite coeff with output. */ instruction_phase = TRUE; - emit_mov( + x86_mov( func, get_output_base(), get_argument( 1 ) ); @@ -2463,8 +2140,10 @@ tgsi_emit_sse2( &parse.FullToken.FullInstruction ); if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE\n", - parse.FullToken.FullInstruction.Instruction.Opcode ); + debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); } break; @@ -2499,8 +2178,6 @@ tgsi_emit_sse2( tgsi_parse_free( &parse ); - DUMP_END(); - return ok; } -- cgit v1.2.3 From 08717d94619802f7816420be155c0c92fa727109 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 21 Apr 2008 20:44:45 +0900 Subject: gallium: Do not mistake pipe state objects for state tracker state objects. --- src/gallium/auxiliary/cso_cache/cso_context.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 746b176185..4541be8a5c 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -266,8 +266,9 @@ void cso_save_samplers(struct cso_context *ctx) void cso_restore_samplers(struct cso_context *ctx) { - cso_set_samplers(ctx, ctx->nr_samplers_saved, - (const struct pipe_sampler_state **) ctx->samplers_saved); + ctx->nr_samplers = ctx->nr_samplers_saved; + memcpy(ctx->samplers, ctx->samplers_saved, sizeof(ctx->samplers)); + cso_single_sampler_done( ctx ); } @@ -313,6 +314,8 @@ void cso_restore_sampler_textures( struct cso_context *ctx ) pipe_texture_reference(&ctx->textures[i], NULL); ctx->pipe->set_sampler_textures(ctx->pipe, ctx->nr_textures, ctx->textures); + + ctx->nr_textures_saved = 0; } -- cgit v1.2.3 From 13d8b1b211a803f44ffe325e7eed887cce4abaca Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 21 Apr 2008 22:26:33 +0900 Subject: gallium: Set all state via cso_context in blit/gen_mipmap utils. cso_restore_* functions are implemented on top of cso_set_*, therefore they require full knowledge of the current pipe state to work correctly. Directly calling pipe's set_*_state functions will lead to undefined state. Also save and restore shaders. --- src/gallium/auxiliary/util/u_blit.c | 10 +++++++--- src/gallium/auxiliary/util/u_gen_mipmap.c | 12 ++++++++---- 2 files changed, 15 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index eec5e600c9..be5e83e834 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -300,6 +300,8 @@ util_blit_pixels(struct blit_state *ctx, cso_save_samplers(ctx->cso); cso_save_sampler_textures(ctx->cso); cso_save_framebuffer(ctx->cso); + cso_save_fragment_shader(ctx->cso); + cso_save_vertex_shader(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); @@ -313,11 +315,11 @@ util_blit_pixels(struct blit_state *ctx, cso_single_sampler_done(ctx->cso); /* texture */ - pipe->set_sampler_textures(pipe, 1, &tex); + cso_set_sampler_textures(ctx->cso, 1, &tex); /* shaders */ - pipe->bind_fs_state(pipe, ctx->fs); - pipe->bind_vs_state(pipe, ctx->vs); + cso_set_fragment_shader(ctx->cso, ctx->fs); + cso_set_vertex_shader(ctx->cso, ctx->vs); /* drawing dest */ memset(&fb, 0, sizeof(fb)); @@ -344,6 +346,8 @@ util_blit_pixels(struct blit_state *ctx, cso_restore_samplers(ctx->cso); cso_restore_sampler_textures(ctx->cso); cso_restore_framebuffer(ctx->cso); + cso_restore_fragment_shader(ctx->cso); + cso_restore_vertex_shader(ctx->cso); /* free the texture */ pipe_surface_reference(&texSurf, NULL); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 2fd214d22e..f0c4063b28 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -880,16 +880,18 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_save_samplers(ctx->cso); cso_save_sampler_textures(ctx->cso); cso_save_framebuffer(ctx->cso); + cso_save_fragment_shader(ctx->cso); + cso_save_vertex_shader(ctx->cso); /* bind our state */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); - pipe->bind_vs_state(pipe, ctx->vs); - pipe->bind_fs_state(pipe, ctx->fs); + cso_set_fragment_shader(ctx->cso, ctx->fs); + cso_set_vertex_shader(ctx->cso, ctx->vs); #if 0 - pipe->set_viewport_state(pipe, &ctx->viewport); + cso_set_viewport(ctx->cso, &ctx->viewport); #endif /* init framebuffer state */ @@ -930,7 +932,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); #endif - pipe->set_sampler_textures(pipe, 1, &pt); + cso_set_sampler_textures(ctx->cso, 1, &pt); /* quad coords in window coords (bypassing clipping, viewport mapping) */ set_vertex_data(ctx, @@ -954,4 +956,6 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_restore_samplers(ctx->cso); cso_restore_sampler_textures(ctx->cso); cso_restore_framebuffer(ctx->cso); + cso_restore_fragment_shader(ctx->cso); + cso_restore_vertex_shader(ctx->cso); } -- cgit v1.2.3 From e29583afcb238cf7a70089cfdf50a69ca277c53a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 22 Apr 2008 00:14:08 +0900 Subject: gallium: Include dependent header. --- src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index a98e88e343..da0121c482 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -1,6 +1,8 @@ #if !defined TGSI_PARSE_H #define TGSI_PARSE_H +#include "pipe/p_shader_tokens.h" + #if defined __cplusplus extern "C" { #endif -- cgit v1.2.3 From d3045ebb0642b09b4d353be6d4a258e6766061e6 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 22 Apr 2008 00:16:04 +0900 Subject: gallium: Hash the fragment shader tokens, instead of pipe_shader_state. PS: pipe_shader_state should probably go away now that it is reduced to a single pointer. --- src/gallium/auxiliary/cso_cache/cso_context.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 4541be8a5c..87995c80c3 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -38,6 +38,7 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "tgsi/util/tgsi_parse.h" #include "cso_cache/cso_context.h" #include "cso_cache/cso_cache.h" @@ -148,6 +149,7 @@ void cso_set_blend(struct cso_context *ctx, void *handle; if (cso_hash_iter_is_null(iter)) { + /* FIXME: handle OOM */ struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); cso->state = *templ; @@ -198,6 +200,7 @@ void cso_single_sampler(struct cso_context *ctx, (void*)templ); if (cso_hash_iter_is_null(iter)) { + /* FIXME: handle OOM */ struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); cso->state = *templ; @@ -333,6 +336,7 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, void *handle; if (cso_hash_iter_is_null(iter)) { + /* FIXME: handle OOM */ struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); cso->state = *templ; @@ -381,6 +385,7 @@ void cso_set_rasterizer(struct cso_context *ctx, void *handle = NULL; if (cso_hash_iter_is_null(iter)) { + /* FIXME: handle OOM */ struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); cso->state = *templ; @@ -420,17 +425,23 @@ void cso_restore_rasterizer(struct cso_context *ctx) void cso_set_fragment_shader(struct cso_context *ctx, const struct pipe_shader_state *templ) { - unsigned hash_key = cso_construct_key((void*)templ, - sizeof(struct pipe_shader_state)); + const struct tgsi_token *tokens = templ->tokens; + unsigned num_tokens = tgsi_num_tokens(tokens); + size_t tokens_size = num_tokens*sizeof(struct tgsi_token); + unsigned hash_key = cso_construct_key((void*)tokens, tokens_size); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, - hash_key, CSO_FRAGMENT_SHADER, - (void*)templ); + hash_key, + CSO_FRAGMENT_SHADER, + (void*)tokens); void *handle = NULL; if (cso_hash_iter_is_null(iter)) { - struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader)); + /* FIXME: handle OOM */ + struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader) + tokens_size); + struct tgsi_token *cso_tokens = (struct tgsi_token *)((char *)cso + sizeof(*cso)); - cso->state = *templ; + memcpy(cso_tokens, tokens, tokens_size); + cso->state.tokens = cso_tokens; cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_fs_state; cso->context = ctx->pipe; @@ -477,6 +488,7 @@ void cso_set_vertex_shader(struct cso_context *ctx, void *handle = NULL; if (cso_hash_iter_is_null(iter)) { + /* FIXME: handle OOM */ struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); cso->state = *templ; -- cgit v1.2.3 From a918a9c744f656c8bf2e3fd2841732e01a5ccefc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 16:10:13 +0100 Subject: draw: consolidate all the passthrough line/tri/point funcs --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/SConscript | 1 + src/gallium/auxiliary/draw/draw_pipe.c | 63 ---------- src/gallium/auxiliary/draw/draw_pipe.h | 6 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 38 +++---- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 16 +-- src/gallium/auxiliary/draw/draw_pipe_cull.c | 18 +-- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 9 +- src/gallium/auxiliary/draw/draw_pipe_offset.c | 16 +-- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 25 +--- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 16 +-- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 19 +--- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 17 +-- src/gallium/auxiliary/draw/draw_pipe_util.c | 133 ++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 17 +-- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 23 +--- 16 files changed, 177 insertions(+), 241 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pipe_util.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 5289f2660a..bc6acfe458 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -16,6 +16,7 @@ C_SOURCES = \ draw_pipe_stipple.c \ draw_pipe_twoside.c \ draw_pipe_unfilled.c \ + draw_pipe_util.c \ draw_pipe_validate.c \ draw_pipe_vbuf.c \ draw_pipe_wide_line.c \ diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 91e6a35c5e..0b9852f633 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -15,6 +15,7 @@ draw = env.ConvenienceLibrary( 'draw_pipe_stipple.c', 'draw_pipe_twoside.c', 'draw_pipe_unfilled.c', + 'draw_pipe_util.c', 'draw_pipe_validate.c', 'draw_pipe_vbuf.c', 'draw_pipe_wide_line.c', diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 0c0840af0c..d0890203a5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -108,40 +108,6 @@ void draw_pipeline_destroy( struct draw_context *draw ) -/* This is only used for temporary verts. - */ -#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) - - -/** - * Allocate space for temporary post-transform vertices, such as for clipping. - */ -void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) -{ - assert(!stage->tmp); - - stage->nr_tmps = nr; - - if (nr) { - ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); - unsigned i; - - stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); - - for (i = 0; i < nr; i++) - stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); - } -} - - -void draw_free_temp_verts( struct draw_stage *stage ) -{ - if (stage->tmp) { - FREE( stage->tmp[0] ); - FREE( stage->tmp ); - stage->tmp = NULL; - } -} @@ -195,35 +161,6 @@ static void do_triangle( struct draw_context *draw, } -/* Reset vertex ids. This is basically a type of flush. - * - * Called only from draw_pipe_vbuf.c - */ -void draw_reset_vertex_ids(struct draw_context *draw) -{ - struct draw_stage *stage = draw->pipeline.first; - - while (stage) { - unsigned i; - - for (i = 0; i < stage->nr_tmps; i++) - stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; - - stage = stage->next; - } - - if (draw->pipeline.verts) - { - unsigned i; - char *verts = draw->pipeline.verts; - unsigned stride = draw->pipeline.vertex_stride; - - for (i = 0; i < draw->pipeline.vertex_count; i++) { - ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; - verts += stride; - } - } -} /* Code to run the pipeline on a fairly arbitary collection of vertices. diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index f2749b34bb..2476abb2b2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -81,10 +81,14 @@ extern struct draw_stage *draw_validate_stage( struct draw_context *context ); extern void draw_free_temp_verts( struct draw_stage *stage ); -extern void draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); +extern boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ); extern void draw_reset_vertex_ids( struct draw_context *draw ); +void draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header); +void draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header); +void draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header); + /** diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 24bc87d4f8..b27360170a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -334,7 +334,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, * Generate the frag shader we'll use for drawing AA lines. * This will be the user's shader plus some texture/modulate instructions. */ -static void +static boolean generate_aaline_fs(struct aaline_stage *aaline) { const struct pipe_shader_state *orig_fs = &aaline->fs->state; @@ -346,6 +346,8 @@ generate_aaline_fs(struct aaline_stage *aaline) aaline_fs = *orig_fs; /* copy to init */ aaline_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (aaline_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; @@ -372,6 +374,7 @@ generate_aaline_fs(struct aaline_stage *aaline) = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); aaline->fs->generic_attrib = transform.maxGeneric + 1; + return TRUE; } @@ -469,13 +472,15 @@ aaline_create_sampler(struct aaline_stage *aaline) * When we're about to draw our first AA line in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_aaline_fragment_shader(struct aaline_stage *aaline) { - if (!aaline->fs->aaline_fs) { - generate_aaline_fs(aaline); - } + if (!aaline->fs->aaline_fs && + !generate_aaline_fs(aaline)) + return FALSE; + aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); + return TRUE; } @@ -487,20 +492,6 @@ aaline_stage( struct draw_stage *stage ) } -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} - - /** * Draw a wide line by drawing a quad, using geometry which will * fullfill GL's antialiased line requirements. @@ -638,7 +629,10 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) /* * Bind (generate) our fragprog, sampler and texture */ - bind_aaline_fragment_shader(aaline); + if (!bind_aaline_fragment_shader(aaline)) { + stage->line = draw_pipe_passthrough_line; + return; + } /* update vertex attrib info */ aaline->tex_slot = draw->num_vs_outputs; @@ -721,9 +715,9 @@ draw_aaline_stage(struct draw_context *draw) aaline->stage.draw = draw; aaline->stage.next = NULL; - aaline->stage.point = passthrough_point; + aaline->stage.point = draw_pipe_passthrough_point; aaline->stage.line = aaline_first_line; - aaline->stage.tri = passthrough_tri; + aaline->stage.tri = draw_pipe_passthrough_tri; aaline->stage.flush = aaline_flush; aaline->stage.reset_stipple_counter = aaline_reset_stipple_counter; aaline->stage.destroy = aaline_destroy; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 9f878f6c02..6d22d7ac48 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -546,18 +546,6 @@ aapoint_stage( struct draw_stage *stage ) } -static void -passthrough_line(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->line(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} /** @@ -749,8 +737,8 @@ draw_aapoint_stage(struct draw_context *draw) aapoint->stage.draw = draw; aapoint->stage.next = NULL; aapoint->stage.point = aapoint_first_point; - aapoint->stage.line = passthrough_line; - aapoint->stage.tri = passthrough_tri; + aapoint->stage.line = draw_pipe_passthrough_line; + aapoint->stage.tri = draw_pipe_passthrough_tri; aapoint->stage.flush = aapoint_flush; aapoint->stage.reset_stipple_counter = aapoint_reset_stipple_counter; aapoint->stage.destroy = aapoint_destroy; diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index c406f89d05..8c13f40b55 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -95,20 +95,6 @@ static void cull_first_tri( struct draw_stage *stage, -static void cull_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void cull_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - static void cull_flush( struct draw_stage *stage, unsigned flags ) { stage->tri = cull_first_tri; @@ -139,8 +125,8 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) cull->stage.draw = draw; cull->stage.next = NULL; - cull->stage.point = cull_point; - cull->stage.line = cull_line; + cull->stage.point = draw_pipe_passthrough_point; + cull->stage.line = draw_pipe_passthrough_line; cull->stage.tri = cull_first_tri; cull->stage.flush = cull_flush; cull->stage.reset_stipple_counter = cull_reset_stipple_counter; diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index bdb8b49dc4..2aeb309554 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -152,13 +152,6 @@ static void flatshade_line_1( struct draw_stage *stage, } -/* Flatshade point -- passthrough. - */ -static void flatshade_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} static void flatshade_init_state( struct draw_stage *stage ) @@ -236,7 +229,7 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) flatshade->stage.draw = draw; flatshade->stage.next = NULL; - flatshade->stage.point = flatshade_point; + flatshade->stage.point = draw_pipe_passthrough_point; flatshade->stage.line = flatshade_first_line; flatshade->stage.tri = flatshade_first_tri; flatshade->stage.flush = flatshade_flush; diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index dbdece45bb..c1dc21cd32 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -129,18 +129,6 @@ static void offset_first_tri( struct draw_stage *stage, } -static void offset_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void offset_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} static void offset_flush( struct draw_stage *stage, @@ -175,8 +163,8 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) offset->stage.draw = draw; offset->stage.next = NULL; - offset->stage.point = offset_point; - offset->stage.line = offset_line; + offset->stage.point = draw_pipe_passthrough_point; + offset->stage.line = draw_pipe_passthrough_line; offset->stage.tri = offset_first_tri; offset->stage.flush = offset_flush; offset->stage.reset_stipple_counter = offset_reset_stipple_counter; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 4903ba2133..5686729cd3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -473,25 +473,6 @@ pstip_stage( struct draw_stage *stage ) } -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point(stage->next, header); -} - - -static void -passthrough_line(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->line(stage->next, header); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} @@ -523,7 +504,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); /* now really draw first line */ - stage->tri = passthrough_tri; + stage->tri = draw_pipe_passthrough_tri; stage->tri(stage, header); } @@ -579,8 +560,8 @@ draw_pstip_stage(struct draw_context *draw) pstip->stage.draw = draw; pstip->stage.next = NULL; - pstip->stage.point = passthrough_point; - pstip->stage.line = passthrough_line; + pstip->stage.point = draw_pipe_passthrough_point; + pstip->stage.line = draw_pipe_passthrough_line; pstip->stage.tri = pstip_first_tri; pstip->stage.flush = pstip_flush; pstip->stage.reset_stipple_counter = pstip_reset_stipple_counter; diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 49429ee9e1..9cf5840cce 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -195,18 +195,6 @@ stipple_flush(struct draw_stage *stage, unsigned flags) } -static void -passthrough_point(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->point( stage->next, header ); -} - - -static void -passthrough_tri(struct draw_stage *stage, struct prim_header *header) -{ - stage->next->tri(stage->next, header); -} static void @@ -228,9 +216,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw ) stipple->stage.draw = draw; stipple->stage.next = NULL; - stipple->stage.point = passthrough_point; + stipple->stage.point = draw_pipe_passthrough_point; stipple->stage.line = stipple_first_line; - stipple->stage.tri = passthrough_tri; + stipple->stage.tri = draw_pipe_passthrough_tri; stipple->stage.reset_stipple_counter = reset_stipple_counter; stipple->stage.flush = stipple_flush; stipple->stage.destroy = stipple_destroy; diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 09a9d23d57..453fd3ac71 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -99,21 +99,6 @@ static void twoside_tri( struct draw_stage *stage, } -static void twoside_line( struct draw_stage *stage, - struct prim_header *header ) -{ - /* pass-through */ - stage->next->line( stage->next, header ); -} - - -static void twoside_point( struct draw_stage *stage, - struct prim_header *header ) -{ - /* pass-through */ - stage->next->point( stage->next, header ); -} - static void twoside_first_tri( struct draw_stage *stage, struct prim_header *header ) @@ -192,8 +177,8 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw ) twoside->stage.draw = draw; twoside->stage.next = NULL; - twoside->stage.point = twoside_point; - twoside->stage.line = twoside_line; + twoside->stage.point = draw_pipe_passthrough_point; + twoside->stage.line = draw_pipe_passthrough_line; twoside->stage.tri = twoside_first_tri; twoside->stage.flush = twoside_flush; twoside->stage.reset_stipple_counter = twoside_reset_stipple_counter; diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index 31e24f6a14..d4ddfec1b3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -148,19 +148,6 @@ static void unfilled_first_tri( struct draw_stage *stage, } -static void unfilled_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line( stage->next, header ); -} - - -static void unfilled_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - static void unfilled_flush( struct draw_stage *stage, unsigned flags ) @@ -196,8 +183,8 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) unfilled->stage.draw = draw; unfilled->stage.next = NULL; unfilled->stage.tmp = NULL; - unfilled->stage.point = unfilled_point; - unfilled->stage.line = unfilled_line; + unfilled->stage.point = draw_pipe_passthrough_point; + unfilled->stage.line = draw_pipe_passthrough_line; unfilled->stage.tri = unfilled_first_tri; unfilled->stage.flush = unfilled_flush; unfilled->stage.reset_stipple_counter = unfilled_reset_stipple_counter; diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c new file mode 100644 index 0000000000..e9821de976 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pipe_util.c @@ -0,0 +1,133 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_private.h" +#include "draw/draw_pipe.h" + + + +void +draw_pipe_passthrough_point(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->point(stage->next, header); +} + +void +draw_pipe_passthrough_line(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->line(stage->next, header); +} + +void +draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header) +{ + stage->next->tri(stage->next, header); +} + + + + + +/* This is only used for temporary verts. + */ +#define MAX_VERTEX_SIZE ((2 + PIPE_MAX_SHADER_OUTPUTS) * 4 * sizeof(float)) + + +/** + * Allocate space for temporary post-transform vertices, such as for clipping. + */ +boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) +{ + unsigned i; + ubyte *store; + + assert(!stage->tmp); + + stage->tmp = NULL; + stage->nr_tmps = nr; + if (nr == 0) + return FALSE; + + store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); + if (store == NULL) + return FALSE; + + stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); + + for (i = 0; i < nr; i++) + stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + + return TRUE; +} + + +void draw_free_temp_verts( struct draw_stage *stage ) +{ + if (stage->tmp) { + FREE( stage->tmp[0] ); + FREE( stage->tmp ); + stage->tmp = NULL; + } +} + + +/* Reset vertex ids. This is basically a type of flush. + * + * Called only from draw_pipe_vbuf.c + */ +void draw_reset_vertex_ids(struct draw_context *draw) +{ + struct draw_stage *stage = draw->pipeline.first; + + while (stage) { + unsigned i; + + for (i = 0; i < stage->nr_tmps; i++) + stage->tmp[i]->vertex_id = UNDEFINED_VERTEX_ID; + + stage = stage->next; + } + + if (draw->pipeline.verts) + { + unsigned i; + char *verts = draw->pipeline.verts; + unsigned stride = draw->pipeline.vertex_stride; + + for (i = 0; i < draw->pipeline.vertex_count; i++) { + ((struct vertex_header *)verts)->vertex_id = UNDEFINED_VERTEX_ID; + verts += stride; + } + } +} + diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 329b5d0fb0..452732e662 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -49,19 +49,6 @@ static INLINE struct wideline_stage *wideline_stage( struct draw_stage *stage ) } -static void wideline_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - - -static void wideline_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} - /** * Draw a wide line by drawing a quad (two triangles). @@ -180,9 +167,9 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) wide->stage.draw = draw; wide->stage.next = NULL; - wide->stage.point = wideline_point; + wide->stage.point = draw_pipe_passthrough_point; wide->stage.line = wideline_line; - wide->stage.tri = wideline_tri; + wide->stage.tri = draw_pipe_passthrough_point; wide->stage.flush = wideline_flush; wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; wide->stage.destroy = wideline_destroy; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 7a439178a0..8101340680 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -61,23 +61,6 @@ widepoint_stage( struct draw_stage *stage ) } -static void passthrough_point( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->point( stage->next, header ); -} - -static void widepoint_line( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->line(stage->next, header); -} - -static void widepoint_tri( struct draw_stage *stage, - struct prim_header *header ) -{ - stage->next->tri(stage->next, header); -} /** @@ -209,7 +192,7 @@ static void widepoint_first_point( struct draw_stage *stage, stage->point = widepoint_point; } else { - stage->point = passthrough_point; + stage->point = draw_pipe_passthrough_point; } if (draw->rasterizer->point_sprite) { @@ -272,8 +255,8 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) wide->stage.draw = draw; wide->stage.next = NULL; wide->stage.point = widepoint_first_point; - wide->stage.line = widepoint_line; - wide->stage.tri = widepoint_tri; + wide->stage.line = draw_pipe_passthrough_line; + wide->stage.tri = draw_pipe_passthrough_tri; wide->stage.flush = widepoint_flush; wide->stage.reset_stipple_counter = widepoint_reset_stipple_counter; wide->stage.destroy = widepoint_destroy; -- cgit v1.2.3 From 69ecc2a577dc45451d56cee3e41cb6e7e542b097 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 16:38:09 +0100 Subject: draw: propogate errors out of aaline stage --- src/gallium/auxiliary/draw/draw_context.h | 2 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 58 ++++++++++++++++++++++----- 2 files changed, 48 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 68e2efb865..6171c75cb4 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -73,7 +73,7 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable); void draw_enable_point_sprites(struct draw_context *draw, boolean enable); -void +boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); void diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index b27360170a..ae1abf91c2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -338,7 +338,6 @@ static boolean generate_aaline_fs(struct aaline_stage *aaline) { const struct pipe_shader_state *orig_fs = &aaline->fs->state; - //struct draw_context *draw = aaline->stage.draw; struct pipe_shader_state aaline_fs; struct aa_transform_context transform; @@ -372,6 +371,8 @@ generate_aaline_fs(struct aaline_stage *aaline) aaline->fs->aaline_fs = aaline->driver_create_fs_state(aaline->pipe, &aaline_fs); + if (aaline->fs->aaline_fs == NULL) + return FALSE; aaline->fs->generic_attrib = transform.maxGeneric + 1; return TRUE; @@ -381,7 +382,7 @@ generate_aaline_fs(struct aaline_stage *aaline) /** * Create the texture map we'll use for antialiasing the lines. */ -static void +static boolean aaline_create_texture(struct aaline_stage *aaline) { struct pipe_context *pipe = aaline->pipe; @@ -399,6 +400,8 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.cpp = 1; aaline->texture = screen->texture_create(screen, &texTemp); + if (!aaline->texture) + return FALSE; /* Fill in mipmap images. * Basically each level is solid opaque, except for the outermost @@ -414,6 +417,8 @@ aaline_create_texture(struct aaline_stage *aaline) surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); data = pipe_surface_map(surface); + if (data == NULL) + return FALSE; for (i = 0; i < size; i++) { for (j = 0; j < size; j++) { @@ -439,6 +444,7 @@ aaline_create_texture(struct aaline_stage *aaline) pipe_surface_reference(&surface, NULL); pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); } + return TRUE; } @@ -447,7 +453,7 @@ aaline_create_texture(struct aaline_stage *aaline) * By using a mipmapped texture, we don't have to generate a different * texture image for each line size. */ -static void +static boolean aaline_create_sampler(struct aaline_stage *aaline) { struct pipe_sampler_state sampler; @@ -465,6 +471,10 @@ aaline_create_sampler(struct aaline_stage *aaline) sampler.max_lod = MAX_TEXTURE_LEVEL; aaline->sampler_cso = pipe->create_sampler_state(pipe, &sampler); + if (aaline->sampler_cso == NULL) + return FALSE; + + return TRUE; } @@ -696,9 +706,11 @@ aaline_destroy(struct draw_stage *stage) { struct aaline_stage *aaline = aaline_stage(stage); - aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); + if (aaline->sampler_cso) + aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); - pipe_texture_release(&aaline->texture); + if (aaline->texture) + pipe_texture_release(&aaline->texture); draw_free_temp_verts( stage ); @@ -710,8 +722,11 @@ static struct aaline_stage * draw_aaline_stage(struct draw_context *draw) { struct aaline_stage *aaline = CALLOC_STRUCT(aaline_stage); + if (aaline == NULL) + return NULL; - draw_alloc_temp_verts( &aaline->stage, 8 ); + if (draw_alloc_temp_verts( &aaline->stage, 8 )) + goto fail; aaline->stage.draw = draw; aaline->stage.next = NULL; @@ -723,6 +738,12 @@ draw_aaline_stage(struct draw_context *draw) aaline->stage.destroy = aaline_destroy; return aaline; + + fail: + if (aaline) + aaline_destroy(&aaline->stage); + + return NULL; } @@ -816,7 +837,7 @@ aaline_set_sampler_textures(struct pipe_context *pipe, * into the draw module's pipeline. This will not be used if the * hardware has native support for AA lines. */ -void +boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) { struct aaline_stage *aaline; @@ -827,14 +848,17 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) * Create / install AA line drawing / prim stage */ aaline = draw_aaline_stage( draw ); - assert(aaline); - draw->pipeline.aaline = &aaline->stage; + if (!aaline) + goto fail; aaline->pipe = pipe; /* create special texture, sampler state */ - aaline_create_texture(aaline); - aaline_create_sampler(aaline); + if (!aaline_create_texture(aaline)) + goto fail; + + if (!aaline_create_sampler(aaline)) + goto fail; /* save original driver functions */ aaline->driver_create_fs_state = pipe->create_fs_state; @@ -851,4 +875,16 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe) pipe->bind_sampler_states = aaline_bind_sampler_states; pipe->set_sampler_textures = aaline_set_sampler_textures; + + /* Install once everything is known to be OK: + */ + draw->pipeline.aaline = &aaline->stage; + + return TRUE; + + fail: + if (aaline) + aaline->stage.destroy( &aaline->stage ); + + return FALSE; } -- cgit v1.2.3 From 0cd90a917d289363a3edb5cfa40c391eb07aa97c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 16:45:41 +0100 Subject: draw: propogate errors out of aapoint stage --- src/gallium/auxiliary/draw/draw_context.h | 2 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 46 ++++++++++++++++++++------ 3 files changed, 38 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 6171c75cb4..197235b2d8 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -76,7 +76,7 @@ void draw_enable_point_sprites(struct draw_context *draw, boolean enable); boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); -void +boolean draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); void diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index ae1abf91c2..bc514d40f0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -725,7 +725,7 @@ draw_aaline_stage(struct draw_context *draw) if (aaline == NULL) return NULL; - if (draw_alloc_temp_verts( &aaline->stage, 8 )) + if (!draw_alloc_temp_verts( &aaline->stage, 8 )) goto fail; aaline->stage.draw = draw; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 6d22d7ac48..6d689c36de 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -484,7 +484,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, * Generate the frag shader we'll use for drawing AA lines. * This will be the user's shader plus some texture/modulate instructions. */ -static void +static boolean generate_aapoint_fs(struct aapoint_stage *aapoint) { const struct pipe_shader_state *orig_fs = &aapoint->fs->state; @@ -495,6 +495,8 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) aapoint_fs = *orig_fs; /* copy to init */ aapoint_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (aapoint_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.colorOutput = -1; @@ -519,8 +521,12 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) aapoint->fs->aapoint_fs = aapoint->driver_create_fs_state(aapoint->pipe, &aapoint_fs); + if (aapoint->fs->aapoint_fs == NULL) + return FALSE; aapoint->fs->generic_attrib = transform.maxGeneric + 1; + + return TRUE; } @@ -528,13 +534,15 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) * When we're about to draw our first AA line in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) { - if (!aapoint->fs->aapoint_fs) { - generate_aapoint_fs(aapoint); - } + if (!aapoint->fs->aapoint_fs && + !generate_aapoint_fs(aapoint)) + return FALSE; + aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); + return TRUE; } @@ -731,8 +739,11 @@ static struct aapoint_stage * draw_aapoint_stage(struct draw_context *draw) { struct aapoint_stage *aapoint = CALLOC_STRUCT(aapoint_stage); + if (aapoint == NULL) + goto fail; - draw_alloc_temp_verts( &aapoint->stage, 4 ); + if (!draw_alloc_temp_verts( &aapoint->stage, 4 )) + goto fail; aapoint->stage.draw = draw; aapoint->stage.next = NULL; @@ -744,6 +755,13 @@ draw_aapoint_stage(struct draw_context *draw) aapoint->stage.destroy = aapoint_destroy; return aapoint; + + fail: + if (aapoint) + aapoint_destroy(&aapoint->stage); + + return NULL; + } @@ -806,20 +824,19 @@ aapoint_delete_fs_state(struct pipe_context *pipe, void *fs) * into the draw module's pipeline. This will not be used if the * hardware has native support for AA points. */ -void +boolean draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe) { struct aapoint_stage *aapoint; - pipe->draw = (void *) draw; /* * Create / install AA point drawing / prim stage */ aapoint = draw_aapoint_stage( draw ); - assert(aapoint); - draw->pipeline.aapoint = &aapoint->stage; + if (aapoint == NULL) + goto fail; aapoint->pipe = pipe; @@ -832,4 +849,13 @@ draw_install_aapoint_stage(struct draw_context *draw, pipe->create_fs_state = aapoint_create_fs_state; pipe->bind_fs_state = aapoint_bind_fs_state; pipe->delete_fs_state = aapoint_delete_fs_state; + + pipe->draw = (void *) draw; + draw->pipeline.aapoint = &aapoint->stage; + + fail: + if (aapoint) + aapoint->stage.destroy( &aapoint->stage ); + + return FALSE; } -- cgit v1.2.3 From 0d4ece4c5a243dc4b684331bad49f220311e5520 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 17:03:37 +0100 Subject: draw: propogate lots of errors --- src/gallium/auxiliary/draw/draw_context.h | 2 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 10 ++++---- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 14 ++++++----- src/gallium/auxiliary/draw/draw_pipe_clip.c | 11 ++++++++- src/gallium/auxiliary/draw/draw_pipe_cull.c | 11 ++++++++- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 11 ++++++++- src/gallium/auxiliary/draw/draw_pipe_offset.c | 8 +++++++ src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 14 +++++++++-- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 11 ++++++++- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 11 ++++++++- src/gallium/auxiliary/draw/draw_pipe_util.c | 28 ++++++++++++---------- src/gallium/auxiliary/draw/draw_pipe_validate.c | 2 ++ src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 5 ++-- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 11 ++++++++- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 ++ .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 2 ++ src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 ++ 17 files changed, 120 insertions(+), 35 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 197235b2d8..2742001698 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -79,7 +79,7 @@ draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); boolean draw_install_aapoint_stage(struct draw_context *draw, struct pipe_context *pipe); -void +boolean draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index bc514d40f0..fbb5b45b40 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -765,13 +765,13 @@ aaline_create_fs_state(struct pipe_context *pipe, { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); struct aaline_fragment_shader *aafs = CALLOC_STRUCT(aaline_fragment_shader); + if (aafs == NULL) + return NULL; - if (aafs) { - aafs->state = *fs; + aafs->state = *fs; - /* pass-through */ - aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); - } + /* pass-through */ + aafs->driver_fs = aaline->driver_create_fs_state(aaline->pipe, fs); return aafs; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 6d689c36de..ac0aa4cd7c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -783,13 +783,13 @@ aapoint_create_fs_state(struct pipe_context *pipe, { struct aapoint_stage *aapoint = aapoint_stage_from_pipe(pipe); struct aapoint_fragment_shader *aafs = CALLOC_STRUCT(aapoint_fragment_shader); + if (aafs == NULL) + return NULL; - if (aafs) { - aafs->state = *fs; + aafs->state = *fs; - /* pass-through */ - aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); - } + /* pass-through */ + aafs->driver_fs = aapoint->driver_create_fs_state(aapoint->pipe, fs); return aafs; } @@ -830,6 +830,7 @@ draw_install_aapoint_stage(struct draw_context *draw, { struct aapoint_stage *aapoint; + pipe->draw = (void *) draw; /* * Create / install AA point drawing / prim stage @@ -850,9 +851,10 @@ draw_install_aapoint_stage(struct draw_context *draw, pipe->bind_fs_state = aapoint_bind_fs_state; pipe->delete_fs_state = aapoint_delete_fs_state; - pipe->draw = (void *) draw; draw->pipeline.aapoint = &aapoint->stage; + return TRUE; + fail: if (aapoint) aapoint->stage.destroy( &aapoint->stage ); diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 6780f275d9..21216addea 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -493,8 +493,11 @@ static void clip_destroy( struct draw_stage *stage ) struct draw_stage *draw_clip_stage( struct draw_context *draw ) { struct clipper *clipper = CALLOC_STRUCT(clipper); + if (clipper == NULL) + goto fail; - draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 ); + if (!draw_alloc_temp_verts( &clipper->stage, MAX_CLIPPED_VERTICES+1 )) + goto fail; clipper->stage.draw = draw; clipper->stage.point = clip_point; @@ -507,4 +510,10 @@ struct draw_stage *draw_clip_stage( struct draw_context *draw ) clipper->plane = draw->plane; return &clipper->stage; + + fail: + if (clipper) + clipper->stage.destroy( &clipper->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 8c13f40b55..87aaf1f85b 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -120,8 +120,11 @@ static void cull_destroy( struct draw_stage *stage ) struct draw_stage *draw_cull_stage( struct draw_context *draw ) { struct cull_stage *cull = CALLOC_STRUCT(cull_stage); + if (cull == NULL) + goto fail; - draw_alloc_temp_verts( &cull->stage, 0 ); + if (!draw_alloc_temp_verts( &cull->stage, 0 )) + goto fail; cull->stage.draw = draw; cull->stage.next = NULL; @@ -133,4 +136,10 @@ struct draw_stage *draw_cull_stage( struct draw_context *draw ) cull->stage.destroy = cull_destroy; return &cull->stage; + + fail: + if (cull) + cull->stage.destroy( &cull->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 2aeb309554..205000cbea 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -224,8 +224,11 @@ static void flatshade_destroy( struct draw_stage *stage ) struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) { struct flat_stage *flatshade = CALLOC_STRUCT(flat_stage); + if (flatshade == NULL) + goto fail; - draw_alloc_temp_verts( &flatshade->stage, 2 ); + if (!draw_alloc_temp_verts( &flatshade->stage, 2 )) + goto fail; flatshade->stage.draw = draw; flatshade->stage.next = NULL; @@ -237,6 +240,12 @@ struct draw_stage *draw_flatshade_stage( struct draw_context *draw ) flatshade->stage.destroy = flatshade_destroy; return &flatshade->stage; + + fail: + if (flatshade) + flatshade->stage.destroy( &flatshade->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index c1dc21cd32..ffec85ccdd 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -158,6 +158,8 @@ static void offset_destroy( struct draw_stage *stage ) struct draw_stage *draw_offset_stage( struct draw_context *draw ) { struct offset_stage *offset = CALLOC_STRUCT(offset_stage); + if (offset == NULL) + goto fail; draw_alloc_temp_verts( &offset->stage, 3 ); @@ -171,4 +173,10 @@ struct draw_stage *draw_offset_stage( struct draw_context *draw ) offset->stage.destroy = offset_destroy; return &offset->stage; + + fail: + if (offset) + offset->stage.destroy( &offset->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 5686729cd3..ac08aa10ce 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -686,7 +686,7 @@ pstip_set_polygon_stipple(struct pipe_context *pipe, * into the draw module's pipeline. This will not be used if the * hardware has native support for AA lines. */ -void +boolean draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe) { @@ -698,7 +698,9 @@ draw_install_pstipple_stage(struct draw_context *draw, * Create / install AA line drawing / prim stage */ pstip = draw_pstip_stage( draw ); - assert(pstip); + if (pstip == NULL) + goto fail; + draw->pipeline.pstipple = &pstip->stage; pstip->pipe = pipe; @@ -724,4 +726,12 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->bind_sampler_states = pstip_bind_sampler_states; pipe->set_sampler_textures = pstip_set_sampler_textures; pipe->set_polygon_stipple = pstip_set_polygon_stipple; + + return TRUE; + + fail: + if (pstip) + pstip->stage.destroy( &pstip->stage ); + + return FALSE; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 453fd3ac71..5910dccc43 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -172,8 +172,11 @@ static void twoside_destroy( struct draw_stage *stage ) struct draw_stage *draw_twoside_stage( struct draw_context *draw ) { struct twoside_stage *twoside = CALLOC_STRUCT(twoside_stage); + if (twoside == NULL) + goto fail; - draw_alloc_temp_verts( &twoside->stage, 3 ); + if (!draw_alloc_temp_verts( &twoside->stage, 3 )) + goto fail; twoside->stage.draw = draw; twoside->stage.next = NULL; @@ -185,4 +188,10 @@ struct draw_stage *draw_twoside_stage( struct draw_context *draw ) twoside->stage.destroy = twoside_destroy; return &twoside->stage; + + fail: + if (twoside) + twoside->stage.destroy( &twoside->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index d4ddfec1b3..eeb2bc43f9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -177,8 +177,11 @@ static void unfilled_destroy( struct draw_stage *stage ) struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) { struct unfilled_stage *unfilled = CALLOC_STRUCT(unfilled_stage); + if (unfilled == NULL) + goto fail; - draw_alloc_temp_verts( &unfilled->stage, 0 ); + if (!draw_alloc_temp_verts( &unfilled->stage, 0 )) + goto fail; unfilled->stage.draw = draw; unfilled->stage.next = NULL; @@ -191,4 +194,10 @@ struct draw_stage *draw_unfilled_stage( struct draw_context *draw ) unfilled->stage.destroy = unfilled_destroy; return &unfilled->stage; + + fail: + if (unfilled) + unfilled->stage.destroy( &unfilled->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c index e9821de976..04438f4dd0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_util.c +++ b/src/gallium/auxiliary/draw/draw_pipe_util.c @@ -68,24 +68,28 @@ draw_pipe_passthrough_tri(struct draw_stage *stage, struct prim_header *header) */ boolean draw_alloc_temp_verts( struct draw_stage *stage, unsigned nr ) { - unsigned i; - ubyte *store; - assert(!stage->tmp); stage->tmp = NULL; stage->nr_tmps = nr; - if (nr == 0) - return FALSE; - store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); - if (store == NULL) - return FALSE; + if (nr != 0) + { + unsigned i; + ubyte *store = (ubyte *) MALLOC( MAX_VERTEX_SIZE * nr ); + + if (store == NULL) + return FALSE; - stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); - - for (i = 0; i < nr; i++) - stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + stage->tmp = (struct vertex_header **) MALLOC( sizeof(struct vertex_header *) * nr ); + if (stage->tmp == NULL) { + FREE(store); + return FALSE; + } + + for (i = 0; i < nr; i++) + stage->tmp[i] = (struct vertex_header *)(store + i * MAX_VERTEX_SIZE); + } return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index ffddc2f62c..a2e0812c89 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -299,6 +299,8 @@ static void validate_destroy( struct draw_stage *stage ) struct draw_stage *draw_validate_stage( struct draw_context *draw ) { struct draw_stage *stage = CALLOC_STRUCT(draw_stage); + if (stage == NULL) + return NULL; stage->draw = draw; stage->next = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index c5810febe0..afd5f5544d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -443,8 +443,7 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, struct vbuf_render *render ) { struct vbuf_stage *vbuf = CALLOC_STRUCT(vbuf_stage); - - if(!vbuf) + if (vbuf == NULL) goto fail; vbuf->stage.draw = draw; @@ -461,7 +460,7 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, vbuf->indices = (ushort *) align_malloc( vbuf->max_indices * sizeof(vbuf->indices[0]), 16 ); - if(!vbuf->indices) + if (!vbuf->indices) goto fail; vbuf->vertices = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 8101340680..ed08573382 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -249,8 +249,11 @@ static void widepoint_destroy( struct draw_stage *stage ) struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) { struct widepoint_stage *wide = CALLOC_STRUCT(widepoint_stage); + if (wide == NULL) + goto fail; - draw_alloc_temp_verts( &wide->stage, 4 ); + if (!draw_alloc_temp_verts( &wide->stage, 4 )) + goto fail; wide->stage.draw = draw; wide->stage.next = NULL; @@ -262,4 +265,10 @@ struct draw_stage *draw_wide_point_stage( struct draw_context *draw ) wide->stage.destroy = widepoint_destroy; return &wide->stage; + + fail: + if (wide) + wide->stage.destroy( &wide->stage ); + + return NULL; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 002ced2186..7735173042 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -271,6 +271,8 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) { struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end ); + if (fetch_emit == NULL) + return NULL; fetch_emit->base.prepare = fetch_emit_prepare; fetch_emit->base.run = fetch_emit_run; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 881e47d59d..98a2cb45e4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -114,6 +114,8 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count); if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ assert(0); return; } diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index b61bb50664..3cc2941f96 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -487,6 +487,8 @@ static void vcache_destroy( struct draw_pt_front_end *frontend ) struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ) { struct vcache_frontend *vcache = CALLOC_STRUCT( vcache_frontend ); + if (vcache == NULL) + return NULL; vcache->base.prepare = vcache_prepare; vcache->base.run = NULL; -- cgit v1.2.3 From 30b4dc29091347252bc61d3be9370db0a45c16c3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 17:17:27 +0100 Subject: draw: more propogation -- pstipple stage. --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 1 + src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 43 ++++++++++++++++++------- 2 files changed, 33 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index fbb5b45b40..7e5f8bd281 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -641,6 +641,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) */ if (!bind_aaline_fragment_shader(aaline)) { stage->line = draw_pipe_passthrough_line; + stage->line(stage, header); return; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index ac08aa10ce..61bdd29aa0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -320,7 +320,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, * Generate the frag shader we'll use for doing polygon stipple. * This will be the user's shader prefixed with a TEX and KIL instruction. */ -static void +static boolean generate_pstip_fs(struct pstip_stage *pstip) { const struct pipe_shader_state *orig_fs = &pstip->fs->state; @@ -332,6 +332,8 @@ generate_pstip_fs(struct pstip_stage *pstip) pstip_fs = *orig_fs; /* copy to init */ pstip_fs.tokens = MALLOC(sizeof(struct tgsi_token) * MAX); + if (pstip_fs.tokens == NULL) + return FALSE; memset(&transform, 0, sizeof(transform)); transform.wincoordInput = -1; @@ -355,6 +357,8 @@ generate_pstip_fs(struct pstip_stage *pstip) assert(pstip->fs->sampler_unit < PIPE_MAX_SAMPLERS); pstip->fs->pstip_fs = pstip->driver_create_fs_state(pstip->pipe, &pstip_fs); + + return TRUE; } @@ -404,7 +408,7 @@ pstip_update_texture(struct pstip_stage *pstip) /** * Create the texture map we'll use for stippling. */ -static void +static boolean pstip_create_texture(struct pstip_stage *pstip) { struct pipe_context *pipe = pstip->pipe; @@ -421,7 +425,10 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.cpp = 1; pstip->texture = screen->texture_create(screen, &texTemp); - assert(pstip->texture->refcount == 1); + if (pstip->texture == NULL) + return FALSE; + + return TRUE; } @@ -430,7 +437,7 @@ pstip_create_texture(struct pstip_stage *pstip) * By using a mipmapped texture, we don't have to generate a different * texture image for each line size. */ -static void +static boolean pstip_create_sampler(struct pstip_stage *pstip) { struct pipe_sampler_state sampler; @@ -448,6 +455,10 @@ pstip_create_sampler(struct pstip_stage *pstip) sampler.max_lod = 0.0f; pstip->sampler_cso = pipe->create_sampler_state(pipe, &sampler); + if (pstip->sampler_cso == NULL) + return FALSE; + + return TRUE; } @@ -455,13 +466,15 @@ pstip_create_sampler(struct pstip_stage *pstip) * When we're about to draw our first AA line in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ -static void +static boolean bind_pstip_fragment_shader(struct pstip_stage *pstip) { - if (!pstip->fs->pstip_fs) { - generate_pstip_fs(pstip); - } + if (!pstip->fs->pstip_fs || + !generate_pstip_fs(pstip)) + return FALSE; + pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); + return TRUE; } @@ -486,7 +499,12 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) assert(stage->draw->rasterizer->poly_stipple_enable); /* bind our fragprog */ - bind_pstip_fragment_shader(pstip); + if (!bind_pstip_fragment_shader(pstip)) { + stage->tri = draw_pipe_passthrough_tri; + stage->tri(stage, header); + return; + } + /* how many samplers? */ /* we'll use sampler/texture[pstip->sampler_unit] for the stipple */ @@ -706,8 +724,11 @@ draw_install_pstipple_stage(struct draw_context *draw, pstip->pipe = pipe; /* create special texture, sampler state */ - pstip_create_texture(pstip); - pstip_create_sampler(pstip); + if (!pstip_create_texture(pstip)) + goto fail; + + if (!pstip_create_sampler(pstip)) + goto fail; /* save original driver functions */ pstip->driver_create_fs_state = pipe->create_fs_state; -- cgit v1.2.3 From 0824fb1d6afc651c0ab814e96f08326c706de216 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 21 Apr 2008 12:42:37 -0400 Subject: actually write the results --- src/gallium/auxiliary/draw/draw_vs_llvm.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 2ebf05526a..dcada66514 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -103,15 +103,16 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, /* Unswizzle all output results */ - for (slot = 1; slot < base->info.num_inputs; slot++) { - output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - - output = (float (*)[4])((char *)output + output_stride); + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < base->info.num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + output = (float (*)[4])((char *)output + output_stride); } - } + } } -- cgit v1.2.3 From b17e123a8f20239e8e1fc6816ccf115d9ec57471 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:09:38 +0100 Subject: rtasm: propogate errors in x86 emit --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 34 +++++++++++++++++++++++++----- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 + 2 files changed, 30 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index f2c08c96a6..c2fe0e40f5 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -146,7 +146,10 @@ _fill( static void do_realloc( struct x86_function *p ) { - if (p->size == 0) { + if (p->store == p->error_overflow) { + p->csr = p->store; + } + else if (p->size == 0) { p->size = 1024; p->store = rtasm_exec_malloc(p->size); p->csr = p->store; @@ -156,10 +159,22 @@ static void do_realloc( struct x86_function *p ) unsigned char *tmp = p->store; p->size *= 2; p->store = rtasm_exec_malloc(p->size); - memcpy(p->store, tmp, used); - p->csr = p->store + used; + + if (p->store) { + memcpy(p->store, tmp, used); + p->csr = p->store + used; + } + else { + p->csr = p->store; + } + rtasm_exec_free(tmp); } + + if (p->store == NULL) { + p->store = p->csr = p->error_overflow; + p->size = 4; + } } /* Emit bytes to the instruction stream: @@ -1440,12 +1455,17 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ) { p->size = code_size; p->store = rtasm_exec_malloc(code_size); + if (p->store == NULL) { + p->store = p->error_overflow; + } p->csr = p->store; } void x86_release_func( struct x86_function *p ) { - rtasm_exec_free(p->store); + if (p->store && p->store != p->error_overflow) + rtasm_exec_free(p->store); + p->store = NULL; p->csr = NULL; p->size = 0; @@ -1456,7 +1476,11 @@ void (*x86_get_func( struct x86_function *p ))(void) { if (DISASSEM && p->store) debug_printf("disassemble %p %p\n", p->store, p->csr); - return (void (*)(void)) p->store; + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else + return (void (*)(void)) p->store; } #else diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 5e99ceea70..695a1cef4e 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -43,6 +43,7 @@ struct x86_function { unsigned char *csr; unsigned stack_offset; int need_emms; + unsigned char error_overflow[4]; const char *fn; }; -- cgit v1.2.3 From d3db46eb8257c1b0cf823f1805ca00457be9aff3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:09:54 +0100 Subject: translate: fail on x86 rtasm fail --- src/gallium/auxiliary/translate/translate_sse.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index e587e5afec..f590d48b78 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -597,7 +597,12 @@ struct translate *translate_sse2_create( const struct translate_key *key ) goto fail; p->gen_run = (run_func)x86_get_func(&p->linear_func); + if (p->gen_run == NULL) + goto fail; + p->gen_run_elts = (run_elts_func)x86_get_func(&p->elt_func); + if (p->gen_run_elts == NULL) + goto fail; return &p->translate; -- cgit v1.2.3 From 785831fc6fc56815d9637c7dd2acbcee6dfbbb0a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:11:58 +0100 Subject: cso: propogate one easy error - many more --- src/gallium/auxiliary/cso_cache/cso_cache.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 18acab0967..f607528fdc 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -290,6 +290,8 @@ void * cso_take_state(struct cso_cache *sc, struct cso_cache *cso_cache_create(void) { struct cso_cache *sc = MALLOC_STRUCT(cso_cache); + if (sc == NULL) + return NULL; sc->max_size = 4096; sc->blend_hash = cso_hash_create(); -- cgit v1.2.3 From 73c2711bb186692b866720058a09f5eb05950213 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:43:53 +0100 Subject: rtasm: clean up debug dumping a little --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 286 ++++++++++++++--------------- 1 file changed, 140 insertions(+), 146 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index c2fe0e40f5..10796c540d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -77,69 +77,60 @@ _print_reg( debug_printf( "]" ); } -static void -_fill( - const char *op ) -{ - unsigned count = 10 - strlen( op ); - while( count-- ) { - debug_printf( " " ); - } -} +#define DUMP_START() debug_printf( "\n" ) +#define DUMP_END() debug_printf( "\n" ) -#define DUMP_START() debug_printf( "\nsse-dump start ----------------" ) -#define DUMP_END() debug_printf( "\nsse-dump end ----------------\n" ) -#define DUMP( OP ) debug_printf( "\n%s", OP ) +#define DUMP() do { \ + const char *foo = __FUNCTION__; \ + while (*foo && *foo != '_') \ + foo++; \ + if (*foo) \ + foo++; \ + debug_printf( "\n% 15s ", foo ); \ +} while (0) -#define DUMP_I( OP, I ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_I( I ) do { \ + DUMP(); \ debug_printf( "%u", I ); \ } while( 0 ) -#define DUMP_R( OP, R0 ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_R( R0 ) do { \ + DUMP(); \ _print_reg( R0 ); \ } while( 0 ) -#define DUMP_RR( OP, R0, R1 ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_RR( R0, R1 ) do { \ + DUMP(); \ _print_reg( R0 ); \ debug_printf( ", " ); \ _print_reg( R1 ); \ } while( 0 ) -#define DUMP_RI( OP, R0, I ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_RI( R0, I ) do { \ + DUMP(); \ _print_reg( R0 ); \ - debug_printf( ", " ); \ - debug_printf( "%u", I ); \ + debug_printf( ", %u", I ); \ } while( 0 ) -#define DUMP_RRI( OP, R0, R1, I ) do { \ - debug_printf( "\n%s", OP ); \ - _fill( OP ); \ +#define DUMP_RRI( R0, R1, I ) do { \ + DUMP(); \ _print_reg( R0 ); \ debug_printf( ", " ); \ _print_reg( R1 ); \ - debug_printf( ", " ); \ - debug_printf( "%u", I ); \ + debug_printf( ", %u", I ); \ } while( 0 ) #else #define DUMP_START() #define DUMP_END() -#define DUMP( OP ) -#define DUMP_I( OP, I ) -#define DUMP_R( OP, R0 ) -#define DUMP_RR( OP, R0, R1 ) -#define DUMP_RI( OP, R0, I ) -#define DUMP_RRI( OP, R0, R1, I ) +#define DUMP( ) +#define DUMP_I( I ) +#define DUMP_R( R0 ) +#define DUMP_RR( R0, R1 ) +#define DUMP_RI( R0, I ) +#define DUMP_RRI( R0, R1, I ) #endif @@ -173,7 +164,7 @@ static void do_realloc( struct x86_function *p ) if (p->store == NULL) { p->store = p->csr = p->error_overflow; - p->size = 4; + p->size = sizeof(p->error_overflow); } } @@ -397,7 +388,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p, unsigned char *x86_jmp_forward( struct x86_function *p) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0xe9); emit_1i(p, 0); return x86_get_label(p); @@ -405,7 +396,7 @@ unsigned char *x86_jmp_forward( struct x86_function *p) unsigned char *x86_call_forward( struct x86_function *p) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0xe8); emit_1i(p, 0); @@ -422,7 +413,7 @@ void x86_fixup_fwd_jump( struct x86_function *p, void x86_jmp( struct x86_function *p, unsigned char *label) { - DUMP_I( __FUNCTION__, label ); + DUMP_I( label ); emit_1ub(p, 0xe9); emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); } @@ -439,14 +430,14 @@ static unsigned char *cptr( void (*label)() ) */ void x86_call( struct x86_function *p, void (*label)()) { - DUMP_I( __FUNCTION__, label ); + DUMP_I( label ); emit_1ub(p, 0xe8); emit_1i(p, cptr(label) - x86_get_label(p) - 4); } #else void x86_call( struct x86_function *p, struct x86_reg reg) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); emit_1ub(p, 0xff); emit_modrm_noreg(p, 2, reg); } @@ -459,7 +450,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg) */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { - DUMP_RI( __FUNCTION__, dst, imm ); + DUMP_RI( dst, imm ); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); @@ -468,7 +459,7 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) void x86_push( struct x86_function *p, struct x86_reg reg ) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x50 + reg.idx); p->stack_offset += 4; @@ -477,7 +468,7 @@ void x86_push( struct x86_function *p, void x86_pop( struct x86_function *p, struct x86_reg reg ) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x58 + reg.idx); p->stack_offset -= 4; @@ -486,7 +477,7 @@ void x86_pop( struct x86_function *p, void x86_inc( struct x86_function *p, struct x86_reg reg ) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x40 + reg.idx); } @@ -494,20 +485,20 @@ void x86_inc( struct x86_function *p, void x86_dec( struct x86_function *p, struct x86_reg reg ) { - DUMP_R( __FUNCTION__, reg ); + DUMP_R( reg ); assert(reg.mod == mod_REG); emit_1ub(p, 0x48 + reg.idx); } void x86_ret( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0xc3); } void x86_sahf( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0x9e); } @@ -515,7 +506,7 @@ void x86_mov( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x8b, 0x89, dst, src ); } @@ -523,7 +514,7 @@ void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x33, 0x31, dst, src ); } @@ -531,7 +522,7 @@ void x86_cmp( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x3b, 0x39, dst, src ); } @@ -539,7 +530,7 @@ void x86_lea( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_1ub(p, 0x8d); emit_modrm( p, dst, src ); } @@ -548,7 +539,7 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_1ub(p, 0x85); emit_modrm( p, dst, src ); } @@ -557,7 +548,7 @@ void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm(p, 0x03, 0x01, dst, src ); } @@ -566,7 +557,7 @@ void x86_add( struct x86_function *p, void x86_mul( struct x86_function *p, struct x86_reg src ) { - DUMP_R( __FUNCTION__, src ); + DUMP_R( src ); emit_1ub(p, 0xf7); emit_modrm_noreg(p, 4, src ); } @@ -576,7 +567,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0xAF); emit_modrm(p, dst, src); } @@ -586,7 +577,7 @@ void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm(p, 0x2b, 0x29, dst, src ); } @@ -594,7 +585,7 @@ void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x0b, 0x09, dst, src ); } @@ -602,7 +593,7 @@ void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_op_modrm( p, 0x23, 0x21, dst, src ); } @@ -617,7 +608,7 @@ void sse_movss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, 0xF3, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -626,7 +617,7 @@ void sse_movaps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x28, 0x29, dst, src ); } @@ -635,7 +626,7 @@ void sse_movups( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x10, 0x11, dst, src ); } @@ -644,7 +635,7 @@ void sse_movhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x16, 0x17, dst, src ); /* cf movlhps */ @@ -654,7 +645,7 @@ void sse_movlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.mod != mod_REG || src.mod != mod_REG); emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x12, 0x13, dst, src ); /* cf movhlps */ @@ -664,7 +655,7 @@ void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -673,7 +664,7 @@ void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5F); emit_modrm( p, dst, src ); } @@ -682,7 +673,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x5E); emit_modrm( p, dst, src ); } @@ -691,7 +682,7 @@ void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5D); emit_modrm( p, dst, src ); } @@ -700,7 +691,7 @@ void sse_subps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5C); emit_modrm( p, dst, src ); } @@ -709,7 +700,7 @@ void sse_mulps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -718,7 +709,7 @@ void sse_mulss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x59); emit_modrm( p, dst, src ); } @@ -727,7 +718,7 @@ void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -736,7 +727,7 @@ void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x58); emit_modrm( p, dst, src ); } @@ -745,7 +736,7 @@ void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x55); emit_modrm( p, dst, src ); } @@ -754,7 +745,7 @@ void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x54); emit_modrm( p, dst, src ); } @@ -763,7 +754,7 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x52); emit_modrm( p, dst, src ); } @@ -772,7 +763,7 @@ void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x52); emit_modrm( p, dst, src ); @@ -782,7 +773,7 @@ void sse_movhlps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x12); emit_modrm( p, dst, src ); @@ -792,7 +783,7 @@ void sse_movlhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.mod == mod_REG && src.mod == mod_REG); emit_2ub(p, X86_TWOB, 0x16); emit_modrm( p, dst, src ); @@ -802,7 +793,7 @@ void sse_orps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x56); emit_modrm( p, dst, src ); } @@ -811,7 +802,7 @@ void sse_xorps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x57); emit_modrm( p, dst, src ); } @@ -820,7 +811,7 @@ void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_XMM || src.mod != mod_REG)); @@ -834,7 +825,7 @@ void sse2_cvtdq2ps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x5b); emit_modrm( p, dst, src ); } @@ -848,7 +839,7 @@ void sse_shufps( struct x86_function *p, struct x86_reg src, unsigned char shuf) { - DUMP_RRI( __FUNCTION__, dst, src, shuf ); + DUMP_RRI( dst, src, shuf ); emit_2ub(p, X86_TWOB, 0xC6); emit_modrm(p, dst, src); emit_1ub(p, shuf); @@ -859,7 +850,7 @@ void sse_cmpps( struct x86_function *p, struct x86_reg src, unsigned char cc) { - DUMP_RRI( "CMPPS", dst, src, cc ); + DUMP_RRI( dst, src, cc ); emit_2ub(p, X86_TWOB, 0xC2); emit_modrm(p, dst, src); emit_1ub(p, cc); @@ -869,7 +860,7 @@ void sse_pmovmskb( struct x86_function *p, struct x86_reg dst, struct x86_reg src) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0xD7); emit_modrm(p, dst, src); } @@ -886,7 +877,7 @@ void sse2_pshufd( struct x86_function *p, struct x86_reg src, unsigned char shuf) { - DUMP_RRI( __FUNCTION__, dst, src, shuf ); + DUMP_RRI( dst, src, shuf ); emit_3ub(p, 0x66, X86_TWOB, 0x70); emit_modrm(p, dst, src); emit_1ub(p, shuf); @@ -896,7 +887,7 @@ void sse2_cvttps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub( p, 0xF3, X86_TWOB, 0x5B ); emit_modrm( p, dst, src ); } @@ -905,7 +896,7 @@ void sse2_cvtps2dq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x5B); emit_modrm( p, dst, src ); } @@ -914,7 +905,7 @@ void sse2_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x6B); emit_modrm( p, dst, src ); } @@ -923,7 +914,7 @@ void sse2_packsswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x63); emit_modrm( p, dst, src ); } @@ -932,7 +923,7 @@ void sse2_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x67); emit_modrm( p, dst, src ); } @@ -941,7 +932,7 @@ void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0x66, X86_TWOB, 0x60); emit_modrm( p, dst, src ); } @@ -951,7 +942,7 @@ void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -960,7 +951,7 @@ void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_3ub(p, 0xF3, X86_TWOB, 0x53); emit_modrm( p, dst, src ); } @@ -969,7 +960,7 @@ void sse2_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); emit_2ub(p, 0x66, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); } @@ -982,35 +973,35 @@ void sse2_movd( struct x86_function *p, */ void x87_fist( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 2, dst); } void x87_fistp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 3, dst); } void x87_fild( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); emit_1ub(p, 0xdf); emit_modrm_noreg(p, 0, arg); } void x87_fldz( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xee); } void x87_fldcw( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); assert(arg.file == file_REG32); assert(arg.mod != mod_REG); emit_1ub(p, 0xd9); @@ -1019,31 +1010,31 @@ void x87_fldcw( struct x86_function *p, struct x86_reg arg ) void x87_fld1( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xe8); } void x87_fldl2e( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xea); } void x87_fldln2( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xed); } void x87_fwait( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_1ub(p, 0x9b); } void x87_fnclex( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xdb, 0xe2); } @@ -1082,7 +1073,7 @@ static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86 void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xc8, 0xdc, 0xc8, @@ -1091,7 +1082,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xe0, 0xdc, 0xe8, @@ -1100,7 +1091,7 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xe8, 0xdc, 0xe0, @@ -1109,7 +1100,7 @@ void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xc0, 0xdc, 0xc0, @@ -1118,7 +1109,7 @@ void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xf0, 0xdc, 0xf8, @@ -1127,7 +1118,7 @@ void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); x87_arith_op(p, dst, src, 0xd8, 0xf8, 0xdc, 0xf0, @@ -1136,7 +1127,7 @@ void x87_fdivr( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void x87_fmulp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc8+dst.idx); @@ -1144,7 +1135,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) void x87_fsubp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe8+dst.idx); @@ -1152,7 +1143,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe0+dst.idx); @@ -1160,7 +1151,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) void x87_faddp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc0+dst.idx); @@ -1168,7 +1159,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) void x87_fdivp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf8+dst.idx); @@ -1176,7 +1167,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf0+dst.idx); @@ -1184,83 +1175,83 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) void x87_fucom( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe0+arg.idx); } void x87_fucomp( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe8+arg.idx); } void x87_fucompp( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xda, 0xe9); } void x87_fxch( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xd9, 0xc8+arg.idx); } void x87_fabs( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xe1); } void x87_fchs( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xe0); } void x87_fcos( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xff); } void x87_fprndint( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfc); } void x87_fscale( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfd); } void x87_fsin( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfe); } void x87_fsincos( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfb); } void x87_fsqrt( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xfa); } void x87_fxtract( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xf4); } @@ -1270,7 +1261,7 @@ void x87_fxtract( struct x86_function *p ) */ void x87_f2xm1( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xf0); } @@ -1279,7 +1270,7 @@ void x87_f2xm1( struct x86_function *p ) */ void x87_fyl2x( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xf1); } @@ -1290,14 +1281,14 @@ void x87_fyl2x( struct x86_function *p ) */ void x87_fyl2xp1( struct x86_function *p ) { - DUMP( __FUNCTION__ ); + DUMP(); emit_2ub(p, 0xd9, 0xf9); } void x87_fld( struct x86_function *p, struct x86_reg arg ) { - DUMP_R( __FUNCTION__, arg ); + DUMP_R( arg ); if (arg.file == file_x87) emit_2ub(p, 0xd9, 0xc0 + arg.idx); else { @@ -1308,7 +1299,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) void x87_fst( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd0 + dst.idx); else { @@ -1319,7 +1310,7 @@ void x87_fst( struct x86_function *p, struct x86_reg dst ) void x87_fstp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xdd, 0xd8 + dst.idx); else { @@ -1330,7 +1321,7 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) void x87_fcom( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd0 + dst.idx); else { @@ -1341,7 +1332,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) void x87_fcomp( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); if (dst.file == file_x87) emit_2ub(p, 0xd8, 0xd8 + dst.idx); else { @@ -1353,7 +1344,7 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) { - DUMP_R( __FUNCTION__, dst ); + DUMP_R( dst ); assert(dst.file == file_REG32); if (dst.idx == reg_AX && @@ -1383,7 +1374,7 @@ void mmx_packssdw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1397,7 +1388,7 @@ void mmx_packuswb( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); assert(dst.file == file_MMX && (src.file == file_MMX || src.mod != mod_REG)); @@ -1411,7 +1402,7 @@ void mmx_movd( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6e, 0x7e, dst, src ); @@ -1421,7 +1412,7 @@ void mmx_movq( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) { - DUMP_RR( __FUNCTION__, dst, src ); + DUMP_RR( dst, src ); p->need_emms = 1; emit_1ub(p, X86_TWOB); emit_op_modrm( p, 0x6f, 0x7f, dst, src ); @@ -1449,6 +1440,7 @@ void x86_init_func( struct x86_function *p ) p->size = 0; p->store = NULL; p->csr = p->store; + DUMP_START(); } void x86_init_func_size( struct x86_function *p, unsigned code_size ) @@ -1459,6 +1451,7 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ) p->store = p->error_overflow; } p->csr = p->store; + DUMP_START(); } void x86_release_func( struct x86_function *p ) @@ -1474,6 +1467,7 @@ void x86_release_func( struct x86_function *p ) void (*x86_get_func( struct x86_function *p ))(void) { + DUMP_END(); if (DISASSEM && p->store) debug_printf("disassemble %p %p\n", p->store, p->csr); -- cgit v1.2.3 From 73706deef59c35472d2410411403f30c9603f22f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:48:08 +0100 Subject: rtasm: quieten sse_enabled debug --- src/gallium/auxiliary/rtasm/rtasm_cpu.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index 175245a9f6..f01e12faa0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -32,7 +32,16 @@ static boolean rtasm_sse_enabled(void) { - return !debug_get_bool_option("GALLIUM_NOSSE", FALSE); + static boolean firsttime = 1; + static boolean enabled; + + /* This gets called quite often at the moment: + */ + if (firsttime) { + enabled = !debug_get_bool_option("GALLIUM_NOSSE", FALSE); + firsttime = FALSE; + } + return enabled; } int rtasm_cpu_has_sse(void) -- cgit v1.2.3 From a945420ae6f96f0d7024f97e37ffd31329865a84 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 19:48:21 +0100 Subject: rtasm: debug some missing funcs --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 10796c540d..3cd45d7dd9 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -364,6 +364,7 @@ void x86_jcc( struct x86_function *p, unsigned char *label ) { intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); + DUMP_I(cc); if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); @@ -381,6 +382,7 @@ void x86_jcc( struct x86_function *p, unsigned char *x86_jcc_forward( struct x86_function *p, enum x86_cc cc ) { + DUMP_I(cc); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, 0); return x86_get_label(p); @@ -1365,6 +1367,7 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) void mmx_emms( struct x86_function *p ) { + DUMP(); assert(p->need_emms); emit_2ub(p, 0x0f, 0x77); p->need_emms = 0; -- cgit v1.2.3 From fb1c09305ea8cb727514c53a0346d90e78eeb05f Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 21 Apr 2008 15:15:31 -0400 Subject: Use llvm 2.3 (2.2 won't work because of a lot of problems, e.g. lack of constant vectors handling in execution engine) --- src/gallium/auxiliary/gallivm/Makefile | 12 +++--- src/gallium/auxiliary/gallivm/instructions.cpp | 42 +++++++++---------- src/gallium/auxiliary/gallivm/instructions.h | 4 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 49 ++++++++++++----------- src/gallium/auxiliary/gallivm/instructionssoa.h | 4 +- src/gallium/auxiliary/gallivm/storage.cpp | 32 +++++++-------- src/gallium/auxiliary/gallivm/storagesoa.cpp | 10 ++--- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 4 +- 8 files changed, 79 insertions(+), 78 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index 6bfd187fb3..c3f7bfba93 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -65,14 +65,14 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ - rm temp.bin + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + rm temp1.bin gallivmsoabuiltins.cpp: soabuiltins.c - clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ - rm temp.bin + clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + rm temp2.bin # Emacs tags tags: diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index b35d6790f7..95a670edaf 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -139,12 +139,12 @@ llvm::Value *Instructions::callFSqrt(llvm::Value *val) // predeclare the intrinsic std::vector fsqrtArgs; fsqrtArgs.push_back(Type::FloatTy); - ParamAttrsList *fsqrtPal = 0; + PAListPtr fsqrtPal; FunctionType* fsqrtType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fsqrtArgs, /*isVarArg=*/false); - m_llvmFSqrt = new Function( + m_llvmFSqrt = Function::Create( /*Type=*/fsqrtType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"llvm.sqrt.f32", m_mod); @@ -196,12 +196,12 @@ llvm::Value *Instructions::callFAbs(llvm::Value *val) // predeclare the intrinsic std::vector fabsArgs; fabsArgs.push_back(Type::FloatTy); - ParamAttrsList *fabsPal = 0; + PAListPtr fabsPal; FunctionType* fabsType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fabsArgs, /*isVarArg=*/false); - m_llvmFAbs = new Function( + m_llvmFAbs = Function::Create( /*Type=*/fabsType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"fabs", m_mod); @@ -239,12 +239,12 @@ llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) std::vector powArgs; powArgs.push_back(Type::FloatTy); powArgs.push_back(Type::FloatTy); - ParamAttrsList *powPal = 0; + PAListPtr powPal; FunctionType* powType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/powArgs, /*isVarArg=*/false); - m_llvmPow = new Function( + m_llvmPow = Function::Create( /*Type=*/powType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"llvm.pow.f32", m_mod); @@ -338,12 +338,12 @@ llvm::Value * Instructions::callFloor(llvm::Value *val) // predeclare the intrinsic std::vector floorArgs; floorArgs.push_back(Type::FloatTy); - ParamAttrsList *floorPal = 0; + PAListPtr floorPal; FunctionType* floorType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/floorArgs, /*isVarArg=*/false); - m_llvmFloor = new Function( + m_llvmFloor = Function::Create( /*Type=*/floorType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"floorf", m_mod); @@ -381,12 +381,12 @@ llvm::Value * Instructions::callFLog(llvm::Value *val) // predeclare the intrinsic std::vector flogArgs; flogArgs.push_back(Type::FloatTy); - ParamAttrsList *flogPal = 0; + PAListPtr flogPal; FunctionType* flogType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/flogArgs, /*isVarArg=*/false); - m_llvmFlog = new Function( + m_llvmFlog = Function::Create( /*Type=*/flogType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"logf", m_mod); @@ -509,12 +509,12 @@ void Instructions::printVector(llvm::Value *val) llvm::Function * Instructions::declarePrintf() { std::vector args; - ParamAttrsList *params = 0; + PAListPtr params; FunctionType* funcTy = FunctionType::get( /*Result=*/IntegerType::get(32), /*Params=*/args, /*isVarArg=*/true); - Function* func_printf = new Function( + Function* func_printf = Function::Create( /*Type=*/funcTy, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"printf", m_mod); @@ -638,8 +638,8 @@ llvm::Value * Instructions::abs(llvm::Value *in) void Instructions::ifop(llvm::Value *in) { - BasicBlock *ifthen = new BasicBlock(name("ifthen"), m_func,0); - BasicBlock *ifend = new BasicBlock(name("ifthenend"), m_func,0); + BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); + BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); @@ -665,7 +665,7 @@ llvm::BasicBlock * Instructions::currentBlock() const void Instructions::elseop() { assert(!m_ifStack.empty()); - BasicBlock *ifend = new BasicBlock(name("ifend"), m_func,0); + BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); m_builder.CreateBr(ifend); m_builder.SetInsertPoint(m_ifStack.top()); currentBlock()->setName(name("ifelse")); @@ -692,8 +692,8 @@ llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, void Instructions::beginLoop() { - BasicBlock *begin = new BasicBlock(name("loop"), m_func,0); - BasicBlock *end = new BasicBlock(name("endloop"), m_func,0); + BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); + BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); m_builder.CreateBr(begin); Loop loop; @@ -716,7 +716,7 @@ void Instructions::endLoop() void Instructions::brk() { assert(!m_loopStack.empty()); - BasicBlock *unr = new BasicBlock(name("unreachable"), m_func,0); + BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); m_builder.CreateBr(m_loopStack.top().end); m_builder.SetInsertPoint(unr); } @@ -765,13 +765,13 @@ llvm::Function * Instructions::declareFunc(int label) args.push_back(vecPtr); args.push_back(vecPtr); args.push_back(vecPtr); - ParamAttrsList *params = 0; + PAListPtr params; FunctionType *funcType = FunctionType::get( /*Result=*/Type::VoidTy, /*Params=*/args, /*isVarArg=*/false); std::string name = createFuncName(label); - Function *func = new Function( + Function *func = Function::Create( /*Type=*/funcType, /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/name.c_str(), m_mod); @@ -789,7 +789,7 @@ void Instructions::bgnSub(unsigned label) ptr_INPUT->setName("INPUT"); m_storage->pushArguments(ptr_INPUT); - llvm::BasicBlock *entry = new BasicBlock("entry", func, 0); + llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); m_func = func; m_builder.SetInsertPoint(entry); diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index 9ebc17dd8e..19ca84ddc6 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include @@ -125,7 +125,7 @@ private: llvm::Module *m_mod; llvm::Function *m_func; char m_name[32]; - llvm::LLVMFoldingBuilder m_builder; + llvm::IRBuilder m_builder; int m_idx; llvm::VectorType *m_floatVecType; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 3c16a3692d..f0122802db 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -38,6 +38,7 @@ #include #include #include +//#include #include #include @@ -237,32 +238,32 @@ llvm::Value * InstructionsSoa::allocaTemp() std::vector indices; indices.push_back(m_storage->constantInt(0)); indices.push_back(m_storage->constantInt(0)); - GetElementPtrInst *getElem = new GetElementPtrInst(alloca, - indices.begin(), - indices.end(), - name("allocaPtr"), - m_builder.GetInsertBlock()); + GetElementPtrInst *getElem = GetElementPtrInst::Create(alloca, + indices.begin(), + indices.end(), + name("allocaPtr"), + m_builder.GetInsertBlock()); return getElem; } std::vector InstructionsSoa::allocaToResult(llvm::Value *allocaPtr) { - GetElementPtrInst *xElemPtr = new GetElementPtrInst(allocaPtr, - m_storage->constantInt(0), - name("xPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *yElemPtr = new GetElementPtrInst(allocaPtr, - m_storage->constantInt(1), - name("yPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *zElemPtr = new GetElementPtrInst(allocaPtr, - m_storage->constantInt(2), - name("zPtr"), - m_builder.GetInsertBlock()); - GetElementPtrInst *wElemPtr = new GetElementPtrInst(allocaPtr, - m_storage->constantInt(3), - name("wPtr"), - m_builder.GetInsertBlock()); + GetElementPtrInst *xElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(0), + name("xPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *yElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(1), + name("yPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *zElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(2), + name("zPtr"), + m_builder.GetInsertBlock()); + GetElementPtrInst *wElemPtr = GetElementPtrInst::Create(allocaPtr, + m_storage->constantInt(3), + name("wPtr"), + m_builder.GetInsertBlock()); std::vector res(4); res[0] = new LoadInst(xElemPtr, name("xRes"), false, m_builder.GetInsertBlock()); @@ -388,10 +389,10 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) llvm::Function *func = 0; if (originalFunc->isDeclaration()) { std::cout << "function decleration" <getFunctionType(), GlobalValue::ExternalLinkage, - originalFunc->getName(), currentModule()); + func = Function::Create(originalFunc->getFunctionType(), GlobalValue::ExternalLinkage, + originalFunc->getName(), currentModule()); func->setCallingConv(CallingConv::C); - const ParamAttrsList *pal = 0; + const PAListPtr pal; func->setParamAttrs(pal); currentModule()->dump(); } else { diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index b9104ea286..060ee72f2e 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -29,7 +29,7 @@ #define INSTRUCTIONSSOA_H #include -#include +#include #include #include @@ -87,7 +87,7 @@ private: const std::vector in3); void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); private: - llvm::LLVMFoldingBuilder m_builder; + llvm::IRBuilder m_builder; StorageSoa *m_storage; std::map m_functionsMap; diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp index c4326de8c5..9d9fd12360 100644 --- a/src/gallium/auxiliary/gallivm/storage.cpp +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -186,26 +186,26 @@ llvm::Value *Storage::maskWrite(llvm::Value *src, int mask, llvm::Value *templ) if ((mask & TGSI_WRITEMASK_X)) { llvm::Value *x = new ExtractElementInst(src, unsigned(0), name("x"), m_block); - dst = new InsertElementInst(dst, x, unsigned(0), - name("dstx"), m_block); + dst = InsertElementInst::Create(dst, x, unsigned(0), + name("dstx"), m_block); } if ((mask & TGSI_WRITEMASK_Y)) { llvm::Value *y = new ExtractElementInst(src, unsigned(1), name("y"), m_block); - dst = new InsertElementInst(dst, y, unsigned(1), - name("dsty"), m_block); + dst = InsertElementInst::Create(dst, y, unsigned(1), + name("dsty"), m_block); } if ((mask & TGSI_WRITEMASK_Z)) { llvm::Value *z = new ExtractElementInst(src, unsigned(2), name("z"), m_block); - dst = new InsertElementInst(dst, z, unsigned(2), - name("dstz"), m_block); + dst = InsertElementInst::Create(dst, z, unsigned(2), + name("dstz"), m_block); } if ((mask & TGSI_WRITEMASK_W)) { llvm::Value *w = new ExtractElementInst(src, unsigned(3), name("w"), m_block); - dst = new InsertElementInst(dst, w, unsigned(3), - name("dstw"), m_block); + dst = InsertElementInst::Create(dst, w, unsigned(3), + name("dstw"), m_block); } return dst; } @@ -308,11 +308,11 @@ llvm::Value * Storage::elemPtr(Args arg) std::vector indices; indices.push_back(constantInt(0)); indices.push_back(constantInt(static_cast(arg))); - GetElementPtrInst *getElem = new GetElementPtrInst(m_INPUT, - indices.begin(), - indices.end(), - name("input_ptr"), - m_block); + GetElementPtrInst *getElem = GetElementPtrInst::Create(m_INPUT, + indices.begin(), + indices.end(), + name("input_ptr"), + m_block); return new LoadInst(getElem, name("input_field"), false, m_block); } @@ -322,7 +322,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, GetElementPtrInst *getElem = 0; if (indIdx) { - getElem = new GetElementPtrInst(ptr, + getElem = GetElementPtrInst::Create(ptr, BinaryOperator::create(Instruction::Add, indIdx, constantInt(idx), @@ -331,7 +331,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, name("field"), m_block); } else { - getElem = new GetElementPtrInst(ptr, + getElem = GetElementPtrInst::Create(ptr, constantInt(idx), name("field"), m_block); @@ -350,7 +350,7 @@ void Storage::setKilElement(llvm::Value *val) std::vector indices; indices.push_back(constantInt(0)); indices.push_back(constantInt(static_cast(KilArg))); - GetElementPtrInst *elem = new GetElementPtrInst(m_INPUT, + GetElementPtrInst *elem = GetElementPtrInst::Create(m_INPUT, indices.begin(), indices.end(), name("kil_ptr"), diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index bb6fe3d7e1..0e6e68c9d7 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -207,11 +207,11 @@ llvm::Value * StorageSoa::elementPointer(llvm::Value *ptr, llvm::Value *index, indices.push_back(index); indices.push_back(constantInt(channel)); - GetElementPtrInst *getElem = new GetElementPtrInst(ptr, - indices.begin(), - indices.end(), - name("ptr"), - m_block); + GetElementPtrInst *getElem = GetElementPtrInst::Create(ptr, + indices.begin(), + indices.end(), + name("ptr"), + m_block); return getElem; } diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index ab9e7a06fb..ab8c851f14 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -1014,7 +1014,7 @@ tgsi_to_llvm(struct gallivm_ir *ir, const struct tgsi_token *tokens) Value *ptr_INPUT = args++; ptr_INPUT->setName("input"); - BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); tgsi_parse_init(&parse, tokens); @@ -1085,7 +1085,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, Value *temps = args++; temps->setName("temps"); - BasicBlock *label_entry = new BasicBlock("entry", shader, 0); + BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); tgsi_parse_init(&parse, tokens); -- cgit v1.2.3 From 1dc5e56f3e48b629daa18c2d8631c96bda638eb6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 20:25:47 +0100 Subject: cso: provide functions to bind fs/vs handles directly --- src/gallium/auxiliary/cso_cache/cso_context.c | 32 ++++++++++++++++++++------- src/gallium/auxiliary/cso_cache/cso_context.h | 4 ++++ 2 files changed, 28 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 87995c80c3..e6366e7b14 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -422,6 +422,16 @@ void cso_restore_rasterizer(struct cso_context *ctx) } +void cso_set_fragment_shader_handle(struct cso_context *ctx, + void *handle ) +{ + if (ctx->fragment_shader != handle) { + ctx->fragment_shader = handle; + ctx->pipe->bind_fs_state(ctx->pipe, handle); + } +} + + void cso_set_fragment_shader(struct cso_context *ctx, const struct pipe_shader_state *templ) { @@ -453,10 +463,7 @@ void cso_set_fragment_shader(struct cso_context *ctx, handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; } - if (ctx->fragment_shader != handle) { - ctx->fragment_shader = handle; - ctx->pipe->bind_fs_state(ctx->pipe, handle); - } + cso_set_fragment_shader_handle( ctx, handle ); } void cso_save_fragment_shader(struct cso_context *ctx) @@ -476,6 +483,16 @@ void cso_restore_fragment_shader(struct cso_context *ctx) } +void cso_set_vertex_shader_handle(struct cso_context *ctx, + void *handle ) +{ + if (ctx->vertex_shader != handle) { + ctx->vertex_shader = handle; + ctx->pipe->bind_vs_state(ctx->pipe, handle); + } +} + + void cso_set_vertex_shader(struct cso_context *ctx, const struct pipe_shader_state *templ) @@ -503,12 +520,11 @@ void cso_set_vertex_shader(struct cso_context *ctx, handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; } - if (ctx->vertex_shader != handle) { - ctx->vertex_shader = handle; - ctx->pipe->bind_vs_state(ctx->pipe, handle); - } + cso_set_vertex_shader_handle( ctx, handle ); } + + void cso_save_vertex_shader(struct cso_context *ctx) { assert(!ctx->vertex_shader_saved); diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 665e8d9911..945f4881a8 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -96,6 +96,8 @@ void cso_restore_sampler_textures( struct cso_context *cso ); * (eg mesa's internall-generated texenv programs), it will be up to * the state tracker to implement their own specialized caching. */ +void cso_set_fragment_shader_handle(struct cso_context *ctx, + void *handle ); void cso_set_fragment_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); void cso_save_fragment_shader(struct cso_context *cso); @@ -103,6 +105,8 @@ void cso_restore_fragment_shader(struct cso_context *cso); +void cso_set_vertex_shader_handle(struct cso_context *ctx, + void *handle ); void cso_set_vertex_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); void cso_save_vertex_shader(struct cso_context *cso); -- cgit v1.2.3 From 01dfa6cde157321f565bab949f23f367ed20fa0e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 20:26:19 +0100 Subject: use cso fs/vs handle functions --- src/gallium/auxiliary/util/u_blit.c | 4 ++-- src/gallium/auxiliary/util/u_gen_mipmap.c | 4 ++-- src/mesa/state_tracker/st_atom_shader.c | 4 ++-- src/mesa/state_tracker/st_cb_bitmap.c | 4 ++-- src/mesa/state_tracker/st_cb_clear.c | 4 ++-- src/mesa/state_tracker/st_cb_drawpixels.c | 4 ++-- src/mesa/state_tracker/st_program.h | 4 ++-- 7 files changed, 14 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index be5e83e834..0938b03820 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -318,8 +318,8 @@ util_blit_pixels(struct blit_state *ctx, cso_set_sampler_textures(ctx->cso, 1, &tex); /* shaders */ - cso_set_fragment_shader(ctx->cso, ctx->fs); - cso_set_vertex_shader(ctx->cso, ctx->vs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); /* drawing dest */ memset(&fb, 0, sizeof(fb)); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index f0c4063b28..9822a25ca6 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -888,8 +888,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); - cso_set_fragment_shader(ctx->cso, ctx->fs); - cso_set_vertex_shader(ctx->cso, ctx->vs); + cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); #if 0 cso_set_viewport(ctx->cso, &ctx->viewport); #endif diff --git a/src/mesa/state_tracker/st_atom_shader.c b/src/mesa/state_tracker/st_atom_shader.c index 4a641a4a73..3f5ec71112 100644 --- a/src/mesa/state_tracker/st_atom_shader.c +++ b/src/mesa/state_tracker/st_atom_shader.c @@ -273,8 +273,8 @@ update_linkage( struct st_context *st ) st->vp = stvp; st->fp = stfp; - cso_set_vertex_shader(st->cso_context, stvp->driver_shader); - cso_set_fragment_shader(st->cso_context, stfp->driver_shader); + cso_set_vertex_shader_handle(st->cso_context, stvp->driver_shader); + cso_set_fragment_shader_handle(st->cso_context, stfp->driver_shader); st->vertex_result_to_slot = xvp->output_to_slot; } diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 1b863143e0..71a848ea54 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -446,10 +446,10 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, cso_set_rasterizer(cso, &st->bitmap.rasterizer); /* fragment shader state: TEX lookup program */ - cso_set_fragment_shader(cso, stfp->driver_shader); + cso_set_fragment_shader_handle(cso, stfp->driver_shader); /* vertex shader state: position + texcoord pass-through */ - cso_set_vertex_shader(cso, st->bitmap.vs); + cso_set_vertex_shader_handle(cso, st->bitmap.vs); /* sampler / texture state */ cso_single_sampler(cso, 0, &st->bitmap.sampler); diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index bb27faad21..dac346a06c 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -274,8 +274,8 @@ clear_with_quad(GLcontext *ctx, cso_set_rasterizer(st->cso_context, &st->clear.raster); cso_set_viewport(st->cso_context, &st->clear.viewport); - cso_set_fragment_shader(st->cso_context, st->clear.fs); - cso_set_vertex_shader(st->cso_context, st->clear.vs); + cso_set_fragment_shader_handle(st->cso_context, st->clear.fs); + cso_set_vertex_shader_handle(st->cso_context, st->clear.vs); /* draw quad matching scissor rect (XXX verify coord round-off) */ draw_quad(ctx, x0, y0, x1, y1, ctx->Depth.Clear, ctx->Color.ClearColor); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 75261c3350..3921500659 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -545,10 +545,10 @@ draw_textured_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, } /* fragment shader state: TEX lookup program */ - cso_set_fragment_shader(cso, stfp->driver_shader); + cso_set_fragment_shader_handle(cso, stfp->driver_shader); /* vertex shader state: position + texcoord pass-through */ - cso_set_vertex_shader(cso, stvp->driver_shader); + cso_set_vertex_shader_handle(cso, stvp->driver_shader); /* texture sampling state: */ diff --git a/src/mesa/state_tracker/st_program.h b/src/mesa/state_tracker/st_program.h index 63d6590540..dced31e88e 100644 --- a/src/mesa/state_tracker/st_program.h +++ b/src/mesa/state_tracker/st_program.h @@ -61,7 +61,7 @@ struct st_fragment_program GLuint input_map[PIPE_MAX_SHADER_INPUTS]; struct pipe_shader_state state; - struct pipe_shader_state *driver_shader; + void *driver_shader; GLuint param_state; @@ -88,7 +88,7 @@ struct st_vertex_program GLuint num_inputs; struct pipe_shader_state state; - struct pipe_shader_state *driver_shader; + void *driver_shader; /** For using our private draw module (glRasterPos) */ struct draw_vertex_shader *draw_shader; -- cgit v1.2.3 From e406ad5912985920a0d796f1ada58b40316590ed Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 21:01:38 +0100 Subject: draw: squash a couple of memory leaks --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 3 +++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 5 +++++ 2 files changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index c588710b75..f98bce6eac 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -166,6 +166,9 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) void draw_pt_fetch_destroy( struct pt_fetch *fetch ) { + if (fetch->translate) + fetch->translate->release( fetch->translate ); + FREE(fetch); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 7735173042..68b2c5b1e3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -264,6 +264,11 @@ static void fetch_emit_finish( struct draw_pt_middle_end *middle ) static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) { + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + + if (feme->translate) + feme->translate->release( feme->translate ); + FREE(middle); } -- cgit v1.2.3 From 9a0e6860d3b4602515f39593ed9af00cee7bb106 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 21 Apr 2008 21:05:11 +0100 Subject: cso: disable not-really-working cso_set_*_shader() funcs --- src/gallium/auxiliary/cso_cache/cso_context.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index e6366e7b14..3840dd2239 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -432,6 +432,9 @@ void cso_set_fragment_shader_handle(struct cso_context *ctx, } +/* Not really working: + */ +#if 0 void cso_set_fragment_shader(struct cso_context *ctx, const struct pipe_shader_state *templ) { @@ -465,6 +468,7 @@ void cso_set_fragment_shader(struct cso_context *ctx, cso_set_fragment_shader_handle( ctx, handle ); } +#endif void cso_save_fragment_shader(struct cso_context *ctx) { @@ -493,7 +497,9 @@ void cso_set_vertex_shader_handle(struct cso_context *ctx, } - +/* Not really working: + */ +#if 0 void cso_set_vertex_shader(struct cso_context *ctx, const struct pipe_shader_state *templ) { @@ -522,6 +528,7 @@ void cso_set_vertex_shader(struct cso_context *ctx, cso_set_vertex_shader_handle( ctx, handle ); } +#endif -- cgit v1.2.3 From 9fe63929011cd9c4d86ab6525555a3e53423c854 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 21 Apr 2008 15:44:51 -0400 Subject: initial stab at error handling in cso --- src/gallium/auxiliary/cso_cache/cso_context.c | 98 +++++++++++++++++---------- src/gallium/auxiliary/cso_cache/cso_context.h | 34 +++++----- 2 files changed, 80 insertions(+), 52 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 3840dd2239..6040522a46 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -139,8 +139,8 @@ void cso_destroy_context( struct cso_context *ctx ) * the data member of the cso to be the template itself. */ -void cso_set_blend(struct cso_context *ctx, - const struct pipe_blend_state *templ) +enum pipe_error cso_set_blend(struct cso_context *ctx, + const struct pipe_blend_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_blend_state)); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, @@ -149,8 +149,9 @@ void cso_set_blend(struct cso_context *ctx, void *handle; if (cso_hash_iter_is_null(iter)) { - /* FIXME: handle OOM */ struct cso_blend *cso = MALLOC(sizeof(struct cso_blend)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; cso->state = *templ; cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); @@ -168,6 +169,7 @@ void cso_set_blend(struct cso_context *ctx, ctx->blend = handle; ctx->pipe->bind_blend_state(ctx->pipe, handle); } + return PIPE_OK; } void cso_save_blend(struct cso_context *ctx) @@ -187,12 +189,12 @@ void cso_restore_blend(struct cso_context *ctx) -void cso_single_sampler(struct cso_context *ctx, - unsigned idx, - const struct pipe_sampler_state *templ) +enum pipe_error cso_single_sampler(struct cso_context *ctx, + unsigned idx, + const struct pipe_sampler_state *templ) { void *handle = NULL; - + if (templ != NULL) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_sampler_state)); struct cso_hash_iter iter = cso_find_state_template(ctx->cache, @@ -200,9 +202,10 @@ void cso_single_sampler(struct cso_context *ctx, (void*)templ); if (cso_hash_iter_is_null(iter)) { - /* FIXME: handle OOM */ struct cso_sampler *cso = MALLOC(sizeof(struct cso_sampler)); - + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + cso->state = *templ; cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; @@ -217,11 +220,12 @@ void cso_single_sampler(struct cso_context *ctx, } ctx->samplers[idx] = handle; + return PIPE_OK; } void cso_single_sampler_done( struct cso_context *ctx ) { - unsigned i; + unsigned i; /* find highest non-null sampler */ for (i = PIPE_MAX_SAMPLERS; i > 0; i--) { @@ -232,8 +236,8 @@ void cso_single_sampler_done( struct cso_context *ctx ) ctx->nr_samplers = i; if (ctx->hw.nr_samplers != ctx->nr_samplers || - memcmp(ctx->hw.samplers, - ctx->samplers, + memcmp(ctx->hw.samplers, + ctx->samplers, ctx->nr_samplers * sizeof(void *)) != 0) { memcpy(ctx->hw.samplers, ctx->samplers, ctx->nr_samplers * sizeof(void *)); @@ -243,22 +247,36 @@ void cso_single_sampler_done( struct cso_context *ctx ) } } -void cso_set_samplers( struct cso_context *ctx, - unsigned nr, - const struct pipe_sampler_state **templates ) +/* + * If the function encouters any errors it will return the + * last one. Done to always try to set as many samplers + * as possible. + */ +enum pipe_error cso_set_samplers( struct cso_context *ctx, + unsigned nr, + const struct pipe_sampler_state **templates ) { unsigned i; - + enum pipe_error temp, error = PIPE_OK; + /* TODO: fastpath */ - for (i = 0; i < nr; i++) - cso_single_sampler( ctx, i, templates[i] ); + for (i = 0; i < nr; i++) { + temp = cso_single_sampler( ctx, i, templates[i] ); + if (temp != PIPE_OK) + error = temp; + } + + for ( ; i < ctx->nr_samplers; i++) { + temp = cso_single_sampler( ctx, i, NULL ); + if (temp != PIPE_OK) + error = temp; + } - for ( ; i < ctx->nr_samplers; i++) - cso_single_sampler( ctx, i, NULL ); - cso_single_sampler_done( ctx ); + + return error; } void cso_save_samplers(struct cso_context *ctx) @@ -294,7 +312,7 @@ void cso_set_sampler_textures( struct cso_context *ctx, void cso_save_sampler_textures( struct cso_context *ctx ) { uint i; - + ctx->nr_textures_saved = ctx->nr_textures; for (i = 0; i < ctx->nr_textures; i++) { assert(!ctx->textures_saved[i]); @@ -323,9 +341,8 @@ void cso_restore_sampler_textures( struct cso_context *ctx ) - -void cso_set_depth_stencil_alpha(struct cso_context *ctx, - const struct pipe_depth_stencil_alpha_state *templ) +enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, + const struct pipe_depth_stencil_alpha_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_depth_stencil_alpha_state)); @@ -336,8 +353,9 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, void *handle; if (cso_hash_iter_is_null(iter)) { - /* FIXME: handle OOM */ struct cso_depth_stencil_alpha *cso = MALLOC(sizeof(struct cso_depth_stencil_alpha)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; cso->state = *templ; cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); @@ -355,6 +373,7 @@ void cso_set_depth_stencil_alpha(struct cso_context *ctx, ctx->depth_stencil = handle; ctx->pipe->bind_depth_stencil_alpha_state(ctx->pipe, handle); } + return PIPE_OK; } void cso_save_depth_stencil_alpha(struct cso_context *ctx) @@ -374,8 +393,8 @@ void cso_restore_depth_stencil_alpha(struct cso_context *ctx) -void cso_set_rasterizer(struct cso_context *ctx, - const struct pipe_rasterizer_state *templ) +enum pipe_error cso_set_rasterizer(struct cso_context *ctx, + const struct pipe_rasterizer_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_rasterizer_state)); @@ -385,8 +404,9 @@ void cso_set_rasterizer(struct cso_context *ctx, void *handle = NULL; if (cso_hash_iter_is_null(iter)) { - /* FIXME: handle OOM */ struct cso_rasterizer *cso = MALLOC(sizeof(struct cso_rasterizer)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; cso->state = *templ; cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); @@ -404,6 +424,7 @@ void cso_set_rasterizer(struct cso_context *ctx, ctx->rasterizer = handle; ctx->pipe->bind_rasterizer_state(ctx->pipe, handle); } + return PIPE_OK; } void cso_save_rasterizer(struct cso_context *ctx) @@ -422,6 +443,7 @@ void cso_restore_rasterizer(struct cso_context *ctx) } +<<<<<<< HEAD:src/gallium/auxiliary/cso_cache/cso_context.c void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle ) { @@ -435,8 +457,8 @@ void cso_set_fragment_shader_handle(struct cso_context *ctx, /* Not really working: */ #if 0 -void cso_set_fragment_shader(struct cso_context *ctx, - const struct pipe_shader_state *templ) +enum pipe_error cso_set_fragment_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) { const struct tgsi_token *tokens = templ->tokens; unsigned num_tokens = tgsi_num_tokens(tokens); @@ -449,10 +471,12 @@ void cso_set_fragment_shader(struct cso_context *ctx, void *handle = NULL; if (cso_hash_iter_is_null(iter)) { - /* FIXME: handle OOM */ struct cso_fragment_shader *cso = MALLOC(sizeof(struct cso_fragment_shader) + tokens_size); struct tgsi_token *cso_tokens = (struct tgsi_token *)((char *)cso + sizeof(*cso)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + memcpy(cso_tokens, tokens, tokens_size); cso->state.tokens = cso_tokens; cso->data = ctx->pipe->create_fs_state(ctx->pipe, &cso->state); @@ -467,6 +491,7 @@ void cso_set_fragment_shader(struct cso_context *ctx, } cso_set_fragment_shader_handle( ctx, handle ); + return PIPE_OK; } #endif @@ -500,8 +525,8 @@ void cso_set_vertex_shader_handle(struct cso_context *ctx, /* Not really working: */ #if 0 -void cso_set_vertex_shader(struct cso_context *ctx, - const struct pipe_shader_state *templ) +enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, + const struct pipe_shader_state *templ) { unsigned hash_key = cso_construct_key((void*)templ, sizeof(struct pipe_shader_state)); @@ -511,9 +536,11 @@ void cso_set_vertex_shader(struct cso_context *ctx, void *handle = NULL; if (cso_hash_iter_is_null(iter)) { - /* FIXME: handle OOM */ struct cso_vertex_shader *cso = MALLOC(sizeof(struct cso_vertex_shader)); + if (!cso) + return PIPE_ERROR_OUT_OF_MEMORY; + cso->state = *templ; cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; @@ -527,6 +554,7 @@ void cso_set_vertex_shader(struct cso_context *ctx, } cso_set_vertex_shader_handle( ctx, handle ); + return PIPE_OK; } #endif diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 945f4881a8..ab46b93d5a 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -31,6 +31,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" +#include "pipe/p_error.h" #ifdef __cplusplus @@ -45,39 +46,39 @@ void cso_destroy_context( struct cso_context *cso ); -void cso_set_blend( struct cso_context *cso, - const struct pipe_blend_state *blend ); +enum pipe_error cso_set_blend( struct cso_context *cso, + const struct pipe_blend_state *blend ); void cso_save_blend(struct cso_context *cso); void cso_restore_blend(struct cso_context *cso); -void cso_set_depth_stencil_alpha( struct cso_context *cso, - const struct pipe_depth_stencil_alpha_state *dsa ); +enum pipe_error cso_set_depth_stencil_alpha( struct cso_context *cso, + const struct pipe_depth_stencil_alpha_state *dsa ); void cso_save_depth_stencil_alpha(struct cso_context *cso); void cso_restore_depth_stencil_alpha(struct cso_context *cso); -void cso_set_rasterizer( struct cso_context *cso, - const struct pipe_rasterizer_state *rasterizer ); +enum pipe_error cso_set_rasterizer( struct cso_context *cso, + const struct pipe_rasterizer_state *rasterizer ); void cso_save_rasterizer(struct cso_context *cso); void cso_restore_rasterizer(struct cso_context *cso); -void cso_set_samplers( struct cso_context *cso, - unsigned count, - const struct pipe_sampler_state **states ); +enum pipe_error cso_set_samplers( struct cso_context *cso, + unsigned count, + const struct pipe_sampler_state **states ); void cso_save_samplers(struct cso_context *cso); void cso_restore_samplers(struct cso_context *cso); /* Alternate interface to support state trackers that like to modify * samplers one at a time: */ -void cso_single_sampler( struct cso_context *cso, - unsigned nr, - const struct pipe_sampler_state *states ); +enum pipe_error cso_single_sampler( struct cso_context *cso, + unsigned nr, + const struct pipe_sampler_state *states ); void cso_single_sampler_done( struct cso_context *cso ); @@ -98,17 +99,16 @@ void cso_restore_sampler_textures( struct cso_context *cso ); */ void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle ); -void cso_set_fragment_shader( struct cso_context *cso, - const struct pipe_shader_state *shader ); +enum pipe_error cso_set_fragment_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); void cso_save_fragment_shader(struct cso_context *cso); void cso_restore_fragment_shader(struct cso_context *cso); - void cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle ); -void cso_set_vertex_shader( struct cso_context *cso, - const struct pipe_shader_state *shader ); +enum pipe_error cso_set_vertex_shader( struct cso_context *cso, + const struct pipe_shader_state *shader ); void cso_save_vertex_shader(struct cso_context *cso); void cso_restore_vertex_shader(struct cso_context *cso); -- cgit v1.2.3 From 0879237725eca893318137b795d4234300a37e9a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 21 Apr 2008 16:04:27 -0400 Subject: handle some of the possible allocation failures within the hash itself --- src/gallium/auxiliary/cso_cache/cso_context.c | 36 ++++++++++++++++++++++++--- src/gallium/auxiliary/cso_cache/cso_hash.c | 17 +++++++++++++ 2 files changed, 49 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 6040522a46..364a413580 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -159,6 +159,11 @@ enum pipe_error cso_set_blend(struct cso_context *ctx, cso->context = ctx->pipe; iter = cso_insert_state(ctx->cache, hash_key, CSO_BLEND, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -212,6 +217,11 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx, cso->context = ctx->pipe; iter = cso_insert_state(ctx->cache, hash_key, CSO_SAMPLER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -362,7 +372,12 @@ enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; cso->context = ctx->pipe; - cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + iter = cso_insert_state(ctx->cache, hash_key, CSO_DEPTH_STENCIL_ALPHA, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -413,7 +428,12 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx, cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; cso->context = ctx->pipe; - cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + iter = cso_insert_state(ctx->cache, hash_key, CSO_RASTERIZER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -442,8 +462,6 @@ void cso_restore_rasterizer(struct cso_context *ctx) ctx->rasterizer_saved = NULL; } - -<<<<<<< HEAD:src/gallium/auxiliary/cso_cache/cso_context.c void cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle ) { @@ -484,6 +502,11 @@ enum pipe_error cso_set_fragment_shader(struct cso_context *ctx, cso->context = ctx->pipe; iter = cso_insert_state(ctx->cache, hash_key, CSO_FRAGMENT_SHADER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { @@ -547,6 +570,11 @@ enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, cso->context = ctx->pipe; iter = cso_insert_state(ctx->cache, hash_key, CSO_VERTEX_SHADER, cso); + if (cso_hash_iter_is_null(iter)) { + FREE(cso); + return PIPE_ERROR_OUT_OF_MEMORY; + } + handle = cso->data; } else { diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index ddce3822f7..8d867f86d2 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -110,6 +110,10 @@ cso_hash_create_node(struct cso_hash *hash, struct cso_node **anextNode) { struct cso_node *node = cso_data_allocate_node(hash->data.d); + + if (!node) + return NULL; + node->key = akey; node->value = avalue; @@ -219,6 +223,11 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, { struct cso_node **nextNode = cso_hash_find_node(hash, key); struct cso_node *node = cso_hash_create_node(hash, key, data, nextNode); + if (!node) { + struct cso_hash_iter null_iter = {hash, 0}; + return null_iter; + } + struct cso_hash_iter iter = {hash, node}; return iter; } @@ -227,7 +236,15 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, struct cso_hash * cso_hash_create(void) { struct cso_hash *hash = MALLOC_STRUCT(cso_hash); + if (!hash) + return NULL; + hash->data.d = MALLOC_STRUCT(cso_hash_data); + if (!hash->data.d) { + FREE(hash); + return NULL; + } + hash->data.d->fakeNext = 0; hash->data.d->buckets = 0; hash->data.d->size = 0; -- cgit v1.2.3 From ed187d39a6e0fd921b2a45a143d88ac4b66eee91 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 21 Apr 2008 16:40:27 -0400 Subject: make the api consistent (all set functions return pipe_error status) --- src/gallium/auxiliary/cso_cache/cso_context.c | 39 +++++++++++++++------------ src/gallium/auxiliary/cso_cache/cso_context.h | 26 +++++++++--------- 2 files changed, 35 insertions(+), 30 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 364a413580..2d98a34edf 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -303,9 +303,9 @@ void cso_restore_samplers(struct cso_context *ctx) } -void cso_set_sampler_textures( struct cso_context *ctx, - uint count, - struct pipe_texture **textures ) +enum pipe_error cso_set_sampler_textures( struct cso_context *ctx, + uint count, + struct pipe_texture **textures ) { uint i; @@ -317,6 +317,8 @@ void cso_set_sampler_textures( struct cso_context *ctx, pipe_texture_reference(&ctx->textures[i], NULL); ctx->pipe->set_sampler_textures(ctx->pipe, count, textures); + + return PIPE_OK; } void cso_save_sampler_textures( struct cso_context *ctx ) @@ -462,13 +464,14 @@ void cso_restore_rasterizer(struct cso_context *ctx) ctx->rasterizer_saved = NULL; } -void cso_set_fragment_shader_handle(struct cso_context *ctx, - void *handle ) +enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, + void *handle ) { if (ctx->fragment_shader != handle) { ctx->fragment_shader = handle; ctx->pipe->bind_fs_state(ctx->pipe, handle); } + return PIPE_OK; } @@ -513,8 +516,7 @@ enum pipe_error cso_set_fragment_shader(struct cso_context *ctx, handle = ((struct cso_fragment_shader *)cso_hash_iter_data(iter))->data; } - cso_set_fragment_shader_handle( ctx, handle ); - return PIPE_OK; + return cso_set_fragment_shader_handle( ctx, handle ); } #endif @@ -535,13 +537,14 @@ void cso_restore_fragment_shader(struct cso_context *ctx) } -void cso_set_vertex_shader_handle(struct cso_context *ctx, - void *handle ) +enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, + void *handle ) { if (ctx->vertex_shader != handle) { ctx->vertex_shader = handle; ctx->pipe->bind_vs_state(ctx->pipe, handle); } + return PIPE_OK; } @@ -581,8 +584,7 @@ enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, handle = ((struct cso_vertex_shader *)cso_hash_iter_data(iter))->data; } - cso_set_vertex_shader_handle( ctx, handle ); - return PIPE_OK; + return cso_set_vertex_shader_handle( ctx, handle ); } #endif @@ -606,14 +608,15 @@ void cso_restore_vertex_shader(struct cso_context *ctx) -void cso_set_framebuffer(struct cso_context *ctx, - const struct pipe_framebuffer_state *fb) +enum pipe_error cso_set_framebuffer(struct cso_context *ctx, + const struct pipe_framebuffer_state *fb) { /* XXX this memcmp() fails to detect buffer size changes */ if (1/*memcmp(&ctx->fb, fb, sizeof(*fb))*/) { ctx->fb = *fb; ctx->pipe->set_framebuffer_state(ctx->pipe, fb); } + return PIPE_OK; } void cso_save_framebuffer(struct cso_context *ctx) @@ -630,13 +633,14 @@ void cso_restore_framebuffer(struct cso_context *ctx) } -void cso_set_viewport(struct cso_context *ctx, - const struct pipe_viewport_state *vp) +enum pipe_error cso_set_viewport(struct cso_context *ctx, + const struct pipe_viewport_state *vp) { if (memcmp(&ctx->vp, vp, sizeof(*vp))) { ctx->vp = *vp; ctx->pipe->set_viewport_state(ctx->pipe, vp); } + return PIPE_OK; } void cso_save_viewport(struct cso_context *ctx) @@ -656,11 +660,12 @@ void cso_restore_viewport(struct cso_context *ctx) -void cso_set_blend_color(struct cso_context *ctx, - const struct pipe_blend_color *bc) +enum pipe_error cso_set_blend_color(struct cso_context *ctx, + const struct pipe_blend_color *bc) { if (memcmp(&ctx->blend_color, bc, sizeof(ctx->blend_color))) { ctx->blend_color = *bc; ctx->pipe->set_blend_color(ctx->pipe, bc); } + return PIPE_OK; } diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index ab46b93d5a..0405944132 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -84,9 +84,9 @@ void cso_single_sampler_done( struct cso_context *cso ); -void cso_set_sampler_textures( struct cso_context *cso, - uint count, - struct pipe_texture **textures ); +enum pipe_error cso_set_sampler_textures( struct cso_context *cso, + uint count, + struct pipe_texture **textures ); void cso_save_sampler_textures( struct cso_context *cso ); void cso_restore_sampler_textures( struct cso_context *cso ); @@ -97,16 +97,16 @@ void cso_restore_sampler_textures( struct cso_context *cso ); * (eg mesa's internall-generated texenv programs), it will be up to * the state tracker to implement their own specialized caching. */ -void cso_set_fragment_shader_handle(struct cso_context *ctx, - void *handle ); +enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, + void *handle ); enum pipe_error cso_set_fragment_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); void cso_save_fragment_shader(struct cso_context *cso); void cso_restore_fragment_shader(struct cso_context *cso); -void cso_set_vertex_shader_handle(struct cso_context *ctx, - void *handle ); +enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, + void *handle ); enum pipe_error cso_set_vertex_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); void cso_save_vertex_shader(struct cso_context *cso); @@ -114,20 +114,20 @@ void cso_restore_vertex_shader(struct cso_context *cso); -void cso_set_framebuffer(struct cso_context *cso, - const struct pipe_framebuffer_state *fb); +enum pipe_error cso_set_framebuffer(struct cso_context *cso, + const struct pipe_framebuffer_state *fb); void cso_save_framebuffer(struct cso_context *cso); void cso_restore_framebuffer(struct cso_context *cso); -void cso_set_viewport(struct cso_context *cso, - const struct pipe_viewport_state *vp); +enum pipe_error cso_set_viewport(struct cso_context *cso, + const struct pipe_viewport_state *vp); void cso_save_viewport(struct cso_context *cso); void cso_restore_viewport(struct cso_context *cso); -void cso_set_blend_color(struct cso_context *cso, - const struct pipe_blend_color *bc); +enum pipe_error cso_set_blend_color(struct cso_context *cso, + const struct pipe_blend_color *bc); #ifdef __cplusplus -- cgit v1.2.3 From 83fec372b45eb0af9e2d83549b3d92afb17c38af Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 22 Apr 2008 11:26:26 +0200 Subject: cso: Fix build on Win32. --- src/gallium/auxiliary/cso_cache/cso_hash.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 8d867f86d2..0646efd952 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -228,8 +228,10 @@ struct cso_hash_iter cso_hash_insert(struct cso_hash *hash, return null_iter; } - struct cso_hash_iter iter = {hash, node}; - return iter; + { + struct cso_hash_iter iter = {hash, node}; + return iter; + } } } -- cgit v1.2.3 From 57987ea67320e79e4c2d7e66388806148ece09b5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Apr 2008 11:26:07 +0100 Subject: draw: disable broken edgeflag code - didn't work & was killing performance --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 3cc2941f96..153055417d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -106,7 +106,7 @@ static unsigned add_edgeflag( struct vcache_frontend *vcache, unsigned idx, unsigned mask ) { - if (mask && draw_pt_get_edgeflag(vcache->draw, idx)) + if (0 && mask && draw_pt_get_edgeflag(vcache->draw, idx)) return idx | DRAW_PT_EDGEFLAG; else return idx; @@ -116,7 +116,7 @@ static unsigned add_edgeflag( struct vcache_frontend *vcache, static unsigned add_reset_stipple( unsigned idx, unsigned reset ) { - if (reset) + if (0 && reset) return idx | DRAW_PT_RESET_STIPPLE; else return idx; @@ -128,9 +128,9 @@ static void vcache_triangle( struct vcache_frontend *vcache, unsigned i1, unsigned i2 ) { - vcache_elt(vcache, i0 | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE); - vcache_elt(vcache, i1 | DRAW_PT_EDGEFLAG); - vcache_elt(vcache, i2 | DRAW_PT_EDGEFLAG); + vcache_elt(vcache, i0 /* | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE */ ); + vcache_elt(vcache, i1 /* | DRAW_PT_EDGEFLAG */); + vcache_elt(vcache, i2 /* | DRAW_PT_EDGEFLAG */); vcache_check_flush(vcache); } @@ -142,11 +142,12 @@ static void vcache_ef_triangle( struct vcache_frontend *vcache, unsigned i1, unsigned i2 ) { +/* i0 = add_edgeflag( vcache, i0, (ef_mask >> 0) & 1 ); i1 = add_edgeflag( vcache, i1, (ef_mask >> 1) & 1 ); i2 = add_edgeflag( vcache, i2, (ef_mask >> 2) & 1 ); - i0 = add_reset_stipple( i0, reset_stipple ); +*/ vcache_elt(vcache, i0); vcache_elt(vcache, i1); -- cgit v1.2.3 From b4b3a73bdf68adc1d9fbadac913aa6cf60d648d5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 22 Apr 2008 19:47:02 +0900 Subject: pipebuffer: Temporarily allow simultaneous CPU writes. Also, fast path for re-fencing the same buffer multiple times with the same fence. --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 65b6584003..27032b0c4c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -215,15 +215,21 @@ fenced_buffer_serialize(struct fenced_buffer *fenced_buf, unsigned flags) struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pipe_winsys *winsys = fenced_list->winsys; + /* Allow concurrent reads */ if(((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_WRITE) == 0) return PIPE_OK; + /* Wait for the CPU to finish */ if(fenced_buf->mapcount) { - /* FIXME */ + /* FIXME: Use thread conditions variables to signal when mapcount + * reaches zero */ debug_warning("attemp to write concurrently to buffer"); + /* XXX: we must not fail here in order to support texture mipmap generation return PIPE_ERROR_RETRY; + */ } + /* Wait for the GPU to finish */ if(fenced_buf->fence) { if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) return PIPE_ERROR_RETRY; @@ -353,6 +359,16 @@ buffer_fence(struct pb_buffer *buf, /* FIXME: receive this as a parameter */ unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; + if(fence == fenced_buf->fence) { + /* Handle the same fence case specially, not only because it is a fast + * path, but mostly to avoid serializing two writes with the same fence, + * as that would bring the hardware down to synchronous operation without + * any benefit. + */ + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return; + } + if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) { /* FIXME: propagate error */ (void)0; -- cgit v1.2.3 From 88f8eed3c4430505b1436b6c5b0114d34c33f822 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Apr 2008 15:26:38 +0100 Subject: cso: fix vs/fs confusion --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 2d98a34edf..23b1f5a032 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -600,7 +600,7 @@ void cso_restore_vertex_shader(struct cso_context *ctx) { assert(ctx->vertex_shader_saved); if (ctx->vertex_shader_saved != ctx->vertex_shader) { - ctx->pipe->bind_fs_state(ctx->pipe, ctx->vertex_shader_saved); + ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved); ctx->vertex_shader = ctx->vertex_shader_saved; } ctx->vertex_shader_saved = NULL; -- cgit v1.2.3 From 0588858702d1a5c9c08573ea6817e2e149473cf6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 22 Apr 2008 20:50:20 +0100 Subject: draw: allow drivers to query pipeline state more easily Also, provide a separate flag to say whether the driver can handle clipping/rhw tasks, in addition to the API flag which indicates they have already been done. --- src/gallium/auxiliary/draw/draw_context.c | 13 ++++++++++ src/gallium/auxiliary/draw/draw_context.h | 12 +++++++++ src/gallium/auxiliary/draw/draw_pipe_validate.c | 30 ++++++++++++---------- src/gallium/auxiliary/draw/draw_private.h | 7 +++-- src/gallium/auxiliary/draw/draw_pt.c | 6 +++-- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 2 +- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 11 +++++--- 7 files changed, 58 insertions(+), 23 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index b4dbdccd61..b916b27877 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -122,6 +122,19 @@ void draw_set_rasterizer_state( struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->rasterizer = raster; + draw->bypass_clipping = (draw->rasterizer->bypass_clipping || + draw->driver.bypass_clipping); +} + + +void draw_set_driver_clipping( struct draw_context *draw, + boolean bypass_clipping ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + + draw->driver.bypass_clipping = bypass_clipping; + draw->bypass_clipping = (draw->rasterizer->bypass_clipping || + draw->driver.bypass_clipping); } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 2742001698..c5c3d3b09e 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -148,4 +148,16 @@ struct vbuf_render; void draw_set_render( struct draw_context *draw, struct vbuf_render *render ); +void draw_set_driver_clipping( struct draw_context *draw, + boolean bypass_clipping ); + +/******************************************************************************* + * Draw pipeline + */ +boolean draw_need_pipeline(const struct draw_context *draw, + const struct pipe_rasterizer_state *rasterizer, + unsigned prim ); + + + #endif /* DRAW_CONTEXT_H */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index a2e0812c89..6be1d369c3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" +#include "draw_context.h" static boolean points( unsigned prim ) { @@ -57,7 +58,8 @@ static boolean triangles( unsigned prim ) * pipeline stages. */ boolean -draw_need_pipeline(const struct draw_context *draw, +draw_need_pipeline(const struct draw_context *draw, + const struct pipe_rasterizer_state *rasterizer, unsigned int prim ) { /* Don't have to worry about triangles turning into lines/points @@ -67,30 +69,30 @@ draw_need_pipeline(const struct draw_context *draw, if (lines(prim)) { /* line stipple */ - if (draw->rasterizer->line_stipple_enable && draw->pipeline.line_stipple) + if (rasterizer->line_stipple_enable && draw->pipeline.line_stipple) return TRUE; /* wide lines */ - if (draw->rasterizer->line_width > draw->pipeline.wide_line_threshold) + if (rasterizer->line_width > draw->pipeline.wide_line_threshold) return TRUE; /* AA lines */ - if (draw->rasterizer->line_smooth && draw->pipeline.aaline) + if (rasterizer->line_smooth && draw->pipeline.aaline) return TRUE; } if (points(prim)) { /* large points */ - if (draw->rasterizer->point_size > draw->pipeline.wide_point_threshold) + if (rasterizer->point_size > draw->pipeline.wide_point_threshold) return TRUE; /* AA points */ - if (draw->rasterizer->point_smooth && draw->pipeline.aapoint) + if (rasterizer->point_smooth && draw->pipeline.aapoint) return TRUE; /* point sprites */ - if (draw->rasterizer->point_sprite && draw->pipeline.point_sprite) + if (rasterizer->point_sprite && draw->pipeline.point_sprite) return TRUE; } @@ -98,20 +100,20 @@ draw_need_pipeline(const struct draw_context *draw, if (triangles(prim)) { /* polygon stipple */ - if (draw->rasterizer->poly_stipple_enable && draw->pipeline.pstipple) + if (rasterizer->poly_stipple_enable && draw->pipeline.pstipple) return TRUE; /* unfilled polygons */ - if (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) + if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) return TRUE; /* polygon offset */ - if (draw->rasterizer->offset_cw || draw->rasterizer->offset_ccw) + if (rasterizer->offset_cw || rasterizer->offset_ccw) return TRUE; /* two-side lighting */ - if (draw->rasterizer->light_twoside) + if (rasterizer->light_twoside) return TRUE; } @@ -120,7 +122,7 @@ draw_need_pipeline(const struct draw_context *draw, * * Generally this isn't a reason to require the pipeline, though. * - if (draw->rasterizer->cull_mode) + if (rasterizer->cull_mode) return TRUE; */ @@ -239,7 +241,7 @@ static struct draw_stage *validate_pipeline( struct draw_stage *stage ) /* Clip stage */ - if (!draw->rasterizer->bypass_clipping) + if (!draw->bypass_clipping) { draw->pipeline.clip->next = next; next = draw->pipeline.clip; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index da973e868b..83b81f66a0 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -174,7 +174,12 @@ struct draw_context } pt; + struct { + boolean bypass_clipping; + } driver; + boolean flushing; + boolean bypass_clipping; /* set if either api or driver bypass_clipping true */ /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; @@ -243,8 +248,6 @@ void draw_pipeline_run( struct draw_context *draw, void draw_pipeline_flush( struct draw_context *draw, unsigned flags ); -boolean draw_need_pipeline(const struct draw_context *draw, - unsigned prim ); /******************************************************************************* diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 9f8e8d3d62..f5a3bf390e 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -58,11 +58,13 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_PIPELINE; } - if (draw_need_pipeline(draw, prim)) { + if (draw_need_pipeline(draw, + draw->rasterizer, + prim)) { opt |= PT_PIPELINE; } - if (!draw->rasterizer->bypass_clipping) { + if (!draw->bypass_clipping) { opt |= PT_CLIPTEST; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 98a2cb45e4..f0763dad8d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -81,7 +81,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, * but gl vs dx9 clip spaces. */ draw_pt_post_vs_prepare( fpme->post_vs, - draw->rasterizer->bypass_clipping, + draw->bypass_clipping, draw->identity_viewport, draw->rasterizer->gl_rasterization_rules ); diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 581f044dae..e5e38fa264 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -94,8 +94,10 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; + boolean clipped = FALSE; unsigned j; - unsigned clipped = 0; + + if (0) debug_printf("%s\n"); for (j = 0; j < count; j++) { out->clip[0] = out->data[0][0]; @@ -108,10 +110,11 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, out->clipmask = compute_clipmask_gl(out->clip, pvs->draw->plane, pvs->draw->nr_planes); - clipped += out->clipmask; if (out->clipmask == 0) { + clipped = TRUE; + /* divide by w */ float w = 1.0f / out->data[0][3]; @@ -142,7 +145,7 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, const float *trans = pvs->draw->viewport.translate; unsigned j; - debug_printf("%s\n", __FUNCTION__); + if (0) debug_printf("%s\n", __FUNCTION__); for (j = 0; j < count; j++) { /* Viewport mapping only, no cliptest/rhw divide */ @@ -165,7 +168,7 @@ static boolean post_vs_none( struct pt_post_vs *pvs, unsigned count, unsigned stride ) { - debug_printf("%s\n", __FUNCTION__); + if (0) debug_printf("%s\n", __FUNCTION__); return FALSE; } -- cgit v1.2.3 From 8f4f89c04383b2100f6d856270cad62dfe8a6354 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Apr 2008 14:42:36 -0600 Subject: gallium: remove assertions It's possible the current vs/fs is null when cso_save_vertex/fragment_shader() is called. --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 23b1f5a032..0523cb1949 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -528,7 +528,6 @@ void cso_save_fragment_shader(struct cso_context *ctx) void cso_restore_fragment_shader(struct cso_context *ctx) { - assert(ctx->fragment_shader_saved); if (ctx->fragment_shader_saved != ctx->fragment_shader) { ctx->pipe->bind_fs_state(ctx->pipe, ctx->fragment_shader_saved); ctx->fragment_shader = ctx->fragment_shader_saved; @@ -598,7 +597,6 @@ void cso_save_vertex_shader(struct cso_context *ctx) void cso_restore_vertex_shader(struct cso_context *ctx) { - assert(ctx->vertex_shader_saved); if (ctx->vertex_shader_saved != ctx->vertex_shader) { ctx->pipe->bind_vs_state(ctx->pipe, ctx->vertex_shader_saved); ctx->vertex_shader = ctx->vertex_shader_saved; -- cgit v1.2.3 From aaa43218f34fc9897b280d6cf9bc1a31bbb7dafc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Apr 2008 16:22:21 -0600 Subject: gallium: setup an identity viewport --- src/gallium/auxiliary/util/u_gen_mipmap.c | 35 +++++-------------------------- 1 file changed, 5 insertions(+), 30 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 9822a25ca6..ff5881b979 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -61,6 +61,7 @@ struct gen_mipmap_state struct pipe_depth_stencil_alpha_state depthstencil; struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_viewport_state viewport; struct pipe_shader_state vert_shader; struct pipe_shader_state frag_shader; @@ -723,9 +724,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST; ctx->sampler.normalized_coords = 1; - -#if 0 - /* viewport */ + /* viewport state (identity, verts are in wincoords) */ ctx->viewport.scale[0] = 1.0; ctx->viewport.scale[1] = 1.0; ctx->viewport.scale[2] = 1.0; @@ -734,7 +733,6 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->viewport.translate[1] = 0.0; ctx->viewport.translate[2] = 0.0; ctx->viewport.translate[3] = 0.0; -#endif /* vertex shader */ { @@ -825,26 +823,6 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) } -#if 0 -static void -simple_viewport(struct pipe_context *pipe, uint width, uint height) -{ - struct pipe_viewport_state vp; - - vp.scale[0] = 0.5 * width; - vp.scale[1] = -0.5 * height; - vp.scale[2] = 1.0; - vp.scale[3] = 1.0; - vp.translate[0] = 0.5 * width; - vp.translate[1] = 0.5 * height; - vp.translate[2] = 0.0; - vp.translate[3] = 0.0; - - pipe->set_viewport_state(pipe, &vp); -} -#endif - - /** * Generate mipmap images. It's assumed all needed texture memory is * already allocated. @@ -882,17 +860,16 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); /* bind our state */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_viewport(ctx->cso, &ctx->viewport); cso_set_fragment_shader_handle(ctx->cso, ctx->fs); cso_set_vertex_shader_handle(ctx->cso, ctx->vs); -#if 0 - cso_set_viewport(ctx->cso, &ctx->viewport); -#endif /* init framebuffer state */ memset(&fb, 0, sizeof(fb)); @@ -928,9 +905,6 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, ctx->sampler.lod_bias = (float) srcLevel; cso_single_sampler(ctx->cso, 0, &ctx->sampler); cso_single_sampler_done(ctx->cso); -#if 0 - simple_viewport(pipe, pt->width[dstLevel], pt->height[dstLevel]); -#endif cso_set_sampler_textures(ctx->cso, 1, &pt); @@ -958,4 +932,5 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); } -- cgit v1.2.3 From 0939a986a8a7dbb8d30c505fcaad8d2718a6527d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Apr 2008 16:25:55 -0600 Subject: gallium: update comment about bypass clipping/viewport --- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index ff5881b979..dfdb5f16fe 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -713,7 +713,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); ctx->rasterizer.front_winding = PIPE_WINDING_CW; ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; - ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + ctx->rasterizer.bypass_clipping = 1; /*ctx->rasterizer.bypass_vs = 1;*/ /* sampler state */ -- cgit v1.2.3 From 22cbf6a70437dfa12c10600e5a496ea5771cfc56 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Apr 2008 16:29:48 -0600 Subject: gallium: setup an identity viewport This fixes broken blits. --- src/gallium/auxiliary/util/u_blit.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 0938b03820..1105066cb8 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -57,6 +57,7 @@ struct blit_state struct pipe_depth_stencil_alpha_state depthstencil; struct pipe_rasterizer_state rasterizer; struct pipe_sampler_state sampler; + struct pipe_viewport_state viewport; struct pipe_shader_state vert_shader; struct pipe_shader_state frag_shader; @@ -100,7 +101,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer)); ctx->rasterizer.front_winding = PIPE_WINDING_CW; ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; - ctx->rasterizer.bypass_clipping = 1; /* bypasses viewport too */ + ctx->rasterizer.bypass_clipping = 1; /*ctx->rasterizer.bypass_vs = 1;*/ /* samplers */ @@ -113,8 +114,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->sampler.mag_img_filter = 0; /* set later */ ctx->sampler.normalized_coords = 1; -#if 0 - /* viewport */ + /* viewport (identity, we setup vertices in wincoords) */ ctx->viewport.scale[0] = 1.0; ctx->viewport.scale[1] = 1.0; ctx->viewport.scale[2] = 1.0; @@ -123,7 +123,6 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->viewport.translate[1] = 0.0; ctx->viewport.translate[2] = 0.0; ctx->viewport.translate[3] = 0.0; -#endif /* vertex shader */ { @@ -302,11 +301,13 @@ util_blit_pixels(struct blit_state *ctx, cso_save_framebuffer(ctx->cso); cso_save_fragment_shader(ctx->cso); cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); /* set misc state we care about */ cso_set_blend(ctx->cso, &ctx->blend); cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_viewport(ctx->cso, &ctx->viewport); /* sampler */ ctx->sampler.min_img_filter = filter; @@ -348,6 +349,7 @@ util_blit_pixels(struct blit_state *ctx, cso_restore_framebuffer(ctx->cso); cso_restore_fragment_shader(ctx->cso); cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); /* free the texture */ pipe_surface_reference(&texSurf, NULL); -- cgit v1.2.3 From 500c41b8ba3ad025c69e63a2c74da90674a8037d Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 22 Apr 2008 18:11:58 -0400 Subject: Fix reporting of clipped vertices. --- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index e5e38fa264..f98e130ed6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -94,7 +94,7 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; - boolean clipped = FALSE; + unsigned clipped = 0; unsigned j; if (0) debug_printf("%s\n"); @@ -110,11 +110,10 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, out->clipmask = compute_clipmask_gl(out->clip, pvs->draw->plane, pvs->draw->nr_planes); + clipped += out->clipmask; if (out->clipmask == 0) { - clipped = TRUE; - /* divide by w */ float w = 1.0f / out->data[0][3]; -- cgit v1.2.3 From f088b53769aacbee20135d912c33d688b6002011 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 22 Apr 2008 18:20:34 -0400 Subject: Cache translate's structs for emits and fetches. Results in a fair speed improvement. --- src/gallium/auxiliary/draw/draw_pt_emit.c | 63 ++++++++++++++++++++++++++--- src/gallium/auxiliary/draw/draw_pt_fetch.c | 65 ++++++++++++++++++++++++++---- 2 files changed, 114 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 490da4cca3..d35329aba0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -33,13 +33,66 @@ #include "draw/draw_pt.h" #include "translate/translate.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" struct pt_emit { struct draw_context *draw; struct translate *translate; + + struct cso_hash *hash; }; +static INLINE unsigned translate_hash_key_size(struct translate_key *key) +{ + unsigned size = sizeof(struct translate_key) - + sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); + return size; +} + +static INLINE unsigned create_key(struct translate_key *key) +{ + unsigned hash_key; + unsigned size = translate_hash_key_size(key); + /*debug_printf("key size = %d, (els = %d)\n", + size, key->nr_elements);*/ + hash_key = cso_construct_key(key, size); + return hash_key; +} + +static struct translate *cached_translate(struct pt_emit *emit, + struct translate_key *key) +{ + unsigned hash_key = create_key(key); + struct cso_hash_iter iter = cso_hash_find(emit->hash, hash_key); + struct translate *translate = 0; + + if (cso_hash_iter_is_null(iter)) { + translate = translate_create(key); + cso_hash_insert(emit->hash, hash_key, translate); + /*debug_printf("\tCREATED with %d\n", hash_key);*/ + } else { + translate = cso_hash_iter_data(iter); + /*debug_printf("\tOK with %d\n", hash_key);*/ + } + + return translate; +} + + +static INLINE void delete_translates(struct pt_emit *emit) +{ + struct cso_hash *hash = emit->hash; + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + struct translate *state = (struct translate*)cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + state->release(state); + } + } +} void draw_pt_emit_prepare( struct pt_emit *emit, unsigned prim ) @@ -123,10 +176,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, if (!emit->translate || memcmp(&emit->translate->key, &hw_key, sizeof(hw_key)) != 0) { - if (emit->translate) - emit->translate->release(emit->translate); - - emit->translate = translate_create( &hw_key ); + emit->translate = cached_translate(emit, &hw_key); } } @@ -188,14 +238,15 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) return NULL; emit->draw = draw; + emit->hash = cso_hash_create(); return emit; } void draw_pt_emit_destroy( struct pt_emit *emit ) { - if (emit->translate) - emit->translate->release( emit->translate ); + delete_translates(emit); + cso_hash_delete(emit->hash); FREE(emit); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index f98bce6eac..93da811ed8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -33,16 +33,67 @@ #include "draw/draw_pt.h" #include "translate/translate.h" +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" struct pt_fetch { struct draw_context *draw; struct translate *translate; - + unsigned vertex_size; + + struct cso_hash *hash; }; +static INLINE unsigned translate_hash_key_size(struct translate_key *key) +{ + unsigned size = sizeof(struct translate_key) - + sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); + return size; +} + +static INLINE unsigned create_key(struct translate_key *key) +{ + unsigned hash_key; + unsigned size = translate_hash_key_size(key); + /*debug_printf("key size = %d, (els = %d)\n", + size, key->nr_elements);*/ + hash_key = cso_construct_key(key, size); + return hash_key; +} + +static struct translate *cached_translate(struct pt_fetch *fetch, + struct translate_key *key) +{ + unsigned hash_key = create_key(key); + struct cso_hash_iter iter = cso_hash_find(fetch->hash, hash_key); + struct translate *translate = 0; + + if (cso_hash_iter_is_null(iter)) { + translate = translate_create(key); + cso_hash_insert(fetch->hash, hash_key, translate); + /*debug_printf("\tCREATED with %d\n", hash_key);*/ + } else { + translate = cso_hash_iter_data(iter); + /*debug_printf("\tOK with %d\n", hash_key);*/ + } + + return translate; +} +static INLINE void delete_translates(struct pt_fetch *fetch) +{ + struct cso_hash *hash = fetch->hash; + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + struct translate *state = (struct translate*)cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + state->release(state); + } + } +} /* Perform the fetch from API vertex elements & vertex buffers, to a * contiguous set of float[4] attributes as required for the @@ -112,10 +163,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, if (!fetch->translate || memcmp(&fetch->translate->key, &key, sizeof(key)) != 0) { - if (fetch->translate) - fetch->translate->release(fetch->translate); - - fetch->translate = translate_create( &key ); + fetch->translate = cached_translate(fetch, &key); { static struct vertex_header vh = { 0, 0, 0, 0xffff }; @@ -159,15 +207,16 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch); if (!fetch) return NULL; - + fetch->draw = draw; + fetch->hash = cso_hash_create(); return fetch; } void draw_pt_fetch_destroy( struct pt_fetch *fetch ) { - if (fetch->translate) - fetch->translate->release( fetch->translate ); + delete_translates(fetch); + cso_hash_delete(fetch->hash); FREE(fetch); } -- cgit v1.2.3 From d8f2e400cfe6e32e82d1656d3483905343124b97 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 22 Apr 2008 18:30:31 -0400 Subject: Fix a crash. Rasterizer can be null --- src/gallium/auxiliary/draw/draw_context.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index b916b27877..db92a53ed2 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -122,8 +122,9 @@ void draw_set_rasterizer_state( struct draw_context *draw, draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); draw->rasterizer = raster; - draw->bypass_clipping = (draw->rasterizer->bypass_clipping || - draw->driver.bypass_clipping); + draw->bypass_clipping = + ((draw->rasterizer && draw->rasterizer->bypass_clipping) || + draw->driver.bypass_clipping); } -- cgit v1.2.3 From 36feb5eacf16467d06d5cd9f63d19f17f933f1ef Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 22 Apr 2008 18:32:29 -0400 Subject: In case the 'func' is deleting the state move the iterator before calling it. --- src/gallium/auxiliary/cso_cache/cso_cache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index f607528fdc..63464e0705 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -334,10 +334,10 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type, iter = cso_hash_first_node(hash); while (!cso_hash_iter_is_null(iter)) { void *state = cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); if (state) { func(state, user_data); } - iter = cso_hash_iter_next(iter); } } -- cgit v1.2.3 From 91e37b71404a83b5e4258e129a2753f7c8fd0706 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Apr 2008 19:10:52 -0600 Subject: gallium: fix bad logic in bind_pstip_fragment_shader(): use &&, not || --- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 61bdd29aa0..aec485a6e7 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -469,7 +469,7 @@ pstip_create_sampler(struct pstip_stage *pstip) static boolean bind_pstip_fragment_shader(struct pstip_stage *pstip) { - if (!pstip->fs->pstip_fs || + if (!pstip->fs->pstip_fs && !generate_pstip_fs(pstip)) return FALSE; -- cgit v1.2.3 From 72fd5b9c5a78792ad8c1fe7c8713a3583008c50a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 22 Apr 2008 19:11:59 -0600 Subject: gallium: added a flushing_vcache flag, test in draw_do_flush() Fixes broken polygon stipple, aaline, aapoint stages --- src/gallium/auxiliary/draw/draw_context.c | 2 +- src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index db92a53ed2..f90187816b 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -367,7 +367,7 @@ draw_set_mapped_element_buffer( struct draw_context *draw, */ void draw_do_flush( struct draw_context *draw, unsigned flags ) { - if (!draw->flushing) + if (!draw->flushing && !draw->vcache_flushing) { draw->flushing = TRUE; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 83b81f66a0..39aa81b43c 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -179,6 +179,7 @@ struct draw_context } driver; boolean flushing; + boolean vcache_flushing; boolean bypass_clipping; /* set if either api or driver bypass_clipping true */ /* pipe state that we need: */ diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 153055417d..afcff41043 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -63,6 +63,7 @@ struct vcache_frontend { static void vcache_flush( struct vcache_frontend *vcache ) { + vcache->draw->vcache_flushing = TRUE; if (vcache->draw_count) { vcache->middle->run( vcache->middle, vcache->fetch_elts, @@ -74,6 +75,7 @@ static void vcache_flush( struct vcache_frontend *vcache ) memset(vcache->in, ~0, sizeof(vcache->in)); vcache->fetch_count = 0; vcache->draw_count = 0; + vcache->draw->vcache_flushing = FALSE; } static void vcache_check_flush( struct vcache_frontend *vcache ) -- cgit v1.2.3 From 6fc530ccda2971a5d99a955ad90ae9762238040f Mon Sep 17 00:00:00 2001 From: Ben Skeggs Date: Wed, 23 Apr 2008 12:38:37 +1000 Subject: fix non-i386 builds --- src/gallium/auxiliary/translate/translate.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index 86ebeae2a7..b04bc6eefd 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -38,9 +38,11 @@ struct translate *translate_create( const struct translate_key *key ) { struct translate *translate = NULL; +#if defined(__i386__) || defined(__386__) || defined(i386) translate = translate_sse2_create( key ); if (translate) return translate; +#endif return translate_generic_create( key ); } -- cgit v1.2.3 From d6a965972b48fec95a2bcb778cf05d0468ba2573 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 23 Apr 2008 13:51:50 +0100 Subject: draw: add missing break (Jakob) --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index afd5f5544d..1eb7c4bc38 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -136,7 +136,7 @@ emit_vertex( struct vbuf_stage *vbuf, vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); - if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); + if (1) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); vbuf->vertex_ptr += vbuf->vertex_size/4; vertex->vertex_id = vbuf->nr_vertices++; @@ -254,6 +254,7 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) case EMIT_4UB: output_format = PIPE_FORMAT_B8G8R8A8_UNORM; emit_sz = 4 * sizeof(ubyte); + break; default: assert(0); output_format = PIPE_FORMAT_NONE; -- cgit v1.2.3 From 43be7a4819ad342e1cb3f8e3fb966a8a78dc2c1b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 23 Apr 2008 13:56:05 +0100 Subject: draw: remove stupid debug (Keith) --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 1eb7c4bc38..2a19e6916a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -136,7 +136,7 @@ emit_vertex( struct vbuf_stage *vbuf, vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); - if (1) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); + if (0) draw_dump_emitted_vertex(vbuf->vinfo, (uint8_t *)vbuf->vertex_ptr); vbuf->vertex_ptr += vbuf->vertex_size/4; vertex->vertex_id = vbuf->nr_vertices++; -- cgit v1.2.3 From 333976c90aafa602816defef3e4cc4a418601a51 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Apr 2008 10:29:52 -0600 Subject: gallium: fix broken hashing for vertex translation It seems we get hash collisions fairly easily and the code as it was didn't deal with that properly. I think we need a simpler hashing interface... --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 93da811ed8..a8d4be5b64 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -70,13 +70,21 @@ static struct translate *cached_translate(struct pt_fetch *fetch, struct cso_hash_iter iter = cso_hash_find(fetch->hash, hash_key); struct translate *translate = 0; - if (cso_hash_iter_is_null(iter)) { + while (!cso_hash_iter_is_null(iter)) { + void *iter_data = cso_hash_iter_data(iter); + if (memcmp(iter_data, key, sizeof(*key)) == 0) { + /* found */ + translate = cso_hash_iter_data(iter); + break; + } + iter = cso_hash_iter_next(iter); + /*debug_printf("\tOK with %d\n", hash_key);*/ + } + + if (!translate) { + /* create/insert */ translate = translate_create(key); cso_hash_insert(fetch->hash, hash_key, translate); - /*debug_printf("\tCREATED with %d\n", hash_key);*/ - } else { - translate = cso_hash_iter_data(iter); - /*debug_printf("\tOK with %d\n", hash_key);*/ } return translate; -- cgit v1.2.3 From 53cf833af9776d47bd4c14906784aa8f2027dc4b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 24 Apr 2008 00:10:51 +0900 Subject: gallium: Add extern "C" to the headers. --- src/gallium/auxiliary/util/u_blit.h | 8 ++++++++ src/gallium/auxiliary/util/u_gen_mipmap.h | 10 ++++++++++ 2 files changed, 18 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index 61f1d9bb32..0ce9732e62 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -30,7 +30,11 @@ #define U_BLIT_H +#ifdef __cplusplus +extern "C" { +#endif + struct pipe_context; struct pipe_surface; struct cso_context; @@ -58,4 +62,8 @@ util_blit_pixels(struct blit_state *ctx, float z, uint filter); +#ifdef __cplusplus +} +#endif + #endif diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index bd9af54fb7..3277024f07 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -31,6 +31,11 @@ #include "pipe/p_state.h" +#ifdef __cplusplus +extern "C" { +#endif + + struct pipe_context; struct pipe_texture; struct cso_context; @@ -52,4 +57,9 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel, uint filter); + +#ifdef __cplusplus +} +#endif + #endif -- cgit v1.2.3 From a75a3df851339c782e045e01c2b21ffadb1e09f5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 24 Apr 2008 01:59:57 +0900 Subject: pipebuffer: New function to flush the buffer cache. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 3 +++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 12 +++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 96f9af3825..8de286e3f9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -144,6 +144,9 @@ struct pb_manager * pb_cache_manager_create(struct pb_manager *provider, unsigned usecs); +void +pb_cache_flush(struct pb_manager *mgr); + /** * Fenced buffer manager. diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 543fd51253..4bd3f94a6c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -294,8 +294,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, } -static void -pb_cache_manager_destroy(struct pb_manager *_mgr) +void +pb_cache_flush(struct pb_manager *_mgr) { struct pb_cache_manager *mgr = pb_cache_manager(_mgr); struct list_head *curr, *next; @@ -311,7 +311,13 @@ pb_cache_manager_destroy(struct pb_manager *_mgr) next = curr->next; } _glthread_UNLOCK_MUTEX(mgr->mutex); - +} + + +static void +pb_cache_manager_destroy(struct pb_manager *mgr) +{ + pb_cache_flush(mgr); FREE(mgr); } -- cgit v1.2.3 From 4f93a3a680560940630c44be0066b72a86ff008a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 24 Apr 2008 02:13:31 +0900 Subject: gallium: Fix texture refcount leak. --- src/gallium/auxiliary/cso_cache/cso_context.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 0523cb1949..d246dff433 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -104,6 +104,8 @@ out: static void cso_release_all( struct cso_context *ctx ) { + unsigned i; + if (ctx->pipe) { ctx->pipe->bind_blend_state( ctx->pipe, NULL ); ctx->pipe->bind_rasterizer_state( ctx->pipe, NULL ); @@ -113,6 +115,11 @@ static void cso_release_all( struct cso_context *ctx ) ctx->pipe->bind_vs_state( ctx->pipe, NULL ); } + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + pipe_texture_reference(&ctx->textures[i], NULL); + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + pipe_texture_reference(&ctx->textures_saved[i], NULL); + if (ctx->cache) { cso_cache_delete( ctx->cache ); ctx->cache = NULL; -- cgit v1.2.3 From e1180c2d694851ed12e86027aa406ee20546e6d3 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 23 Apr 2008 13:32:41 -0400 Subject: fix the simple hash finding function and use it --- src/gallium/auxiliary/cso_cache/cso_cache.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_emit.c | 14 ++++++-------- src/gallium/auxiliary/draw/draw_pt_fetch.c | 17 ++++------------- 3 files changed, 12 insertions(+), 23 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 63464e0705..096875807b 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -255,9 +255,9 @@ void *cso_hash_find_data_from_template( struct cso_hash *hash, while (!cso_hash_iter_is_null(iter)) { void *iter_data = cso_hash_iter_data(iter); if (!memcmp(iter_data, templ, size)) { - /* Return the payload: + /* We found a match */ - return (unsigned char *)iter_data + size; + return iter_data; } iter = cso_hash_iter_next(iter); } diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d35329aba0..92ad8f7049 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -65,16 +65,14 @@ static struct translate *cached_translate(struct pt_emit *emit, struct translate_key *key) { unsigned hash_key = create_key(key); - struct cso_hash_iter iter = cso_hash_find(emit->hash, hash_key); - struct translate *translate = 0; - - if (cso_hash_iter_is_null(iter)) { + struct translate *translate = (struct translate*) + cso_hash_find_data_from_template(emit->hash, + hash_key, + key, sizeof(*key)); + if (!translate) { + /* create/insert */ translate = translate_create(key); cso_hash_insert(emit->hash, hash_key, translate); - /*debug_printf("\tCREATED with %d\n", hash_key);*/ - } else { - translate = cso_hash_iter_data(iter); - /*debug_printf("\tOK with %d\n", hash_key);*/ } return translate; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index a8d4be5b64..013d16e1bc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -67,19 +67,10 @@ static struct translate *cached_translate(struct pt_fetch *fetch, struct translate_key *key) { unsigned hash_key = create_key(key); - struct cso_hash_iter iter = cso_hash_find(fetch->hash, hash_key); - struct translate *translate = 0; - - while (!cso_hash_iter_is_null(iter)) { - void *iter_data = cso_hash_iter_data(iter); - if (memcmp(iter_data, key, sizeof(*key)) == 0) { - /* found */ - translate = cso_hash_iter_data(iter); - break; - } - iter = cso_hash_iter_next(iter); - /*debug_printf("\tOK with %d\n", hash_key);*/ - } + struct translate *translate = (struct translate*) + cso_hash_find_data_from_template(fetch->hash, + hash_key, + key, sizeof(*key)); if (!translate) { /* create/insert */ -- cgit v1.2.3 From 5d873c87186fd3a59b46b4e1e0c987120aa961a7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Apr 2008 11:36:47 -0600 Subject: gallium: additional debug code --- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index f98e130ed6..65cb8ba701 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -61,6 +61,12 @@ compute_clipmask_gl(const float *clip, /*const*/ float plane[][4], unsigned nr) unsigned mask = 0x0; unsigned i; +#if 0 + debug_printf("compute clipmask %f %f %f %f\n", + clip[0], clip[1], clip[2], clip[3]); + assert(clip[3] != 0.0); +#endif + /* Do the hardwired planes first: */ if (-clip[0] + clip[3] < 0) mask |= (1<<0); @@ -122,6 +128,13 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1]; out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2]; out->data[0][3] = w; +#if 0 + debug_printf("post viewport: %f %f %f %f\n", + out->data[0][0], + out->data[0][1], + out->data[0][2], + out->data[0][3]); +#endif } out = (struct vertex_header *)( (char *)out + stride ); -- cgit v1.2.3 From 5fcd84ab39318a371253b1a7285bc657fb82efed Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 23 Apr 2008 14:00:13 -0400 Subject: Create a sharable translate_cache and use it. --- src/gallium/auxiliary/cso_cache/cso_hash.h | 4 +- src/gallium/auxiliary/draw/draw_pt_emit.c | 65 ++------------ src/gallium/auxiliary/draw/draw_pt_fetch.c | 68 ++------------- src/gallium/auxiliary/translate/Makefile | 3 +- src/gallium/auxiliary/translate/SConscript | 1 + src/gallium/auxiliary/translate/translate.h | 1 - src/gallium/auxiliary/translate/translate_cache.c | 100 ++++++++++++++++++++++ src/gallium/auxiliary/translate/translate_cache.h | 54 ++++++++++++ 8 files changed, 173 insertions(+), 123 deletions(-) create mode 100644 src/gallium/auxiliary/translate/translate_cache.c create mode 100644 src/gallium/auxiliary/translate/translate_cache.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index 73c4742006..85f3e276c6 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -106,12 +106,12 @@ struct cso_hash_iter cso_hash_iter_prev(struct cso_hash_iter iter); /** - * Convenience routine to iterate over the collision list while doing a memory + * Convenience routine to iterate over the collision list while doing a memory * comparison to see which entry in the list is a direct copy of our template * and returns that entry. */ void *cso_hash_find_data_from_template( struct cso_hash *hash, - unsigned hash_key, + unsigned hash_key, void *templ, int size ); diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 92ad8f7049..c6d9537530 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -32,66 +32,16 @@ #include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "translate/translate.h" - -#include "cso_cache/cso_cache.h" -#include "cso_cache/cso_hash.h" +#include "translate/translate_cache.h" struct pt_emit { struct draw_context *draw; struct translate *translate; - struct cso_hash *hash; + struct translate_cache *cache; }; -static INLINE unsigned translate_hash_key_size(struct translate_key *key) -{ - unsigned size = sizeof(struct translate_key) - - sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); - return size; -} - -static INLINE unsigned create_key(struct translate_key *key) -{ - unsigned hash_key; - unsigned size = translate_hash_key_size(key); - /*debug_printf("key size = %d, (els = %d)\n", - size, key->nr_elements);*/ - hash_key = cso_construct_key(key, size); - return hash_key; -} - -static struct translate *cached_translate(struct pt_emit *emit, - struct translate_key *key) -{ - unsigned hash_key = create_key(key); - struct translate *translate = (struct translate*) - cso_hash_find_data_from_template(emit->hash, - hash_key, - key, sizeof(*key)); - if (!translate) { - /* create/insert */ - translate = translate_create(key); - cso_hash_insert(emit->hash, hash_key, translate); - } - - return translate; -} - - -static INLINE void delete_translates(struct pt_emit *emit) -{ - struct cso_hash *hash = emit->hash; - struct cso_hash_iter iter = cso_hash_first_node(hash); - while (!cso_hash_iter_is_null(iter)) { - struct translate *state = (struct translate*)cso_hash_iter_data(iter); - iter = cso_hash_iter_next(iter); - if (state) { - state->release(state); - } - } -} - void draw_pt_emit_prepare( struct pt_emit *emit, unsigned prim ) { @@ -169,12 +119,10 @@ void draw_pt_emit_prepare( struct pt_emit *emit, hw_key.nr_elements = vinfo->num_attribs; hw_key.output_stride = vinfo->size * 4; - /* Don't bother with caching at this stage: - */ if (!emit->translate || - memcmp(&emit->translate->key, &hw_key, sizeof(hw_key)) != 0) + memcmp(&emit->translate->key, &hw_key, sizeof(hw_key)) != 0) { - emit->translate = cached_translate(emit, &hw_key); + emit->translate = translate_cache_find(emit->cache, &hw_key); } } @@ -236,15 +184,14 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) return NULL; emit->draw = draw; - emit->hash = cso_hash_create(); + emit->cache = translate_cache_create(); return emit; } void draw_pt_emit_destroy( struct pt_emit *emit ) { - delete_translates(emit); - cso_hash_delete(emit->hash); + translate_cache_destroy(emit->cache); FREE(emit); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 013d16e1bc..8183c51676 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -32,9 +32,8 @@ #include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "translate/translate.h" +#include "translate/translate_cache.h" -#include "cso_cache/cso_cache.h" -#include "cso_cache/cso_hash.h" struct pt_fetch { struct draw_context *draw; @@ -43,57 +42,9 @@ struct pt_fetch { unsigned vertex_size; - struct cso_hash *hash; + struct translate_cache *cache; }; -static INLINE unsigned translate_hash_key_size(struct translate_key *key) -{ - unsigned size = sizeof(struct translate_key) - - sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); - return size; -} - -static INLINE unsigned create_key(struct translate_key *key) -{ - unsigned hash_key; - unsigned size = translate_hash_key_size(key); - /*debug_printf("key size = %d, (els = %d)\n", - size, key->nr_elements);*/ - hash_key = cso_construct_key(key, size); - return hash_key; -} - -static struct translate *cached_translate(struct pt_fetch *fetch, - struct translate_key *key) -{ - unsigned hash_key = create_key(key); - struct translate *translate = (struct translate*) - cso_hash_find_data_from_template(fetch->hash, - hash_key, - key, sizeof(*key)); - - if (!translate) { - /* create/insert */ - translate = translate_create(key); - cso_hash_insert(fetch->hash, hash_key, translate); - } - - return translate; -} - -static INLINE void delete_translates(struct pt_fetch *fetch) -{ - struct cso_hash *hash = fetch->hash; - struct cso_hash_iter iter = cso_hash_first_node(hash); - while (!cso_hash_iter_is_null(iter)) { - struct translate *state = (struct translate*)cso_hash_iter_data(iter); - iter = cso_hash_iter_next(iter); - if (state) { - state->release(state); - } - } -} - /* Perform the fetch from API vertex elements & vertex buffers, to a * contiguous set of float[4] attributes as required for the * vertex_shader->run_linear() method. @@ -157,17 +108,15 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, key.output_stride = vertex_size; - /* Don't bother with caching at this stage: - */ if (!fetch->translate || - memcmp(&fetch->translate->key, &key, sizeof(key)) != 0) + memcmp(&fetch->translate->key, &key, sizeof(key)) != 0) { - fetch->translate = cached_translate(fetch, &key); + fetch->translate = translate_cache_find(fetch->cache, &key); { static struct vertex_header vh = { 0, 0, 0, 0xffff }; - fetch->translate->set_buffer(fetch->translate, - draw->pt.nr_vertex_buffers, + fetch->translate->set_buffer(fetch->translate, + draw->pt.nr_vertex_buffers, &vh, 0); } @@ -208,14 +157,13 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) return NULL; fetch->draw = draw; - fetch->hash = cso_hash_create(); + fetch->cache = translate_cache_create(); return fetch; } void draw_pt_fetch_destroy( struct pt_fetch *fetch ) { - delete_translates(fetch); - cso_hash_delete(fetch->hash); + translate_cache_destroy(fetch->cache); FREE(fetch); } diff --git a/src/gallium/auxiliary/translate/Makefile b/src/gallium/auxiliary/translate/Makefile index 39dfb0de30..ad2a5b705e 100644 --- a/src/gallium/auxiliary/translate/Makefile +++ b/src/gallium/auxiliary/translate/Makefile @@ -6,7 +6,8 @@ LIBNAME = translate C_SOURCES = \ translate_generic.c \ translate_sse.c \ - translate.c + translate.c \ + translate_cache.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/translate/SConscript b/src/gallium/auxiliary/translate/SConscript index 7608908915..9553a67537 100644 --- a/src/gallium/auxiliary/translate/SConscript +++ b/src/gallium/auxiliary/translate/SConscript @@ -6,6 +6,7 @@ translate = env.ConvenienceLibrary( 'translate_generic.c', 'translate_sse.c', 'translate.c', + 'translate_cache.c', ]) auxiliaries.insert(0, translate) diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index d95d1ac4f3..6c15d7e4dc 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -96,7 +96,6 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx, struct translate *translate_create( const struct translate_key *key ); - /******************************************************************************* * Private: */ diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c new file mode 100644 index 0000000000..c14f37c42f --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -0,0 +1,100 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_state.h" +#include "translate.h" + +#include "cso_cache/cso_cache.h" +#include "cso_cache/cso_hash.h" + +struct translate_cache { + struct cso_hash *hash; +}; + +struct translate_cache * translate_cache_create() +{ + struct translate_cache *cache = MALLOC_STRUCT(translate_cache); + cache->hash = cso_hash_create(); +} + + +static INLINE void delete_translates(struct translate_cache *cache) +{ + struct cso_hash *hash = cache->hash; + struct cso_hash_iter iter = cso_hash_first_node(hash); + while (!cso_hash_iter_is_null(iter)) { + struct translate *state = (struct translate*)cso_hash_iter_data(iter); + iter = cso_hash_iter_next(iter); + if (state) { + state->release(state); + } + } +} + +void translate_cache_destroy(struct translate_cache *cache) +{ + delete_translates(cache); + cso_hash_delete(cache->hash); + FREE(cache); +} + + +static INLINE unsigned translate_hash_key_size(struct translate_key *key) +{ + unsigned size = sizeof(struct translate_key) - + sizeof(struct translate_element) * (PIPE_MAX_ATTRIBS - key->nr_elements); + return size; +} + +static INLINE unsigned create_key(struct translate_key *key) +{ + unsigned hash_key; + unsigned size = translate_hash_key_size(key); + /*debug_printf("key size = %d, (els = %d)\n", + size, key->nr_elements);*/ + hash_key = cso_construct_key(key, size); + return hash_key; +} + +struct translate * translate_cache_find(struct translate_cache *cache, + struct translate_key *key) +{ + unsigned hash_key = create_key(key); + struct translate *translate = (struct translate*) + cso_hash_find_data_from_template(cache->hash, + hash_key, + key, sizeof(*key)); + + if (!translate) { + /* create/insert */ + translate = translate_create(key); + cso_hash_insert(cache->hash, hash_key, translate); + } + + return translate; +} diff --git a/src/gallium/auxiliary/translate/translate_cache.h b/src/gallium/auxiliary/translate/translate_cache.h new file mode 100644 index 0000000000..63fc57b7ea --- /dev/null +++ b/src/gallium/auxiliary/translate/translate_cache.h @@ -0,0 +1,54 @@ +/* + * Copyright 2008 Tungsten Graphics, inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * TUNGSTEN GRAPHICS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ +#ifndef _TRANSLATE_CACHE_H +#define _TRANSLATE_CACHE_H + + +/******************************************************************************* + * Translate cache. + * Simply used to cache created translates. Avoids unecessary creation of + * translate's if one suitable for a given translate_key has already been + * created. + * + * Note: this functionality depends and requires the CSO module. + */ +struct translate_cache; + +struct translate_key; +struct translate; + +struct translate_cache *translate_cache_create(); +void translate_cache_destroy(struct translate_cache *cache); + +/** + * Will try to find a translate structure matched by the given key. + * If such a structure doesn't exist in the cache the function + * will automatically create it, insert it in the cache and + * return the created version. + * + */ +struct translate *translate_cache_find(struct translate_cache *cache, + struct translate_key *key); + +#endif -- cgit v1.2.3 From 95f8f8863a80ce1e584160d4d085213a9bbaef12 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 23 Apr 2008 14:07:53 -0400 Subject: Add translate cache to fetch_emit stage and add out of memory checks to code creating the cache. --- src/gallium/auxiliary/draw/draw_pt_emit.c | 4 ++++ src/gallium/auxiliary/draw/draw_pt_fetch.c | 5 +++++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 20 +++++++++++++------- 3 files changed, 22 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index c6d9537530..35707af8a8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -185,6 +185,10 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) emit->draw = draw; emit->cache = translate_cache_create(); + if (!emit->cache) { + FREE(emit); + return NULL; + } return emit; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 8183c51676..cc61b113fd 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -158,6 +158,11 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) fetch->draw = draw; fetch->cache = translate_cache_create(); + if (!fetch->cache) { + FREE(fetch); + return NULL; + } + return fetch; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 68b2c5b1e3..a4de341df8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -37,6 +37,7 @@ #include "draw/draw_vertex.h" #include "draw/draw_pt.h" #include "translate/translate.h" +#include "translate/translate_cache.h" /* The simplest 'middle end' in the new vertex code. * @@ -81,6 +82,7 @@ struct fetch_emit_middle_end { */ float point_size; + struct translate_cache *cache; }; @@ -174,10 +176,9 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, if (!feme->translate || memcmp(&feme->translate->key, &key, sizeof(key)) != 0) { - if (feme->translate) - feme->translate->release(feme->translate); + feme->translate = translate_cache_find(feme->cache, + &key); - feme->translate = translate_create( &key ); feme->translate->set_buffer(feme->translate, draw->pt.nr_vertex_buffers, @@ -266,9 +267,8 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; - if (feme->translate) - feme->translate->release( feme->translate ); - + translate_cache_destroy(feme->cache); + FREE(middle); } @@ -278,7 +278,13 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) struct fetch_emit_middle_end *fetch_emit = CALLOC_STRUCT( fetch_emit_middle_end ); if (fetch_emit == NULL) return NULL; - + + fetch_emit->cache = translate_cache_create(); + if (!fetch_emit->cache) { + FREE(fetch_emit); + return NULL; + } + fetch_emit->base.prepare = fetch_emit_prepare; fetch_emit->base.run = fetch_emit_run; fetch_emit->base.finish = fetch_emit_finish; -- cgit v1.2.3 From 3f6242d3e4d3ee61884a91a3eef4be8dfaadee3c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Apr 2008 12:11:00 -0600 Subject: gallium: passthrough tri, not point --- src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 452732e662..878c9c7169 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -169,7 +169,7 @@ struct draw_stage *draw_wide_line_stage( struct draw_context *draw ) wide->stage.next = NULL; wide->stage.point = draw_pipe_passthrough_point; wide->stage.line = wideline_line; - wide->stage.tri = draw_pipe_passthrough_point; + wide->stage.tri = draw_pipe_passthrough_tri; wide->stage.flush = wideline_flush; wide->stage.reset_stipple_counter = wideline_reset_stipple_counter; wide->stage.destroy = wideline_destroy; -- cgit v1.2.3 From bff371c431b962c62d74800c543e09d258e67551 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Apr 2008 17:40:39 -0600 Subject: gallium: fix comments, whitespace changes --- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index aec485a6e7..848d3be55d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -433,9 +433,7 @@ pstip_create_texture(struct pstip_stage *pstip) /** - * Create the sampler CSO that'll be used for antialiasing. - * By using a mipmapped texture, we don't have to generate a different - * texture image for each line size. + * Create the sampler CSO that'll be used for stippling. */ static boolean pstip_create_sampler(struct pstip_stage *pstip) @@ -463,8 +461,8 @@ pstip_create_sampler(struct pstip_stage *pstip) /** - * When we're about to draw our first AA line in a batch, this function is - * called to tell the driver to bind our modified fragment shader. + * When we're about to draw our first stipple polygon in a batch, this function + * is called to tell the driver to bind our modified fragment shader. */ static boolean bind_pstip_fragment_shader(struct pstip_stage *pstip) @@ -478,7 +476,6 @@ bind_pstip_fragment_shader(struct pstip_stage *pstip) } - static INLINE struct pstip_stage * pstip_stage( struct draw_stage *stage ) { @@ -486,9 +483,6 @@ pstip_stage( struct draw_stage *stage ) } - - - static void pstip_first_tri(struct draw_stage *stage, struct prim_header *header) { @@ -521,7 +515,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); - /* now really draw first line */ + /* now really draw first triangle */ stage->tri = draw_pipe_passthrough_tri; stage->tri(stage, header); } @@ -698,11 +692,10 @@ pstip_set_polygon_stipple(struct pipe_context *pipe, } - /** - * Called by drivers that want to install this AA line prim stage + * Called by drivers that want to install this polygon stipple stage * into the draw module's pipeline. This will not be used if the - * hardware has native support for AA lines. + * hardware has native support for polygon stipple. */ boolean draw_install_pstipple_stage(struct draw_context *draw, @@ -713,7 +706,7 @@ draw_install_pstipple_stage(struct draw_context *draw, pipe->draw = (void *) draw; /* - * Create / install AA line drawing / prim stage + * Create / install pgon stipple drawing / prim stage */ pstip = draw_pstip_stage( draw ); if (pstip == NULL) -- cgit v1.2.3 From 8437f5c763c1a1ac364d71426109c2b095bbcc72 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Apr 2008 17:41:30 -0600 Subject: gallium: fix comments --- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index ac0aa4cd7c..8c38c15d86 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -481,7 +481,7 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /** - * Generate the frag shader we'll use for drawing AA lines. + * Generate the frag shader we'll use for drawing AA points. * This will be the user's shader plus some texture/modulate instructions. */ static boolean @@ -531,7 +531,7 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) /** - * When we're about to draw our first AA line in a batch, this function is + * When we're about to draw our first AA point in a batch, this function is * called to tell the driver to bind our modified fragment shader. */ static boolean @@ -697,7 +697,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) } } - /* now really draw first line */ + /* now really draw first point */ stage->point = aapoint_point; stage->point(stage, header); } -- cgit v1.2.3 From 14d1ca8d867d6e44c756cb759f92421107118b2e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Apr 2008 18:08:20 -0600 Subject: gallium: fix issues in recursive flushing When flushing/rendering, some stages (like AA line/point) need to set pipe/driver state. Those driver functions often call draw_flush(). That leads to recursion. Use new draw->suspend_flush flag to explicitly prevent that in the key places. Remove the draw->vcache_flushing field. Reuse draw->flushing as a debug/assertion var. --- src/gallium/auxiliary/draw/draw_context.c | 4 +++- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 18 +++++++++++++++--- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 7 +++++++ src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 16 ++++++++++++++-- src/gallium/auxiliary/draw/draw_private.h | 6 +++--- src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 -- 6 files changed, 42 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index f90187816b..98e23fa830 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -367,8 +367,10 @@ draw_set_mapped_element_buffer( struct draw_context *draw, */ void draw_do_flush( struct draw_context *draw, unsigned flags ) { - if (!draw->flushing && !draw->vcache_flushing) + if (!draw->suspend_flushing) { + assert(!draw->flushing); /* catch inadvertant recursion */ + draw->flushing = TRUE; draw_pipeline_flush( draw, flags ); diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 7e5f8bd281..0545dbc171 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -485,11 +485,16 @@ aaline_create_sampler(struct aaline_stage *aaline) static boolean bind_aaline_fragment_shader(struct aaline_stage *aaline) { + struct draw_context *draw = aaline->stage.draw; + if (!aaline->fs->aaline_fs && !generate_aaline_fs(aaline)) return FALSE; + draw->suspend_flushing = TRUE; aaline->driver_bind_fs_state(aaline->pipe, aaline->fs->aaline_fs); + draw->suspend_flushing = FALSE; + return TRUE; } @@ -663,8 +668,10 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) pipe_texture_reference(&aaline->state.texture[aaline->fs->sampler_unit], aaline->texture); + draw->suspend_flushing = TRUE; aaline->driver_bind_sampler_states(pipe, num_samplers, aaline->state.sampler); aaline->driver_set_sampler_textures(pipe, num_samplers, aaline->state.texture); + draw->suspend_flushing = FALSE; /* now really draw first line */ stage->line = aaline_line; @@ -682,14 +689,14 @@ aaline_flush(struct draw_stage *stage, unsigned flags) stage->line = aaline_first_line; stage->next->flush( stage->next, flags ); - /* restore original frag shader */ + /* restore original frag shader, texture, sampler state */ + draw->suspend_flushing = TRUE; aaline->driver_bind_fs_state(pipe, aaline->fs->driver_fs); - - /* XXX restore original texture, sampler state */ aaline->driver_bind_sampler_states(pipe, aaline->num_samplers, aaline->state.sampler); aaline->driver_set_sampler_textures(pipe, aaline->num_textures, aaline->state.texture); + draw->suspend_flushing = FALSE; draw->extra_vp_outputs.slot = 0; } @@ -783,6 +790,7 @@ aaline_bind_fs_state(struct pipe_context *pipe, void *fs) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); struct aaline_fragment_shader *aafs = (struct aaline_fragment_shader *) fs; + /* save current */ aaline->fs = aafs; /* pass-through */ @@ -807,9 +815,12 @@ aaline_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct draw_context *draw = aaline->stage.draw; + /* save current */ memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); aaline->num_samplers = num; + /* pass-through */ aaline->driver_bind_sampler_states(aaline->pipe, num, sampler); } @@ -820,6 +831,7 @@ aaline_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); + struct draw_context *draw = aaline->stage.draw; uint i; /* save current */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 8c38c15d86..122a48660a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -537,11 +537,16 @@ generate_aapoint_fs(struct aapoint_stage *aapoint) static boolean bind_aapoint_fragment_shader(struct aapoint_stage *aapoint) { + struct draw_context *draw = aapoint->stage.draw; + if (!aapoint->fs->aapoint_fs && !generate_aapoint_fs(aapoint)) return FALSE; + draw->suspend_flushing = TRUE; aapoint->driver_bind_fs_state(aapoint->pipe, aapoint->fs->aapoint_fs); + draw->suspend_flushing = FALSE; + return TRUE; } @@ -714,7 +719,9 @@ aapoint_flush(struct draw_stage *stage, unsigned flags) stage->next->flush( stage->next, flags ); /* restore original frag shader */ + draw->suspend_flushing = TRUE; aapoint->driver_bind_fs_state(pipe, aapoint->fs->driver_fs); + draw->suspend_flushing = FALSE; draw->extra_vp_outputs.slot = 0; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 848d3be55d..f8156fe731 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -467,11 +467,14 @@ pstip_create_sampler(struct pstip_stage *pstip) static boolean bind_pstip_fragment_shader(struct pstip_stage *pstip) { + struct draw_context *draw = pstip->stage.draw; if (!pstip->fs->pstip_fs && !generate_pstip_fs(pstip)) return FALSE; + draw->suspend_flushing = TRUE; pstip->driver_bind_fs_state(pstip->pipe, pstip->fs->pstip_fs); + draw->suspend_flushing = FALSE; return TRUE; } @@ -488,6 +491,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) { struct pstip_stage *pstip = pstip_stage(stage); struct pipe_context *pipe = pstip->pipe; + struct draw_context *draw = stage->draw; uint num_samplers; assert(stage->draw->rasterizer->poly_stipple_enable); @@ -512,8 +516,10 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) assert(num_samplers <= PIPE_MAX_SAMPLERS); + draw->suspend_flushing = TRUE; pstip->driver_bind_sampler_states(pipe, num_samplers, pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, num_samplers, pstip->state.textures); + draw->suspend_flushing = FALSE; /* now really draw first triangle */ stage->tri = draw_pipe_passthrough_tri; @@ -524,7 +530,7 @@ pstip_first_tri(struct draw_stage *stage, struct prim_header *header) static void pstip_flush(struct draw_stage *stage, unsigned flags) { - /*struct draw_context *draw = stage->draw;*/ + struct draw_context *draw = stage->draw; struct pstip_stage *pstip = pstip_stage(stage); struct pipe_context *pipe = pstip->pipe; @@ -534,11 +540,13 @@ pstip_flush(struct draw_stage *stage, unsigned flags) /* restore original frag shader */ pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); - /* XXX restore original texture, sampler state */ + /* restore original texture, sampler state */ + draw->suspend_flushing = TRUE; pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, pstip->num_textures, pstip->state.textures); + draw->suspend_flushing = FALSE; } @@ -661,6 +669,7 @@ pstip_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct draw_context *draw = pstip->stage.draw; uint i; /* save current */ @@ -683,8 +692,11 @@ pstip_set_polygon_stipple(struct pipe_context *pipe, const struct pipe_poly_stipple *stipple) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); + struct draw_context *draw = (struct draw_context *) pipe->draw; + /* save current */ pstip->state.stipple = stipple; + /* pass-through */ pstip->driver_set_polygon_stipple(pstip->pipe, stipple); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 39aa81b43c..ca5021c5c6 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -178,9 +178,9 @@ struct draw_context boolean bypass_clipping; } driver; - boolean flushing; - boolean vcache_flushing; - boolean bypass_clipping; /* set if either api or driver bypass_clipping true */ + boolean flushing; /**< debugging/sanity */ + boolean suspend_flushing; /**< internally set */ + boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */ /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index afcff41043..153055417d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -63,7 +63,6 @@ struct vcache_frontend { static void vcache_flush( struct vcache_frontend *vcache ) { - vcache->draw->vcache_flushing = TRUE; if (vcache->draw_count) { vcache->middle->run( vcache->middle, vcache->fetch_elts, @@ -75,7 +74,6 @@ static void vcache_flush( struct vcache_frontend *vcache ) memset(vcache->in, ~0, sizeof(vcache->in)); vcache->fetch_count = 0; vcache->draw_count = 0; - vcache->draw->vcache_flushing = FALSE; } static void vcache_check_flush( struct vcache_frontend *vcache ) -- cgit v1.2.3 From bb4f8ae1f93d17c57fd8f62bea24b48131e02037 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Apr 2008 18:09:20 -0600 Subject: gallium: reorder code to fix a recursive flush --- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index f8156fe731..da303a13ad 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -537,11 +537,9 @@ pstip_flush(struct draw_stage *stage, unsigned flags) stage->tri = pstip_first_tri; stage->next->flush( stage->next, flags ); - /* restore original frag shader */ - pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); - - /* restore original texture, sampler state */ + /* restore original frag shader, texture, sampler state */ draw->suspend_flushing = TRUE; + pstip->driver_bind_fs_state(pipe, pstip->fs->driver_fs); pstip->driver_bind_sampler_states(pipe, pstip->num_samplers, pstip->state.samplers); pstip->driver_set_sampler_textures(pipe, pstip->num_textures, -- cgit v1.2.3 From 7333578d2a5fa18f7f0101fc3fd3b03cf2f356bc Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Thu, 24 Apr 2008 10:12:07 +0100 Subject: gallium: Initial support for pixel formats with unused storage components. Also clarify that RGB formats with no (used) alpha component are treated as having alpha = 1.0. --- src/gallium/auxiliary/util/p_tile.c | 52 ++++++++++++++ src/gallium/auxiliary/util/u_gen_mipmap.c | 2 + src/gallium/auxiliary/util/u_pack_color.h | 38 +++++++++++ src/gallium/include/pipe/p_format.h | 108 +++++++++++++++++------------- src/mesa/state_tracker/st_cb_fbo.c | 4 ++ 5 files changed, 157 insertions(+), 47 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 520da5cecd..91283a9ab0 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -169,6 +169,52 @@ a8r8g8b8_put_tile_rgba(unsigned *dst, } +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +x8r8g8b8_get_tile_rgba(unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff); + pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff); + pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff); + pRow[3] = UBYTE_TO_FLOAT(0xff); + } + p += dst_stride; + } +} + + +static void +x8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); + UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); + *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + /*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ static void @@ -647,6 +693,9 @@ pipe_get_tile_rgba(struct pipe_context *pipe, case PIPE_FORMAT_A8R8G8B8_UNORM: a8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + break; case PIPE_FORMAT_B8G8R8A8_UNORM: b8g8r8a8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; @@ -723,6 +772,9 @@ pipe_put_tile_rgba(struct pipe_context *pipe, case PIPE_FORMAT_A8R8G8B8_UNORM: a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; case PIPE_FORMAT_B8G8R8A8_UNORM: b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index dfdb5f16fe..b8dc6c66c0 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -475,7 +475,9 @@ format_to_type_comps(enum pipe_format pformat, { switch (pformat) { case PIPE_FORMAT_A8R8G8B8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: *datatype = UBYTE; *comps = 4; return; diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 1f6604c554..50ef44992b 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -53,18 +53,36 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, *d = (r << 24) | (g << 16) | (b << 8) | a; } return; + case PIPE_FORMAT_R8G8B8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + } + return; case PIPE_FORMAT_A8R8G8B8_UNORM: { uint *d = (uint *) dest; *d = (a << 24) | (r << 16) | (g << 8) | b; } return; + case PIPE_FORMAT_X8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (0xff << 24) | (r << 16) | (g << 8) | b; + } + return; case PIPE_FORMAT_B8G8R8A8_UNORM: { uint *d = (uint *) dest; *d = (b << 24) | (g << 16) | (r << 8) | a; } return; + case PIPE_FORMAT_B8G8R8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + } + return; case PIPE_FORMAT_R5G6B5_UNORM: { ushort *d = (ushort *) dest; @@ -101,18 +119,36 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) *d = (r << 24) | (g << 16) | (b << 8) | a; } return; + case PIPE_FORMAT_R8G8B8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (r << 24) | (g << 16) | (b << 8) | 0xff; + } + return; case PIPE_FORMAT_A8R8G8B8_UNORM: { uint *d = (uint *) dest; *d = (a << 24) | (r << 16) | (g << 8) | b; } return; + case PIPE_FORMAT_X8R8G8B8_UNORM: + { + uint *d = (uint *) dest; + *d = (0xff << 24) | (r << 16) | (g << 8) | b; + } + return; case PIPE_FORMAT_B8G8R8A8_UNORM: { uint *d = (uint *) dest; *d = (b << 24) | (g << 16) | (r << 8) | a; } return; + case PIPE_FORMAT_B8G8R8X8_UNORM: + { + uint *d = (uint *) dest; + *d = (b << 24) | (g << 16) | (r << 8) | 0xff; + } + return; case PIPE_FORMAT_R5G6B5_UNORM: { ushort *d = (ushort *) dest; @@ -159,8 +195,10 @@ util_pack_z(enum pipe_format format, double z) else return (uint) (z * 0xffffffff); case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: return (uint) (z * 0xffffff); case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: return ((uint) (z * 0xffffff)) << 8; default: debug_printf("gallium: unhandled fomrat in util_pack_z()"); diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index ef9e3a3d6c..2ee6f100bf 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -163,18 +163,21 @@ static INLINE uint pf_get(pipe_format_rgbazs_t f, uint shift, uint mask) /** * Shorthand macro for common format swizzles. */ -#define _PIPE_FORMAT_R000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) -#define _PIPE_FORMAT_RG00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) -#define _PIPE_FORMAT_RGB0 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_0 ) +#define _PIPE_FORMAT_R001 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_1 ) +#define _PIPE_FORMAT_RG01 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_1 ) +#define _PIPE_FORMAT_RGB1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_1 ) #define _PIPE_FORMAT_RGBA _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_A ) #define _PIPE_FORMAT_ARGB _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_A, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B ) #define _PIPE_FORMAT_BGRA _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_A ) +#define _PIPE_FORMAT_1RGB _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_1, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B ) +#define _PIPE_FORMAT_BGR1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_1 ) #define _PIPE_FORMAT_0000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) #define _PIPE_FORMAT_000R _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_R ) #define _PIPE_FORMAT_RRR1 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_1 ) #define _PIPE_FORMAT_RRRR _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R ) #define _PIPE_FORMAT_RRRG _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G ) #define _PIPE_FORMAT_Z000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +#define _PIPE_FORMAT_0Z00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) #define _PIPE_FORMAT_SZ00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) #define _PIPE_FORMAT_ZS00 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) #define _PIPE_FORMAT_S000 _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) @@ -229,10 +232,12 @@ static INLINE uint pf_rev(pipe_format_ycbcr_t f) enum pipe_format { PIPE_FORMAT_NONE = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_0000, 0, 0, 0, 0, PIPE_FORMAT_TYPE_UNKNOWN ), PIPE_FORMAT_A8R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ARGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_X8R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_1RGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_B8G8R8A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_B8G8R8X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_A1R5G5B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_ARGB, 1, 5, 5, 5, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_A4R4G4B4_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_ARGB, 4, 4, 4, 4, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R5G6B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_RGB0, 5, 6, 5, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R5G6B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_RGB1, 5, 6, 5, 0, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_U_L8 = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte luminance */ PIPE_FORMAT_U_A8 = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_000R, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte alpha */ PIPE_FORMAT_U_I8 = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRR, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte intensity */ @@ -244,68 +249,75 @@ enum pipe_format { PIPE_FORMAT_Z32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), PIPE_FORMAT_S8Z24_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_SZ00, 1, 3, 0, 0, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_Z24S8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ZS00, 3, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_X8Z24_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_0Z00, 1, 3, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_Z24X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 3, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_S8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_S000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte stencil */ - PIPE_FORMAT_R64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_R000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R64G64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RG00, 1, 1, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R64G64B64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGB0, 1, 1, 1, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R64G64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R64G64B64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_FLOAT ), PIPE_FORMAT_R64G64B64A64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32G32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 4, 4, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32G32B32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 4, 4, 4, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R32G32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_FLOAT ), + PIPE_FORMAT_R32G32B32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_FLOAT ), PIPE_FORMAT_R32G32B32A32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FLOAT ), - PIPE_FORMAT_R32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R32G32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 4, 4, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R32G32B32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 4, 4, 4, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R32G32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R32G32B32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_R32G32B32A32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R32G32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 4, 4, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R32G32B32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 4, 4, 4, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R32G32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R32G32B32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_USCALED ), PIPE_FORMAT_R32G32B32A32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R32G32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R32G32B32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R32G32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R32G32B32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SNORM ), PIPE_FORMAT_R32G32B32A32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R32G32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R32G32B32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R32G32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R32G32B32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SSCALED ), PIPE_FORMAT_R32G32B32A32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R16G16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 2, 2, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R16G16B16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 2, 2, 2, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R16G16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R16G16B16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_R16G16B16A16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 2, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R16G16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 2, 2, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R16G16B16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 2, 2, 2, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R16G16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R16G16B16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_USCALED ), PIPE_FORMAT_R16G16B16A16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R16G16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R16G16B16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R16G16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R16G16B16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SNORM ), PIPE_FORMAT_R16G16B16A16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R16G16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R16G16B16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R16G16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R16G16B16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SSCALED ), PIPE_FORMAT_R16G16B16A16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R8G8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 1, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 1, 1, 1, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8G8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_UNORM ), PIPE_FORMAT_R8G8B8A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), - PIPE_FORMAT_R8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R8G8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 1, 1, 0, 0, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R8G8B8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 1, 1, 1, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8G8B8X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_R8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8G8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8G8B8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_USCALED ), PIPE_FORMAT_R8G8B8A8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_USCALED ), - PIPE_FORMAT_R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R8G8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R8G8B8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8G8B8X8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_USCALED ), + PIPE_FORMAT_R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8G8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8G8B8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SNORM ), PIPE_FORMAT_R8G8B8A8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), - PIPE_FORMAT_R8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R8G8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG00, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), - PIPE_FORMAT_R8G8B8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8G8B8X8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ), + PIPE_FORMAT_R8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8G8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8G8B8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SSCALED ), PIPE_FORMAT_R8G8B8A8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R8G8B8X8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ), /* sRGB formats */ PIPE_FORMAT_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_A8_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB0, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_R8G8B8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_R8G8B8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), /* compressed formats */ PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0 ), @@ -442,6 +454,8 @@ static INLINE uint pf_get_bits( enum pipe_format format ) switch (pf_layout(format)) { case PIPE_FORMAT_LAYOUT_RGBAZS: return + pf_get_component_bits( format, PIPE_FORMAT_COMP_0 ) + + pf_get_component_bits( format, PIPE_FORMAT_COMP_1 ) + pf_get_component_bits( format, PIPE_FORMAT_COMP_R ) + pf_get_component_bits( format, PIPE_FORMAT_COMP_G ) + pf_get_component_bits( format, PIPE_FORMAT_COMP_B ) + diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index b1a56f3ca6..b73867bb72 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -252,6 +252,8 @@ st_new_renderbuffer_fb(enum pipe_format format) switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_X8R8G8B8_UNORM: + case PIPE_FORMAT_B8G8R8X8_UNORM: case PIPE_FORMAT_A1R5G5B5_UNORM: case PIPE_FORMAT_A4R4G4B4_UNORM: case PIPE_FORMAT_R5G6B5_UNORM: @@ -268,6 +270,8 @@ st_new_renderbuffer_fb(enum pipe_format format) break; case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_Z24S8_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + case PIPE_FORMAT_Z24X8_UNORM: strb->Base.InternalFormat = GL_DEPTH24_STENCIL8_EXT; strb->Base._BaseFormat = GL_DEPTH_STENCIL_EXT; break; -- cgit v1.2.3 From f93332da5655a31b6c44a1079629a15360ff999b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 24 Apr 2008 12:38:15 +0100 Subject: draw: handle edgeflags and reset-line-stipple again --- src/gallium/auxiliary/draw/draw_pipe.c | 19 +- src/gallium/auxiliary/draw/draw_pipe.h | 11 + src/gallium/auxiliary/draw/draw_pipe_clip.c | 56 ++-- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 6 +- src/gallium/auxiliary/draw/draw_pipe_offset.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 4 + src/gallium/auxiliary/draw/draw_pipe_twoside.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 24 +- src/gallium/auxiliary/draw/draw_private.h | 40 +-- src/gallium/auxiliary/draw/draw_pt.h | 9 - src/gallium/auxiliary/draw/draw_pt_fetch.c | 17 ++ src/gallium/auxiliary/draw/draw_pt_vcache.c | 356 +++++------------------ src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 174 +++++++++++ 13 files changed, 348 insertions(+), 374 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index d0890203a5..1c9d8650e2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -116,8 +116,7 @@ static void do_point( struct draw_context *draw, { struct prim_header prim; - prim.reset_line_stipple = 0; - prim.edgeflags = 1; + prim.flags = 0; prim.pad = 0; prim.v[0] = (struct vertex_header *)v0; @@ -126,13 +125,13 @@ static void do_point( struct draw_context *draw, static void do_line( struct draw_context *draw, + ushort flags, const char *v0, const char *v1 ) { struct prim_header prim; - prim.reset_line_stipple = 1; /* fixme */ - prim.edgeflags = 1; + prim.flags = flags; prim.pad = 0; prim.v[0] = (struct vertex_header *)v0; prim.v[1] = (struct vertex_header *)v1; @@ -142,6 +141,7 @@ static void do_line( struct draw_context *draw, static void do_triangle( struct draw_context *draw, + ushort flags, char *v0, char *v1, char *v2 ) @@ -151,10 +151,7 @@ static void do_triangle( struct draw_context *draw, prim.v[0] = (struct vertex_header *)v0; prim.v[1] = (struct vertex_header *)v1; prim.v[2] = (struct vertex_header *)v2; - prim.reset_line_stipple = 1; - prim.edgeflags = ((prim.v[0]->edgeflag) | - (prim.v[1]->edgeflag << 1) | - (prim.v[2]->edgeflag << 2)); + prim.flags = flags; prim.pad = 0; draw->pipeline.first->tri( draw->pipeline.first, &prim ); @@ -197,13 +194,15 @@ void draw_pipeline_run( struct draw_context *draw, case PIPE_PRIM_LINES: for (i = 0; i+1 < count; i += 2) do_line( draw, - verts + stride * elts[i+0], + elts[i+0], + verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK), verts + stride * elts[i+1]); break; case PIPE_PRIM_TRIANGLES: for (i = 0; i+2 < count; i += 3) do_triangle( draw, - verts + stride * elts[i+0], + elts[i+0], + verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK), verts + stride * elts[i+1], verts + stride * elts[i+2]); break; diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index 2476abb2b2..f1cb0891ca 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -37,6 +37,17 @@ #include "draw_private.h" /* for sizeof(vertex_header) */ +/** + * Basic info for a point/line/triangle primitive. + */ +struct prim_header { + float det; /**< front/back face determinant */ + ushort flags; + ushort pad; + struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */ +}; + + /** * Base class for all primitive drawing stages. diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 21216addea..ce80c94163 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -119,7 +119,7 @@ static void interp( const struct clipper *clip, */ { dst->clipmask = 0; - dst->edgeflag = 0; + dst->edgeflag = 0; /* will get overwritten later */ dst->pad = 0; dst->vertex_id = UNDEFINED_VERTEX_ID; } @@ -162,45 +162,39 @@ static void emit_poly( struct draw_stage *stage, struct prim_header header; unsigned i; + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; + /* later stages may need the determinant, but only the sign matters */ header.det = origPrim->det; + header.flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + header.pad = 0; - for (i = 2; i < n; i++) { + for (i = 2; i < n; i++, header.flags = 0) { header.v[0] = inlist[i-1]; header.v[1] = inlist[i]; header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ - - { - unsigned tmp1 = header.v[1]->edgeflag; - unsigned tmp2 = header.v[2]->edgeflag; - - if (i != n-1) header.v[1]->edgeflag = 0; - if (i != 2) header.v[2]->edgeflag = 0; - - header.edgeflags = ((header.v[0]->edgeflag << 0) | - (header.v[1]->edgeflag << 1) | - (header.v[2]->edgeflag << 2)); - - if (0) { - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; - uint j, k; - debug_printf("Clipped tri:\n"); - for (j = 0; j < 3; j++) { - for (k = 0; k < vs->info.num_outputs; k++) { - debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, - header.v[j]->data[k][0], - header.v[j]->data[k][1], - header.v[j]->data[k][2], - header.v[j]->data[k][3]); - } + + if (i == n-1) + header.flags |= edge_last; + + if (0) { + const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + uint j, k; + debug_printf("Clipped tri:\n"); + for (j = 0; j < 3; j++) { + for (k = 0; k < vs->info.num_outputs; k++) { + debug_printf(" Vert %d: Attr %d: %f %f %f %f\n", j, k, + header.v[j]->data[k][0], + header.v[j]->data[k][1], + header.v[j]->data[k][2], + header.v[j]->data[k][3]); } } - - stage->next->tri( stage->next, &header ); - - header.v[1]->edgeflag = tmp1; - header.v[2]->edgeflag = tmp2; } + + stage->next->tri( stage->next, &header ); } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 205000cbea..09b68c4559 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -91,7 +91,8 @@ static void flatshade_tri_0( struct draw_stage *stage, struct prim_header tmp; tmp.det = header->det; - tmp.edgeflags = header->edgeflags; + tmp.flags = header->flags; + tmp.pad = header->pad; tmp.v[0] = header->v[0]; tmp.v[1] = dup_vert(stage, header->v[1], 0); tmp.v[2] = dup_vert(stage, header->v[2], 1); @@ -108,7 +109,8 @@ static void flatshade_tri_2( struct draw_stage *stage, struct prim_header tmp; tmp.det = header->det; - tmp.edgeflags = header->edgeflags; + tmp.flags = header->flags; + tmp.pad = header->pad; tmp.v[0] = dup_vert(stage, header->v[0], 0); tmp.v[1] = dup_vert(stage, header->v[1], 1); tmp.v[2] = header->v[2]; diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index ffec85ccdd..ea6de8c571 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -106,7 +106,8 @@ static void offset_tri( struct draw_stage *stage, struct prim_header tmp; tmp.det = header->det; - tmp.edgeflags = header->edgeflags; + tmp.flags = header->flags; + tmp.pad = header->pad; tmp.v[0] = dup_vert(stage, header->v[0], 0); tmp.v[1] = dup_vert(stage, header->v[1], 1); tmp.v[2] = dup_vert(stage, header->v[2], 2); diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 9cf5840cce..3cbced362e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -135,6 +135,10 @@ stipple_line(struct draw_stage *stage, struct prim_header *header) float length = MAX2(dx, dy); int i; + if (header->flags & DRAW_PIPE_RESET_STIPPLE) + stipple->counter = 0; + + /* XXX ToDo: intead of iterating pixel-by-pixel, use a look-up table. */ for (i = 0; i < length; i++) { diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 5910dccc43..50872fdbe9 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -85,7 +85,8 @@ static void twoside_tri( struct draw_stage *stage, struct prim_header tmp; tmp.det = header->det; - tmp.edgeflags = header->edgeflags; + tmp.flags = header->flags; + tmp.pad = header->pad; /* copy back attribs to front attribs */ tmp.v[0] = copy_bfc(twoside, header->v[0], 0); tmp.v[1] = copy_bfc(twoside, header->v[1], 1); diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index eeb2bc43f9..8f97fdedaa 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -83,9 +83,9 @@ static void points( struct draw_stage *stage, struct vertex_header *v1 = header->v[1]; struct vertex_header *v2 = header->v[2]; - if (header->edgeflags & 0x1) point( stage, v0 ); - if (header->edgeflags & 0x2) point( stage, v1 ); - if (header->edgeflags & 0x4) point( stage, v2 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) point( stage, v0 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) point( stage, v1 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) point( stage, v2 ); } @@ -96,22 +96,22 @@ static void lines( struct draw_stage *stage, struct vertex_header *v1 = header->v[1]; struct vertex_header *v2 = header->v[2]; -#if 0 - assert(((header->edgeflags & 0x1) >> 0) == header->v[0]->edgeflag); - assert(((header->edgeflags & 0x2) >> 1) == header->v[1]->edgeflag); - assert(((header->edgeflags & 0x4) >> 2) == header->v[2]->edgeflag); -#endif + if (header->flags & DRAW_PIPE_RESET_STIPPLE) + stage->next->reset_stipple_counter( stage->next ); - if (header->edgeflags & 0x4) line( stage, v2, v0 ); - if (header->edgeflags & 0x1) line( stage, v0, v1 ); - if (header->edgeflags & 0x2) line( stage, v1, v2 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_2) && v2->edgeflag) line( stage, v2, v0 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_0) && v0->edgeflag) line( stage, v0, v1 ); + if ((header->flags & DRAW_PIPE_EDGE_FLAG_1) && v1->edgeflag) line( stage, v1, v2 ); } /* Unfilled tri: * * Note edgeflags in the vertex struct is not sufficient as we will - * need to manipulate them when decomposing primitives??? + * need to manipulate them when decomposing primitives. + * + * We currently keep the vertex edgeflag and primitive edgeflag mask + * separate until the last possible moment. */ static void unfilled_tri( struct draw_stage *stage, struct prim_header *header ) diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index ca5021c5c6..49593c8fad 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -78,25 +78,6 @@ struct vertex_header { #define UNDEFINED_VERTEX_ID 0xffff -/** - * Basic info for a point/line/triangle primitive. - */ -struct prim_header { - float det; /**< front/back face determinant */ - unsigned reset_line_stipple:1; - unsigned edgeflags:3; - unsigned pad:28; - struct vertex_header *v[3]; /**< 1 to 3 vertex pointers */ -}; - - - - -#define PT_SHADE 0x1 -#define PT_CLIPTEST 0x2 -#define PT_PIPELINE 0x4 -#define PT_MAX_MIDDLE 0x8 - /** * Private context for the drawing module. */ @@ -238,6 +219,25 @@ void draw_pt_reset_vertex_ids( struct draw_context *draw ); boolean draw_pipeline_init( struct draw_context *draw ); void draw_pipeline_destroy( struct draw_context *draw ); + + + + +/* We use the top few bits in the elts[] parameter to convey a little + * API information. This limits the number of vertices we can address + * to only 4096 -- if that becomes a problem, we can switch to 32-bit + * draw indices. + * + * These flags expected at first vertex of lines & triangles when + * unfilled and/or line stipple modes are operational. + */ +#define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12) +#define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12) +#define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12) +#define DRAW_PIPE_EDGE_FLAG_ALL (0x7<<12) +#define DRAW_PIPE_RESET_STIPPLE (0x8<<12) +#define DRAW_PIPE_FLAG_MASK (0xf<<12) + void draw_pipeline_run( struct draw_context *draw, unsigned prim, struct vertex_header *vertices, @@ -246,6 +246,8 @@ void draw_pipeline_run( struct draw_context *draw, const ushort *elts, unsigned count ); + + void draw_pipeline_flush( struct draw_context *draw, unsigned flags ); diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index fd0d158fcf..df5c29fc36 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -40,15 +40,6 @@ typedef unsigned (*pt_elt_func)( const void *elts, unsigned idx ); struct draw_pt_middle_end; struct draw_context; -/* We use the top couple of bits in the vertex fetch index to convey a - * little API information. This limits the number of vertices we can - * address to only 1 billion -- if that becomes a problem, these could - * be moved out & passed separately. - */ -#define DRAW_PT_EDGEFLAG (1<<30) -#define DRAW_PT_RESET_STIPPLE (1<<31) -#define DRAW_PT_FLAG_MASK (3<<30) - #define PT_SHADE 0x1 #define PT_CLIPTEST 0x2 diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index cc61b113fd..9f74806e11 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -41,6 +41,7 @@ struct pt_fetch { struct translate *translate; unsigned vertex_size; + boolean need_edgeflags; struct translate_cache *cache; }; @@ -121,6 +122,10 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, 0); } } + + fetch->need_edgeflags = ((draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL) && + draw->pt.user.edgeflag); } @@ -147,6 +152,18 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, elts, count, verts ); + + /* Edgeflags are hard to fit into a translate program, populate + * them separately if required. In the setup above they are + * defaulted to one, so only need this if there is reason to change + * that default: + */ + if (fetch->need_edgeflags) { + for (i = 0; i < count; i++) { + struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); + vh->edgeflag = draw_pt_get_edgeflag( draw, elts[i] ); + } + } } diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 153055417d..b3133359e0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -33,8 +33,6 @@ #include "pipe/p_util.h" #include "draw/draw_context.h" #include "draw/draw_private.h" -//#include "draw/draw_vbuf.h" -//#include "draw/draw_vertex.h" #include "draw/draw_pt.h" @@ -86,8 +84,9 @@ static void vcache_check_flush( struct vcache_frontend *vcache ) } -static void vcache_elt( struct vcache_frontend *vcache, - unsigned felt ) +static INLINE void vcache_elt( struct vcache_frontend *vcache, + unsigned felt, + ushort flags ) { unsigned idx = felt % CACHE_MAX; @@ -99,28 +98,9 @@ static void vcache_elt( struct vcache_frontend *vcache, vcache->fetch_elts[vcache->fetch_count++] = felt; } - vcache->draw_elts[vcache->draw_count++] = vcache->out[idx]; + vcache->draw_elts[vcache->draw_count++] = vcache->out[idx] | flags; } -static unsigned add_edgeflag( struct vcache_frontend *vcache, - unsigned idx, - unsigned mask ) -{ - if (0 && mask && draw_pt_get_edgeflag(vcache->draw, idx)) - return idx | DRAW_PT_EDGEFLAG; - else - return idx; -} - - -static unsigned add_reset_stipple( unsigned idx, - unsigned reset ) -{ - if (0 && reset) - return idx | DRAW_PT_RESET_STIPPLE; - else - return idx; -} static void vcache_triangle( struct vcache_frontend *vcache, @@ -128,49 +108,42 @@ static void vcache_triangle( struct vcache_frontend *vcache, unsigned i1, unsigned i2 ) { - vcache_elt(vcache, i0 /* | DRAW_PT_EDGEFLAG | DRAW_PT_RESET_STIPPLE */ ); - vcache_elt(vcache, i1 /* | DRAW_PT_EDGEFLAG */); - vcache_elt(vcache, i2 /* | DRAW_PT_EDGEFLAG */); + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, i2, 0); vcache_check_flush(vcache); } -static void vcache_ef_triangle( struct vcache_frontend *vcache, - boolean reset_stipple, - unsigned ef_mask, - unsigned i0, - unsigned i1, - unsigned i2 ) +static void vcache_triangle_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1, + unsigned i2 ) { -/* - i0 = add_edgeflag( vcache, i0, (ef_mask >> 0) & 1 ); - i1 = add_edgeflag( vcache, i1, (ef_mask >> 1) & 1 ); - i2 = add_edgeflag( vcache, i2, (ef_mask >> 2) & 1 ); - i0 = add_reset_stipple( i0, reset_stipple ); -*/ - - vcache_elt(vcache, i0); - vcache_elt(vcache, i1); - vcache_elt(vcache, i2); + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, i2, 0); vcache_check_flush(vcache); - - if (0) debug_printf("emit tri ef: %d %d %d\n", - !!(i0 & DRAW_PT_EDGEFLAG), - !!(i1 & DRAW_PT_EDGEFLAG), - !!(i2 & DRAW_PT_EDGEFLAG)); - } - static void vcache_line( struct vcache_frontend *vcache, - boolean reset_stipple, unsigned i0, unsigned i1 ) { - i0 = add_reset_stipple( i0, reset_stipple ); + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_check_flush(vcache); +} + - vcache_elt(vcache, i0); - vcache_elt(vcache, i1); +static void vcache_line_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1 ) +{ + vcache_elt(vcache, i0, flags); + vcache_elt(vcache, i1, 0); vcache_check_flush(vcache); } @@ -178,7 +151,7 @@ static void vcache_line( struct vcache_frontend *vcache, static void vcache_point( struct vcache_frontend *vcache, unsigned i0 ) { - vcache_elt(vcache, i0); + vcache_elt(vcache, i0, 0); vcache_check_flush(vcache); } @@ -198,237 +171,36 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, unsigned i2, unsigned i3 ) { - const unsigned omitEdge2 = ~(1 << 1); - const unsigned omitEdge3 = ~(1 << 2); - vcache_ef_triangle( vcache, 1, omitEdge2, i0, i1, i3 ); - vcache_ef_triangle( vcache, 0, omitEdge3, i1, i2, i3 ); -} + const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; + const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; + vcache_triangle_flags( vcache, + DRAW_PIPE_RESET_STIPPLE | omitEdge1, + i0, i1, i3 ); + vcache_triangle_flags( vcache, + omitEdge2, + i1, i2, i3 ); +} +/* At least for now, we're back to using a template include file for + * this. The two paths aren't too different though - it may be + * possible to reunify them. + */ +#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1) +#define POINT(vc,i0) vcache_point(vc,i0) +#define FUNC vcache_run_extras +#include "draw_pt_vcache_tmp.h" + +#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1) +#define POINT(vc,i0) vcache_point(vc,i0) +#define FUNC vcache_run +#include "draw_pt_vcache_tmp.h" -static void vcache_run( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - unsigned count ) -{ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - struct draw_context *draw = vcache->draw; - - boolean unfilled = (draw->rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - draw->rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL); - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i; - -// debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count ); - - switch (vcache->input_prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - vcache_point( vcache, - get_elt(elts, i + 0) ); - } - break; - - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - vcache_line( vcache, - TRUE, - get_elt(elts, i + 0), - get_elt(elts, i + 1)); - } - break; - - case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - for (i = 1; i < count; i++) { - vcache_line( vcache, - i == 1, /* XXX: only if vb not split */ - get_elt(elts, i - 1), - get_elt(elts, i )); - } - - vcache_line( vcache, - 0, - get_elt(elts, count - 1), - get_elt(elts, 0 )); - } - break; - - case PIPE_PRIM_LINE_STRIP: - for (i = 1; i < count; i++) { - vcache_line( vcache, - i == 1, - get_elt(elts, i - 1), - get_elt(elts, i )); - } - break; - - case PIPE_PRIM_TRIANGLES: - if (unfilled) { - for (i = 0; i+2 < count; i += 3) { - vcache_ef_triangle( vcache, - 1, - ~0, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - else { - for (i = 0; i+2 < count; i += 3) { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 2), - get_elt(elts, i + 1 )); - } - else { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - } - else { - for (i = 0; i+2 < count; i++) { - if (i & 1) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 0), - get_elt(elts, i + 2 )); - } - else { - vcache_triangle( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - } - break; - - case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0 )); - } - } - else { - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } - } - } - break; - - - case PIPE_PRIM_QUADS: - if (unfilled) { - for (i = 0; i+3 < count; i += 4) { - vcache_ef_quad( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3)); - } - } - else { - for (i = 0; i+3 < count; i += 4) { - vcache_quad( vcache, - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, i + 3)); - } - } - break; - - case PIPE_PRIM_QUAD_STRIP: - if (unfilled) { - for (i = 0; i+3 < count; i += 2) { - vcache_ef_quad( vcache, - get_elt(elts, i + 2), - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3)); - } - } - else { - for (i = 0; i+3 < count; i += 2) { - vcache_quad( vcache, - get_elt(elts, i + 2), - get_elt(elts, i + 0), - get_elt(elts, i + 1), - get_elt(elts, i + 3)); - } - } - break; - - case PIPE_PRIM_POLYGON: - if (unfilled) { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - const unsigned edge_first = (1<<2); - const unsigned edge_middle = (1<<0); - const unsigned edge_last = (1<<1); - - for (i = 0; i+2 < count; i++) { - unsigned ef_mask = edge_middle; - - if (i == 0) - ef_mask |= edge_first; - - if (i + 3 == count) - ef_mask |= edge_last; - - vcache_ef_triangle( vcache, - i == 0, - ef_mask, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0)); - } - } - else { - for (i = 0; i+2 < count; i++) { - vcache_triangle( vcache, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0)); - } - } - break; - - default: - assert(0); - break; - } - - vcache_flush( vcache ); -} @@ -453,15 +225,21 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned opt ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + const struct pipe_rasterizer_state *rasterizer = vcache->draw->rasterizer; + -/* - if (vcache->draw->rasterizer->flatshade_first) - vcache->base.run = vcache_run_pv0; - else - vcache->base.run = vcache_run_pv2; -*/ + if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL || + rasterizer->line_stipple_enable) + { + assert(opt & PT_PIPELINE); + vcache->base.run = vcache_run_extras; + } + else + { + vcache->base.run = vcache_run; + } - vcache->base.run = vcache_run; vcache->input_prim = prim; vcache->output_prim = reduced_prim[prim]; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h new file mode 100644 index 0000000000..cf9f394aa3 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -0,0 +1,174 @@ + + +static void FUNC( struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + struct draw_context *draw = vcache->draw; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i, flags; + + + switch (vcache->input_prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + POINT( vcache, + get_elt(elts, i + 0) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( vcache, + DRAW_PIPE_RESET_STIPPLE, + get_elt(elts, i + 0), + get_elt(elts, i + 1)); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (i = 1; i < count; i++, flags = 0) { + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, i )); + } + + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, 0 )); + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 1; i < count; i++, flags = 0) { + LINE( vcache, + flags, + get_elt(elts, i - 1), + get_elt(elts, i )); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0), + get_elt(elts, i + 1 + (i&1)), + get_elt(elts, i + 2 - (i&1))); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0 + (i&1)), + get_elt(elts, i + 1 - (i&1)), + get_elt(elts, i + 2 )); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0 )); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } + } + } + break; + + + case PIPE_PRIM_QUADS: + for (i = 0; i+3 < count; i += 4) { + QUAD( vcache, + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, i + 3)); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i+3 < count; i += 2) { + QUAD( vcache, + get_elt(elts, i + 2), + get_elt(elts, i + 0), + get_elt(elts, i + 1), + get_elt(elts, i + 3)); + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; + const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE( vcache, + flags, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0)); + } + } + break; + + default: + assert(0); + break; + } + + vcache_flush( vcache ); +} + + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC -- cgit v1.2.3 From d712eea074303812b1be0e79b302d7b9f49a09a8 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 24 Apr 2008 14:05:39 +0200 Subject: translate: Actually return a value from translate_cache_create(). --- src/gallium/auxiliary/translate/translate_cache.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c index c14f37c42f..e91d648eba 100644 --- a/src/gallium/auxiliary/translate/translate_cache.c +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -40,6 +40,7 @@ struct translate_cache * translate_cache_create() { struct translate_cache *cache = MALLOC_STRUCT(translate_cache); cache->hash = cso_hash_create(); + return cache; } -- cgit v1.2.3 From f2c31257167f85df276322be1b8523064e8b66a9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Apr 2008 11:52:37 -0600 Subject: gallium: added cso_delete_vertex_fragment_shader() functions The state tracker now uses these functions to free shaders, rather than the pipe->delete_vs/fs-state() functions. Before, we could get in a situation where we free() a shader and happen to alloc() a new one at the same address. The cso_set_vertex/fragment_shader() function would no-op the state change since the pointers were the same. This led to problems elsewhere, of course. The new delete functions null-out the CSO's current shader pointers. --- src/gallium/auxiliary/cso_cache/cso_context.c | 17 +++++++++++++++++ src/gallium/auxiliary/cso_cache/cso_context.h | 6 ++++++ 2 files changed, 23 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index d246dff433..cfb91d31de 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -471,6 +471,8 @@ void cso_restore_rasterizer(struct cso_context *ctx) ctx->rasterizer_saved = NULL; } + + enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle ) { @@ -481,6 +483,13 @@ enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, return PIPE_OK; } +void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ) +{ + if (handle == ctx->fragment_shader) + ctx->pipe->bind_fs_state(ctx->pipe, NULL); + ctx->pipe->delete_fs_state(ctx->pipe, handle); + ctx->fragment_shader = NULL; +} /* Not really working: */ @@ -553,6 +562,14 @@ enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, return PIPE_OK; } +void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ) +{ + if (handle == ctx->vertex_shader) + ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->pipe->delete_vs_state(ctx->pipe, handle); + ctx->vertex_shader = NULL; +} + /* Not really working: */ diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index 0405944132..cb46f71d51 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -99,16 +99,22 @@ void cso_restore_sampler_textures( struct cso_context *cso ); */ enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, void *handle ); +void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ); +/* enum pipe_error cso_set_fragment_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); +*/ void cso_save_fragment_shader(struct cso_context *cso); void cso_restore_fragment_shader(struct cso_context *cso); enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, void *handle ); +void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ); +/* enum pipe_error cso_set_vertex_shader( struct cso_context *cso, const struct pipe_shader_state *shader ); +*/ void cso_save_vertex_shader(struct cso_context *cso); void cso_restore_vertex_shader(struct cso_context *cso); -- cgit v1.2.3 From ac79532a15a7109bf0fbd0e40a1ba8e65ed8c435 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Apr 2008 11:56:06 -0600 Subject: gallium: tweak the new shader delete funcs --- src/gallium/auxiliary/cso_cache/cso_context.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index cfb91d31de..febecbbbde 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -485,10 +485,12 @@ enum pipe_error cso_set_fragment_shader_handle(struct cso_context *ctx, void cso_delete_fragment_shader(struct cso_context *ctx, void *handle ) { - if (handle == ctx->fragment_shader) + if (handle == ctx->fragment_shader) { + /* unbind before deleting */ ctx->pipe->bind_fs_state(ctx->pipe, NULL); + ctx->fragment_shader = NULL; + } ctx->pipe->delete_fs_state(ctx->pipe, handle); - ctx->fragment_shader = NULL; } /* Not really working: @@ -564,10 +566,12 @@ enum pipe_error cso_set_vertex_shader_handle(struct cso_context *ctx, void cso_delete_vertex_shader(struct cso_context *ctx, void *handle ) { - if (handle == ctx->vertex_shader) + if (handle == ctx->vertex_shader) { + /* unbind before deleting */ ctx->pipe->bind_vs_state(ctx->pipe, NULL); + ctx->vertex_shader = NULL; + } ctx->pipe->delete_vs_state(ctx->pipe, handle); - ctx->vertex_shader = NULL; } -- cgit v1.2.3 From a41804909d5799cddfbf48a46524f78c736408d4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Apr 2008 11:59:22 -0600 Subject: gallium: minor clean-ups, comments --- src/gallium/auxiliary/cso_cache/cso_context.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index febecbbbde..8cf2313b7f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -102,6 +102,10 @@ out: return NULL; } + +/** + * Prior to context destruction, this function unbinds all state objects. + */ static void cso_release_all( struct cso_context *ctx ) { unsigned i; @@ -115,10 +119,10 @@ static void cso_release_all( struct cso_context *ctx ) ctx->pipe->bind_vs_state( ctx->pipe, NULL ); } - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { pipe_texture_reference(&ctx->textures[i], NULL); - for (i = 0; i < PIPE_MAX_SAMPLERS; i++) pipe_texture_reference(&ctx->textures_saved[i], NULL); + } if (ctx->cache) { cso_cache_delete( ctx->cache ); @@ -129,10 +133,10 @@ static void cso_release_all( struct cso_context *ctx ) void cso_destroy_context( struct cso_context *ctx ) { - if (ctx) + if (ctx) { cso_release_all( ctx ); - - FREE( ctx ); + FREE( ctx ); + } } -- cgit v1.2.3 From 386102c62a3315182ffbc6319351cb883234511a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Apr 2008 12:10:14 -0600 Subject: gallium: make cso_release_all() public --- src/gallium/auxiliary/cso_cache/cso_context.c | 4 ++-- src/gallium/auxiliary/cso_cache/cso_context.h | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 8cf2313b7f..b4609e999b 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -106,7 +106,7 @@ out: /** * Prior to context destruction, this function unbinds all state objects. */ -static void cso_release_all( struct cso_context *ctx ) +void cso_release_all( struct cso_context *ctx ) { unsigned i; @@ -134,7 +134,7 @@ static void cso_release_all( struct cso_context *ctx ) void cso_destroy_context( struct cso_context *ctx ) { if (ctx) { - cso_release_all( ctx ); + //cso_release_all( ctx ); FREE( ctx ); } } diff --git a/src/gallium/auxiliary/cso_cache/cso_context.h b/src/gallium/auxiliary/cso_cache/cso_context.h index cb46f71d51..b04e98bfa1 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.h +++ b/src/gallium/auxiliary/cso_cache/cso_context.h @@ -42,6 +42,8 @@ struct cso_context; struct cso_context *cso_create_context( struct pipe_context *pipe ); +void cso_release_all( struct cso_context *ctx ); + void cso_destroy_context( struct cso_context *cso ); -- cgit v1.2.3 From da8312a1cf73d0777d51c63148ee090a9acace8b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 24 Apr 2008 21:13:56 +0100 Subject: draw: default edgeflag should be one --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 9f74806e11..1f765b73ad 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -115,7 +115,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->translate = translate_cache_find(fetch->cache, &key); { - static struct vertex_header vh = { 0, 0, 0, 0xffff }; + static struct vertex_header vh = { 0, 1, 0, 0xffff }; fetch->translate->set_buffer(fetch->translate, draw->pt.nr_vertex_buffers, &vh, -- cgit v1.2.3 From 909894e34ca5e575ce21005e38dc0b5e98e4bcd6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 24 Apr 2008 15:58:46 -0600 Subject: gallium: comments --- src/gallium/auxiliary/draw/draw_pipe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 1c9d8650e2..46afb0f41f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -194,14 +194,14 @@ void draw_pipeline_run( struct draw_context *draw, case PIPE_PRIM_LINES: for (i = 0; i+1 < count; i += 2) do_line( draw, - elts[i+0], + elts[i+0], /* flags */ verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK), verts + stride * elts[i+1]); break; case PIPE_PRIM_TRIANGLES: for (i = 0; i+2 < count; i += 3) do_triangle( draw, - elts[i+0], + elts[i+0], /* flags */ verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK), verts + stride * elts[i+1], verts + stride * elts[i+2]); -- cgit v1.2.3 From 2926e59e4ad604dedcb639b2961937841afcf005 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 24 Apr 2008 23:31:35 +0100 Subject: draw: remove old assignment of edgeflag value --- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index 65cb8ba701..c4a67c8289 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -112,7 +112,6 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, out->clip[3] = out->data[0][3]; out->vertex_id = 0xffff; - out->edgeflag = 1; out->clipmask = compute_clipmask_gl(out->clip, pvs->draw->plane, pvs->draw->nr_planes); -- cgit v1.2.3 From b06cd4debfc4fb4162b4b45e61ea91948de0a279 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 25 Apr 2008 18:19:51 +0900 Subject: gallium: Windows user mode portability fixes. --- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 11 ++++--- src/gallium/auxiliary/util/p_debug.c | 14 ++++---- src/gallium/auxiliary/util/p_debug_mem.c | 6 ++-- src/gallium/auxiliary/util/u_time.c | 38 ++++++++++++++-------- src/gallium/auxiliary/util/u_time.h | 8 +++-- src/gallium/include/pipe/p_util.h | 9 ++--- 6 files changed, 53 insertions(+), 33 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 27032b0c4c..d3c1ec4fbe 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -34,6 +34,12 @@ */ +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) +#include +#endif + #include "pipe/p_compiler.h" #include "pipe/p_error.h" #include "pipe/p_debug.h" @@ -45,9 +51,6 @@ #include "pb_buffer.h" #include "pb_buffer_fenced.h" -#ifndef WIN32 -#include -#endif /** @@ -425,7 +428,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) /* Wait on outstanding fences */ while (fenced_list->numDelayed) { _glthread_UNLOCK_MUTEX(fenced_list->mutex); -#ifndef WIN32 +#if defined(PIPE_OS_LINUX) sched_yield(); #endif _fenced_buffer_list_check_free(fenced_list, 1); diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 25b132b40c..cd612e23b3 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -26,9 +26,11 @@ **************************************************************************/ +#include "pipe/p_config.h" + #include -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY #include #include #else @@ -42,7 +44,7 @@ #include "util/u_string.h" -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY static INLINE void _EngDebugPrint(const char *format, ...) { @@ -56,7 +58,7 @@ _EngDebugPrint(const char *format, ...) void _debug_vprintf(const char *format, va_list ap) { -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY #ifndef WINCE /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation. It is also very slow, so buffer until @@ -101,7 +103,7 @@ void _debug_break(void) __asm("int3"); #elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) _asm {int 3}; -#elif defined(WIN32) && !defined(WINCE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE) EngDebugBreak(); #else abort(); @@ -109,7 +111,7 @@ void _debug_break(void) } -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY static const char * find(const char *start, const char *end, char c) { @@ -150,7 +152,7 @@ const char * debug_get_option(const char *name, const char *dfault) { const char *result; -#ifdef WIN32 +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY ULONG_PTR iFile = 0; const void *pMap = NULL; const char *sol, *eol, *sep; diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index aba69c0294..9321cf71bb 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -32,7 +32,9 @@ * @author José Fonseca */ -#ifdef WIN32 +#include "pipe/p_config.h" + +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY #include #include #else @@ -47,7 +49,7 @@ #define DEBUG_MEMORY_MAGIC 0x6e34090aU -#if defined(WIN32) && !defined(WINCE) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE) #define real_malloc(_size) EngAllocMem(0, _size, 'D3AG') #define real_free(_ptr) EngFreeMem(_ptr) #else diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index 01112ebe5a..dd28ff4134 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -33,27 +33,35 @@ */ -#ifndef WIN32 +#include "util/u_time.h" + +#if defined(PIPE_OS_LINUX) #include -#else +#elif defined(PIPE_OS_WINDOWS) #include +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include #endif - -#include "util/u_time.h" +#else +#error Unsupported OS +#endif -#ifdef WIN32 +#if defined(PIPE_OS_WINDOWS) static LONGLONG frequency = 0; +#if !defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +#define EngQueryPerformanceFrequency(p) QueryPerformanceFrequency((LARGE_INTEGER*)(p)) +#define EngQueryPerformanceCounter(p) QueryPerformanceCounter((LARGE_INTEGER*)(p)) +#endif #endif void util_time_get(struct util_time *t) { -#ifndef WIN32 +#if defined(PIPE_OS_LINUX) gettimeofday(&t->tv, NULL); -#else +#elif defined(PIPE_OS_WINDOWS) EngQueryPerformanceCounter(&t->counter); #endif } @@ -64,10 +72,10 @@ util_time_add(const struct util_time *t1, int64_t usecs, struct util_time *t2) { -#ifndef WIN32 +#if defined(PIPE_OS_LINUX) t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; -#else +#elif defined(PIPE_OS_WINDOWS) if(!frequency) EngQueryPerformanceFrequency(&frequency); t2->counter = t1->counter + (usecs * frequency + 999999LL)/1000000LL; @@ -79,10 +87,12 @@ int64_t util_time_diff(const struct util_time *t1, const struct util_time *t2) { -#ifndef WIN32 +#if defined(PIPE_OS_LINUX) return (t2->tv.tv_usec - t1->tv.tv_usec) + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; -#else +#elif defined(PIPE_OS_WINDOWS) + if(!frequency) + EngQueryPerformanceFrequency(&frequency); return (t2->counter - t1->counter)*1000000LL/frequency; #endif } @@ -98,7 +108,7 @@ static INLINE int util_time_compare(const struct util_time *t1, const struct util_time *t2) { -#ifndef WIN32 +#if defined(PIPE_OS_LINUX) if (t1->tv.tv_sec < t2->tv.tv_sec) return -1; else if(t1->tv.tv_sec > t2->tv.tv_sec) @@ -109,7 +119,7 @@ util_time_compare(const struct util_time *t1, return 1; else return 0; -#else +#elif defined(PIPE_OS_WINDOWS) if (t1->counter < t2->counter) return -1; else if(t1->counter > t2->counter) @@ -132,7 +142,7 @@ util_time_timeout(const struct util_time *start, } -#ifdef WIN32 +#if defined(PIPE_OS_WINDOWS) void util_time_sleep(unsigned usecs) { LONGLONG start, curr, end; diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index c8836c137f..48ec7a4a96 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -36,7 +36,9 @@ #define U_TIME_H_ -#ifndef WIN32 +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) #include /* timeval */ #include /* usleep */ #endif @@ -56,7 +58,7 @@ extern "C" { */ struct util_time { -#ifndef WIN32 +#if defined(PIPE_OS_LINUX) struct timeval tv; #else long long counter; @@ -84,7 +86,7 @@ util_time_timeout(const struct util_time *start, const struct util_time *end, const struct util_time *curr); -#ifndef WIN32 +#if defined(PIPE_OS_LINUX) #define util_time_sleep usleep #else void diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 43d94ec4ba..63301ae3aa 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -28,6 +28,7 @@ #ifndef P_UTIL_H #define P_UTIL_H +#include "p_config.h" #include "p_compiler.h" #include "p_debug.h" #include "p_pointer.h" @@ -40,7 +41,7 @@ extern "C" { #endif -#if defined(WIN32) && defined(DEBUG) /* memory debugging */ +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) /* memory debugging */ #include "p_debug.h" @@ -55,7 +56,7 @@ extern "C" { #else -#ifdef WIN32 +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) void * __stdcall EngAllocMem( @@ -118,7 +119,7 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) return new_ptr; } -#else /* !WIN32 */ +#else /* !PIPE_SUBSYSTEM_WINDOWS_DISPLAY */ #define MALLOC( SIZE ) malloc( SIZE ) @@ -128,7 +129,7 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) -#endif /* !WIN32 */ +#endif /* !PIPE_SUBSYSTEM_WINDOWS_DISPLAY */ #endif /* !DEBUG */ #define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) -- cgit v1.2.3 From 992d0b997f8f7e965e56852b81e01c290f8c13de Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 24 Apr 2008 16:22:47 -0400 Subject: frontend for rendering without elts --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_pt.c | 30 ++- src/gallium/auxiliary/draw/draw_pt.h | 1 + src/gallium/auxiliary/draw/draw_pt_varray.c | 251 ++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 144 ++++++++++++++ 6 files changed, 421 insertions(+), 7 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_varray.c create mode 100644 src/gallium/auxiliary/draw/draw_pt_varray_tmp.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index bc6acfe458..da7eded21f 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -28,6 +28,7 @@ C_SOURCES = \ draw_pt_fetch_emit.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ + draw_pt_varray.c \ draw_pt_vcache.c \ draw_vertex.c \ draw_vs.c \ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 49593c8fad..cee58bbf73 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -129,6 +129,7 @@ struct draw_context struct { struct draw_pt_front_end *vcache; + struct draw_pt_front_end *varray; } front; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f5a3bf390e..d6585bbfd0 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -53,12 +53,14 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; unsigned opt = 0; + pt_elt_func get_elt = 0; + void *elts = 0; if (!draw->render) { opt |= PT_PIPELINE; } - if (draw_need_pipeline(draw, + if (draw_need_pipeline(draw, draw->rasterizer, prim)) { opt |= PT_PIPELINE; @@ -78,16 +80,21 @@ draw_pt_arrays(struct draw_context *draw, middle = draw->pt.middle.fetch_emit; - /* May create a short-circuited version of this for small primitives: + /* Pick the right frontend */ - frontend = draw->pt.front.vcache; + if (draw->pt.user.elts || + count >= 256) { + frontend = draw->pt.front.vcache; + get_elt = draw_pt_elt_func(draw); + elts = draw_pt_elt_ptr(draw, start); + } else { + frontend = draw->pt.front.varray; + elts = start; + } frontend->prepare( frontend, prim, middle, opt ); - frontend->run( frontend, - draw_pt_elt_func( draw ), - draw_pt_elt_ptr( draw, start ), - count ); + frontend->run(frontend, get_elt, elts, count); frontend->finish( frontend ); @@ -101,6 +108,10 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.front.vcache) return FALSE; + draw->pt.front.varray = draw_pt_varray(draw); + if (!draw->pt.front.varray) + return FALSE; + draw->pt.middle.fetch_emit = draw_pt_fetch_emit( draw ); if (!draw->pt.middle.fetch_emit) return FALSE; @@ -129,6 +140,11 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; } + + if (draw->pt.front.varray) { + draw->pt.front.varray->destroy( draw->pt.front.varray ); + draw->pt.front.varray = NULL; + } } diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index df5c29fc36..2dec376cee 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -115,6 +115,7 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, * a special case for tiny vertex buffers. */ struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); +struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); /* Middle-ends: * diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c new file mode 100644 index 0000000000..022098a314 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -0,0 +1,251 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + +#define FETCH_MAX 256 +#define DRAW_MAX (16*FETCH_MAX) + +struct varray_frontend { + struct draw_pt_front_end base; + struct draw_context *draw; + + ushort draw_elts[DRAW_MAX]; + unsigned fetch_elts[FETCH_MAX]; + + unsigned draw_count; + unsigned fetch_count; + + struct draw_pt_middle_end *middle; + + unsigned input_prim; + unsigned output_prim; +}; + +static void varray_flush(struct varray_frontend *varray) +{ + if (varray->draw_count) { +#if 0 + debug_printf("FLUSH fc = %d, dc = %d\n", + varray->fetch_count, + varray->draw_count); +#endif + varray->middle->run(varray->middle, + varray->fetch_elts, + varray->fetch_count, + varray->draw_elts, + varray->draw_count); + } + + varray->fetch_count = 0; + varray->draw_count = 0; +} + +#if 0 +static void varray_check_flush(struct varray_frontend *varray) +{ + if (varray->draw_count + 6 >= DRAW_MAX/* || + varray->fetch_count + 4 >= FETCH_MAX*/) { + varray_flush(varray); + } +} +#endif + +static INLINE void add_draw_el(struct varray_frontend *varray, + int idx, ushort flags) +{ + varray->draw_elts[varray->draw_count++] = idx | flags; +} + + +static INLINE void varray_triangle( struct varray_frontend *varray, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + add_draw_el(varray, i0, 0); + add_draw_el(varray, i1, 0); + add_draw_el(varray, i2, 0); +} + +static INLINE void varray_triangle_flags( struct varray_frontend *varray, + ushort flags, + unsigned i0, + unsigned i1, + unsigned i2 ) +{ + add_draw_el(varray, i0, flags); + add_draw_el(varray, i1, 0); + add_draw_el(varray, i2, 0); +} + +static INLINE void varray_line( struct varray_frontend *varray, + unsigned i0, + unsigned i1 ) +{ + add_draw_el(varray, i0, 0); + add_draw_el(varray, i1, 0); +} + + +static INLINE void varray_line_flags( struct varray_frontend *varray, + ushort flags, + unsigned i0, + unsigned i1 ) +{ + add_draw_el(varray, i0, flags); + add_draw_el(varray, i1, 0); +} + + +static INLINE void varray_point( struct varray_frontend *varray, + unsigned i0 ) +{ + add_draw_el(varray, i0, 0); +} + +static INLINE void varray_quad( struct varray_frontend *varray, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + varray_triangle( varray, i0, i1, i3 ); + varray_triangle( varray, i1, i2, i3 ); +} + +static INLINE void varray_ef_quad( struct varray_frontend *varray, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; + const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; + + varray_triangle_flags( varray, + DRAW_PIPE_RESET_STIPPLE | omitEdge1, + i0, i1, i3 ); + + varray_triangle_flags( varray, + omitEdge2, + i1, i2, i3 ); +} + +/* At least for now, we're back to using a template include file for + * this. The two paths aren't too different though - it may be + * possible to reunify them. + */ +#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1) +#define POINT(vc,i0) varray_point(vc,i0) +#define FUNC varray_run_extras +#include "draw_pt_varray_tmp.h" + +#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1) +#define POINT(vc,i0) varray_point(vc,i0) +#define FUNC varray_run +#include "draw_pt_varray_tmp.h" + + + +static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { + PIPE_PRIM_POINTS, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_LINES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES, + PIPE_PRIM_TRIANGLES +}; + + + +static void varray_prepare(struct draw_pt_front_end *frontend, + unsigned prim, + struct draw_pt_middle_end *middle, + unsigned opt) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + const struct pipe_rasterizer_state *rasterizer = varray->draw->rasterizer; + + if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || + rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL || + rasterizer->line_stipple_enable) + { + assert(opt & PT_PIPELINE); + varray->base.run = varray_run_extras; + } else { + varray->base.run = varray_run; + } + + varray->input_prim = prim; + varray->output_prim = reduced_prim[prim]; + + varray->middle = middle; + middle->prepare(middle, varray->output_prim, opt); +} + + + + +static void varray_finish(struct draw_pt_front_end *frontend) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + varray->middle->finish(varray->middle); + varray->middle = NULL; +} + +static void varray_destroy(struct draw_pt_front_end *frontend) +{ + FREE(frontend); +} + + +struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw) +{ + struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend); + if (varray == NULL) + return NULL; + + varray->base.prepare = varray_prepare; + varray->base.run = NULL; + varray->base.finish = varray_finish; + varray->base.destroy = varray_destroy; + varray->draw = draw; + + return &varray->base; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h new file mode 100644 index 0000000000..b9a319b253 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -0,0 +1,144 @@ + +static void FUNC(struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + struct draw_context *draw = varray->draw; + unsigned start = (unsigned)elts; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i, flags; + +#if 0 + debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count); +#endif +#if 0 + debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim, + start, count); +#endif + + for (i = 0; i < count; ++i) { + varray->fetch_elts[i] = start + i; + } + varray->fetch_count = count; + + switch (varray->input_prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + POINT(varray, i + 0); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE(varray, DRAW_PIPE_RESET_STIPPLE, + i + 0, i + 1); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (i = 1; i < count; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + LINE(varray, flags, i - 1, 0); + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 1; i < count; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1, i + 2); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1 + (i&1), i + 2 - (i&1)); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0 + (i&1), i + 1 - (i&1), i + 2); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i+2 < count; i++) { + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + } + else { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (i = 0; i+2 < count; i++) { + TRIANGLE(varray, flags, 0, i + 1, i + 2); + } + } + } + break; + + case PIPE_PRIM_QUADS: + for (i = 0; i+3 < count; i += 4) { + QUAD(varray, i + 0, i + 1, i + 2, i + 3); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i+3 < count; i += 2) { + QUAD(varray, i + 2, i + 0, i + 1, i + 3); + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; + const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + } + break; + + default: + assert(0); + break; + } + + varray_flush(varray); +} + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC -- cgit v1.2.3 From 4e46e6f52b88ca7df40a52cf994e6fe1e4b9870e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 25 Apr 2008 14:10:02 +0100 Subject: draw: remove unused vars --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 -- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 0545dbc171..f05641dee6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -815,7 +815,6 @@ aaline_bind_sampler_states(struct pipe_context *pipe, unsigned num, void **sampler) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); - struct draw_context *draw = aaline->stage.draw; /* save current */ memcpy(aaline->state.sampler, sampler, num * sizeof(void *)); @@ -831,7 +830,6 @@ aaline_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { struct aaline_stage *aaline = aaline_stage_from_pipe(pipe); - struct draw_context *draw = aaline->stage.draw; uint i; /* save current */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index da303a13ad..d1d63d73be 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -667,7 +667,6 @@ pstip_set_sampler_textures(struct pipe_context *pipe, unsigned num, struct pipe_texture **texture) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct draw_context *draw = pstip->stage.draw; uint i; /* save current */ @@ -690,7 +689,6 @@ pstip_set_polygon_stipple(struct pipe_context *pipe, const struct pipe_poly_stipple *stipple) { struct pstip_stage *pstip = pstip_stage_from_pipe(pipe); - struct draw_context *draw = (struct draw_context *) pipe->draw; /* save current */ pstip->state.stipple = stipple; -- cgit v1.2.3 From 14cfcb18204233d3d4848ea8e579465983df3d75 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 25 Apr 2008 14:10:32 +0100 Subject: draw: no need to special-case elts/get_elts for varray frontend --- src/gallium/auxiliary/draw/draw_pt.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index d6585bbfd0..c9c5d18313 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -53,8 +53,6 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_front_end *frontend = NULL; struct draw_pt_middle_end *middle = NULL; unsigned opt = 0; - pt_elt_func get_elt = 0; - void *elts = 0; if (!draw->render) { opt |= PT_PIPELINE; @@ -83,18 +81,18 @@ draw_pt_arrays(struct draw_context *draw, /* Pick the right frontend */ if (draw->pt.user.elts || - count >= 256) { + count >= 256) { frontend = draw->pt.front.vcache; - get_elt = draw_pt_elt_func(draw); - elts = draw_pt_elt_ptr(draw, start); } else { frontend = draw->pt.front.varray; - elts = start; } frontend->prepare( frontend, prim, middle, opt ); - frontend->run(frontend, get_elt, elts, count); + frontend->run(frontend, + draw_pt_elt_func(draw), + draw_pt_elt_ptr(draw, start), + count); frontend->finish( frontend ); -- cgit v1.2.3 From 2325d1959783aaf57178a86d8a0b28f168761e13 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 25 Apr 2008 14:20:02 +0100 Subject: tgsi: fix compile when HIGH_PRECISION not defined --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index c3295a27ff..559370e613 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -854,7 +854,7 @@ emit_rsqrt( /* On Intel CPUs at least, this is only accurate to 12 bits -- not * good enough. */ - emit_rsqrtps( + sse_rsqrtps( func, make_xmm( xmm_dst ), make_xmm( xmm_src ) ); -- cgit v1.2.3 From 96cfd804f6dcc0ec9f5e887ff1b402a55886fb0b Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 25 Apr 2008 14:57:50 +0100 Subject: gallium: Add draw_pt_varray.c to scons build. --- src/gallium/auxiliary/draw/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 0b9852f633..3b5d5ed492 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -27,6 +27,7 @@ draw = env.ConvenienceLibrary( 'draw_pt_fetch_emit.c', 'draw_pt_fetch_shade_pipeline.c', 'draw_pt_post_vs.c', + 'draw_pt_varray.c', 'draw_pt_vcache.c', 'draw_vertex.c', 'draw_vs.c', -- cgit v1.2.3 From 1aedbf9efe4d1cf45be3c27fc3a0eb4a69a1b1b9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 25 Apr 2008 16:16:06 +0100 Subject: draw: emit extra flags whenever pipeline is active The assert was in fact over-sensitive, but emitting the extra flags is pretty trivial & we may as well just do it whenever we know the pipeline is running. --- src/gallium/auxiliary/draw/draw_pt_varray.c | 9 ++++----- src/gallium/auxiliary/draw/draw_pt_vcache.c | 6 +----- 2 files changed, 5 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 022098a314..c85d8ded50 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -202,13 +202,12 @@ static void varray_prepare(struct draw_pt_front_end *frontend, struct varray_frontend *varray = (struct varray_frontend *)frontend; const struct pipe_rasterizer_state *rasterizer = varray->draw->rasterizer; - if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL || - rasterizer->line_stipple_enable) + if (opt & PT_PIPELINE) { - assert(opt & PT_PIPELINE); varray->base.run = varray_run_extras; - } else { + } + else + { varray->base.run = varray_run; } diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index b3133359e0..2f9775814f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -227,12 +227,8 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; const struct pipe_rasterizer_state *rasterizer = vcache->draw->rasterizer; - - if (rasterizer->fill_cw != PIPE_POLYGON_MODE_FILL || - rasterizer->fill_ccw != PIPE_POLYGON_MODE_FILL || - rasterizer->line_stipple_enable) + if (opt & PT_PIPELINE) { - assert(opt & PT_PIPELINE); vcache->base.run = vcache_run_extras; } else -- cgit v1.2.3 From a94aad297d9804688f888a5112326104a5d00e07 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 25 Apr 2008 18:46:29 -0600 Subject: use PIPE_ARCH_X86 --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 559370e613..9061e00b63 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -34,7 +34,7 @@ #include "rtasm/rtasm_x86sse.h" -#if defined(__i386__) || defined(__386__) +#ifdef PIPE_ARCH_X86 #define HIGH_PRECISION 1 /* for 1/sqrt() */ @@ -2181,4 +2181,4 @@ tgsi_emit_sse2( return ok; } -#endif /* i386 */ +#endif /* PIPE_ARCH_X86 */ -- cgit v1.2.3 From a8e39b6f5a1fedf2f8719e1adb8802ebbfc09688 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 25 Apr 2008 19:25:26 -0600 Subject: gallium: fix broken SGT, SLE --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 29e104bbd1..d55f907c0d 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -516,6 +516,20 @@ micro_lg2( dst->f[3] = logf( src->f[3] ) * 1.442695f; } +static void +micro_le( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; +} + static void micro_lt( union tgsi_exec_channel *dst, @@ -1975,7 +1989,7 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); STORE( &r[0], 0, chan_index ); } break; @@ -1992,7 +2006,7 @@ exec_instruction( FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); FETCH( &r[1], 1, chan_index ); - micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); STORE( &r[0], 0, chan_index ); } break; -- cgit v1.2.3 From e3c415995706d2dda7c34a227e2e24d0745763ec Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 27 Apr 2008 21:09:45 +0900 Subject: rtasm: Implement x86_retw. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 6 ++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 + 2 files changed, 7 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 3cd45d7dd9..e6cbe9967f 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -498,6 +498,12 @@ void x86_ret( struct x86_function *p ) emit_1ub(p, 0xc3); } +void x86_retw( struct x86_function *p, unsigned short imm ) +{ + DUMP(); + emit_3ub(p, 0xc2, imm & 0xff, (imm >> 8) & 0xff); +} + void x86_sahf( struct x86_function *p ) { DUMP(); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 695a1cef4e..1962b07bc5 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -219,6 +219,7 @@ void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_push( struct x86_function *p, struct x86_reg reg ); void x86_ret( struct x86_function *p ); +void x86_retw( struct x86_function *p, unsigned short imm ); void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From 083008d808c02226a3dfead6000a84efd5e6a9fa Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 27 Apr 2008 21:19:34 +0900 Subject: pipebuffer: Be extra cautious with the incoming buffers. A common mistake is trying to fence user or malloc buffers. So don't let the crash happen inside pipebuffer lib. --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index d3c1ec4fbe..a92daf91dd 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -356,11 +356,25 @@ void buffer_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) { - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pipe_winsys *winsys = fenced_list->winsys; + struct fenced_buffer *fenced_buf; + struct fenced_buffer_list *fenced_list; + struct pipe_winsys *winsys; /* FIXME: receive this as a parameter */ unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; + + /* This is a public function, so be extra cautious with the buffer passed, + * as happens frequently to receive null buffers, or pointer to buffers + * other than fenced buffers. */ + assert(buf); + if(!buf) + return; + assert(buf->vtbl == &fenced_buffer_vtbl); + if(buf->vtbl != &fenced_buffer_vtbl) + return; + + fenced_buf = fenced_buffer(buf); + fenced_list = fenced_buf->list; + winsys = fenced_list->winsys; if(fence == fenced_buf->fence) { /* Handle the same fence case specially, not only because it is a fast -- cgit v1.2.3 From 0d179ffe9774487f37838aa634c59ff8ebc1111d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 28 Apr 2008 09:33:20 -0600 Subject: gallium: add cases for PIPE_FORMAT_X8Z24_UNORM --- src/gallium/auxiliary/util/p_tile.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 91283a9ab0..13175ca46e 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -730,6 +730,7 @@ pipe_get_tile_rgba(struct pipe_context *pipe, z32_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: s8z24_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); break; case PIPE_FORMAT_Z24S8_UNORM: @@ -808,6 +809,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe, /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_Z24S8_UNORM: @@ -852,6 +854,7 @@ pipe_get_tile_z(struct pipe_context *pipe, } break; case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: { const uint *pSrc = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); @@ -912,6 +915,7 @@ pipe_put_tile_z(struct pipe_context *pipe, } break; case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: { uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); for (i = 0; i < h; i++) { -- cgit v1.2.3 From 58d3dff0d3115ddd5397b7f77b5bcf4f9ca616b6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 28 Apr 2008 18:50:27 +0200 Subject: gallium: Generate SSE code to swizzle and unswizzle vs inputs and outputs. Change SSE_SWIZZLES #define to 0 to disable it. --- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 52 ++++++-- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 14 ++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 + src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 142 ++++++++++++++++++++- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 4 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- 7 files changed, 204 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index f0763dad8d..4ec20493c4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -109,9 +109,10 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; unsigned opt = fpme->opt; + unsigned alloc_count = align_int( fetch_count, 4 ); struct vertex_header *pipeline_verts = - (struct vertex_header *)MALLOC(fpme->vertex_size * fetch_count); + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); if (!pipeline_verts) { /* Not much we can do here - just skip the rendering. diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index b1e9f67114..07f85bc448 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -47,14 +47,29 @@ #include "tgsi/util/tgsi_parse.h" #define SSE_MAX_VERTICES 4 +#define SSE_SWIZZLES 1 +#if SSE_SWIZZLES +typedef void (XSTDCALL *codegen_function) ( + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], + struct tgsi_exec_vector *temporary, + float (*immediates)[4], + const float (*aos_input)[4], + uint num_inputs, + uint input_stride, + float (*aos_output)[4], + uint num_outputs, + uint output_stride ); +#else typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, float (*constant)[4], struct tgsi_exec_vector *temporary, float (*immediates)[4] ); - +#endif struct draw_sse_vertex_shader { struct draw_vertex_shader base; @@ -91,12 +106,31 @@ vs_sse_run_linear( struct draw_vertex_shader *base, { struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base; struct tgsi_exec_machine *machine = shader->machine; - unsigned int i, j; - unsigned slot; + unsigned int i; for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); +#if SSE_SWIZZLES + /* run compiled shader + */ + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + shader->immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); + + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); +#else + unsigned int j, slot; + /* Swizzle inputs. */ for (j = 0; j < max_vertices; j++) { @@ -105,10 +139,10 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; - } + } input = (const float (*)[4])((const char *)input + input_stride); - } + } /* run compiled shader */ @@ -118,7 +152,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Temps, shader->immediates); - /* Unswizzle all output results. */ for (j = 0; j < max_vertices; j++) { @@ -127,10 +160,11 @@ vs_sse_run_linear( struct draw_vertex_shader *base, output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } + } output = (float (*)[4])((char *)output + output_stride); - } + } +#endif } } @@ -176,7 +210,7 @@ draw_create_vs_sse(struct draw_context *draw, x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program, vs->immediates )) + &vs->sse2_program, vs->immediates, SSE_SWIZZLES )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index e6cbe9967f..d7e2230557 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -853,6 +853,20 @@ void sse_shufps( struct x86_function *p, emit_1ub(p, shuf); } +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x15 ); + emit_modrm( p, dst, src ); +} + +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) +{ + DUMP_RR( dst, src ); + emit_2ub( p, X86_TWOB, 0x14 ); + emit_modrm( p, dst, src ); +} + void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 1962b07bc5..ad79b1facf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -203,6 +203,8 @@ void sse_rsqrtps( struct x86_function *p, struct x86_reg dst, struct x86_reg src void sse_rsqrtss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_shufps( struct x86_function *p, struct x86_reg dest, struct x86_reg arg0, unsigned char shuf ); +void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); +void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src ); void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 9061e00b63..86ca16c246 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1788,7 +1788,6 @@ emit_instruction( break; case TGSI_OPCODE_RET: - case TGSI_OPCODE_END: #ifdef WIN32 emit_retw( func, 16 ); #else @@ -1796,6 +1795,9 @@ emit_instruction( #endif break; + case TGSI_OPCODE_END: + break; + case TGSI_OPCODE_SSG: return 0; break; @@ -2027,6 +2029,127 @@ emit_declaration( } } +static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_input; + struct x86_reg aos_input; + struct x86_reg num_inputs; + struct x86_reg temp; + unsigned char *inner_loop; + + soa_input = x86_make_reg( file_REG32, reg_AX ); + aos_input = x86_make_reg( file_REG32, reg_BX ); + num_inputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, soa_input, get_argument( soa + 1 ) ); + x86_mov( func, aos_input, get_argument( aos + 1 ) ); + x86_mov( func, num_inputs, get_argument( num + 1 ) ); + + inner_loop = x86_get_label( func ); + + x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_mov_reg_imm( func, temp, 16 ); + x86_add( func, aos_input, temp ); + x86_mov_reg_imm( func, temp, 64 ); + x86_add( func, soa_input, temp ); + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); +} + +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + unsigned char *inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, soa_output, get_argument( soa + 1 ) ); + x86_mov( func, aos_output, get_argument( aos + 1 ) ); + x86_mov( func, num_outputs, get_argument( num + 1 ) ); + + inner_loop = x86_get_label( func ); + + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_mov_reg_imm( func, temp, 16 ); + x86_add( func, aos_output, temp ); + x86_mov_reg_imm( func, temp, 64 ); + x86_add( func, soa_output, temp ); + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); +} /** * Translate a TGSI vertex/fragment shader to SSE2 code. @@ -2048,7 +2171,8 @@ unsigned tgsi_emit_sse2( const struct tgsi_token *tokens, struct x86_function *func, - float (*immediates)[4]) + float (*immediates)[4], + boolean do_swizzles ) { struct tgsi_parse_context parse; boolean instruction_phase = FALSE; @@ -2089,6 +2213,9 @@ tgsi_emit_sse2( else { assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + if (do_swizzles) + aos_to_soa( func, 5, 0, 6, 7 ); + x86_mov( func, get_input_base(), @@ -2176,6 +2303,17 @@ tgsi_emit_sse2( } } + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 8, 1, 9, 10 ); + } + +#ifdef WIN32 + emit_retw( func, 16 ); +#else + emit_ret( func ); +#endif + tgsi_parse_free( &parse ); return ok; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index 063287dc5e..e66d115283 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -12,8 +12,8 @@ unsigned tgsi_emit_sse2( const struct tgsi_token *tokens, struct x86_function *function, - float (*immediates)[4] - ); + float (*immediates)[4], + boolean do_swizzles ); #if defined __cplusplus } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index f857d26143..4d569e1e22 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -133,7 +133,7 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, x86_init_func( &shader->sse2_program ); if (!tgsi_emit_sse2( templ->tokens, &shader->sse2_program, - shader->immediates)) { + shader->immediates, FALSE )) { FREE(shader); return NULL; } -- cgit v1.2.3 From b6d9666a420bd7c31a6696f94ba1025e5204d458 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 28 Apr 2008 19:03:20 +0200 Subject: gallium: Set SSE_SWIZZLES to 0 by default. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 07f85bc448..90926aec85 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -47,7 +47,7 @@ #include "tgsi/util/tgsi_parse.h" #define SSE_MAX_VERTICES 4 -#define SSE_SWIZZLES 1 +#define SSE_SWIZZLES 0 #if SSE_SWIZZLES typedef void (XSTDCALL *codegen_function) ( -- cgit v1.2.3 From a41b78d107264227f3338446e04dcfda32634f52 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 25 Apr 2008 15:27:33 +0100 Subject: pb: remove unused variable, squash warning --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index a92daf91dd..2fa0842971 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -134,8 +134,6 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf) static INLINE void _fenced_buffer_destroy(struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; - assert(!fenced_buf->base.base.refcount); assert(!fenced_buf->fence); pb_reference(&fenced_buf->buffer, NULL); -- cgit v1.2.3 From 546ab045d8a18758ffc44da9dc76ad1335553cf6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 25 Apr 2008 15:28:54 +0100 Subject: translate: squash warnings --- src/gallium/auxiliary/translate/translate_cache.c | 3 ++- src/gallium/auxiliary/translate/translate_cache.h | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c index e91d648eba..115dc9287e 100644 --- a/src/gallium/auxiliary/translate/translate_cache.c +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -28,6 +28,7 @@ #include "pipe/p_util.h" #include "pipe/p_state.h" #include "translate.h" +#include "translate_cache.h" #include "cso_cache/cso_cache.h" #include "cso_cache/cso_hash.h" @@ -36,7 +37,7 @@ struct translate_cache { struct cso_hash *hash; }; -struct translate_cache * translate_cache_create() +struct translate_cache * translate_cache_create( void ) { struct translate_cache *cache = MALLOC_STRUCT(translate_cache); cache->hash = cso_hash_create(); diff --git a/src/gallium/auxiliary/translate/translate_cache.h b/src/gallium/auxiliary/translate/translate_cache.h index 63fc57b7ea..7dba871e57 100644 --- a/src/gallium/auxiliary/translate/translate_cache.h +++ b/src/gallium/auxiliary/translate/translate_cache.h @@ -38,7 +38,7 @@ struct translate_cache; struct translate_key; struct translate; -struct translate_cache *translate_cache_create(); +struct translate_cache *translate_cache_create( void ); void translate_cache_destroy(struct translate_cache *cache); /** -- cgit v1.2.3 From 9fb444f191015b44498a5c83d762519ccc98ed55 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 28 Apr 2008 18:43:27 +0100 Subject: tsgi: add a makefile --- src/gallium/auxiliary/tgsi/exec/Makefile | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile new file mode 100644 index 0000000000..451911a354 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/exec/Makefile @@ -0,0 +1,2 @@ +default: + cd .. ; make -- cgit v1.2.3 From c4917c62311522df902003d77b146fc677c09a4e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 28 Apr 2008 18:50:31 +0100 Subject: tgsi: make loop structure clearer, use x86_lea for increments --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 134 ++++++++++++++-------------- 1 file changed, 68 insertions(+), 66 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 86ca16c246..1138f59997 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2049,40 +2049,41 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, x86_mov( func, aos_input, get_argument( aos + 1 ) ); x86_mov( func, num_inputs, get_argument( num + 1 ) ); + /* do */ inner_loop = x86_get_label( func ); - - x86_mov( func, temp, get_argument( stride + 1 ) ); - x86_push( func, aos_input ); - sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); - sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); - sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); - sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); - sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); - sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); - sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); - sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_pop( func, aos_input ); - - sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); - sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); - sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); - sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); - sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); - sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); - - sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); - sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); - sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); - sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); - - /* Advance to next input */ - x86_mov_reg_imm( func, temp, 16 ); - x86_add( func, aos_input, temp ); - x86_mov_reg_imm( func, temp, 64 ); - x86_add( func, soa_input, temp ); + { + x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, temp ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); + x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); + } + /* while --num_inputs */ x86_dec( func, num_inputs ); x86_jcc( func, cc_NE, inner_loop ); @@ -2110,40 +2111,41 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, x86_mov( func, aos_output, get_argument( aos + 1 ) ); x86_mov( func, num_outputs, get_argument( num + 1 ) ); + /* do */ inner_loop = x86_get_label( func ); - - sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); - sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); - sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); - sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); - - sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); - sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); - sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); - sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); - sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); - sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); - - x86_mov( func, temp, get_argument( stride + 1 ) ); - x86_push( func, aos_output ); - sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); - sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); - x86_add( func, aos_output, temp ); - sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); - sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); - x86_add( func, aos_output, temp ); - sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); - sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); - x86_add( func, aos_output, temp ); - sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); - sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); - x86_pop( func, aos_output ); - - /* Advance to next output */ - x86_mov_reg_imm( func, temp, 16 ); - x86_add( func, aos_output, temp ); - x86_mov_reg_imm( func, temp, 64 ); - x86_add( func, soa_output, temp ); + { + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); + x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); + } + /* while --num_outputs */ x86_dec( func, num_outputs ); x86_jcc( func, cc_NE, inner_loop ); -- cgit v1.2.3 From 5e8d7899a8114918054f07c807aef07cbbfcb6b4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 28 Apr 2008 16:32:42 -0600 Subject: gallium: fix error msg typo --- src/gallium/auxiliary/util/u_pack_color.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 50ef44992b..89e3b5dc45 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -201,7 +201,7 @@ util_pack_z(enum pipe_format format, double z) case PIPE_FORMAT_Z24X8_UNORM: return ((uint) (z * 0xffffff)) << 8; default: - debug_printf("gallium: unhandled fomrat in util_pack_z()"); + debug_printf("gallium: unhandled format in util_pack_z()\n"); return 0; } } -- cgit v1.2.3 From 1cff4992b389ad884a663c93bdd7b7c6be6c79d2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 28 Apr 2008 16:33:15 -0600 Subject: gallium: add \n to error strings --- src/gallium/auxiliary/util/u_pack_color.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 89e3b5dc45..8296f1291c 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -91,7 +91,7 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, return; /* XXX lots more cases to add */ default: - debug_printf("gallium: unhandled format in util_pack_color_ub()"); + debug_printf("gallium: unhandled format in util_pack_color_ub()\n"); } } @@ -174,7 +174,7 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) return; /* XXX lots more cases to add */ default: - debug_printf("gallium: unhandled format in util_pack_color()"); + debug_printf("gallium: unhandled format in util_pack_color()\n"); } } -- cgit v1.2.3 From 733bc4df1a53bb1899933685e06c52109d096bee Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 29 Apr 2008 12:55:41 -0600 Subject: gallium: added some assertions to be sure the blit's surface formats are suitable --- src/gallium/auxiliary/util/u_blit.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 1105066cb8..9e9912c6e4 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -264,6 +264,9 @@ util_blit_pixels(struct blit_state *ctx, dstY1 = tmp; } + assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE)); + assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); + /* * XXX for now we're always creating a temporary texture. * Strictly speaking that's not always needed. -- cgit v1.2.3 From 9bfe1a3d505733489f7583fe603b7d192f38fa8c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 29 Apr 2008 20:33:37 +0100 Subject: gallium: add debug_print_format() make it easier to print format error messages --- src/gallium/auxiliary/util/p_debug.c | 97 +++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_pack_color.h | 9 ++- src/gallium/include/pipe/p_debug.h | 11 ++-- src/gallium/include/pipe/p_format.h | 83 +------------------------- 4 files changed, 111 insertions(+), 89 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index cd612e23b3..eaf2c9bd98 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -41,6 +41,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "pipe/p_format.h" #include "util/u_string.h" @@ -324,3 +325,99 @@ debug_dump_flags(const struct debug_named_value *names, } + + + + +char *pf_sprint_name( char *str, enum pipe_format format ) +{ + strcpy( str, "PIPE_FORMAT_" ); + switch (pf_layout( format )) { + case PIPE_FORMAT_LAYOUT_RGBAZS: + { + pipe_format_rgbazs_t rgbazs = (pipe_format_rgbazs_t) format; + uint i; + uint scale = 1 << (pf_exp8( rgbazs ) * 3); + + for (i = 0; i < 4; i++) { + uint size = pf_size_xyzw( rgbazs, i ); + + if (size == 0) { + break; + } + switch (pf_swizzle_xyzw( rgbazs, i )) { + case PIPE_FORMAT_COMP_R: + strcat( str, "R" ); + break; + case PIPE_FORMAT_COMP_G: + strcat( str, "G" ); + break; + case PIPE_FORMAT_COMP_B: + strcat( str, "B" ); + break; + case PIPE_FORMAT_COMP_A: + strcat( str, "A" ); + break; + case PIPE_FORMAT_COMP_0: + strcat( str, "0" ); + break; + case PIPE_FORMAT_COMP_1: + strcat( str, "1" ); + break; + case PIPE_FORMAT_COMP_Z: + strcat( str, "Z" ); + break; + case PIPE_FORMAT_COMP_S: + strcat( str, "S" ); + break; + } + util_snprintf( &str[strlen( str )], 32, "%u", size * scale ); + } + if (i != 0) { + strcat( str, "_" ); + } + switch (pf_type( rgbazs )) { + case PIPE_FORMAT_TYPE_UNKNOWN: + strcat( str, "NONE" ); + break; + case PIPE_FORMAT_TYPE_FLOAT: + strcat( str, "FLOAT" ); + break; + case PIPE_FORMAT_TYPE_UNORM: + strcat( str, "UNORM" ); + break; + case PIPE_FORMAT_TYPE_SNORM: + strcat( str, "SNORM" ); + break; + case PIPE_FORMAT_TYPE_USCALED: + strcat( str, "USCALED" ); + break; + case PIPE_FORMAT_TYPE_SSCALED: + strcat( str, "SSCALED" ); + break; + } + } + break; + case PIPE_FORMAT_LAYOUT_YCBCR: + { + pipe_format_ycbcr_t ycbcr = (pipe_format_ycbcr_t) format; + + strcat( str, "YCBCR" ); + if (pf_rev( ycbcr )) { + strcat( str, "_REV" ); + } + } + break; + } + return str; +} + + +void debug_print_format(const char *msg, enum pipe_format fmt ) +{ + char fmtstr[80]; + + pf_sprint_name(fmtstr, fmt); + + debug_printf("%s: %s\n", msg, fmtstr); +} diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 8296f1291c..7e26b1642a 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -91,7 +91,8 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, return; /* XXX lots more cases to add */ default: - debug_printf("gallium: unhandled format in util_pack_color_ub()\n"); + debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); + assert(0); } } @@ -174,7 +175,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) return; /* XXX lots more cases to add */ default: - debug_printf("gallium: unhandled format in util_pack_color()\n"); + debug_print_format("gallium: unhandled format in util_pack_color()", format); + assert(0); } } @@ -201,7 +203,8 @@ util_pack_z(enum pipe_format format, double z) case PIPE_FORMAT_Z24X8_UNORM: return ((uint) (z * 0xffffff)) << 8; default: - debug_printf("gallium: unhandled format in util_pack_z()\n"); + debug_print_format("gallium: unhandled format in util_pack_z()", format); + assert(0); return 0; } } diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 235c47abac..f87247994b 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -103,16 +103,19 @@ debug_printf(const char *format, ...) #endif +#ifdef DEBUG /** * Dump a blob in hex to the same place that debug_printf sends its * messages. */ -#ifdef DEBUG -void debug_print_blob( const char *name, - const void *blob, - unsigned size ); +void debug_print_blob( const char *name, const void *blob, unsigned size ); + +/* Print a message along with a prettified format string + */ +void debug_print_format(const char *msg, enum pipe_format fmt ); #else #define debug_print_blob(_name, _blob, _size) ((void)0) +#define debug_print_format(_msg, _fmt) ((void)0) #endif diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 94c2eeb62b..bc23fe142e 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -337,88 +337,7 @@ enum pipe_format { /** * Builds pipe format name from format token. */ -static INLINE char *pf_sprint_name( char *str, enum pipe_format format ) -{ - strcpy( str, "PIPE_FORMAT_" ); - switch (pf_layout( format )) { - case PIPE_FORMAT_LAYOUT_RGBAZS: - { - pipe_format_rgbazs_t rgbazs = (pipe_format_rgbazs_t) format; - uint i; - uint scale = 1 << (pf_exp8( rgbazs ) * 3); - - for (i = 0; i < 4; i++) { - uint size = pf_size_xyzw( rgbazs, i ); - - if (size == 0) { - break; - } - switch (pf_swizzle_xyzw( rgbazs, i )) { - case PIPE_FORMAT_COMP_R: - strcat( str, "R" ); - break; - case PIPE_FORMAT_COMP_G: - strcat( str, "G" ); - break; - case PIPE_FORMAT_COMP_B: - strcat( str, "B" ); - break; - case PIPE_FORMAT_COMP_A: - strcat( str, "A" ); - break; - case PIPE_FORMAT_COMP_0: - strcat( str, "0" ); - break; - case PIPE_FORMAT_COMP_1: - strcat( str, "1" ); - break; - case PIPE_FORMAT_COMP_Z: - strcat( str, "Z" ); - break; - case PIPE_FORMAT_COMP_S: - strcat( str, "S" ); - break; - } - util_snprintf( &str[strlen( str )], 32, "%u", size * scale ); - } - if (i != 0) { - strcat( str, "_" ); - } - switch (pf_type( rgbazs )) { - case PIPE_FORMAT_TYPE_UNKNOWN: - strcat( str, "NONE" ); - break; - case PIPE_FORMAT_TYPE_FLOAT: - strcat( str, "FLOAT" ); - break; - case PIPE_FORMAT_TYPE_UNORM: - strcat( str, "UNORM" ); - break; - case PIPE_FORMAT_TYPE_SNORM: - strcat( str, "SNORM" ); - break; - case PIPE_FORMAT_TYPE_USCALED: - strcat( str, "USCALED" ); - break; - case PIPE_FORMAT_TYPE_SSCALED: - strcat( str, "SSCALED" ); - break; - } - } - break; - case PIPE_FORMAT_LAYOUT_YCBCR: - { - pipe_format_ycbcr_t ycbcr = (pipe_format_ycbcr_t) format; - - strcat( str, "YCBCR" ); - if (pf_rev( ycbcr )) { - strcat( str, "_REV" ); - } - } - break; - } - return str; -} +extern char *pf_sprint_name( char *str, enum pipe_format format ); /** * Return bits for a particular component. -- cgit v1.2.3 From 7441f2e3ea77404064d65b604e1e525cacc453e9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 29 Apr 2008 20:42:35 +0100 Subject: gallium: add pack for A1R5B5G5 --- src/gallium/auxiliary/util/u_pack_color.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 7e26b1642a..7d4024e82c 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -89,6 +89,12 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 3) | (b >> 3); + } + return; /* XXX lots more cases to add */ default: debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); @@ -156,6 +162,12 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) *d = ((r & 0xf8) << 8) | ((g & 0xfc) << 3) | (b >> 3); } return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 3) | (b >> 3); + } + return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *d = (float *) dest; -- cgit v1.2.3 From fbddc8097ce3a9d38a061105542875dbb9f909f7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 29 Apr 2008 21:05:48 +0100 Subject: gallium: add pack for A4R4G4B4 --- src/gallium/auxiliary/util/u_pack_color.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 7d4024e82c..f641329d18 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -95,6 +95,12 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 3) | (b >> 3); } return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + } + return; /* XXX lots more cases to add */ default: debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); @@ -168,6 +174,12 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 3) | (b >> 3); } return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + { + ushort *d = (ushort *) dest; + *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); + } + return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *d = (float *) dest; -- cgit v1.2.3 From bbafa8aa2fc8009fb8e32f996d4972c56e6b46e6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 29 Apr 2008 21:30:04 +0100 Subject: gallium: fix pack for A1R5B5G5 --- src/gallium/auxiliary/util/u_pack_color.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index f641329d18..bb2c34e5f6 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -92,7 +92,7 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, case PIPE_FORMAT_A1R5G5B5_UNORM: { ushort *d = (ushort *) dest; - *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 3) | (b >> 3); + *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); } return; case PIPE_FORMAT_A4R4G4B4_UNORM: @@ -171,7 +171,7 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) case PIPE_FORMAT_A1R5G5B5_UNORM: { ushort *d = (ushort *) dest; - *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 3) | (b >> 3); + *d = ((a & 0x80) << 8) | ((r & 0xf8) << 7) | ((g & 0xf8) << 2) | (b >> 3); } return; case PIPE_FORMAT_A4R4G4B4_UNORM: -- cgit v1.2.3 From 1e97ab685689ef06181a5f22fae9a3a82c83142c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 29 Apr 2008 21:30:31 +0100 Subject: gallium: add pack for _FLOAT formats to pack_ub --- src/gallium/auxiliary/util/u_pack_color.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index bb2c34e5f6..0b917c005f 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -101,6 +101,24 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *d = (float *) dest; + d[0] = (float)r / 255.0f; + d[1] = (float)g / 255.0f; + d[2] = (float)b / 255.0f; + d[3] = (float)a / 255.0f; + } + return; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *d = (float *) dest; + d[0] = (float)r / 255.0f; + d[1] = (float)g / 255.0f; + d[2] = (float)b / 255.0f; + } + return; + /* XXX lots more cases to add */ default: debug_print_format("gallium: unhandled format in util_pack_color_ub()", format); -- cgit v1.2.3 From 0d80f407f128f1a324e9dc0db2d0910bf32ba736 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 29 Apr 2008 17:21:10 -0400 Subject: silence p_debug.h:63: warning: ISO C forbids forward references to ‘enum’ types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/gallium/auxiliary/util/p_debug.c | 4 ++-- src/gallium/include/pipe/p_debug.h | 5 +---- 2 files changed, 3 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index eaf2c9bd98..8ef2880191 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -413,11 +413,11 @@ char *pf_sprint_name( char *str, enum pipe_format format ) } -void debug_print_format(const char *msg, enum pipe_format fmt ) +void debug_print_format(const char *msg, unsigned fmt ) { char fmtstr[80]; - pf_sprint_name(fmtstr, fmt); + pf_sprint_name(fmtstr, (enum pipe_format)fmt); debug_printf("%s: %s\n", msg, fmtstr); } diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index ed47c0e14c..7a7312ea12 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -59,9 +59,6 @@ extern "C" { #endif #endif - -enum pipe_format; - void _debug_vprintf(const char *format, va_list ap); @@ -114,7 +111,7 @@ void debug_print_blob( const char *name, const void *blob, unsigned size ); /* Print a message along with a prettified format string */ -void debug_print_format(const char *msg, enum pipe_format fmt ); +void debug_print_format(const char *msg, unsigned fmt ); #else #define debug_print_blob(_name, _blob, _size) ((void)0) #define debug_print_format(_msg, _fmt) ((void)0) -- cgit v1.2.3 From 42fb48492e71016c5a2888cd3d2507a89dbd91f3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 29 Apr 2008 18:34:14 -0600 Subject: gallium: init hw_key with memset() to silence valgrind warnings We shouldn't be hashing with keys that have uninitialized memory. --- src/gallium/auxiliary/draw/draw_pt_emit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 35707af8a8..f9ac16786e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -58,6 +58,8 @@ void draw_pt_emit_prepare( struct pt_emit *emit, return; } + memset(&hw_key, 0, sizeof(hw_key)); + /* Must do this after set_primitive() above: */ vinfo = draw->render->get_vertex_info(draw->render); -- cgit v1.2.3 From cafb545721e6c9479c07d3a7a891236e006d3376 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 1 May 2008 00:59:12 +0900 Subject: d3d: Windows miniport driver portability fixes. --- src/gallium/auxiliary/util/p_debug.c | 2 +- src/gallium/auxiliary/util/p_debug_mem.c | 7 +++- src/gallium/auxiliary/util/u_time.c | 6 +-- src/gallium/include/pipe/p_config.h | 9 +++++ src/gallium/include/pipe/p_util.h | 64 +++++++++++++++++--------------- 5 files changed, 53 insertions(+), 35 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 8ef2880191..56eecaab62 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -104,7 +104,7 @@ void _debug_break(void) __asm("int3"); #elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) _asm {int 3}; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) EngDebugBreak(); #else abort(); diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 9321cf71bb..3b5e4fbaee 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -34,9 +34,11 @@ #include "pipe/p_config.h" -#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include #include +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#include #else #include #include @@ -52,6 +54,9 @@ #if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && !defined(WINCE) #define real_malloc(_size) EngAllocMem(0, _size, 'D3AG') #define real_free(_ptr) EngFreeMem(_ptr) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#define real_malloc(_size) ExAllocatePool(0, _size) +#define real_free(_ptr) ExFreePool(_ptr) #else #define real_malloc(_size) malloc(_size) #define real_free(_ptr) free(_ptr) diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index dd28ff4134..9b97050d51 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -37,11 +37,11 @@ #if defined(PIPE_OS_LINUX) #include -#elif defined(PIPE_OS_WINDOWS) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include -#endif +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) +#include #else #error Unsupported OS #endif diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index e44fafeae9..5c030bdfff 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -106,7 +106,16 @@ #if defined(PIPE_SUBSYSTEM_KERNEL) #define PIPE_SUBSYSTEM_WINDOWS_DISPLAY #endif +#if 0 /* FIXME */ +#define PIPE_SUBSYSTEM_WINDOWS_MINIPORT +#endif +#if 0 /* FIXME */ +#define PIPE_SUBSYSTEM_WINDOWS_CE +#endif +#if defined(PIPE_SUBSYSTEM_USER) +#define PIPE_SUBSYSTEM_WINDOWS_USER #endif +#endif /* PIPE_OS_WINDOWS */ #endif /* P_CONFIG_H_ */ diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 63301ae3aa..0e7e246666 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -41,7 +41,9 @@ extern "C" { #endif -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) /* memory debugging */ +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) + +/* memory debugging */ #include "p_debug.h" @@ -54,9 +56,7 @@ extern "C" { #define REALLOC( _ptr, _old_size, _size ) \ debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) -#else - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) void * __stdcall EngAllocMem( @@ -68,17 +68,33 @@ void __stdcall EngFreeMem( void *Mem ); -static INLINE void * -MALLOC( unsigned size ) -{ -#ifdef WINCE - /* TODO: Need to abstract this */ - return malloc( size ); +#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) +#define _FREE( _ptr ) EngFreeMem( _ptr ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +void * +ExAllocatePool( + unsigned long PoolType, + size_t NumberOfBytes); + +void +ExFreePool(void *P); + +#define MALLOC(_size) ExAllocatePool(0, _size) +#define _FREE(_ptr) ExFreePool(_ptr) + #else - return EngAllocMem( 0, size, 'D3AG' ); + +#define MALLOC( SIZE ) malloc( SIZE ) +#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) +#define FREE( PTR ) free( PTR ) +#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) + #endif -} + +#ifndef CALLOC static INLINE void * CALLOC( unsigned count, unsigned size ) { @@ -88,20 +104,19 @@ CALLOC( unsigned count, unsigned size ) } return ptr; } +#endif /* !CALLOC */ +#ifndef FREE static INLINE void FREE( void *ptr ) { if( ptr ) { -#ifdef WINCE - /* TODO: Need to abstract this */ - free( ptr ); -#else - EngFreeMem( ptr ); -#endif + _FREE( ptr ); } } +#endif /* !FREE */ +#ifndef REALLOC static INLINE void * REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) { @@ -118,19 +133,8 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) FREE( old_ptr ); return new_ptr; } +#endif /* !REALLOC */ -#else /* !PIPE_SUBSYSTEM_WINDOWS_DISPLAY */ - -#define MALLOC( SIZE ) malloc( SIZE ) - -#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) - -#define FREE( PTR ) free( PTR ) - -#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) - -#endif /* !PIPE_SUBSYSTEM_WINDOWS_DISPLAY */ -#endif /* !DEBUG */ #define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) -- cgit v1.2.3 From dadb11f5d96354975f1c9f7c8134bb4727129249 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 1 May 2008 03:43:19 +0900 Subject: gallium: Fix release build. --- src/gallium/auxiliary/util/p_debug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 56eecaab62..f1fb07bf5b 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -413,6 +413,7 @@ char *pf_sprint_name( char *str, enum pipe_format format ) } +#ifdef DEBUG void debug_print_format(const char *msg, unsigned fmt ) { char fmtstr[80]; @@ -421,3 +422,4 @@ void debug_print_format(const char *msg, unsigned fmt ) debug_printf("%s: %s\n", msg, fmtstr); } +#endif \ No newline at end of file -- cgit v1.2.3 From 026e31a068981724fb0c98f6d1fc87d086fd2da6 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 30 Apr 2008 02:04:28 -0400 Subject: try to fix the deleting of the bound cso during cleanup of the hash --- src/gallium/auxiliary/cso_cache/cso_cache.c | 40 ++++++-- src/gallium/auxiliary/cso_cache/cso_cache.h | 28 ++++-- src/gallium/auxiliary/cso_cache/cso_context.c | 128 ++++++++++++++++++++++++++ 3 files changed, 178 insertions(+), 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 096875807b..36dc46ff80 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -43,6 +43,9 @@ struct cso_cache { struct cso_hash *rasterizer_hash; struct cso_hash *sampler_hash; int max_size; + + cso_sanitize_callback sanitize_cb; + void *sanitize_data; }; #if 1 @@ -205,8 +208,19 @@ static INLINE void delete_cso(void *state, enum cso_cache_type type) } } -static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + +static INLINE void sanitize_hash(struct cso_cache *sc, + struct cso_hash *hash, + enum cso_cache_type type, int max_size) +{ + if (sc->sanitize_cb) + sc->sanitize_cb(hash, type, max_size, sc->sanitize_data); +} + + +static INLINE void sanitize_cb(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) { /* if we're approach the maximum size, remove fourth of the entries * otherwise every subsequent call will go through the same */ @@ -231,7 +245,7 @@ cso_insert_state(struct cso_cache *sc, void *state) { struct cso_hash *hash = _cso_hash_for_type(sc, type); - sanitize_hash(hash, type, sc->max_size); + sanitize_hash(sc, hash, type, sc->max_size); return cso_hash_insert(hash, hash_key, state); } @@ -300,6 +314,8 @@ struct cso_cache *cso_cache_create(void) sc->rasterizer_hash = cso_hash_create(); sc->fs_hash = cso_hash_create(); sc->vs_hash = cso_hash_create(); + sc->sanitize_cb = sanitize_cb; + sc->sanitize_data = 0; return sc; } @@ -365,13 +381,13 @@ void cso_set_maximum_cache_size(struct cso_cache *sc, int number) { sc->max_size = number; - sanitize_hash(sc->blend_hash, CSO_BLEND, sc->max_size); - sanitize_hash(sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, + sanitize_hash(sc, sc->blend_hash, CSO_BLEND, sc->max_size); + sanitize_hash(sc, sc->depth_stencil_hash, CSO_DEPTH_STENCIL_ALPHA, sc->max_size); - sanitize_hash(sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); - sanitize_hash(sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); - sanitize_hash(sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); - sanitize_hash(sc->sampler_hash, CSO_SAMPLER, sc->max_size); + sanitize_hash(sc, sc->fs_hash, CSO_FRAGMENT_SHADER, sc->max_size); + sanitize_hash(sc, sc->vs_hash, CSO_VERTEX_SHADER, sc->max_size); + sanitize_hash(sc, sc->rasterizer_hash, CSO_RASTERIZER, sc->max_size); + sanitize_hash(sc, sc->sampler_hash, CSO_SAMPLER, sc->max_size); } int cso_maximum_cache_size(const struct cso_cache *sc) @@ -379,3 +395,11 @@ int cso_maximum_cache_size(const struct cso_cache *sc) return sc->max_size; } +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data) +{ + sc->sanitize_cb = cb; + sc->sanitize_data = user_data; +} + diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.h b/src/gallium/auxiliary/cso_cache/cso_cache.h index e5edbbb556..6b5c230e8f 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.h +++ b/src/gallium/auxiliary/cso_cache/cso_cache.h @@ -84,8 +84,22 @@ extern "C" { #endif +enum cso_cache_type { + CSO_BLEND, + CSO_SAMPLER, + CSO_DEPTH_STENCIL_ALPHA, + CSO_RASTERIZER, + CSO_FRAGMENT_SHADER, + CSO_VERTEX_SHADER +}; + typedef void (*cso_state_callback)(void *ctx, void *obj); +typedef void (*cso_sanitize_callback)(struct cso_hash *hash, + enum cso_cache_type type, + int max_size, + void *user_data); + struct cso_cache; struct cso_blend { @@ -130,21 +144,15 @@ struct cso_sampler { struct pipe_context *context; }; - -enum cso_cache_type { - CSO_BLEND, - CSO_SAMPLER, - CSO_DEPTH_STENCIL_ALPHA, - CSO_RASTERIZER, - CSO_FRAGMENT_SHADER, - CSO_VERTEX_SHADER -}; - unsigned cso_construct_key(void *item, int item_size); struct cso_cache *cso_cache_create(void); void cso_cache_delete(struct cso_cache *sc); +void cso_cache_set_sanitize_callback(struct cso_cache *sc, + cso_sanitize_callback cb, + void *user_data); + struct cso_hash_iter cso_insert_state(struct cso_cache *sc, unsigned hash_key, enum cso_cache_type type, void *state); diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index b4609e999b..eef898f486 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -80,6 +80,131 @@ struct cso_context { }; +static boolean delete_blend_state(struct cso_context *ctx, void *state) +{ + struct cso_blend *cso = (struct cso_blend *)state; + + if (ctx->blend == state) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_depth_stencil_state(struct cso_context *ctx, void *state) +{ + struct cso_depth_stencil_alpha *cso = (struct cso_depth_stencil_alpha *)state; + + if (ctx->depth_stencil == cso->data) + return FALSE; + + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + + return TRUE; +} + +static boolean delete_sampler_state(struct cso_context *ctx, void *state) +{ + struct cso_sampler *cso = (struct cso_sampler *)state; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_rasterizer_state(struct cso_context *ctx, void *state) +{ + struct cso_rasterizer *cso = (struct cso_rasterizer *)state; + + if (ctx->rasterizer == cso->data) + return FALSE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_fs_state(struct cso_context *ctx, void *state) +{ + struct cso_fragment_shader *cso = (struct cso_fragment_shader *)state; + if (ctx->fragment_shader == cso->data) + return FALSE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return TRUE; +} + +static boolean delete_vs_state(struct cso_context *ctx, void *state) +{ + struct cso_vertex_shader *cso = (struct cso_vertex_shader *)state; + if (ctx->vertex_shader == cso->data) + return TRUE; + if (cso->delete_state) + cso->delete_state(cso->context, cso->data); + FREE(state); + return FALSE; +} + + +static INLINE boolean delete_cso(struct cso_context *ctx, + void *state, enum cso_cache_type type) +{ + switch (type) { + case CSO_BLEND: + return delete_blend_state(ctx, state); + break; + case CSO_SAMPLER: + return delete_sampler_state(ctx, state); + break; + case CSO_DEPTH_STENCIL_ALPHA: + return delete_depth_stencil_state(ctx, state); + break; + case CSO_RASTERIZER: + return delete_rasterizer_state(ctx, state); + break; + case CSO_FRAGMENT_SHADER: + return delete_fs_state(ctx, state); + break; + case CSO_VERTEX_SHADER: + return delete_vs_state(ctx, state); + break; + default: + assert(0); + FREE(state); + } + return FALSE; +} + +static INLINE void sanitize_hash(struct cso_hash *hash, enum cso_cache_type type, + int max_size, void *user_data) +{ + struct cso_context *ctx = (struct cso_context *)user_data; + /* if we're approach the maximum size, remove fourth of the entries + * otherwise every subsequent call will go through the same */ + int hash_size = cso_hash_size(hash); + int max_entries = (max_size > hash_size) ? max_size : hash_size; + int to_remove = (max_size < max_entries) * max_entries/4; + struct cso_hash_iter iter = cso_hash_first_node(hash); + if (hash_size > max_size) + to_remove += hash_size - max_size; + while (to_remove) { + /*remove elements until we're good */ + /*fixme: currently we pick the nodes to remove at random*/ + void *cso = cso_hash_iter_data(iter); + if (delete_cso(ctx, cso, type)) { + iter = cso_hash_erase(hash, iter); + --to_remove; + } else + iter = cso_hash_iter_next(iter); + } +} + + struct cso_context *cso_create_context( struct pipe_context *pipe ) { struct cso_context *ctx = CALLOC_STRUCT(cso_context); @@ -89,6 +214,9 @@ struct cso_context *cso_create_context( struct pipe_context *pipe ) ctx->cache = cso_cache_create(); if (ctx->cache == NULL) goto out; + cso_cache_set_sanitize_callback(ctx->cache, + sanitize_hash, + ctx); ctx->pipe = pipe; -- cgit v1.2.3 From 54f94a790e4488445347abcff9a636a9c440d7f9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 30 Apr 2008 16:50:17 -0600 Subject: gallium: use the newer PIPE_FORMAT_x_UNORM format names --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 2 +- src/gallium/auxiliary/util/p_tile.c | 24 ++++++++-------- src/gallium/auxiliary/util/u_gen_mipmap.c | 8 +++--- src/gallium/drivers/i915simple/i915_screen.c | 8 +++--- .../drivers/i915simple/i915_state_sampler.c | 8 +++--- src/gallium/drivers/i965simple/brw_screen.c | 24 ++++++++-------- .../drivers/i965simple/brw_wm_surface_state.c | 8 +++--- src/mesa/state_tracker/st_cb_bitmap.c | 4 +-- src/mesa/state_tracker/st_format.c | 32 +++++++++++----------- 10 files changed, 60 insertions(+), 60 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index f05641dee6..f501b2aed4 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -392,7 +392,7 @@ aaline_create_texture(struct aaline_stage *aaline) memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = MAX_TEXTURE_LEVEL; texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index d1d63d73be..c4de9d2698 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -417,7 +417,7 @@ pstip_create_texture(struct pstip_stage *pstip) memset(&texTemp, 0, sizeof(texTemp)); texTemp.target = PIPE_TEXTURE_2D; - texTemp.format = PIPE_FORMAT_U_A8; /* XXX verify supported by driver! */ + texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */ texTemp.last_level = 0; texTemp.width[0] = 32; texTemp.height[0] = 32; diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 13175ca46e..63e1cc6013 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -385,7 +385,7 @@ z16_get_tile_rgba(ushort *src, -/*** PIPE_FORMAT_U_L8 ***/ +/*** PIPE_FORMAT_L8_UNORM ***/ static void l8_get_tile_rgba(ubyte *src, @@ -408,7 +408,7 @@ l8_get_tile_rgba(ubyte *src, } -/*** PIPE_FORMAT_U_A8 ***/ +/*** PIPE_FORMAT_A8_UNORM ***/ static void a8_get_tile_rgba(ubyte *src, @@ -476,7 +476,7 @@ r16g16b16a16_put_tile_rgba(short *dst, -/*** PIPE_FORMAT_U_I8 ***/ +/*** PIPE_FORMAT_I8_UNORM ***/ static void i8_get_tile_rgba(ubyte *src, @@ -499,7 +499,7 @@ i8_get_tile_rgba(ubyte *src, } -/*** PIPE_FORMAT_U_A8_L8 ***/ +/*** PIPE_FORMAT_A8L8_UNORM ***/ static void a8_l8_get_tile_rgba(ushort *src, @@ -708,16 +708,16 @@ pipe_get_tile_rgba(struct pipe_context *pipe, case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: l8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: a8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: i8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); break; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: a8_l8_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); break; case PIPE_FORMAT_R16G16B16A16_SNORM: @@ -787,16 +787,16 @@ pipe_put_tile_rgba(struct pipe_context *pipe, break; case PIPE_FORMAT_R8G8B8A8_UNORM: break; - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: /*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: /*a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: /*i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ break; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_R16G16B16A16_SNORM: diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b8dc6c66c0..0348629ab8 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -493,13 +493,13 @@ format_to_type_comps(enum pipe_format pformat, *datatype = USHORT_5_6_5; *comps = 3; return; - case PIPE_FORMAT_U_L8: - case PIPE_FORMAT_U_A8: - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_A8_UNORM: + case PIPE_FORMAT_I8_UNORM: *datatype = UBYTE; *comps = 1; return; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: *datatype = UBYTE; *comps = 2; return; diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 9ae594ce54..631642e1b6 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -154,10 +154,10 @@ i915_is_format_supported( struct pipe_screen *screen, PIPE_FORMAT_R8G8B8A8_UNORM, PIPE_FORMAT_A8R8G8B8_UNORM, PIPE_FORMAT_R5G6B5_UNORM, - PIPE_FORMAT_U_L8, - PIPE_FORMAT_U_A8, - PIPE_FORMAT_U_I8, - PIPE_FORMAT_U_A8_L8, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_YCBCR, PIPE_FORMAT_YCBCR_REV, PIPE_FORMAT_S8Z24_UNORM, diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 84f6529a3a..982eec4a1b 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -131,13 +131,13 @@ static uint translate_texture_format(enum pipe_format pipeFormat) { switch (pipeFormat) { - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: return MAPSURF_8BIT | MT_8BIT_L8; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: return MAPSURF_8BIT | MT_8BIT_I8; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: return MAPSURF_8BIT | MT_8BIT_A8; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: return MAPSURF_16BIT | MT_16BIT_AY88; case PIPE_FORMAT_R5G6B5_UNORM: return MAPSURF_16BIT | MT_16BIT_RGB565; diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index 6845c7abde..b700f7e4f5 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -141,13 +141,13 @@ brw_is_format_supported( struct pipe_screen *screen, #if 0 /* XXX: This is broken -- rewrite if still needed. */ static const unsigned tex_supported[] = { - PIPE_FORMAT_U_R8_G8_B8_A8, - PIPE_FORMAT_U_A8_R8_G8_B8, - PIPE_FORMAT_U_R5_G6_B5, - PIPE_FORMAT_U_L8, - PIPE_FORMAT_U_A8, - PIPE_FORMAT_U_I8, - PIPE_FORMAT_U_L8_A8, + PIPE_FORMAT_R8G8B8A8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_L8A8_UNORM, PIPE_FORMAT_YCBCR, PIPE_FORMAT_YCBCR_REV, PIPE_FORMAT_S8_Z24, @@ -157,16 +157,16 @@ brw_is_format_supported( struct pipe_screen *screen, /* Actually a lot more than this - add later: */ static const unsigned render_supported[] = { - PIPE_FORMAT_U_A8_R8_G8_B8, - PIPE_FORMAT_U_R5_G6_B5, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_R5G6B5_UNORM, }; /* */ static const unsigned z_stencil_supported[] = { - PIPE_FORMAT_U_Z16, - PIPE_FORMAT_U_Z32, - PIPE_FORMAT_S8_Z24, + PIPE_FORMAT_Z16_UNORM, + PIPE_FORMAT_Z32_UNORM, + PIPE_FORMAT_S8Z24_UNORM, }; switch (type) { diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index 853c743ccf..69e56dc8bd 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -57,16 +57,16 @@ static unsigned translate_tex_target( enum pipe_texture_target target ) static unsigned translate_tex_format( enum pipe_format pipe_format ) { switch( pipe_format ) { - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: return BRW_SURFACEFORMAT_L8_UNORM; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: return BRW_SURFACEFORMAT_I8_UNORM; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: return BRW_SURFACEFORMAT_A8_UNORM; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: return BRW_SURFACEFORMAT_L8A8_UNORM; case PIPE_FORMAT_R8G8B8_UNORM: diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index f0a3b75357..ce8fefe703 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -722,8 +722,8 @@ st_init_bitmap(struct st_context *st) st->bitmap.rasterizer.bypass_vs = 1; /* find a usable texture format */ - if (screen->is_format_supported(screen, PIPE_FORMAT_U_I8, PIPE_TEXTURE)) { - st->bitmap.tex_format = PIPE_FORMAT_U_I8; + if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, PIPE_TEXTURE)) { + st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM; } else { /* XXX support more formats */ diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 8b5f84cd56..9a385a0457 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -261,13 +261,13 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat) case MESA_FORMAT_RGB565: return PIPE_FORMAT_R5G6B5_UNORM; case MESA_FORMAT_AL88: - return PIPE_FORMAT_U_A8_L8; + return PIPE_FORMAT_A8L8_UNORM; case MESA_FORMAT_A8: - return PIPE_FORMAT_U_A8; + return PIPE_FORMAT_A8_UNORM; case MESA_FORMAT_L8: - return PIPE_FORMAT_U_L8; + return PIPE_FORMAT_L8_UNORM; case MESA_FORMAT_I8: - return PIPE_FORMAT_U_I8; + return PIPE_FORMAT_I8_UNORM; case MESA_FORMAT_Z16: return PIPE_FORMAT_Z16_UNORM; case MESA_FORMAT_Z32: @@ -409,8 +409,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_ALPHA12: case GL_ALPHA16: case GL_COMPRESSED_ALPHA: - if (screen->is_format_supported( screen, PIPE_FORMAT_U_A8, surfType )) - return PIPE_FORMAT_U_A8; + if (screen->is_format_supported( screen, PIPE_FORMAT_A8_UNORM, surfType )) + return PIPE_FORMAT_A8_UNORM; return default_rgba_format( screen, surfType ); case 1: @@ -420,8 +420,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_LUMINANCE12: case GL_LUMINANCE16: case GL_COMPRESSED_LUMINANCE: - if (screen->is_format_supported( screen, PIPE_FORMAT_U_L8, surfType )) - return PIPE_FORMAT_U_L8; + if (screen->is_format_supported( screen, PIPE_FORMAT_L8_UNORM, surfType )) + return PIPE_FORMAT_L8_UNORM; return default_rgba_format( screen, surfType ); case 2: @@ -433,8 +433,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_LUMINANCE12_ALPHA12: case GL_LUMINANCE16_ALPHA16: case GL_COMPRESSED_LUMINANCE_ALPHA: - if (screen->is_format_supported( screen, PIPE_FORMAT_U_A8_L8, surfType )) - return PIPE_FORMAT_U_A8_L8; + if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_UNORM, surfType )) + return PIPE_FORMAT_A8L8_UNORM; return default_rgba_format( screen, surfType ); case GL_INTENSITY: @@ -443,8 +443,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_INTENSITY12: case GL_INTENSITY16: case GL_COMPRESSED_INTENSITY: - if (screen->is_format_supported( screen, PIPE_FORMAT_U_I8, surfType )) - return PIPE_FORMAT_U_I8; + if (screen->is_format_supported( screen, PIPE_FORMAT_I8_UNORM, surfType )) + return PIPE_FORMAT_I8_UNORM; return default_rgba_format( screen, surfType ); case GL_YCBCR_MESA: @@ -547,13 +547,13 @@ translate_gallium_format_to_mesa_format(enum pipe_format format) return &_mesa_texformat_argb4444; case PIPE_FORMAT_R5G6B5_UNORM: return &_mesa_texformat_rgb565; - case PIPE_FORMAT_U_A8_L8: + case PIPE_FORMAT_A8L8_UNORM: return &_mesa_texformat_al88; - case PIPE_FORMAT_U_A8: + case PIPE_FORMAT_A8_UNORM: return &_mesa_texformat_a8; - case PIPE_FORMAT_U_L8: + case PIPE_FORMAT_L8_UNORM: return &_mesa_texformat_l8; - case PIPE_FORMAT_U_I8: + case PIPE_FORMAT_I8_UNORM: return &_mesa_texformat_i8; case PIPE_FORMAT_Z16_UNORM: return &_mesa_texformat_z16; -- cgit v1.2.3 From c9ed86a96483063f3d6789ed16645a3dca77d726 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 11:07:21 +0100 Subject: gallium: tex surface checkpoint --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 12 ++-- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 15 +++-- src/gallium/auxiliary/util/p_tile.c | 50 ++++++++++---- src/gallium/auxiliary/util/u_blit.c | 3 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 21 ++++-- src/gallium/drivers/failover/fo_context.c | 2 +- src/gallium/drivers/i915simple/i915_screen.c | 31 +++++++++ src/gallium/drivers/i915simple/i915_surface.c | 23 +++++-- src/gallium/drivers/i915simple/i915_texture.c | 13 ++-- src/gallium/drivers/i965simple/brw_surface.c | 51 ++++++--------- src/gallium/drivers/i965simple/brw_tex_layout.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 6 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 6 +- src/gallium/drivers/softpipe/sp_flush.c | 35 +++++----- src/gallium/drivers/softpipe/sp_screen.c | 22 ++++--- src/gallium/drivers/softpipe/sp_surface.c | 23 +++++-- src/gallium/drivers/softpipe/sp_texture.c | 87 ++++++++++++++++++------- src/gallium/drivers/softpipe/sp_texture.h | 1 - src/gallium/drivers/softpipe/sp_tile_cache.c | 36 ++++++---- src/gallium/drivers/softpipe/sp_tile_cache.h | 2 +- src/gallium/include/pipe/p_context.h | 6 -- src/gallium/include/pipe/p_inlines.h | 31 ++++----- src/gallium/include/pipe/p_screen.h | 17 ++++- src/gallium/include/pipe/p_state.h | 4 ++ src/gallium/include/pipe/p_util.h | 3 + src/gallium/include/pipe/p_winsys.h | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 2 + src/mesa/state_tracker/st_atom_pixeltransfer.c | 9 +-- src/mesa/state_tracker/st_cb_accum.c | 12 ++-- src/mesa/state_tracker/st_cb_bitmap.c | 23 +++++-- src/mesa/state_tracker/st_cb_drawpixels.c | 56 +++++++++------- src/mesa/state_tracker/st_cb_fbo.c | 27 +++++--- src/mesa/state_tracker/st_cb_readpixels.c | 5 +- src/mesa/state_tracker/st_cb_texture.c | 48 +++++++------- src/mesa/state_tracker/st_gen_mipmap.c | 6 +- src/mesa/state_tracker/st_texture.c | 34 ++++++---- src/mesa/state_tracker/st_texture.h | 6 +- 37 files changed, 465 insertions(+), 267 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index f501b2aed4..6dc20f2c90 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -415,8 +415,11 @@ aaline_create_texture(struct aaline_stage *aaline) assert(aaline->texture->width[level] == aaline->texture->height[level]); - surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0); - data = pipe_surface_map(surface); + /* This texture is new, no need to flush. + */ + surface = screen->get_tex_surface(screen, aaline->texture, 0, level, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + data = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); if (data == NULL) return FALSE; @@ -440,9 +443,8 @@ aaline_create_texture(struct aaline_stage *aaline) } /* unmap */ - pipe_surface_unmap(surface); - pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, aaline->texture, 0, (1 << level)); + screen->surface_unmap(screen, surface); + screen->tex_surface_release(screen, &surface); } return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index c4de9d2698..3aa326acc7 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -376,8 +376,14 @@ pstip_update_texture(struct pstip_stage *pstip) uint i, j; ubyte *data; - surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0); - data = pipe_surface_map(surface); + /* XXX: want to avoid flushing just because we use stipple: + */ + pipe->flush( pipe, PIPE_FLUSH_TEXTURE_CACHE, NULL ); + + surface = screen->get_tex_surface(screen, pstip->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + data = screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); /* * Load alpha texture. @@ -399,9 +405,8 @@ pstip_update_texture(struct pstip_stage *pstip) } /* unmap */ - pipe_surface_unmap(surface); - pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pstip->texture, 0, 0x1); + screen->surface_unmap(screen, surface); + screen->tex_surface_release(screen, &surface); } diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 63e1cc6013..5728757d2f 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -50,6 +50,7 @@ pipe_get_tile_raw(struct pipe_context *pipe, uint x, uint y, uint w, uint h, void *p, int dst_stride) { + struct pipe_screen *screen = pipe->screen; const uint cpp = ps->cpp; const ubyte *pSrc; const uint src_stride = ps->pitch * cpp; @@ -63,7 +64,11 @@ pipe_get_tile_raw(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - pSrc = (const ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp; + pSrc = (const ubyte *) screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_CPU_READ); + assert(pSrc); /* XXX: proper error handling! */ + + pSrc += (y * ps->pitch + x) * cpp; pDest = (ubyte *) p; for (i = 0; i < h; i++) { @@ -72,7 +77,7 @@ pipe_get_tile_raw(struct pipe_context *pipe, pSrc += src_stride; } - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } @@ -86,6 +91,7 @@ pipe_put_tile_raw(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const void *p, int src_stride) { + struct pipe_screen *screen = pipe->screen; const uint cpp = ps->cpp; const ubyte *pSrc; const uint dst_stride = ps->pitch * cpp; @@ -100,7 +106,11 @@ pipe_put_tile_raw(struct pipe_context *pipe, return; pSrc = (const ubyte *) p; - pDest = (ubyte *) pipe_surface_map(ps) + (y * ps->pitch + x) * cpp; + + pDest = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(pDest); /* XXX: proper error handling */ + + pDest += (y * ps->pitch + x) * cpp; for (i = 0; i < h; i++) { memcpy(pDest, pSrc, w * cpp); @@ -108,7 +118,7 @@ pipe_put_tile_raw(struct pipe_context *pipe, pSrc += src_stride; } - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } @@ -834,18 +844,26 @@ pipe_get_tile_z(struct pipe_context *pipe, uint x, uint y, uint w, uint h, uint *z) { + struct pipe_screen *screen = pipe->screen; const uint dstStride = w; + void *map; uint *pDest = z; uint i, j; if (pipe_clip_tile(x, y, &w, &h, ps)) return; + map = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + if (!map) { + assert(0); + return; + } + switch (ps->format) { case PIPE_FORMAT_Z32_UNORM: { const uint *pSrc - = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + = (const uint *)map + (y * ps->pitch + x); for (i = 0; i < h; i++) { memcpy(pDest, pSrc, 4 * w); pDest += dstStride; @@ -857,7 +875,7 @@ pipe_get_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_X8Z24_UNORM: { const uint *pSrc - = (const uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + = (const uint *)map + (y * ps->pitch + x); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 24-bit Z to 32-bit Z */ @@ -871,7 +889,7 @@ pipe_get_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_Z16_UNORM: { const ushort *pSrc - = (const ushort *) pipe_surface_map(ps) + (y * ps->pitch + x); + = (const ushort *)map + (y * ps->pitch + x); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 16-bit Z to 32-bit Z */ @@ -886,7 +904,7 @@ pipe_get_tile_z(struct pipe_context *pipe, assert(0); } - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } @@ -896,17 +914,25 @@ pipe_put_tile_z(struct pipe_context *pipe, uint x, uint y, uint w, uint h, const uint *zSrc) { + struct pipe_screen *screen = pipe->screen; const uint srcStride = w; const uint *pSrc = zSrc; + void *map; uint i, j; if (pipe_clip_tile(x, y, &w, &h, ps)) return; + map = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + if (!map) { + assert(0); + return; + } + switch (ps->format) { case PIPE_FORMAT_Z32_UNORM: { - uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + uint *pDest = (uint *) map + (y * ps->pitch + x); for (i = 0; i < h; i++) { memcpy(pDest, pSrc, 4 * w); pDest += ps->pitch; @@ -917,7 +943,7 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: { - uint *pDest = (uint *) pipe_surface_map(ps) + (y * ps->pitch + x); + uint *pDest = (uint *) map + (y * ps->pitch + x); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z (0 stencil) */ @@ -930,7 +956,7 @@ pipe_put_tile_z(struct pipe_context *pipe, break; case PIPE_FORMAT_Z16_UNORM: { - ushort *pDest = (ushort *) pipe_surface_map(ps) + (y * ps->pitch + x); + ushort *pDest = (ushort *) map + (y * ps->pitch + x); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 16-bit Z */ @@ -945,7 +971,7 @@ pipe_put_tile_z(struct pipe_context *pipe, assert(0); } - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 9e9912c6e4..257473ab26 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -287,7 +287,8 @@ util_blit_pixels(struct blit_state *ctx, if (!tex) return; - texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0); + texSurf = screen->get_tex_surface(screen, tex, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE); /* load temp texture */ pipe->surface_copy(pipe, FALSE, diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 0348629ab8..6ed5503c9a 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -586,8 +586,11 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_surface *srcSurf, *dstSurf; void *srcMap, *dstMap; - srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); - dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) @@ -626,8 +629,10 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, struct pipe_surface *srcSurf, *dstSurf; ubyte *srcMap, *dstMap; - srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); - dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) @@ -888,10 +893,14 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; + struct pipe_surface *surf = + screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_GPU_WRITE); + /* * Setup framebuffer / dest surface */ - fb.cbufs[0] = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + fb.cbufs[0] = surf; fb.width = pt->width[dstLevel]; fb.height = pt->height[dstLevel]; cso_set_framebuffer(ctx->cso, &fb); @@ -922,7 +931,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* need to signal that the texture has changed _after_ rendering to it */ - pipe->texture_update(pipe, pt, face, (1 << dstLevel)); + pipe_surface_reference( &surf, NULL ); } /* restore state we changed */ diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index cb95ba516f..014a3e31d5 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -147,8 +147,8 @@ struct pipe_context *failover_create( struct pipe_context *hw, failover->pipe.texture_create = hw->texture_create; failover->pipe.texture_release = hw->texture_release; failover->pipe.get_tex_surface = hw->get_tex_surface; -#endif failover->pipe.texture_update = hw->texture_update; +#endif failover->pipe.flush = hw->flush; diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 631642e1b6..dcd349e478 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -200,6 +200,35 @@ i915_destroy_screen( struct pipe_screen *screen ) } +static void * +i915_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + char *map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + if (map == NULL) + return NULL; + + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_CPU_WRITE)) + { + /* Do something to notify contexts of a texture change. + */ + /* i915_screen(screen)->timestamp++; */ + } + + return map + surface->offset; +} + +static void +i915_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); +} + + + /** * Create a new i915_screen object */ @@ -243,6 +272,8 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id) i915screen->screen.get_param = i915_get_param; i915screen->screen.get_paramf = i915_get_paramf; i915screen->screen.is_format_supported = i915_is_format_supported; + i915screen->screen.surface_map = i915_surface_map; + i915screen->screen.surface_unmap = i915_surface_unmap; i915_init_screen_texture_functions(&i915screen->screen); diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index f4fbedbe9b..98367ac073 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -51,17 +51,25 @@ i915_surface_copy(struct pipe_context *pipe, assert( dst->cpp == src->cpp ); if (0) { - pipe_copy_rect(pipe_surface_map(dst), + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_CPU_READ ); + + pipe_copy_rect(dst_map, dst->cpp, dst->pitch, dstx, dsty, width, height, - pipe_surface_map(src), + src_map, do_flip ? -(int) src->pitch : src->pitch, srcx, do_flip ? 1 - srcy - height : srcy); - pipe_surface_unmap(src); - pipe_surface_unmap(dst); + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); } else { i915_copy_blit( i915_context(pipe), @@ -92,7 +100,10 @@ i915_surface_fill(struct pipe_context *pipe, { if (0) { unsigned i, j; - void *dst_map = pipe_surface_map(dst); + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + switch (dst->cpp) { case 1: { @@ -126,7 +137,7 @@ i915_surface_fill(struct pipe_context *pipe, break; } - pipe_surface_unmap( dst ); + pipe->screen->surface_unmap(pipe->screen, dst); } else { i915_fill_blit( i915_context(pipe), diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index c39e747705..7b9359a0fe 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -541,13 +541,6 @@ i915_texture_release_screen(struct pipe_screen *screen, } -static void -i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, - uint face, uint levelsMask) -{ - /* no-op? */ -} - /* * XXX note: same as code in sp_surface.c @@ -555,7 +548,8 @@ i915_texture_update(struct pipe_context *pipe, struct pipe_texture *texture, static struct pipe_surface * i915_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) + unsigned face, unsigned level, unsigned zslice, + unsigned flags) { struct i915_texture *tex = (struct i915_texture *)pt; struct pipe_winsys *ws = screen->winsys; @@ -586,6 +580,7 @@ i915_get_tex_surface_screen(struct pipe_screen *screen, ps->height = pt->height[level]; ps->pitch = tex->pitch; ps->offset = offset; + ps->usage = flags; } return ps; } @@ -594,7 +589,7 @@ i915_get_tex_surface_screen(struct pipe_screen *screen, void i915_init_texture_functions(struct i915_context *i915) { - i915->pipe.texture_update = i915_texture_update; +// i915->pipe.texture_update = i915_texture_update; } diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index c99a91dcf7..3e3736b280 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -35,27 +35,6 @@ #include "util/p_tile.h" -/* Upload data to a rectangular sub-region. Lots of choices how to do this: - * - * - memcpy by span to current destination - * - upload data as new buffer and blit - * - * Currently always memcpy. - */ -static void -brw_surface_data(struct pipe_context *pipe, - struct pipe_surface *dst, - unsigned dstx, unsigned dsty, - const void *src, unsigned src_pitch, - unsigned srcx, unsigned srcy, unsigned width, unsigned height) -{ - pipe_copy_rect(pipe_surface_map(dst) + dst->offset, - dst->cpp, dst->pitch, - dstx, dsty, width, height, src, src_pitch, srcx, srcy); - - pipe_surface_unmap(dst); -} - /* Assumes all values are within bounds -- no checking at this level - * do it higher up if required. @@ -72,17 +51,25 @@ brw_surface_copy(struct pipe_context *pipe, assert(dst->cpp == src->cpp); if (0) { - pipe_copy_rect(pipe_surface_map(dst) + dst->offset, + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_CPU_READ ); + + pipe_copy_rect(dst_map, dst->cpp, dst->pitch, - dstx, dsty, - width, height, - pipe_surface_map(src) + src->offset, - do_flip ? -src->pitch : src->pitch, + dstx, dsty, + width, height, + src_map, + do_flip ? -(int) src->pitch : src->pitch, srcx, do_flip ? 1 - srcy - height : srcy); - pipe_surface_unmap(src); - pipe_surface_unmap(dst); + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); } else { brw_copy_blit(brw_context(pipe), @@ -113,7 +100,10 @@ brw_surface_fill(struct pipe_context *pipe, { if (0) { unsigned i, j; - void *dst_map = pipe_surface_map(dst); + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_CPU_WRITE ); + switch (dst->cpp) { case 1: { @@ -147,7 +137,7 @@ brw_surface_fill(struct pipe_context *pipe, break; } - pipe_surface_unmap( dst ); + pipe->screen->surface_unmap(pipe->screen, dst); } else { brw_fill_blit(brw_context(pipe), @@ -164,7 +154,6 @@ brw_surface_fill(struct pipe_context *pipe, void brw_init_surface_functions(struct brw_context *brw) { - (void) brw_surface_data; /* silence warning */ brw->pipe.surface_copy = brw_surface_copy; brw->pipe.surface_fill = brw_surface_fill; } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index b580f98204..ba4c4a7bcf 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -407,7 +407,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, void brw_init_texture_functions(struct brw_context *brw) { - brw->pipe.texture_update = brw_texture_update; +// brw->pipe.texture_update = brw_texture_update; } diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index edf91ecafa..ee74826763 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -192,11 +192,11 @@ softpipe_create( struct pipe_screen *screen, * Must be before quad stage setup! */ for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) - softpipe->cbuf_cache[i] = sp_create_tile_cache(); - softpipe->zsbuf_cache = sp_create_tile_cache(); + softpipe->cbuf_cache[i] = sp_create_tile_cache( screen ); + softpipe->zsbuf_cache = sp_create_tile_cache( screen ); for (i = 0; i < PIPE_MAX_SAMPLERS; i++) - softpipe->tex_cache[i] = sp_create_tile_cache(); + softpipe->tex_cache[i] = sp_create_tile_cache( screen ); /* setup quad rendering stages */ diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 6c58f9909d..355c120d18 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -50,7 +50,7 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) for (i = 0; i < 2; i++) { if (sp->constants[i].size) sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + PIPE_BUFFER_USAGE_GPU_READ); } draw_set_mapped_constant_buffer(sp->draw, @@ -133,14 +133,14 @@ softpipe_draw_elements(struct pipe_context *pipe, void *buf = pipe->winsys->buffer_map(pipe->winsys, sp->vertex_buffer[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + PIPE_BUFFER_USAGE_GPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); } /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, - PIPE_BUFFER_USAGE_CPU_READ); + PIPE_BUFFER_USAGE_GPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } else { diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 0625b69099..e03994b63b 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -50,25 +50,28 @@ softpipe_flush( struct pipe_context *pipe, draw_flush(softpipe->draw); - /* - flush the quad pipeline - * - flush the texture cache - * - flush the render cache - */ + if (flags & PIPE_FLUSH_TEXTURE_CACHE) { + for (i = 0; i < softpipe->num_textures; i++) { + sp_flush_tile_cache(softpipe, softpipe->tex_cache[i]); + } + } - for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) - if (softpipe->cbuf_cache[i]) - sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); + if (flags & PIPE_FLUSH_RENDER_CACHE) { + for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) + if (softpipe->cbuf_cache[i]) + sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); - if (softpipe->zsbuf_cache) - sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); + if (softpipe->zsbuf_cache) + sp_flush_tile_cache(softpipe, softpipe->zsbuf_cache); - /* Need this call for hardware buffers before swapbuffers. - * - * there should probably be another/different flush-type function - * that's called before swapbuffers because we don't always want - * to unmap surfaces when flushing. - */ - softpipe_unmap_surfaces(softpipe); + /* Need this call for hardware buffers before swapbuffers. + * + * there should probably be another/different flush-type function + * that's called before swapbuffers because we don't always want + * to unmap surfaces when flushing. + */ + softpipe_unmap_surfaces(softpipe); + } if (fence) *fence = NULL; diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 7dacb1c461..e9926bf41f 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -33,6 +33,7 @@ #include "sp_texture.h" #include "sp_winsys.h" +#include "sp_screen.h" static const char * @@ -137,6 +138,7 @@ softpipe_destroy_screen( struct pipe_screen *screen ) } + /** * Create a new pipe_screen object * Note: we're not presently subclassing pipe_screen (no softpipe_screen). @@ -144,22 +146,22 @@ softpipe_destroy_screen( struct pipe_screen *screen ) struct pipe_screen * softpipe_create_screen(struct pipe_winsys *winsys) { - struct pipe_screen *screen = CALLOC_STRUCT(pipe_screen); + struct softpipe_screen *screen = CALLOC_STRUCT(softpipe_screen); if (!screen) return NULL; - screen->winsys = winsys; + screen->base.winsys = winsys; - screen->destroy = softpipe_destroy_screen; + screen->base.destroy = softpipe_destroy_screen; - screen->get_name = softpipe_get_name; - screen->get_vendor = softpipe_get_vendor; - screen->get_param = softpipe_get_param; - screen->get_paramf = softpipe_get_paramf; - screen->is_format_supported = softpipe_is_format_supported; + screen->base.get_name = softpipe_get_name; + screen->base.get_vendor = softpipe_get_vendor; + screen->base.get_param = softpipe_get_param; + screen->base.get_paramf = softpipe_get_paramf; + screen->base.is_format_supported = softpipe_is_format_supported; - softpipe_init_screen_texture_funcs(screen); + softpipe_init_screen_texture_funcs(&screen->base); - return screen; + return &screen->base; } diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 653449c4f1..b5cc053548 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -47,18 +47,27 @@ sp_surface_copy(struct pipe_context *pipe, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { assert( dst->cpp == src->cpp ); + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_GPU_WRITE ); - pipe_copy_rect(pipe_surface_map(dst), + const void *src_map = pipe->screen->surface_map( pipe->screen, + src, + PIPE_BUFFER_USAGE_GPU_READ ); + + assert(src_map && dst_map); + + pipe_copy_rect(dst_map, dst->cpp, dst->pitch, dstx, dsty, width, height, - pipe_surface_map(src), + src_map, do_flip ? -(int) src->pitch : src->pitch, srcx, do_flip ? 1 - srcy - height : srcy); - pipe_surface_unmap(src); - pipe_surface_unmap(dst); + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); } @@ -83,7 +92,9 @@ sp_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { unsigned i, j; - void *dst_map = pipe_surface_map(dst); + void *dst_map = pipe->screen->surface_map( pipe->screen, + dst, + PIPE_BUFFER_USAGE_GPU_WRITE ); assert(dst->pitch > 0); assert(width <= dst->pitch); @@ -147,7 +158,7 @@ sp_surface_fill(struct pipe_context *pipe, break; } - pipe_surface_unmap( dst ); + pipe->screen->surface_unmap(pipe->screen, dst); } diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 256586ec88..ee3fa994f9 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -40,6 +40,7 @@ #include "sp_state.h" #include "sp_texture.h" #include "sp_tile_cache.h" +#include "sp_screen.h" /* Simple, maximally packed layout. @@ -116,19 +117,10 @@ softpipe_texture_release(struct pipe_screen *screen, if (!*pt) return; - /* - DBG("%s %p refcount will be %d\n", - __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); - */ if (--(*pt)->refcount <= 0) { struct softpipe_texture *spt = softpipe_texture(*pt); - /* - DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); - */ - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); - FREE(spt); } *pt = NULL; @@ -138,7 +130,8 @@ softpipe_texture_release(struct pipe_screen *screen, static struct pipe_surface * softpipe_get_tex_surface(struct pipe_screen *screen, struct pipe_texture *pt, - unsigned face, unsigned level, unsigned zslice) + unsigned face, unsigned level, unsigned zslice, + unsigned usage) { struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = softpipe_texture(pt); @@ -157,6 +150,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->height = pt->height[level]; ps->pitch = ps->width; ps->offset = spt->level_offset[level]; + ps->usage = usage; if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * @@ -167,30 +161,74 @@ softpipe_get_tex_surface(struct pipe_screen *screen, assert(face == 0); assert(zslice == 0); } + + if (usage & (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_WRITE)) { + /* XXX if writing to the texture, invalidate the texcache entries!!! */ + assert(0); + } } return ps; } -static void -softpipe_texture_update(struct pipe_context *pipe, - struct pipe_texture *texture, - uint face, uint levelsMask) +static void +softpipe_tex_surface_release(struct pipe_screen *screen, + struct pipe_surface **s) { - struct softpipe_context *softpipe = softpipe_context(pipe); - uint unit; - for (unit = 0; unit < softpipe->num_textures; unit++) { - if (softpipe->texture[unit] == texture) { - sp_flush_tile_cache(softpipe, softpipe->tex_cache[unit]); - } + /* Effectively do the texture_update work here - if texture images + * needed post-processing to put them into hardware layout, this is + * where it would happen. For softpipe, nothing to do. + */ + assert ((*s)->texture); + + screen->winsys->surface_release(screen->winsys, s); +} + + +static void * +softpipe_surface_map( struct pipe_screen *screen, + struct pipe_surface *surface, + unsigned flags ) +{ + ubyte *map; + + if (flags & ~surface->usage) { + assert(0); + return NULL; + } + + map = screen->winsys->buffer_map( screen->winsys, surface->buffer, flags ); + if (map == NULL) + return NULL; + + /* May want to different things here depending on read/write nature + * of the map: + */ + if (surface->texture && + (flags & PIPE_BUFFER_USAGE_GPU_WRITE)) + { + /* Do something to notify sharing contexts of a texture change. + * In softpipe, that would mean flushing the texture cache. + */ + softpipe_screen(screen)->timestamp++; } + + return map + surface->offset; +} + + +static void +softpipe_surface_unmap(struct pipe_screen *screen, + struct pipe_surface *surface) +{ + screen->winsys->buffer_unmap( screen->winsys, surface->buffer ); } void -softpipe_init_texture_funcs( struct softpipe_context *softpipe ) +softpipe_init_texture_funcs(struct softpipe_context *sp) { - softpipe->pipe.texture_update = softpipe_texture_update; } @@ -199,5 +237,10 @@ softpipe_init_screen_texture_funcs(struct pipe_screen *screen) { screen->texture_create = softpipe_texture_create; screen->texture_release = softpipe_texture_release; + screen->get_tex_surface = softpipe_get_tex_surface; + screen->tex_surface_release = softpipe_tex_surface_release; + + screen->surface_map = softpipe_surface_map; + screen->surface_unmap = softpipe_surface_unmap; } diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index a7322144e6..2ba093320d 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -61,7 +61,6 @@ softpipe_texture(struct pipe_texture *pt) extern void softpipe_init_texture_funcs( struct softpipe_context *softpipe ); - extern void softpipe_init_screen_texture_funcs(struct pipe_screen *screen); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index a88aad5d09..a3fd375a2d 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -49,6 +49,7 @@ struct softpipe_tile_cache { + struct pipe_screen *screen; struct pipe_surface *surface; /**< the surface we're caching */ void *surface_map; struct pipe_texture *texture; /**< if caching a texture */ @@ -109,13 +110,14 @@ clear_clear_flag(uint *bitvec, int x, int y) struct softpipe_tile_cache * -sp_create_tile_cache(void) +sp_create_tile_cache( struct pipe_screen *screen ) { struct softpipe_tile_cache *tc; uint pos; tc = CALLOC_STRUCT( softpipe_tile_cache ); if (tc) { + tc->screen = screen; for (pos = 0; pos < NUM_ENTRIES; pos++) { tc->entries[pos].x = tc->entries[pos].y = -1; @@ -154,16 +156,17 @@ sp_tile_cache_set_surface(struct softpipe_tile_cache *tc, assert(!tc->texture); if (tc->surface_map) { - /*assert(tc->surface != ps);*/ - pipe_surface_unmap(tc->surface); + tc->screen->surface_unmap(tc->screen, tc->surface); tc->surface_map = NULL; } pipe_surface_reference(&tc->surface, ps); - if (ps) { - if (tc->surface_map) - tc->surface_map = pipe_surface_map(ps); + if (tc->surface) { + if (tc->surface_map) /* XXX: this is always NULL!? */ + tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); tc->depth_stencil = (ps->format == PIPE_FORMAT_S8Z24_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM || @@ -187,10 +190,13 @@ void sp_tile_cache_map_surfaces(struct softpipe_tile_cache *tc) { if (tc->surface && !tc->surface_map) - tc->surface_map = pipe_surface_map(tc->surface); + tc->surface_map = tc->screen->surface_map(tc->screen, tc->surface, + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ); if (tc->tex_surf && !tc->tex_surf_map) - tc->tex_surf_map = pipe_surface_map(tc->tex_surf); + tc->tex_surf_map = tc->screen->surface_map(tc->screen, tc->tex_surf, + PIPE_BUFFER_USAGE_GPU_READ); } @@ -198,12 +204,12 @@ void sp_tile_cache_unmap_surfaces(struct softpipe_tile_cache *tc) { if (tc->surface_map) { - pipe_surface_unmap(tc->surface); + tc->screen->surface_unmap(tc->screen, tc->surface); tc->surface_map = NULL; } if (tc->tex_surf_map) { - pipe_surface_unmap(tc->tex_surf); + tc->screen->surface_unmap(tc->screen, tc->tex_surf); tc->tex_surf_map = NULL; } } @@ -224,7 +230,7 @@ sp_tile_cache_set_texture(struct pipe_context *pipe, pipe_texture_reference(&tc->texture, texture); if (tc->tex_surf_map) { - pipe_surface_unmap(tc->tex_surf); + tc->screen->surface_unmap(tc->screen, tc->tex_surf); tc->tex_surf_map = NULL; } pipe_surface_reference(&tc->tex_surf, NULL); @@ -514,10 +520,12 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, /* get new surface (view into texture) */ if (tc->tex_surf_map) - pipe_surface_unmap(tc->tex_surf); + tc->screen->surface_unmap(tc->screen, tc->tex_surf); - tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z); - tc->tex_surf_map = pipe_surface_map(tc->tex_surf); + tc->tex_surf = screen->get_tex_surface(screen, tc->texture, face, level, z, + PIPE_BUFFER_USAGE_GPU_READ); + tc->tex_surf_map = screen->surface_map(screen, tc->tex_surf, + PIPE_BUFFER_USAGE_GPU_READ); tc->tex_face = face; tc->tex_level = level; diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index 2631e29a3a..bc96c941f6 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -61,7 +61,7 @@ struct softpipe_cached_tile extern struct softpipe_tile_cache * -sp_create_tile_cache(void); +sp_create_tile_cache( struct pipe_screen *screen ); extern void sp_destroy_tile_cache(struct softpipe_tile_cache *tc); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index f3a9c2cd8b..0f68f592f7 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -198,12 +198,6 @@ struct pipe_context { /*@}*/ - /** Called when texture data is changed */ - void (*texture_update)(struct pipe_context *pipe, - struct pipe_texture *texture, - uint face, uint dirtyLevelsMask); - - /** Flush rendering (flags = bitmask of PIPE_FLUSH_x tokens) */ void (*flush)( struct pipe_context *pipe, unsigned flags, diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 8eb604e73f..592c3c87c2 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -39,20 +39,6 @@ extern "C" { #endif -static INLINE void * -pipe_surface_map(struct pipe_surface *surface) -{ - return (char *)surface->winsys->buffer_map( surface->winsys, surface->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_CPU_READ ) - + surface->offset; -} - -static INLINE void -pipe_surface_unmap(struct pipe_surface *surface) -{ - surface->winsys->buffer_unmap( surface->winsys, surface->buffer ); -} /** * Set 'ptr' to point to 'surf' and update reference counting. @@ -66,9 +52,20 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) if (surf) surf->refcount++; - if (*ptr /* && --(*ptr)->refcount == 0 */) { - struct pipe_winsys *winsys = (*ptr)->winsys; - winsys->surface_release(winsys, ptr); + if (*ptr) { + + /* There are currently two sorts of surfaces... This needs to be + * fixed so that all surfaces are views into a texture. + */ + if ((*ptr)->texture) { + struct pipe_screen *screen = (*ptr)->texture->screen; + screen->tex_surface_release( screen, ptr ); + } + else { + struct pipe_winsys *winsys = (*ptr)->winsys; + winsys->surface_release(winsys, ptr); + } + assert(!*ptr); } diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 26ac99d287..c080579c26 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -96,7 +96,22 @@ struct pipe_screen { struct pipe_surface *(*get_tex_surface)(struct pipe_screen *, struct pipe_texture *texture, unsigned face, unsigned level, - unsigned zslice); + unsigned zslice, + unsigned usage ); + + /* Surfaces allocated by the above must be released here: + */ + void (*tex_surface_release)( struct pipe_screen *, + struct pipe_surface ** ); + + + void *(*surface_map)( struct pipe_screen *, + struct pipe_surface *surface, + unsigned flags ); + + void (*surface_unmap)( struct pipe_screen *, + struct pipe_surface *surface ); + }; diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 912d84e7b9..62b05a403b 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -273,7 +273,11 @@ struct pipe_surface unsigned pitch; /**< in pixels */ unsigned offset; /**< offset from start of buffer, in bytes */ unsigned refcount; + unsigned usage; /**< PIPE_BUFFER_USAGE_* */ + struct pipe_winsys *winsys; /**< winsys which owns/created the surface */ + + struct pipe_texture *texture; /**< optional texture into which this is a view */ }; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 0e7e246666..0d8ed167b2 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -204,7 +204,10 @@ mem_dup(const void *src, uint size) #define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) #define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) +#ifndef Elements #define Elements(x) (sizeof(x)/sizeof((x)[0])) +#endif + #define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) /** diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 3005ec2d94..87a66b66d7 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -90,7 +90,7 @@ struct pipe_winsys void (*surface_release)(struct pipe_winsys *ws, struct pipe_surface **s); - + /** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 8a89278cde..fd2f56eff2 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -508,6 +508,7 @@ xm_surface_alloc_storage(struct pipe_winsys *winsys, surf->format = format; surf->cpp = pf_get_size(format); surf->pitch = round_up(width, alignment / surf->cpp); + surf->usage = flags; #ifdef GALLIUM_CELL /* XXX a bit of a hack */ height = round_up(height, TILE_SIZE); @@ -562,6 +563,7 @@ static void xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) { struct pipe_surface *surf = *s; + assert(!surf->texture); surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index 76356bbad7..e7186a85da 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -148,8 +148,10 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) uint *dest; uint i, j; - surface = screen->get_tex_surface(screen, pt, 0, 0, 0); - dest = (uint *) pipe_surface_map(surface); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + dest = (uint *) screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); /* Pack four 1D maps into a 2D texture: * R map is placed horizontally, indexed by S, in channel 0 @@ -168,9 +170,8 @@ load_color_map_texture(GLcontext *ctx, struct pipe_texture *pt) } } - pipe_surface_unmap(surface); + screen->surface_unmap(screen, surface); pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pt, 0, 0x1); } diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 1636bed91a..e4ef3e16b7 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -106,13 +106,15 @@ st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) { struct st_renderbuffer *acc_strb = st_renderbuffer(rb); struct pipe_surface *acc_ps = acc_strb->surface; + struct pipe_screen *screen = ctx->st->pipe->screen; const GLint xpos = ctx->DrawBuffer->_Xmin; const GLint ypos = ctx->DrawBuffer->_Ymin; const GLint width = ctx->DrawBuffer->_Xmax - xpos; const GLint height = ctx->DrawBuffer->_Ymax - ypos; GLvoid *map; - map = pipe_surface_map(acc_ps); + map = screen->surface_map(screen, acc_ps, + PIPE_BUFFER_USAGE_CPU_WRITE); /* note acc_strb->format might not equal acc_ps->format */ switch (acc_strb->format) { @@ -140,7 +142,7 @@ st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) _mesa_problem(ctx, "unexpected format in st_clear_accum_buffer()"); } - pipe_surface_unmap(acc_ps); + screen->surface_unmap(screen, acc_ps); } @@ -150,10 +152,12 @@ accum_mad(GLcontext *ctx, GLfloat scale, GLfloat bias, GLint xpos, GLint ypos, GLint width, GLint height, struct st_renderbuffer *acc_strb) { + struct pipe_screen *screen = ctx->st->pipe->screen; struct pipe_surface *acc_ps = acc_strb->surface; GLvoid *map; - map = pipe_surface_map(acc_ps); + map = screen->surface_map(screen, acc_ps, + PIPE_BUFFER_USAGE_CPU_WRITE); /* note acc_strb->format might not equal acc_ps->format */ switch (acc_strb->format) { @@ -174,7 +178,7 @@ accum_mad(GLcontext *ctx, GLfloat scale, GLfloat bias, _mesa_problem(NULL, "unexpected format in st_clear_accum_buffer()"); } - pipe_surface_unmap(acc_ps); + screen->surface_unmap(screen, acc_ps); } diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index ce8fefe703..873b765c2c 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -327,10 +327,11 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, return NULL; } - surface = screen->get_tex_surface(screen, pt, 0, 0, 0); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); /* map texture surface */ - dest = pipe_surface_map(surface); + dest = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); /* Put image into texture surface */ memset(dest, 0xff, height * surface->pitch); @@ -340,9 +341,8 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, _mesa_unmap_bitmap_pbo(ctx, unpack); /* Release surface */ - pipe_surface_unmap(surface); + screen->surface_unmap(screen, surface); pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pt, 0, 0x1); return pt; } @@ -544,8 +544,10 @@ reset_cache(struct st_context *st) /* Map the texture surface. * Subsequent glBitmap calls will write into the texture image. */ - cache->surf = screen->get_tex_surface(screen, cache->texture, 0, 0, 0); - cache->buffer = pipe_surface_map(cache->surf); + cache->surf = screen->get_tex_surface(screen, cache->texture, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); + cache->buffer = screen->surface_map(screen, cache->surf, + PIPE_BUFFER_USAGE_CPU_WRITE); /* init image to all 0xff */ memset(cache->buffer, 0xff, BITMAP_CACHE_WIDTH * BITMAP_CACHE_HEIGHT); @@ -562,6 +564,7 @@ st_flush_bitmap_cache(struct st_context *st) if (st->ctx->DrawBuffer) { struct bitmap_cache *cache = st->bitmap.cache; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; assert(cache->xmin <= cache->xmax); /* @@ -574,12 +577,18 @@ st_flush_bitmap_cache(struct st_context *st) /* The texture surface has been mapped until now. * So unmap and release the texture surface before drawing. */ +#if 0 pipe_surface_unmap(cache->surf); pipe_surface_reference(&cache->surf, NULL); +#else + screen->surface_unmap(screen, cache->surf); + screen->tex_surface_release(screen, &cache->surf); +#endif +#if 0 /* XXX is this needed? */ pipe->texture_update(pipe, cache->texture, 0, 0x1); - +#endif draw_bitmap_quad(st->ctx, cache->xpos, cache->ypos, diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 65bfd6cfcc..9ae53c95f8 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -362,10 +362,12 @@ make_texture(struct st_context *st, /* we'll do pixel transfer in a fragment shader */ ctx->_ImageTransferState = 0x0; - surface = screen->get_tex_surface(screen, pt, 0, 0, 0); + surface = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); /* map texture surface */ - dest = pipe_surface_map(surface); + dest = screen->surface_map(screen, surface, + PIPE_BUFFER_USAGE_CPU_WRITE); /* Put image into texture surface. * Note that the image is actually going to be upside down in @@ -384,9 +386,8 @@ make_texture(struct st_context *st, unpack); /* unmap */ - pipe_surface_unmap(surface); + screen->surface_unmap(screen, surface); pipe_surface_reference(&surface, NULL); - pipe->texture_update(pipe, pt, 0, 0x1); assert(success); @@ -731,6 +732,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; + struct pipe_screen *screen = pipe->screen; struct pipe_surface *ps = st->state.framebuffer.zsbuf; const GLboolean zoom = ctx->Pixel.ZoomX != 1.0 || ctx->Pixel.ZoomY != 1.0; GLint skipPixels; @@ -739,7 +741,8 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); /* map the stencil buffer */ - stmap = pipe_surface_map(ps); + stmap = screen->surface_map(screen, ps, + PIPE_BUFFER_USAGE_CPU_WRITE); /* if width > MAX_WIDTH, have to process image in chunks */ skipPixels = 0; @@ -796,7 +799,7 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, } /* unmap the stencil buffer */ - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } @@ -869,6 +872,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, GLint dstx, GLint dsty) { struct st_renderbuffer *rbDraw = st_renderbuffer(ctx->DrawBuffer->_StencilBuffer); + struct pipe_screen *screen = ctx->st->pipe->screen; struct pipe_surface *psDraw = rbDraw->surface; ubyte *drawMap; ubyte *buffer; @@ -885,7 +889,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, &ctx->DefaultPacking, buffer); /* map the stencil buffer */ - drawMap = pipe_surface_map(psDraw); + drawMap = screen->surface_map(screen, psDraw, PIPE_BUFFER_USAGE_CPU_WRITE); /* draw */ /* XXX PixelZoom not handled yet */ @@ -925,7 +929,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, free(buffer); /* unmap the stencil buffer */ - pipe_surface_unmap(psDraw); + screen->surface_unmap(screen, psDraw); } @@ -994,13 +998,14 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, if (!pt) return; - psTex = screen->get_tex_surface(screen, pt, 0, 0, 0); - if (st_fb_orientation(ctx->DrawBuffer) == Y_0_TOP) { srcy = ctx->DrawBuffer->Height - srcy - height; } if (srcFormat == texFormat) { + psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_GPU_WRITE ); + /* copy source framebuffer surface into mipmap/texture */ pipe->surface_copy(pipe, FALSE, @@ -1009,21 +1014,26 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, psRead, srcx, srcy, width, height); } - else if (type == GL_COLOR) { - /* alternate path using get/put_tile() */ - GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + else { + psTex = screen->get_tex_surface(screen, pt, 0, 0, 0, + PIPE_BUFFER_USAGE_CPU_WRITE ); - pipe_get_tile_rgba(pipe, psRead, srcx, srcy, width, height, buf); - pipe_put_tile_rgba(pipe, psTex, 0, 0, width, height, buf); + if (type == GL_COLOR) { + /* alternate path using get/put_tile() */ + GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - free(buf); - } - else { - /* GL_DEPTH */ - GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); - pipe_get_tile_z(pipe, psRead, srcx, srcy, width, height, buf); - pipe_put_tile_z(pipe, psTex, 0, 0, width, height, buf); - free(buf); + pipe_get_tile_rgba(pipe, psRead, srcx, srcy, width, height, buf); + pipe_put_tile_rgba(pipe, psTex, 0, 0, width, height, buf); + + free(buf); + } + else { + /* GL_DEPTH */ + GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); + pipe_get_tile_z(pipe, psRead, srcx, srcy, width, height, buf); + pipe_put_tile_z(pipe, psTex, 0, 0, width, height, buf); + free(buf); + } } /* draw textured quad */ diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index fc8a5ea7f6..7fdc0bddd6 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -91,9 +91,14 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, struct pipe_context *pipe = ctx->st->pipe; struct st_renderbuffer *strb = st_renderbuffer(rb); enum pipe_format pipeFormat; - GLbitfield flags = 0x0; /* XXX needed? */ + unsigned flags = (PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ); int ret; + pipe_surface_reference( &strb->surface, NULL ); + if (!strb->surface) { /* first time surface creation */ strb->surface = pipe->winsys->surface_alloc(pipe->winsys); @@ -103,11 +108,16 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, if (!strb->surface) return GL_FALSE; } +#if 0 else if (strb->surface->buffer) { /* release/discard the old surface buffer */ pipe_reference_buffer(pipe, &strb->surface->buffer, NULL); } - +#else + else { + assert(0); + } +#endif /* Determine surface format here */ if (strb->format != PIPE_FORMAT_NONE) { assert(strb->format != 0); @@ -368,7 +378,11 @@ st_render_texture(GLcontext *ctx, strb->surface = screen->get_tex_surface(screen, pt, att->CubeMapFace, att->TextureLevel, - att->Zoffset); + att->Zoffset, + PIPE_BUFFER_USAGE_CPU_READ | + PIPE_BUFFER_USAGE_CPU_WRITE | + PIPE_BUFFER_USAGE_GPU_READ | + PIPE_BUFFER_USAGE_GPU_WRITE); assert(strb->surface); assert(screen->is_format_supported(screen, strb->surface->format, PIPE_TEXTURE)); assert(screen->is_format_supported(screen, strb->surface->format, PIPE_SURFACE)); @@ -396,22 +410,19 @@ static void st_finish_render_texture(GLcontext *ctx, struct gl_renderbuffer_attachment *att) { + struct pipe_screen *screen = ctx->st->pipe->screen; struct st_renderbuffer *strb = st_renderbuffer(att->Renderbuffer); assert(strb); ctx->st->pipe->flush(ctx->st->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); - ctx->st->pipe->texture_update(ctx->st->pipe, - st_get_texobj_texture(att->Texture), - att->CubeMapFace, 1 << att->TextureLevel); + screen->tex_surface_release( screen, &strb->surface ); /* printf("FINISH RENDER TO TEXTURE surf=%p\n", strb->surface); */ - pipe_surface_reference(&strb->surface, NULL); - _mesa_reference_renderbuffer(&att->Renderbuffer, NULL); /* restore previous framebuffer state */ diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index ddbe36106c..e242195e7a 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -61,13 +61,14 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, GLvoid *pixels) { struct gl_framebuffer *fb = ctx->ReadBuffer; + struct pipe_screen *screen = ctx->st->pipe->screen; struct st_renderbuffer *strb = st_renderbuffer(fb->_StencilBuffer); struct pipe_surface *ps = strb->surface; ubyte *stmap; GLint j; /* map the stencil buffer */ - stmap = pipe_surface_map(ps); + stmap = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); /* width should never be > MAX_WIDTH since we did clipping earlier */ ASSERT(width <= MAX_WIDTH); @@ -124,7 +125,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, /* unmap the stencil buffer */ - pipe_surface_unmap(ps); + screen->surface_unmap(screen, ps); } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 981246221b..05e0339e0e 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -478,7 +478,6 @@ st_TexImage(GLcontext * ctx, struct gl_texture_image *texImage, GLsizei imageSize, int compressed) { - struct pipe_context *pipe = ctx->st->pipe; struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); GLint postConvWidth, postConvHeight; @@ -635,7 +634,8 @@ st_TexImage(GLcontext * ctx, return; if (stImage->pt) { - texImage->Data = st_texture_image_map(ctx->st, stImage, 0); + texImage->Data = st_texture_image_map(ctx->st, stImage, 0, + PIPE_BUFFER_USAGE_CPU_WRITE); dstRowStride = stImage->surface->pitch * stImage->surface->cpp; } else { @@ -684,8 +684,9 @@ st_TexImage(GLcontext * ctx, } if (stImage->pt && i < depth) { - st_texture_image_unmap(stImage); - texImage->Data = st_texture_image_map(ctx->st, stImage, i); + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, i, + PIPE_BUFFER_USAGE_CPU_WRITE); src += srcImageStride; } } @@ -694,13 +695,10 @@ st_TexImage(GLcontext * ctx, _mesa_unmap_teximage_pbo(ctx, unpack); if (stImage->pt) { - st_texture_image_unmap(stImage); + st_texture_image_unmap(ctx->st, stImage); texImage->Data = NULL; } - if (stObj->pt) - pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { ctx->Driver.GenerateMipmap(ctx, target, texObj); } @@ -793,7 +791,8 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, /* Image is stored in hardware format in a buffer managed by the * kernel. Need to explicitly map and unmap it. */ - texImage->Data = st_texture_image_map(ctx->st, stImage, 0); + texImage->Data = st_texture_image_map(ctx->st, stImage, 0, + PIPE_BUFFER_USAGE_CPU_READ); texImage->RowStride = stImage->surface->pitch; } else { @@ -823,8 +822,9 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, } if (stImage->pt && i < depth) { - st_texture_image_unmap(stImage); - texImage->Data = st_texture_image_map(ctx->st, stImage, i); + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, i, + PIPE_BUFFER_USAGE_CPU_READ); dest += dstImageStride; } } @@ -833,7 +833,7 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, /* Unmap */ if (stImage->pt) { - st_texture_image_unmap(stImage); + st_texture_image_unmap(ctx->st, stImage); texImage->Data = NULL; } } @@ -874,8 +874,6 @@ st_TexSubimage(GLcontext * ctx, struct gl_texture_object *texObj, struct gl_texture_image *texImage) { - struct pipe_context *pipe = ctx->st->pipe; - struct st_texture_object *stObj = st_texture_object(texObj); struct st_texture_image *stImage = st_texture_image(texImage); GLuint dstRowStride; GLuint srcImageStride = _mesa_image_image_stride(packing, width, height, @@ -897,7 +895,8 @@ st_TexSubimage(GLcontext * ctx, * from uploading the buffer under us. */ if (stImage->pt) { - texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset); + texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset, + PIPE_BUFFER_USAGE_CPU_WRITE); dstRowStride = stImage->surface->pitch * stImage->surface->cpp; } @@ -922,8 +921,9 @@ st_TexSubimage(GLcontext * ctx, if (stImage->pt && i < depth) { /* map next slice of 3D texture */ - st_texture_image_unmap(stImage); - texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset + i); + st_texture_image_unmap(ctx->st, stImage); + texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset + i, + PIPE_BUFFER_USAGE_CPU_WRITE); src += srcImageStride; } } @@ -935,11 +935,9 @@ st_TexSubimage(GLcontext * ctx, _mesa_unmap_teximage_pbo(ctx, packing); if (stImage->pt) { - st_texture_image_unmap(stImage); + st_texture_image_unmap(ctx->st, stImage); texImage->Data = NULL; } - - pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); } @@ -1058,7 +1056,8 @@ fallback_copy_texsubimage(GLcontext *ctx, src_surf = strb->surface; - dest_surf = screen->get_tex_surface(screen, pt, face, level, destZ); + dest_surf = screen->get_tex_surface(screen, pt, face, level, destZ, + PIPE_BUFFER_USAGE_CPU_WRITE); assert(width <= MAX_WIDTH); @@ -1119,7 +1118,6 @@ do_copy_texsubimage(GLcontext *ctx, struct gl_texture_image *texImage = _mesa_select_tex_image(ctx, texObj, target, level); struct st_texture_image *stImage = st_texture_image(texImage); - struct st_texture_object *stObj = st_texture_object(texObj); GLenum baseFormat = texImage->InternalFormat; struct gl_framebuffer *fb = ctx->ReadBuffer; struct st_renderbuffer *strb; @@ -1157,7 +1155,8 @@ do_copy_texsubimage(GLcontext *ctx, dest_format = stImage->pt->format; dest_surface = screen->get_tex_surface(screen, stImage->pt, stImage->face, - stImage->level, destZ); + stImage->level, destZ, + PIPE_BUFFER_USAGE_CPU_WRITE); if (ctx->_ImageTransferState == 0x0 && strb->surface->buffer && @@ -1223,8 +1222,6 @@ do_copy_texsubimage(GLcontext *ctx, pipe_surface_reference(&dest_surface, NULL); - pipe->texture_update(pipe, stObj->pt, stImage->face, (1 << level)); - if (level == texObj->BaseLevel && texObj->GenerateMipmap) { ctx->Driver.GenerateMipmap(ctx, target, texObj); } @@ -1529,7 +1526,6 @@ st_finalize_texture(GLcontext *ctx, if (stImage && stObj->pt != stImage->pt) { copy_image_data_to_texture(ctx->st, stObj, level, stImage); *needFlush = GL_TRUE; - pipe->texture_update(pipe, stObj->pt, face, (1 << level)); } } } diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 1a0e19c2f9..cfacfdd04c 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -123,8 +123,10 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, const ubyte *srcData; ubyte *dstData; - srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice); - dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice); + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); srcData = (ubyte *) pipe_buffer_map(pipe, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index f68bef1207..482a054f64 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -184,25 +184,30 @@ st_texture_image_offset(const struct pipe_texture * pt, */ GLubyte * st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, - GLuint zoffset) + GLuint zoffset, + GLuint flags ) { struct pipe_screen *screen = st->pipe->screen; struct pipe_texture *pt = stImage->pt; DBG("%s \n", __FUNCTION__); stImage->surface = screen->get_tex_surface(screen, pt, stImage->face, - stImage->level, zoffset); + stImage->level, zoffset, + flags); - return pipe_surface_map(stImage->surface); + return screen->surface_map(screen, stImage->surface, flags); } void -st_texture_image_unmap(struct st_texture_image *stImage) +st_texture_image_unmap(struct st_context *st, + struct st_texture_image *stImage) { + struct pipe_screen *screen = st->pipe->screen; + DBG("%s\n", __FUNCTION__); - pipe_surface_unmap(stImage->surface); + screen->surface_unmap(screen, stImage->surface); pipe_surface_reference(&stImage->surface, NULL); } @@ -224,12 +229,15 @@ st_surface_data(struct pipe_context *pipe, const void *src, unsigned src_pitch, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - pipe_copy_rect(pipe_surface_map(dst), + struct pipe_screen *screen = pipe->screen; + void *map = screen->surface_map(screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE); + + pipe_copy_rect(map, dst->cpp, dst->pitch, dstx, dsty, width, height, src, src_pitch, srcx, srcy); - pipe_surface_unmap(dst); + screen->surface_unmap(screen, dst); } @@ -256,7 +264,8 @@ st_texture_image_data(struct pipe_context *pipe, if(dst->compressed) height /= 4; - dst_surface = screen->get_tex_surface(screen, dst, face, level, i); + dst_surface = screen->get_tex_surface(screen, dst, face, level, i, + PIPE_BUFFER_USAGE_CPU_WRITE); st_surface_data(pipe, dst_surface, 0, 0, /* dstx, dsty */ @@ -265,7 +274,7 @@ st_texture_image_data(struct pipe_context *pipe, 0, 0, /* source x, y */ dst->width[level], height); /* width, height */ - pipe_surface_reference(&dst_surface, NULL); + screen->tex_surface_release(screen, &dst_surface); srcUB += src_image_pitch * dst->cpp; } @@ -304,8 +313,11 @@ st_texture_image_copy(struct pipe_context *pipe, assert(src->width[srcLevel] == width); assert(src->height[srcLevel] == height); - dst_surface = screen->get_tex_surface(screen, dst, face, dstLevel, i); - src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i); + dst_surface = screen->get_tex_surface(screen, dst, face, dstLevel, i, + PIPE_BUFFER_USAGE_GPU_WRITE); + + src_surface = screen->get_tex_surface(screen, src, face, srcLevel, i, + PIPE_BUFFER_USAGE_GPU_READ); pipe->surface_copy(pipe, FALSE, diff --git a/src/mesa/state_tracker/st_texture.h b/src/mesa/state_tracker/st_texture.h index 7abccb3a69..f6d5733e21 100644 --- a/src/mesa/state_tracker/st_texture.h +++ b/src/mesa/state_tracker/st_texture.h @@ -121,10 +121,12 @@ st_texture_match_image(const struct pipe_texture *pt, extern GLubyte * st_texture_image_map(struct st_context *st, struct st_texture_image *stImage, - GLuint zoffset); + GLuint zoffset, + GLuint flags); extern void -st_texture_image_unmap(struct st_texture_image *stImage); +st_texture_image_unmap(struct st_context *st, + struct st_texture_image *stImage); /* Return pointers to each 2d slice within an image. Indexed by depth -- cgit v1.2.3 From 4584c0efbd547559d30ba9a5e76549fc1b679619 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 13:47:09 +0100 Subject: draw: turn on SSE swizzle code --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 90926aec85..07f85bc448 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -47,7 +47,7 @@ #include "tgsi/util/tgsi_parse.h" #define SSE_MAX_VERTICES 4 -#define SSE_SWIZZLES 0 +#define SSE_SWIZZLES 1 #if SSE_SWIZZLES typedef void (XSTDCALL *codegen_function) ( -- cgit v1.2.3 From be7e1297f4d5a2c2949968bea428b4c0e7c75d63 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 13:47:27 +0100 Subject: draw: squash warnings --- src/gallium/auxiliary/draw/draw_pt_varray.c | 1 - src/gallium/auxiliary/draw/draw_pt_vcache.c | 1 - 2 files changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index c85d8ded50..355093f945 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -200,7 +200,6 @@ static void varray_prepare(struct draw_pt_front_end *frontend, unsigned opt) { struct varray_frontend *varray = (struct varray_frontend *)frontend; - const struct pipe_rasterizer_state *rasterizer = varray->draw->rasterizer; if (opt & PT_PIPELINE) { diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 2f9775814f..6b3fb1406b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -225,7 +225,6 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, unsigned opt ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - const struct pipe_rasterizer_state *rasterizer = vcache->draw->rasterizer; if (opt & PT_PIPELINE) { -- cgit v1.2.3 From c6fadd9fade40e3397f56fdbfa2325861799e49c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 1 May 2008 10:20:31 +0900 Subject: gallium: Add newline to eof. --- src/gallium/auxiliary/util/p_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index f1fb07bf5b..4ec1746662 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -422,4 +422,4 @@ void debug_print_format(const char *msg, unsigned fmt ) debug_printf("%s: %s\n", msg, fmtstr); } -#endif \ No newline at end of file +#endif -- cgit v1.2.3 From 35b0efb8c6afd319ae36e99aa578ac6c75faf2f5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 14:19:25 +0100 Subject: gallium: do something sensible on the error path to try to avoid crashing in release builds --- src/gallium/auxiliary/util/u_gen_mipmap.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 0348629ab8..2e8417ee13 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -505,6 +505,9 @@ format_to_type_comps(enum pipe_format pformat, return; default: assert(0); + *datatype = UBYTE; + *comps = 0; + break; } } -- cgit v1.2.3 From 1e4217e1b857c6a3c5da7d1eceb74683bf0b9a00 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 14:51:18 +0100 Subject: tgsi: use ESI instead of EBX on non-win32 platforms --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 1138f59997..4587b0930a 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -36,7 +36,7 @@ #ifdef PIPE_ARCH_X86 -#define HIGH_PRECISION 1 /* for 1/sqrt() */ +#define HIGH_PRECISION 0 /* for 1/sqrt() */ #define FOR_EACH_CHANNEL( CHAN )\ @@ -2038,12 +2038,12 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, unsigned char *inner_loop; soa_input = x86_make_reg( file_REG32, reg_AX ); - aos_input = x86_make_reg( file_REG32, reg_BX ); + aos_input = get_temp_base(); /* BX or SI */ num_inputs = x86_make_reg( file_REG32, reg_CX ); temp = x86_make_reg( file_REG32, reg_DX ); /* Save EBX */ - x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + x86_push( func, aos_input ); x86_mov( func, soa_input, get_argument( soa + 1 ) ); x86_mov( func, aos_input, get_argument( aos + 1 ) ); @@ -2088,7 +2088,7 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, x86_jcc( func, cc_NE, inner_loop ); /* Restore EBX */ - x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); + x86_pop( func, aos_input ); } static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) @@ -2100,12 +2100,12 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, unsigned char *inner_loop; soa_output = x86_make_reg( file_REG32, reg_AX ); - aos_output = x86_make_reg( file_REG32, reg_BX ); + aos_output = get_temp_base(); /* BX or SI */ num_outputs = x86_make_reg( file_REG32, reg_CX ); temp = x86_make_reg( file_REG32, reg_DX ); /* Save EBX */ - x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + x86_push( func, aos_output ); x86_mov( func, soa_output, get_argument( soa + 1 ) ); x86_mov( func, aos_output, get_argument( aos + 1 ) ); @@ -2150,7 +2150,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, x86_jcc( func, cc_NE, inner_loop ); /* Restore EBX */ - x86_pop( func, x86_make_reg( file_REG32, reg_BX ) ); + x86_pop( func, aos_output ); } /** -- cgit v1.2.3 From 7810e7f623a47978cdd1a167cc9e6b743d56d949 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 15:13:46 +0100 Subject: tgsi: use x86_fn_arg instead of get_argument() -- it knows about push/pops to the stack --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 63 ++++++++++++++++------------- 1 file changed, 34 insertions(+), 29 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 4587b0930a..5e133a9663 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -133,14 +133,6 @@ get_immediate_base( void ) * Data access helpers. */ -static struct x86_reg -get_argument( - unsigned index ) -{ - return x86_make_disp( - x86_make_reg( file_REG32, reg_SP ), - (index + 1) * 4 ); -} static struct x86_reg get_immediate( @@ -2045,14 +2037,14 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, /* Save EBX */ x86_push( func, aos_input ); - x86_mov( func, soa_input, get_argument( soa + 1 ) ); - x86_mov( func, aos_input, get_argument( aos + 1 ) ); - x86_mov( func, num_inputs, get_argument( num + 1 ) ); + x86_mov( func, soa_input, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_input, x86_fn_arg( func, aos ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, num ) ); /* do */ inner_loop = x86_get_label( func ); { - x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_mov( func, temp, x86_fn_arg( func, stride ) ); x86_push( func, aos_input ); sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); @@ -2107,9 +2099,9 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, /* Save EBX */ x86_push( func, aos_output ); - x86_mov( func, soa_output, get_argument( soa + 1 ) ); - x86_mov( func, aos_output, get_argument( aos + 1 ) ); - x86_mov( func, num_outputs, get_argument( num + 1 ) ); + x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); + x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); /* do */ inner_loop = x86_get_label( func ); @@ -2126,7 +2118,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); - x86_mov( func, temp, get_argument( stride + 1 ) ); + x86_mov( func, temp, x86_fn_arg( func, stride ) ); x86_push( func, aos_output ); sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); @@ -2185,6 +2177,13 @@ tgsi_emit_sse2( tgsi_parse_init( &parse, tokens ); + /* Can't just use EDI without save/restoring it: + */ + x86_push( + func, + get_immediate_base() ); + + /* * Different function args for vertex/fragment shaders: */ @@ -2193,51 +2192,51 @@ tgsi_emit_sse2( x86_mov( func, get_input_base(), - get_argument( 0 ) ); + x86_fn_arg( func, 1 ) ); /* skipping outputs argument here */ x86_mov( func, get_const_base(), - get_argument( 2 ) ); + x86_fn_arg( func, 3 ) ); x86_mov( func, get_temp_base(), - get_argument( 3 ) ); + x86_fn_arg( func, 4 ) ); x86_mov( func, get_coef_base(), - get_argument( 4 ) ); + x86_fn_arg( func, 5 ) ); x86_mov( func, get_immediate_base(), - get_argument( 5 ) ); + x86_fn_arg( func, 6 ) ); } else { assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); if (do_swizzles) - aos_to_soa( func, 5, 0, 6, 7 ); + aos_to_soa( func, 6, 1, 7, 8 ); x86_mov( func, get_input_base(), - get_argument( 0 ) ); + x86_fn_arg( func, 1 ) ); x86_mov( func, get_output_base(), - get_argument( 1 ) ); + x86_fn_arg( func, 2 ) ); x86_mov( func, get_const_base(), - get_argument( 2 ) ); + x86_fn_arg( func, 3 ) ); x86_mov( func, get_temp_base(), - get_argument( 3 ) ); + x86_fn_arg( func, 4 ) ); x86_mov( func, get_immediate_base(), - get_argument( 4 ) ); + x86_fn_arg( func, 5 ) ); } while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { @@ -2260,7 +2259,7 @@ tgsi_emit_sse2( x86_mov( func, get_output_base(), - get_argument( 1 ) ); + x86_fn_arg( func, 2 ) ); } } @@ -2307,9 +2306,15 @@ tgsi_emit_sse2( if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { if (do_swizzles) - soa_to_aos( func, 8, 1, 9, 10 ); + soa_to_aos( func, 9, 2, 10, 11 ); } + /* Can't just use EDI without save/restoring it: + */ + x86_pop( + func, + get_immediate_base() ); + #ifdef WIN32 emit_retw( func, 16 ); #else -- cgit v1.2.3 From 47aa416821b69d3afa33c79ec8cb8499688a0e8e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 15:27:53 +0100 Subject: tgsi: use EBX everywhere, be sure to push/pop it --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 5e133a9663..06df3dbb05 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -103,15 +103,9 @@ get_output_base( void ) static struct x86_reg get_temp_base( void ) { -#ifdef WIN32 return x86_make_reg( file_REG32, reg_BX ); -#else - return x86_make_reg( - file_REG32, - reg_SI ); -#endif } static struct x86_reg @@ -2177,12 +2171,16 @@ tgsi_emit_sse2( tgsi_parse_init( &parse, tokens ); - /* Can't just use EDI without save/restoring it: + /* Can't just use EDI, EBX without save/restoring them: */ x86_push( func, get_immediate_base() ); + x86_push( + func, + get_temp_base() ); + /* * Different function args for vertex/fragment shaders: @@ -2309,8 +2307,12 @@ tgsi_emit_sse2( soa_to_aos( func, 9, 2, 10, 11 ); } - /* Can't just use EDI without save/restoring it: + /* Can't just use EBX, EDI without save/restoring them: */ + x86_pop( + func, + get_temp_base() ); + x86_pop( func, get_immediate_base() ); -- cgit v1.2.3 From 6980823da9120d8d8533f7a78eac2d63dece430f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 15:30:50 +0100 Subject: draw: avoid aliasing warning --- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index c4de9d2698..54fdcc1a3b 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, uint size = 4; immed = tgsi_default_full_immediate(); immed.Immediate.Size = 1 + size; /* one for the token itself */ - immed.u.ImmediateFloat32 = (struct tgsi_immediate_float32 *) value; + immed.u.Pointer = value; ctx->emit_immediate(ctx, &immed); } -- cgit v1.2.3 From 727257f32002544658219d2e0163993c1cbc5644 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 15:31:17 +0100 Subject: rtasm: assert stack is fully popped in return --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index d7e2230557..40f6f973d6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -495,6 +495,7 @@ void x86_dec( struct x86_function *p, void x86_ret( struct x86_function *p ) { DUMP(); + assert(p->stack_offset == 0); emit_1ub(p, 0xc3); } -- cgit v1.2.3 From afe67db8038855d9f7b4ce46b610701c55736c1f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 15:36:14 +0100 Subject: tgsi: add some const qualifiers to immediate pointers --- src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index da0121c482..15e76feb7c 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -52,8 +52,8 @@ struct tgsi_full_immediate struct tgsi_immediate Immediate; union { - void *Pointer; - struct tgsi_immediate_float32 *ImmediateFloat32; + const void *Pointer; + const struct tgsi_immediate_float32 *ImmediateFloat32; } u; }; -- cgit v1.2.3 From 419f3c447520d1dc95c529afa693ffe3fffe5560 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 15:45:51 +0100 Subject: tgsi: restore HIGH_PRECISION setting --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 06df3dbb05..45453c34ce 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -36,7 +36,7 @@ #ifdef PIPE_ARCH_X86 -#define HIGH_PRECISION 0 /* for 1/sqrt() */ +#define HIGH_PRECISION 1 /* for 1/sqrt() */ #define FOR_EACH_CHANNEL( CHAN )\ -- cgit v1.2.3 From f1f52a8be98efa26c7c9bc480a2483fc2106d654 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Thu, 1 May 2008 17:30:17 +0100 Subject: gallium: Notify driver of texture updates in util_blit_pixels(). --- src/gallium/auxiliary/util/u_blit.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 9e9912c6e4..568d62ced1 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -295,6 +295,8 @@ util_blit_pixels(struct blit_state *ctx, src, srcLeft, srcTop, /* src */ srcW, srcH); /* size */ + pipe->texture_update(pipe, tex, 0, 1 << 0); + /* save state (restored below) */ cso_save_blend(ctx->cso); cso_save_depth_stencil_alpha(ctx->cso); -- cgit v1.2.3 From 0000792a2006a2c8fde1b54d070490a625fb8435 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 18:48:28 +0100 Subject: sct: fix bug in remove_context_from_surface --- src/gallium/auxiliary/sct/sct.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c index 97ee5882a1..5e4126e014 100644 --- a/src/gallium/auxiliary/sct/sct.c +++ b/src/gallium/auxiliary/sct/sct.c @@ -209,6 +209,7 @@ remove_context_from_surface(struct sct_surface *si, } else { prev = curr; + next = curr->next; } } } -- cgit v1.2.3 From fb3623b235f5caa9d76e656b1e5eda797c7c73eb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 20:41:03 +0100 Subject: rtasm: fix labels after (not so) recent change to allow dynamic fn growth Using char * for labels doesn't work if you realloc the function during assembly and free the old storage... --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 49 +++++++++---------------- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 14 +++---- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 4 +- src/gallium/auxiliary/translate/translate_sse.c | 2 +- 4 files changed, 28 insertions(+), 41 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 40f6f973d6..e69251f072 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -347,9 +347,9 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg ) return x86_make_reg( reg.file, reg.idx ); } -unsigned char *x86_get_label( struct x86_function *p ) +int x86_get_label( struct x86_function *p ) { - return p->csr; + return p->csr - p->store; } @@ -361,17 +361,22 @@ unsigned char *x86_get_label( struct x86_function *p ) void x86_jcc( struct x86_function *p, enum x86_cc cc, - unsigned char *label ) + int label ) { - intptr_t offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 2); + int offset = label - (x86_get_label(p) + 2); DUMP_I(cc); + if (offset < 0) { + int amt = p->csr - p->store; + assert(amt > -offset); + } + if (offset <= 127 && offset >= -128) { emit_1ub(p, 0x70 + cc); emit_1b(p, (char) offset); } else { - offset = pointer_to_intptr( label ) - (pointer_to_intptr( x86_get_label(p) ) + 6); + offset = label - (x86_get_label(p) + 6); emit_2ub(p, 0x0f, 0x80 + cc); emit_1i(p, offset); } @@ -379,8 +384,8 @@ void x86_jcc( struct x86_function *p, /* Always use a 32bit offset for forward jumps: */ -unsigned char *x86_jcc_forward( struct x86_function *p, - enum x86_cc cc ) +int x86_jcc_forward( struct x86_function *p, + enum x86_cc cc ) { DUMP_I(cc); emit_2ub(p, 0x0f, 0x80 + cc); @@ -388,7 +393,7 @@ unsigned char *x86_jcc_forward( struct x86_function *p, return x86_get_label(p); } -unsigned char *x86_jmp_forward( struct x86_function *p) +int x86_jmp_forward( struct x86_function *p) { DUMP(); emit_1ub(p, 0xe9); @@ -396,7 +401,7 @@ unsigned char *x86_jmp_forward( struct x86_function *p) return x86_get_label(p); } -unsigned char *x86_call_forward( struct x86_function *p) +int x86_call_forward( struct x86_function *p) { DUMP(); @@ -408,42 +413,24 @@ unsigned char *x86_call_forward( struct x86_function *p) /* Fixup offset from forward jump: */ void x86_fixup_fwd_jump( struct x86_function *p, - unsigned char *fixup ) + int fixup ) { - *(int *)(fixup - 4) = pointer_to_intptr( x86_get_label(p) ) - pointer_to_intptr( fixup ); + *(int *)(p->store + fixup - 4) = x86_get_label(p) - fixup; } -void x86_jmp( struct x86_function *p, unsigned char *label) +void x86_jmp( struct x86_function *p, int label) { DUMP_I( label ); emit_1ub(p, 0xe9); - emit_1i(p, pointer_to_intptr( label ) - pointer_to_intptr( x86_get_label(p) ) - 4); -} - -#if 0 -static unsigned char *cptr( void (*label)() ) -{ - return (unsigned char *) label; + emit_1i(p, label - x86_get_label(p) - 4); } -/* This doesn't work once we start reallocating & copying the - * generated code on buffer fills, because the call is relative to the - * current pc. - */ -void x86_call( struct x86_function *p, void (*label)()) -{ - DUMP_I( label ); - emit_1ub(p, 0xe8); - emit_1i(p, cptr(label) - x86_get_label(p) - 4); -} -#else void x86_call( struct x86_function *p, struct x86_reg reg) { DUMP_R( reg ); emit_1ub(p, 0xff); emit_modrm_noreg(p, 2, reg); } -#endif /* michal: diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index ad79b1facf..eacaeeaf6f 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -124,23 +124,23 @@ struct x86_reg x86_get_base_reg( struct x86_reg reg ); /* Labels, jumps and fixup: */ -unsigned char *x86_get_label( struct x86_function *p ); +int x86_get_label( struct x86_function *p ); void x86_jcc( struct x86_function *p, enum x86_cc cc, - unsigned char *label ); + int label ); -unsigned char *x86_jcc_forward( struct x86_function *p, +int x86_jcc_forward( struct x86_function *p, enum x86_cc cc ); -unsigned char *x86_jmp_forward( struct x86_function *p); +int x86_jmp_forward( struct x86_function *p); -unsigned char *x86_call_forward( struct x86_function *p); +int x86_call_forward( struct x86_function *p); void x86_fixup_fwd_jump( struct x86_function *p, - unsigned char *fixup ); + int fixup ); -void x86_jmp( struct x86_function *p, unsigned char *label ); +void x86_jmp( struct x86_function *p, int label ); /* void x86_call( struct x86_function *p, void (*label)() ); */ void x86_call( struct x86_function *p, struct x86_reg reg); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 45453c34ce..07db3292b4 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -2021,7 +2021,7 @@ static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, struct x86_reg aos_input; struct x86_reg num_inputs; struct x86_reg temp; - unsigned char *inner_loop; + int inner_loop; soa_input = x86_make_reg( file_REG32, reg_AX ); aos_input = get_temp_base(); /* BX or SI */ @@ -2083,7 +2083,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, struct x86_reg aos_output; struct x86_reg num_outputs; struct x86_reg temp; - unsigned char *inner_loop; + int inner_loop; soa_output = x86_make_reg( file_REG32, reg_AX ); aos_output = get_temp_base(); /* BX or SI */ diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index f590d48b78..a54ac5a82f 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -404,7 +404,7 @@ static boolean build_vertex_emit( struct translate_sse *p, struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX); struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); - uint8_t *fixup, *label; + int fixup, label; unsigned j; p->func = func; -- cgit v1.2.3 From 2004b8a769110456e66d040398eacf25c8592710 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 20:42:44 +0100 Subject: draw: label fn args -- shouldn't this be defined where the fn is created? --- src/gallium/auxiliary/draw/draw_vs_sse.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 07f85bc448..a57c938fbf 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -51,17 +51,17 @@ #if SSE_SWIZZLES typedef void (XSTDCALL *codegen_function) ( - const struct tgsi_exec_vector *input, - struct tgsi_exec_vector *output, - float (*constant)[4], - struct tgsi_exec_vector *temporary, - float (*immediates)[4], - const float (*aos_input)[4], - uint num_inputs, - uint input_stride, - float (*aos_output)[4], - uint num_outputs, - uint output_stride ); + const struct tgsi_exec_vector *input, /* 1 */ + struct tgsi_exec_vector *output, /* 2 */ + float (*constant)[4], /* 3 */ + struct tgsi_exec_vector *temporary, /* 4 */ + float (*immediates)[4], /* 5 */ + const float (*aos_input)[4], /* 6 */ + uint num_inputs, /* 7 */ + uint input_stride, /* 8 */ + float (*aos_output)[4], /* 9 */ + uint num_outputs, /* 10 */ + uint output_stride ); /* 11 */ #else typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, -- cgit v1.2.3 From 2c89b75e36fd35d5a003107d1d2f97b537321f95 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 20:44:41 +0100 Subject: rtasm: learn another version of push --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index e69251f072..4e036d9032 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -449,8 +449,15 @@ void x86_push( struct x86_function *p, struct x86_reg reg ) { DUMP_R( reg ); - assert(reg.mod == mod_REG); - emit_1ub(p, 0x50 + reg.idx); + if (reg.mod == mod_REG) + emit_1ub(p, 0x50 + reg.idx); + else + { + emit_1ub(p, 0xff); + emit_modrm_noreg(p, 6, reg); + } + + p->stack_offset += 4; } -- cgit v1.2.3 From f067c6c452bdd5f5cc6b0f6b2f79fb3fc1162822 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 20:45:15 +0100 Subject: tgsi: remove some bogus win vs. linux crud Pass arguments properly in linux now. Still need to change this to use a single calling convention on both platforms. --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 118 +++++++++++----------------- 1 file changed, 46 insertions(+), 72 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 07db3292b4..a67bc8567b 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -441,19 +441,13 @@ emit_push_gp( { x86_push( func, - get_const_base() ); + x86_make_reg( file_REG32, reg_AX) ); x86_push( func, - get_input_base() ); + x86_make_reg( file_REG32, reg_CX) ); x86_push( func, - get_output_base() ); - - /* It is important on non-win32 platforms that temp base is pushed last. - */ - x86_push( - func, - get_temp_base() ); + x86_make_reg( file_REG32, reg_DX) ); } static void @@ -464,16 +458,13 @@ x86_pop_gp( */ x86_pop( func, - get_temp_base() ); + x86_make_reg( file_REG32, reg_DX) ); x86_pop( func, - get_output_base() ); + x86_make_reg( file_REG32, reg_CX) ); x86_pop( func, - get_input_base() ); - x86_pop( - func, - get_const_base() ); + x86_make_reg( file_REG32, reg_AX) ); } static void @@ -490,19 +481,23 @@ emit_func_call_dst( emit_push_gp( func ); -#ifdef WIN32 - x86_push( - func, - get_temp( TEMP_R0, 0 ) ); -#endif - { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); x86_mov_reg_imm( func, ecx, (unsigned long) code ); x86_call( func, ecx ); +#ifndef WIN32 + x86_pop(func, ecx ); +#endif } + x86_pop_gp( func ); @@ -563,11 +558,7 @@ static void XSTDCALL cos4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif store[X + 0] = cosf( store[X + 0] ); store[X + 1] = cosf( store[X + 1] ); @@ -590,11 +581,8 @@ static void XSTDCALL ex24f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = powf( 2.0f, store[X + 0] ); store[X + 1] = powf( 2.0f, store[X + 1] ); store[X + 2] = powf( 2.0f, store[X + 2] ); @@ -627,11 +615,8 @@ static void XSTDCALL flr4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = floorf( store[X + 0] ); store[X + 1] = floorf( store[X + 1] ); store[X + 2] = floorf( store[X + 2] ); @@ -653,11 +638,8 @@ static void XSTDCALL frc4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] -= floorf( store[X + 0] ); store[X + 1] -= floorf( store[X + 1] ); store[X + 2] -= floorf( store[X + 2] ); @@ -679,11 +661,8 @@ static void XSTDCALL lg24f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = LOG2( store[X + 0] ); store[X + 1] = LOG2( store[X + 1] ); store[X + 2] = LOG2( store[X + 2] ); @@ -741,11 +720,8 @@ static void XSTDCALL pow4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = powf( store[X + 0], store[X + 4] ); store[X + 1] = powf( store[X + 1], store[X + 5] ); store[X + 2] = powf( store[X + 2], store[X + 6] ); @@ -786,11 +762,8 @@ static void XSTDCALL rsqrt4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = 1.0F / sqrtf( store[X + 0] ); store[X + 1] = 1.0F / sqrtf( store[X + 1] ); store[X + 2] = 1.0F / sqrtf( store[X + 2] ); @@ -864,11 +837,8 @@ static void XSTDCALL sin4f( float *store ) { -#ifdef WIN32 const unsigned X = 0; -#else - const unsigned X = TEMP_R0 * 16; -#endif + store[X + 0] = sinf( store[X + 0] ); store[X + 1] = sinf( store[X + 1] ); store[X + 2] = sinf( store[X + 2] ); @@ -2015,40 +1985,40 @@ emit_declaration( } } -static void aos_to_soa( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +static void aos_to_soa( struct x86_function *func, + uint arg_aos, + uint arg_soa, + uint arg_num, + uint arg_stride ) { - struct x86_reg soa_input; - struct x86_reg aos_input; - struct x86_reg num_inputs; - struct x86_reg temp; + struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); + struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); + struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); int inner_loop; - soa_input = x86_make_reg( file_REG32, reg_AX ); - aos_input = get_temp_base(); /* BX or SI */ - num_inputs = x86_make_reg( file_REG32, reg_CX ); - temp = x86_make_reg( file_REG32, reg_DX ); /* Save EBX */ - x86_push( func, aos_input ); + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); - x86_mov( func, soa_input, x86_fn_arg( func, soa ) ); - x86_mov( func, aos_input, x86_fn_arg( func, aos ) ); - x86_mov( func, num_inputs, x86_fn_arg( func, num ) ); + x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); + x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); + x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); /* do */ inner_loop = x86_get_label( func ); { - x86_mov( func, temp, x86_fn_arg( func, stride ) ); x86_push( func, aos_input ); sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); + x86_add( func, aos_input, stride ); sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); + x86_add( func, aos_input, stride ); sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, temp ); + x86_add( func, aos_input, stride ); sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); x86_pop( func, aos_input ); @@ -2086,7 +2056,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, int inner_loop; soa_output = x86_make_reg( file_REG32, reg_AX ); - aos_output = get_temp_base(); /* BX or SI */ + aos_output = x86_make_reg( file_REG32, reg_BX ); num_outputs = x86_make_reg( file_REG32, reg_CX ); temp = x86_make_reg( file_REG32, reg_DX ); @@ -2213,7 +2183,11 @@ tgsi_emit_sse2( assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); if (do_swizzles) - aos_to_soa( func, 6, 1, 7, 8 ); + aos_to_soa( func, + 6, /* aos_input */ + 1, /* machine->input */ + 7, /* num_inputs */ + 8 ); /* input_stride */ x86_mov( func, -- cgit v1.2.3 From bc4952987419d77fabbf4fa43913f6e488bdb211 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 1 May 2008 15:21:40 -0600 Subject: added cast for MSVC --- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 54fdcc1a3b..73ee419858 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -256,7 +256,7 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, uint size = 4; immed = tgsi_default_full_immediate(); immed.Immediate.Size = 1 + size; /* one for the token itself */ - immed.u.Pointer = value; + immed.u.Pointer = (void *) value; ctx->emit_immediate(ctx, &immed); } -- cgit v1.2.3 From 7a4313b63bcd06318437d384875472e7139070a1 Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 1 May 2008 18:42:01 -0600 Subject: gallium: implement TGSI_OPCODE_LOG --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 30 +++++++++++++++-------------- 1 file changed, 16 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index d55f907c0d..4b33d7e8bd 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1549,22 +1549,24 @@ exec_instruction( break; case TGSI_OPCODE_LOG: - debug_printf("TGSI: LOG opcode not implemented\n"); - /* from ARB_v_p: - tmp = fabs(ScalarLoad(op0)); - result.x = floor(log2(tmp)); - result.y = tmp / 2^(floor(log2(tmp))); - result.z = RoughApproxLog2(tmp); - result.w = 1.0; - */ -#if 0 - /* something like this: */ FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ + micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ + micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[0], 0, CHAN_X ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ + micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ + STORE( &r[0], 0, CHAN_Y ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[1], 0, CHAN_Z ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } -#endif break; case TGSI_OPCODE_MUL: -- cgit v1.2.3 From 3b63bc8ac6db7af4077f12cfd44876a9d43cc6ec Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 1 May 2008 18:49:20 -0600 Subject: gallium: implement TGSI_OPCODE_EXP --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 29 +++++++++++++++-------------- 1 file changed, 15 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 4b33d7e8bd..5d5125f7cb 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1530,22 +1530,23 @@ exec_instruction( break; case TGSI_OPCODE_EXP: - debug_printf("TGSI: EXP opcode not implemented\n"); - /* from ARB_v_p: - tmp = ScalarLoad(op0); - result.x = 2^floor(tmp); - result.y = tmp - floor(tmp); - result.z = RoughApprox2ToX(tmp); - result.w = 1.0; - */ -#if 0 - /* something like this: */ FETCH( &r[0], 0, CHAN_X ); - micro_exp2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); + micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ + STORE( &r[2], 0, CHAN_X ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ + STORE( &r[2], 0, CHAN_Y ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ + STORE( &r[2], 0, CHAN_Z ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } -#endif break; case TGSI_OPCODE_LOG: -- cgit v1.2.3 From 869b0836c1c4339de91c9918ae07926c846a004c Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 1 May 2008 18:56:20 -0600 Subject: gallium: temporarily disable broken SSE2 code for ARL opcode --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index a67bc8567b..2fd76a3072 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1190,11 +1190,16 @@ emit_instruction( switch( inst->Instruction.Opcode ) { case TGSI_OPCODE_ARL: +#if 0 + /* XXX this isn't working properly (see glean vertProg1 test) */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } +#else + return 0; +#endif break; case TGSI_OPCODE_MOV: -- cgit v1.2.3 From 9d151a2517de3f83d676624a21b4f73d5accecbe Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 1 May 2008 16:39:54 +0200 Subject: tgsi: Dump destination register modulate modifier. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 26bfc2051f..4c65ffd780 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -767,6 +767,31 @@ dump_instruction_short( SID( dst->DstRegister.Index ); CHR( ']' ); + switch (dst->DstRegisterExtModulate.Modulate) { + case TGSI_MODULATE_1X: + break; + case TGSI_MODULATE_2X: + TXT( "_2X" ); + break; + case TGSI_MODULATE_4X: + TXT( "_4X" ); + break; + case TGSI_MODULATE_8X: + TXT( "_8X" ); + break; + case TGSI_MODULATE_HALF: + TXT( "_D2" ); + break; + case TGSI_MODULATE_QUARTER: + TXT( "_D4" ); + break; + case TGSI_MODULATE_EIGHTH: + TXT( "_D8" ); + break; + default: + assert( 0 ); + } + if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) { CHR( '.' ); if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) { -- cgit v1.2.3 From 36f93c5e5159ebd99a5a4504efccdf6c5bf40716 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 2 May 2008 10:20:53 +0200 Subject: tgsi: Fix build on Win32. --- src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c index c3526cb71f..5bea773840 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c @@ -43,7 +43,7 @@ tgsi_full_token_free( union tgsi_full_token *full_token ) { if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { - FREE( full_token->FullImmediate.u.Pointer ); + FREE( (void *) full_token->FullImmediate.u.Pointer ); } } @@ -156,7 +156,7 @@ tgsi_parse_token( imm->u.Pointer = MALLOC( sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - next_token( ctx, &imm->u.ImmediateFloat32[i] ); + next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); } break; -- cgit v1.2.3 From a1cb0c2b915532e934b5d37bd0c550b1bfcc77ba Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 2 May 2008 11:13:58 +0200 Subject: tgsi: Do not assume IN and OUT registers are declared sequentially. --- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index ea4a72967d..65650ed22a 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -103,18 +103,14 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->file_max[file] = MAX2(info->file_max[file], (int)i); if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[info->num_inputs] - = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[info->num_inputs] - = (ubyte)fulldecl->Semantic.SemanticIndex; + info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_inputs++; } if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[info->num_outputs] - = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[info->num_outputs] - = (ubyte)fulldecl->Semantic.SemanticIndex; + info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_outputs++; } @@ -137,6 +133,9 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } + assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); + assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); + info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || info->opcode_count[TGSI_OPCODE_KILP]); -- cgit v1.2.3 From 251db95945c6b484a093336e7bf12aed6091de54 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 17:55:52 +0100 Subject: cso: can memcmp-compare pipe_framebuffer_state now it includes fb dimensions --- src/gallium/auxiliary/cso_cache/cso_context.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index eef898f486..5d626b7cdc 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -769,8 +769,7 @@ void cso_restore_vertex_shader(struct cso_context *ctx) enum pipe_error cso_set_framebuffer(struct cso_context *ctx, const struct pipe_framebuffer_state *fb) { - /* XXX this memcmp() fails to detect buffer size changes */ - if (1/*memcmp(&ctx->fb, fb, sizeof(*fb))*/) { + if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) { ctx->fb = *fb; ctx->pipe->set_framebuffer_state(ctx->pipe, fb); } -- cgit v1.2.3 From 731e7b961cd081ac6a64b636937716ce3a623c2c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 1 May 2008 18:13:46 +0100 Subject: re-add pipe_surface map/unmap inlines --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 2 +- src/gallium/include/pipe/p_inlines.h | 33 +++++++++++++++++++++++++++++++ 3 files changed, 35 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 90926aec85..07f85bc448 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -47,7 +47,7 @@ #include "tgsi/util/tgsi_parse.h" #define SSE_MAX_VERTICES 4 -#define SSE_SWIZZLES 0 +#define SSE_SWIZZLES 1 #if SSE_SWIZZLES typedef void (XSTDCALL *codegen_function) ( diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index b82b1a8f37..29a1e92416 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -46,7 +46,6 @@ sp_surface_copy(struct pipe_context *pipe, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - assert( dst->cpp == src->cpp ); void *dst_map = pipe->screen->surface_map( pipe->screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); @@ -55,6 +54,7 @@ sp_surface_copy(struct pipe_context *pipe, src, PIPE_BUFFER_USAGE_CPU_READ ); + assert( dst->cpp == src->cpp ); assert(src_map && dst_map); pipe_copy_rect(dst_map, diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 592c3c87c2..1e4b98edb4 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -39,6 +39,39 @@ extern "C" { #endif +/* XXX: these are a kludge. will fix when all surfaces are views into + * textures, and free-floating winsys surfaces go away. + */ +static INLINE void * +pipe_surface_map( struct pipe_surface *surf, unsigned flags ) +{ + if (surf->texture) { + struct pipe_screen *screen = surf->texture->screen; + return surf->texture->screen->surface_map( screen, surf, flags ); + } + else { + struct pipe_winsys *winsys = surf->winsys; + char *map = (char *)winsys->buffer_map( winsys, surf->buffer, flags ); + if (map == NULL) + return NULL; + return (void *)(map + surf->offset); + } +} + +static INLINE void +pipe_surface_unmap( struct pipe_surface *surf ) +{ + if (surf->texture) { + struct pipe_screen *screen = surf->texture->screen; + surf->texture->screen->surface_unmap( screen, surf ); + } + else { + struct pipe_winsys *winsys = surf->winsys; + winsys->buffer_unmap( winsys, surf->buffer ); + } +} + + /** * Set 'ptr' to point to 'surf' and update reference counting. -- cgit v1.2.3 From 25d60838b5dfdbde54f19f26b41977fc25011474 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 2 May 2008 12:17:11 +0100 Subject: gallium: fix build after merge --- src/gallium/auxiliary/util/u_blit.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index b70bcbfa66..999a3e5099 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -296,7 +296,9 @@ util_blit_pixels(struct blit_state *ctx, src, srcLeft, srcTop, /* src */ srcW, srcH); /* size */ - pipe->texture_update(pipe, tex, 0, 1 << 0); + /* free the surface, update the texture if necessary. + */ + screen->tex_surface_release(screen, &texSurf); /* save state (restored below) */ cso_save_blend(ctx->cso); @@ -357,8 +359,6 @@ util_blit_pixels(struct blit_state *ctx, cso_restore_vertex_shader(ctx->cso); cso_restore_viewport(ctx->cso); - /* free the texture */ - pipe_surface_reference(&texSurf, NULL); screen->texture_release(screen, &tex); } -- cgit v1.2.3 From 17058e07469f2dc5b47b4f820bd5a31b7ed9177c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 2 May 2008 16:02:18 +0200 Subject: tgsi: Implement fast rsqrtf. Not tested, inactive. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 6 ++++ src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 10 +++++-- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 43 +++++++++++++++++------------ 3 files changed, 40 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 5d5125f7cb..826b432f09 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -88,6 +88,10 @@ #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I +#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C +#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I +#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define FOR_EACH_CHANNEL(CHAN)\ @@ -262,6 +266,8 @@ tgsi_exec_machine_init( mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; + mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; + mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; } } diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h index 92e2e5e985..19bd78df3d 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -133,9 +133,15 @@ struct tgsi_exec_labels #define TGSI_EXEC_TEMP_PRIMITIVE_I 34 #define TGSI_EXEC_TEMP_PRIMITIVE_C 2 -#define TGSI_EXEC_TEMP_R0 35 +#define TGSI_EXEC_TEMP_THREE_I 34 +#define TGSI_EXEC_TEMP_THREE_C 3 -#define TGSI_EXEC_NUM_TEMPS (32 + 4) +#define TGSI_EXEC_TEMP_HALF_I 35 +#define TGSI_EXEC_TEMP_HALF_C 0 + +#define TGSI_EXEC_TEMP_R0 36 + +#define TGSI_EXEC_NUM_TEMPS (32 + 5) #define TGSI_EXEC_NUM_ADDRS 1 #define TGSI_EXEC_NUM_IMMEDIATES 256 diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 2fd76a3072..dbf002130b 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -36,7 +36,11 @@ #ifdef PIPE_ARCH_X86 -#define HIGH_PRECISION 1 /* for 1/sqrt() */ +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 #define FOR_EACH_CHANNEL( CHAN )\ @@ -794,20 +798,25 @@ emit_rsqrt( * * See: http://softwarecommunity.intel.com/articles/eng/1818.htm */ - /* This is some code that woudl do the above for a scalar 'a'. We - * obviously are interested in a vector version: - * - * movss xmm3, a; - * movss xmm1, half; - * movss xmm2, three; - * rsqrtss xmm0, xmm3; - * mulss xmm3, xmm0; - * mulss xmm1, xmm0; - * mulss xmm3, xmm0; - * subss xmm2, xmm3; - * mulss xmm1, xmm2; - * movss x, xmm1; - */ + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); + } #endif #else /* On Intel CPUs at least, this is only accurate to 12 bits -- not @@ -1295,9 +1304,9 @@ emit_instruction( case TGSI_OPCODE_RSQ: /* TGSI_OPCODE_RECIPSQRT */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rsqrt( func, 0, 0 ); + emit_rsqrt( func, 1, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 1, 0, chan_index ); } break; -- cgit v1.2.3 From 6c15a70b75b1625b69790f98f2f44e9ae4435f6a Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 2 May 2008 16:12:55 +0200 Subject: tgsi: Enable fast high precision rsqrt. --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 22 ---------------------- 1 file changed, 22 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index dbf002130b..b6b05944be 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -761,20 +761,6 @@ emit_rcp ( make_xmm( xmm_src ) ); } -#if HIGH_PRECISION -static void XSTDCALL -rsqrt4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = 1.0F / sqrtf( store[X + 0] ); - store[X + 1] = 1.0F / sqrtf( store[X + 1] ); - store[X + 2] = 1.0F / sqrtf( store[X + 2] ); - store[X + 3] = 1.0F / sqrtf( store[X + 3] ); -} -#endif - static void emit_rsqrt( struct x86_function *func, @@ -782,13 +768,6 @@ emit_rsqrt( unsigned xmm_src ) { #if HIGH_PRECISION -#if 1 - emit_func_call_dst_src( - func, - xmm_dst, - xmm_src, - rsqrt4f ); -#else /* Although rsqrtps() and rcpps() are low precision on some/all SSE * implementations, it is possible to improve its precision at * fairly low cost, using a newton/raphson step, as below: @@ -817,7 +796,6 @@ emit_rsqrt( sse_subps( func, tmp0, src ); sse_mulps( func, dst, tmp0 ); } -#endif #else /* On Intel CPUs at least, this is only accurate to 12 bits -- not * good enough. -- cgit v1.2.3 From cff8d3bdcbf78b57b52a2f60c54e5a3cae286137 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 2 May 2008 08:22:25 -0600 Subject: gallium: remove ^M (CR) chars --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 46 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 23 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index b6b05944be..8018bd7fa4 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -36,11 +36,11 @@ #ifdef PIPE_ARCH_X86 -/* for 1/sqrt() - * - * This costs about 100fps (close to 10%) in gears: - */ -#define HIGH_PRECISION 1 +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 #define FOR_EACH_CHANNEL( CHAN )\ @@ -777,24 +777,24 @@ emit_rsqrt( * * See: http://softwarecommunity.intel.com/articles/eng/1818.htm */ - { - struct x86_reg dst = make_xmm( xmm_dst ); - struct x86_reg src = make_xmm( xmm_src ); - struct x86_reg tmp0 = make_xmm( 2 ); - struct x86_reg tmp1 = make_xmm( 3 ); - - assert( xmm_dst != xmm_src ); - assert( xmm_dst != 2 && xmm_dst != 3 ); - assert( xmm_src != 2 && xmm_src != 3 ); - - sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); - sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); - sse_rsqrtps( func, tmp1, src ); - sse_mulps( func, src, tmp1 ); - sse_mulps( func, dst, tmp1 ); - sse_mulps( func, src, tmp1 ); - sse_subps( func, tmp0, src ); - sse_mulps( func, dst, tmp0 ); + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); } #else /* On Intel CPUs at least, this is only accurate to 12 bits -- not -- cgit v1.2.3 From 6e004e973bcc5b789ee3f70b70f5d728c8b8ea71 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 2 May 2008 14:00:35 -0600 Subject: gallium: remove 0.5 vertex biases in set_vertex_data() These should not be needed and were causing garbage to appear along the edges of the mipmap images. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 2e8417ee13..c53c512268 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -778,23 +778,23 @@ set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) { void *buf; - ctx->vertices[0][0][0] = -0.5f; /*x*/ - ctx->vertices[0][0][1] = -0.5f; /*y*/ + ctx->vertices[0][0][0] = 0.0f; /*x*/ + ctx->vertices[0][0][1] = 0.0f; /*y*/ ctx->vertices[0][1][0] = 0.0f; /*s*/ ctx->vertices[0][1][1] = 0.0f; /*t*/ - ctx->vertices[1][0][0] = width - 0.5f; /*x*/ - ctx->vertices[1][0][1] = -0.5f; /*y*/ - ctx->vertices[1][1][0] = 1.0f; /*s*/ - ctx->vertices[1][1][1] = 0.0f; /*t*/ + ctx->vertices[1][0][0] = width; + ctx->vertices[1][0][1] = 0.0f; + ctx->vertices[1][1][0] = 1.0f; + ctx->vertices[1][1][1] = 0.0f; - ctx->vertices[2][0][0] = width - 0.5f; - ctx->vertices[2][0][1] = height - 0.5f; + ctx->vertices[2][0][0] = width; + ctx->vertices[2][0][1] = height; ctx->vertices[2][1][0] = 1.0f; ctx->vertices[2][1][1] = 1.0f; - ctx->vertices[3][0][0] = -0.5f; - ctx->vertices[3][0][1] = height - 0.5f; + ctx->vertices[3][0][0] = 0.0f; + ctx->vertices[3][0][1] = height; ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; -- cgit v1.2.3 From 131a1fbc91725c11e4822b82e58b94ec3a711476 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 5 May 2008 23:58:37 +0900 Subject: util: Alternative implementation for standard c library string functions. --- src/gallium/auxiliary/util/u_string.h | 112 ++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h index b99d4e8021..2fede229d9 100644 --- a/src/gallium/auxiliary/util/u_string.h +++ b/src/gallium/auxiliary/util/u_string.h @@ -41,6 +41,8 @@ #include #include +#include "pipe/p_compiler.h" + #ifdef __cplusplus extern "C" { @@ -48,11 +50,121 @@ extern "C" { #ifdef WIN32 + int util_vsnprintf(char *, size_t, const char *, va_list); int util_snprintf(char *str, size_t size, const char *format, ...); + +static INLINE void +util_sprintf(char *str, const char *format, ...) +{ + va_list ap; + va_start(ap, format); + util_vsnprintf(str, (size_t)-1, format, ap); + va_end(ap); +} + +static INLINE char * +util_strchr(const char *s, char c) +{ + while(*s) { + if(*s == c) + return (char *)s; + ++s; + } + return NULL; +} + +static INLINE char* +util_strncat(char *dst, const char *src, size_t n) +{ + char *p = dst + strlen(dst); + const char *q = src; + size_t i; + + for (i = 0; i < n && *q != '\0'; ++i) + *p++ = *q++; + *p = '\0'; + + return dst; +} + +static INLINE int +util_strcmp(const char *s1, const char *s2) +{ + unsigned char u1, u2; + + while (1) { + u1 = (unsigned char) *s1++; + u2 = (unsigned char) *s2++; + if (u1 != u2) + return u1 - u2; + if (u1 == '\0') + return 0; + } + return 0; +} + +static INLINE int +util_strncmp(const char *s1, const char *s2, size_t n) +{ + unsigned char u1, u2; + + while (n-- > 0) { + u1 = (unsigned char) *s1++; + u2 = (unsigned char) *s2++; + if (u1 != u2) + return u1 - u2; + if (u1 == '\0') + return 0; + } + return 0; +} + +static INLINE char * +util_strstr(const char *haystack, const char *needle) +{ + const char *p = haystack; + int len = strlen(needle); + + for (; (p = util_strchr(p, *needle)) != 0; p++) { + if (util_strncmp(p, needle, len) == 0) { + return (char *)p; + } + } + return NULL; +} + +static INLINE void * +util_memmove(void *dest, const void *src, size_t n) +{ + char *p = (char *)dest; + const char *q = (const char *)src; + if (dest < src) { + while (n--) + *p++ = *q++; + } + else + { + p += n; + q += n; + while (n--) + *--p = *--q; + } + return dest; +} + + #else + #define util_vsnprintf vsnprintf #define util_snprintf snprintf +#define util_strchr strchr +#define util_strcmp strcmp +#define util_strncmp strncmp +#define util_strncat strncat +#define util_strstr strstr +#define util_memmove memmove + #endif -- cgit v1.2.3 From 4c6c073f993e13da0b68f897a4221e6bb7875fbe Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Tue, 6 May 2008 09:07:11 +0100 Subject: gallium: Define util_sprintf for non-WIN32. --- src/gallium/auxiliary/util/u_string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h index 2fede229d9..d7d925ee9b 100644 --- a/src/gallium/auxiliary/util/u_string.h +++ b/src/gallium/auxiliary/util/u_string.h @@ -158,6 +158,7 @@ util_memmove(void *dest, const void *src, size_t n) #define util_vsnprintf vsnprintf #define util_snprintf snprintf +#define util_sprintf sprintf #define util_strchr strchr #define util_strcmp strcmp #define util_strncmp strncmp -- cgit v1.2.3 From a6ad4927740e5699f1a047f4c78f069f6a91c6ea Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 7 May 2008 02:51:49 +0900 Subject: gallium: Simple facility to dump and view images for debugging. --- bin/raw2png.py | 339 +++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/p_debug.c | 47 +++++ src/gallium/include/pipe/p_debug.h | 11 ++ 3 files changed, 397 insertions(+) create mode 100755 bin/raw2png.py (limited to 'src/gallium/auxiliary') diff --git a/bin/raw2png.py b/bin/raw2png.py new file mode 100755 index 0000000000..ce18079496 --- /dev/null +++ b/bin/raw2png.py @@ -0,0 +1,339 @@ +#!/usr/bin/env python +########################################################################## +# +# Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. +# All Rights Reserved. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the +# "Software"), to deal in the Software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sub license, and/or sell copies of the Software, and to +# permit persons to whom the Software is furnished to do so, subject to +# the following conditions: +# +# The above copyright notice and this permission notice (including the +# next paragraph) shall be included in all copies or substantial portions +# of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +# OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +# IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR +# ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +# TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +# SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +# +########################################################################## + + +import os.path +import sys +import struct +import Image # http://www.pythonware.com/products/pil/ + +PIPE_FORMAT_LAYOUT_RGBAZS = 0 +PIPE_FORMAT_LAYOUT_YCBCR = 1 +PIPE_FORMAT_LAYOUT_DXT = 2 + +PIPE_FORMAT_COMP_R = 0 +PIPE_FORMAT_COMP_G = 1 +PIPE_FORMAT_COMP_B = 2 +PIPE_FORMAT_COMP_A = 3 +PIPE_FORMAT_COMP_0 = 4 +PIPE_FORMAT_COMP_1 = 5 +PIPE_FORMAT_COMP_Z = 6 +PIPE_FORMAT_COMP_S = 7 + +PIPE_FORMAT_TYPE_UNKNOWN = 0 +PIPE_FORMAT_TYPE_FLOAT = 1 +PIPE_FORMAT_TYPE_UNORM = 2 +PIPE_FORMAT_TYPE_SNORM = 3 +PIPE_FORMAT_TYPE_USCALED = 4 +PIPE_FORMAT_TYPE_SSCALED = 5 +PIPE_FORMAT_TYPE_SRGB = 6 + +def _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, EXP8, TYPE ): + return ((PIPE_FORMAT_LAYOUT_RGBAZS << 0) |\ + ((SWZ) << 2) |\ + ((SIZEX) << 14) |\ + ((SIZEY) << 17) |\ + ((SIZEZ) << 20) |\ + ((SIZEW) << 23) |\ + ((EXP8) << 26) |\ + ((TYPE) << 28) ) + +def _PIPE_FORMAT_SWZ( SWZX, SWZY, SWZZ, SWZW ): + return (((SWZX) << 0) | ((SWZY) << 3) | ((SWZZ) << 6) | ((SWZW) << 9)) + +def _PIPE_FORMAT_RGBAZS_1( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE ): + return _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 0, TYPE ) + +def _PIPE_FORMAT_RGBAZS_8( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE ): + return _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 1, TYPE ) + +def _PIPE_FORMAT_RGBAZS_64( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, TYPE ): + return _PIPE_FORMAT_RGBAZS( SWZ, SIZEX, SIZEY, SIZEZ, SIZEW, 2, TYPE ) + +_PIPE_FORMAT_R001 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_1 ) +_PIPE_FORMAT_RG01 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_1 ) +_PIPE_FORMAT_RGB1 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_1 ) +_PIPE_FORMAT_RGBA = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_A ) +_PIPE_FORMAT_ARGB = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_A, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B ) +_PIPE_FORMAT_BGRA = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_A ) +_PIPE_FORMAT_1RGB = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_1, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_B ) +_PIPE_FORMAT_BGR1 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_B, PIPE_FORMAT_COMP_G, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_1 ) +_PIPE_FORMAT_0000 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +_PIPE_FORMAT_000R = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_R ) +_PIPE_FORMAT_RRR1 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_1 ) +_PIPE_FORMAT_RRRR = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R ) +_PIPE_FORMAT_RRRG = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_R, PIPE_FORMAT_COMP_G ) +_PIPE_FORMAT_Z000 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +_PIPE_FORMAT_0Z00 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +_PIPE_FORMAT_SZ00 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +_PIPE_FORMAT_ZS00 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_Z, PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) +_PIPE_FORMAT_S000 = _PIPE_FORMAT_SWZ( PIPE_FORMAT_COMP_S, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0, PIPE_FORMAT_COMP_0 ) + +def _PIPE_FORMAT_YCBCR( REV ): + return ((PIPE_FORMAT_LAYOUT_YCBCR << 0) |\ + ((REV) << 2) ) + +def _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE ): + return ((PIPE_FORMAT_LAYOUT_DXT << 0) | \ + ((LEVEL) << 2) | \ + ((RSIZE) << 5) | \ + ((GSIZE) << 8) | \ + ((BSIZE) << 11) | \ + ((ASIZE) << 14) ) + +PIPE_FORMAT_NONE = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_0000, 0, 0, 0, 0, PIPE_FORMAT_TYPE_UNKNOWN ) +PIPE_FORMAT_A8R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ARGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_X8R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_1RGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_B8G8R8A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_B8G8R8X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_A1R5G5B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_ARGB, 1, 5, 5, 5, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_A4R4G4B4_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_ARGB, 4, 4, 4, 4, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R5G6B5_UNORM = _PIPE_FORMAT_RGBAZS_1 ( _PIPE_FORMAT_RGB1, 5, 6, 5, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_L8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_000R, 0, 0, 0, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_I8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRR, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_A8L8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_YCBCR = _PIPE_FORMAT_YCBCR( 0 ) +PIPE_FORMAT_YCBCR_REV = _PIPE_FORMAT_YCBCR( 1 ) +PIPE_FORMAT_Z16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_Z32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_Z32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_S8Z24_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_SZ00, 1, 3, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_Z24S8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ZS00, 3, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_X8Z24_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_0Z00, 1, 3, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_Z24X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 3, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_S8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_S000, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_R64G64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_R64G64B64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_R64G64B64A64_FLOAT = _PIPE_FORMAT_RGBAZS_64( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_R32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_R32G32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_R32G32B32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_R32G32B32A32_FLOAT = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FLOAT ) +PIPE_FORMAT_R32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R32G32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R32G32B32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R32G32B32A32_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R32G32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R32G32B32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R32G32B32A32_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R32G32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R32G32B32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R32G32B32A32_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R32G32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R32G32B32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R32G32B32A32_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R16G16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R16G16B16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R16G16B16A16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R16G16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R16G16B16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R16G16B16A16_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R16G16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R16G16B16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R16G16B16A16_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 2, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R16G16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 2, 2, 0, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R16G16B16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R16G16B16A16_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 2, 2, 2, 2, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R8G8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R8G8B8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R8G8B8A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R8G8B8X8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ) +PIPE_FORMAT_R8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R8G8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R8G8B8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R8G8B8A8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R8G8B8X8_USCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_USCALED ) +PIPE_FORMAT_R8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R8G8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R8G8B8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R8G8B8A8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R8G8B8X8_SNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SNORM ) +PIPE_FORMAT_R8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 1, 0, 0, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R8G8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 1, 1, 0, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R8G8B8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R8G8B8A8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_R8G8B8X8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ) +PIPE_FORMAT_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ) +PIPE_FORMAT_A8_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ) +PIPE_FORMAT_R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ) +PIPE_FORMAT_R8G8B8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ) +PIPE_FORMAT_R8G8B8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ) +PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0 ) +PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8 ) +PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8 ) +PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8 ) + +formats = { + PIPE_FORMAT_NONE: "PIPE_FORMAT_NONE", + PIPE_FORMAT_A8R8G8B8_UNORM: "PIPE_FORMAT_A8R8G8B8_UNORM", + PIPE_FORMAT_X8R8G8B8_UNORM: "PIPE_FORMAT_X8R8G8B8_UNORM", + PIPE_FORMAT_B8G8R8A8_UNORM: "PIPE_FORMAT_B8G8R8A8_UNORM", + PIPE_FORMAT_B8G8R8X8_UNORM: "PIPE_FORMAT_B8G8R8X8_UNORM", + PIPE_FORMAT_A1R5G5B5_UNORM: "PIPE_FORMAT_A1R5G5B5_UNORM", + PIPE_FORMAT_A4R4G4B4_UNORM: "PIPE_FORMAT_A4R4G4B4_UNORM", + PIPE_FORMAT_R5G6B5_UNORM: "PIPE_FORMAT_R5G6B5_UNORM", + PIPE_FORMAT_L8_UNORM: "PIPE_FORMAT_L8_UNORM", + PIPE_FORMAT_A8_UNORM: "PIPE_FORMAT_A8_UNORM", + PIPE_FORMAT_I8_UNORM: "PIPE_FORMAT_I8_UNORM", + PIPE_FORMAT_A8L8_UNORM: "PIPE_FORMAT_A8L8_UNORM", + PIPE_FORMAT_YCBCR: "PIPE_FORMAT_YCBCR", + PIPE_FORMAT_YCBCR_REV: "PIPE_FORMAT_YCBCR_REV", + PIPE_FORMAT_Z16_UNORM: "PIPE_FORMAT_Z16_UNORM", + PIPE_FORMAT_Z32_UNORM: "PIPE_FORMAT_Z32_UNORM", + PIPE_FORMAT_Z32_FLOAT: "PIPE_FORMAT_Z32_FLOAT", + PIPE_FORMAT_S8Z24_UNORM: "PIPE_FORMAT_S8Z24_UNORM", + PIPE_FORMAT_Z24S8_UNORM: "PIPE_FORMAT_Z24S8_UNORM", + PIPE_FORMAT_X8Z24_UNORM: "PIPE_FORMAT_X8Z24_UNORM", + PIPE_FORMAT_Z24X8_UNORM: "PIPE_FORMAT_Z24X8_UNORM", + PIPE_FORMAT_S8_UNORM: "PIPE_FORMAT_S8_UNORM", + PIPE_FORMAT_R64_FLOAT: "PIPE_FORMAT_R64_FLOAT", + PIPE_FORMAT_R64G64_FLOAT: "PIPE_FORMAT_R64G64_FLOAT", + PIPE_FORMAT_R64G64B64_FLOAT: "PIPE_FORMAT_R64G64B64_FLOAT", + PIPE_FORMAT_R64G64B64A64_FLOAT: "PIPE_FORMAT_R64G64B64A64_FLOAT", + PIPE_FORMAT_R32_FLOAT: "PIPE_FORMAT_R32_FLOAT", + PIPE_FORMAT_R32G32_FLOAT: "PIPE_FORMAT_R32G32_FLOAT", + PIPE_FORMAT_R32G32B32_FLOAT: "PIPE_FORMAT_R32G32B32_FLOAT", + PIPE_FORMAT_R32G32B32A32_FLOAT: "PIPE_FORMAT_R32G32B32A32_FLOAT", + PIPE_FORMAT_R32_UNORM: "PIPE_FORMAT_R32_UNORM", + PIPE_FORMAT_R32G32_UNORM: "PIPE_FORMAT_R32G32_UNORM", + PIPE_FORMAT_R32G32B32_UNORM: "PIPE_FORMAT_R32G32B32_UNORM", + PIPE_FORMAT_R32G32B32A32_UNORM: "PIPE_FORMAT_R32G32B32A32_UNORM", + PIPE_FORMAT_R32_USCALED: "PIPE_FORMAT_R32_USCALED", + PIPE_FORMAT_R32G32_USCALED: "PIPE_FORMAT_R32G32_USCALED", + PIPE_FORMAT_R32G32B32_USCALED: "PIPE_FORMAT_R32G32B32_USCALED", + PIPE_FORMAT_R32G32B32A32_USCALED: "PIPE_FORMAT_R32G32B32A32_USCALED", + PIPE_FORMAT_R32_SNORM: "PIPE_FORMAT_R32_SNORM", + PIPE_FORMAT_R32G32_SNORM: "PIPE_FORMAT_R32G32_SNORM", + PIPE_FORMAT_R32G32B32_SNORM: "PIPE_FORMAT_R32G32B32_SNORM", + PIPE_FORMAT_R32G32B32A32_SNORM: "PIPE_FORMAT_R32G32B32A32_SNORM", + PIPE_FORMAT_R32_SSCALED: "PIPE_FORMAT_R32_SSCALED", + PIPE_FORMAT_R32G32_SSCALED: "PIPE_FORMAT_R32G32_SSCALED", + PIPE_FORMAT_R32G32B32_SSCALED: "PIPE_FORMAT_R32G32B32_SSCALED", + PIPE_FORMAT_R32G32B32A32_SSCALED: "PIPE_FORMAT_R32G32B32A32_SSCALED", + PIPE_FORMAT_R16_UNORM: "PIPE_FORMAT_R16_UNORM", + PIPE_FORMAT_R16G16_UNORM: "PIPE_FORMAT_R16G16_UNORM", + PIPE_FORMAT_R16G16B16_UNORM: "PIPE_FORMAT_R16G16B16_UNORM", + PIPE_FORMAT_R16G16B16A16_UNORM: "PIPE_FORMAT_R16G16B16A16_UNORM", + PIPE_FORMAT_R16_USCALED: "PIPE_FORMAT_R16_USCALED", + PIPE_FORMAT_R16G16_USCALED: "PIPE_FORMAT_R16G16_USCALED", + PIPE_FORMAT_R16G16B16_USCALED: "PIPE_FORMAT_R16G16B16_USCALED", + PIPE_FORMAT_R16G16B16A16_USCALED: "PIPE_FORMAT_R16G16B16A16_USCALED", + PIPE_FORMAT_R16_SNORM: "PIPE_FORMAT_R16_SNORM", + PIPE_FORMAT_R16G16_SNORM: "PIPE_FORMAT_R16G16_SNORM", + PIPE_FORMAT_R16G16B16_SNORM: "PIPE_FORMAT_R16G16B16_SNORM", + PIPE_FORMAT_R16G16B16A16_SNORM: "PIPE_FORMAT_R16G16B16A16_SNORM", + PIPE_FORMAT_R16_SSCALED: "PIPE_FORMAT_R16_SSCALED", + PIPE_FORMAT_R16G16_SSCALED: "PIPE_FORMAT_R16G16_SSCALED", + PIPE_FORMAT_R16G16B16_SSCALED: "PIPE_FORMAT_R16G16B16_SSCALED", + PIPE_FORMAT_R16G16B16A16_SSCALED: "PIPE_FORMAT_R16G16B16A16_SSCALED", + PIPE_FORMAT_R8_UNORM: "PIPE_FORMAT_R8_UNORM", + PIPE_FORMAT_R8G8_UNORM: "PIPE_FORMAT_R8G8_UNORM", + PIPE_FORMAT_R8G8B8_UNORM: "PIPE_FORMAT_R8G8B8_UNORM", + PIPE_FORMAT_R8G8B8A8_UNORM: "PIPE_FORMAT_R8G8B8A8_UNORM", + PIPE_FORMAT_R8G8B8X8_UNORM: "PIPE_FORMAT_R8G8B8X8_UNORM", + PIPE_FORMAT_R8_USCALED: "PIPE_FORMAT_R8_USCALED", + PIPE_FORMAT_R8G8_USCALED: "PIPE_FORMAT_R8G8_USCALED", + PIPE_FORMAT_R8G8B8_USCALED: "PIPE_FORMAT_R8G8B8_USCALED", + PIPE_FORMAT_R8G8B8A8_USCALED: "PIPE_FORMAT_R8G8B8A8_USCALED", + PIPE_FORMAT_R8G8B8X8_USCALED: "PIPE_FORMAT_R8G8B8X8_USCALED", + PIPE_FORMAT_R8_SNORM: "PIPE_FORMAT_R8_SNORM", + PIPE_FORMAT_R8G8_SNORM: "PIPE_FORMAT_R8G8_SNORM", + PIPE_FORMAT_R8G8B8_SNORM: "PIPE_FORMAT_R8G8B8_SNORM", + PIPE_FORMAT_R8G8B8A8_SNORM: "PIPE_FORMAT_R8G8B8A8_SNORM", + PIPE_FORMAT_R8G8B8X8_SNORM: "PIPE_FORMAT_R8G8B8X8_SNORM", + PIPE_FORMAT_R8_SSCALED: "PIPE_FORMAT_R8_SSCALED", + PIPE_FORMAT_R8G8_SSCALED: "PIPE_FORMAT_R8G8_SSCALED", + PIPE_FORMAT_R8G8B8_SSCALED: "PIPE_FORMAT_R8G8B8_SSCALED", + PIPE_FORMAT_R8G8B8A8_SSCALED: "PIPE_FORMAT_R8G8B8A8_SSCALED", + PIPE_FORMAT_R8G8B8X8_SSCALED: "PIPE_FORMAT_R8G8B8X8_SSCALED", + PIPE_FORMAT_L8_SRGB: "PIPE_FORMAT_L8_SRGB", + PIPE_FORMAT_A8_L8_SRGB: "PIPE_FORMAT_A8_L8_SRGB", + PIPE_FORMAT_R8G8B8_SRGB: "PIPE_FORMAT_R8G8B8_SRGB", + PIPE_FORMAT_R8G8B8A8_SRGB: "PIPE_FORMAT_R8G8B8A8_SRGB", + PIPE_FORMAT_R8G8B8X8_SRGB: "PIPE_FORMAT_R8G8B8X8_SRGB", + PIPE_FORMAT_DXT1_RGB: "PIPE_FORMAT_DXT1_RGB", + PIPE_FORMAT_DXT1_RGBA: "PIPE_FORMAT_DXT1_RGBA", + PIPE_FORMAT_DXT3_RGBA: "PIPE_FORMAT_DXT3_RGBA", + PIPE_FORMAT_DXT5_RGBA: "PIPE_FORMAT_DXT5_RGBA", +} + + +def read_header(infile): + header_fmt = "IIII" + header = infile.read(struct.calcsize(header_fmt)) + return struct.unpack_from(header_fmt, header) + +def read_pixel(infile, fmt, cpp): + assert cpp == 4 + r, g, b, a = map(ord, infile.read(4)) + return r, g, b, a + + +def process(infilename, outfilename): + infile = open(infilename, "rb") + format, cpp, width, height = read_header(infile) + sys.stderr.write("format = %s, cpp = %u, width = %u, height = %u\n" % (formats[format], cpp, width, height)) + outimage = Image.new( + mode='RGB', + size=(width, height), + color=(0,0,0)) + outpixels = outimage.load() + for y in range(height): + for x in range(width): + r, g, b, a = read_pixel(infile, format, cpp) + outpixels[x, y] = r, g, b + outimage.save(outfilename, "PNG") + + +def main(): + if sys.platform == 'win32': + # wildcard expansion + from glob import glob + args = [] + for arg in sys.argv[1:]: + args.extend(glob(arg)) + else: + args = sys.argv[1:] + for infilename in args: + root, ext = os.path.splitext(infilename) + outfilename = root + ".png" + process(infilename, outfilename) + + +if __name__ == '__main__': + main() diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 4ec1746662..f971ee03ba 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -423,3 +423,50 @@ void debug_print_format(const char *msg, unsigned fmt ) debug_printf("%s: %s\n", msg, fmtstr); } #endif + + +#ifdef DEBUG +void debug_dump_image(const char *prefix, + unsigned format, unsigned cpp, + unsigned width, unsigned height, + unsigned pitch, + const void *data) +{ +#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY + static unsigned no = 0; + char filename[256]; + WCHAR wfilename[sizeof(filename)]; + ULONG_PTR iFile = 0; + struct { + unsigned format; + unsigned cpp; + unsigned width; + unsigned height; + } header; + unsigned char *pMap = NULL; + unsigned i; + + util_snprintf(filename, sizeof(filename), "\\??\\c:\\%03u%s.raw", ++no, prefix); + for(i = 0; i < sizeof(filename); ++i) + wfilename[i] = (WCHAR)filename[i]; + + pMap = (unsigned char *)EngMapFile(wfilename, sizeof(header) + cpp*width*height, &iFile); + if(!pMap) + return; + + header.format = format; + header.cpp = cpp; + header.width = width; + header.height = height; + memcpy(pMap, &header, sizeof(header)); + pMap += sizeof(header); + + for(i = 0; i < height; ++i) { + memcpy(pMap, (unsigned char *)data + cpp*pitch*i, cpp*width); + pMap += cpp*width; + } + + EngUnmapFile(iFile); +#endif +} +#endif \ No newline at end of file diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 7a7312ea12..1263d0da61 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -313,6 +313,17 @@ void debug_memory_end(unsigned long beginning); +#ifdef DEBUG +void debug_dump_image(const char *prefix, + unsigned format, unsigned cpp, + unsigned width, unsigned height, + unsigned pitch, + const void *data); +#else +#define debug_dump_image(prefix, format, cpp, width, height, pitch, data) ((void)0) +#endif + + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 4d1bf8a85eae730ca875194864277602f57582ea Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 7 May 2008 16:29:36 +0900 Subject: gallium: Output the total of leaked memory. --- src/gallium/auxiliary/util/p_debug_mem.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 3b5e4fbaee..78497c5f6a 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -211,6 +211,7 @@ debug_memory_begin(void) void debug_memory_end(unsigned long start_no) { + size_t total_size = 0; struct list_head *entry; entry = list.prev; @@ -220,9 +221,15 @@ debug_memory_end(unsigned long start_no) hdr = LIST_ENTRY(struct debug_memory_header, entry, head); ptr = data_from_header(hdr); if(start_no <= hdr->no && hdr->no < last_no || - last_no < start_no && (hdr->no < last_no || start_no <= hdr->no)) + last_no < start_no && (hdr->no < last_no || start_no <= hdr->no)) { debug_printf("%s:%u:%s: %u bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, hdr->size, ptr); + total_size += hdr->size; + } + } + if(total_size) { + debug_printf("Total of %u KB of system memory apparently leaked\n", + (total_size + 1023)/1024); } } -- cgit v1.2.3 From 942b02956e7889aab977cf465fddb0055b758af2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 7 May 2008 19:39:34 +0900 Subject: gallium: Use the u_string.h functions. --- src/gallium/auxiliary/util/p_debug.c | 28 ++++++++++++---------------- 1 file changed, 12 insertions(+), 16 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index f971ee03ba..0e233580d6 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -67,7 +67,7 @@ void _debug_vprintf(const char *format, va_list ap) static char buf[512 + 1] = {'\0'}; size_t len = strlen(buf); int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); - if(ret > (int)(sizeof(buf) - len - 1) || strchr(buf + len, '\n')) { + if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { _EngDebugPrint("%s", buf); buf[0] = '\0'; } @@ -203,15 +203,15 @@ debug_get_bool_option(const char *name, boolean dfault) if(str == NULL) result = dfault; - else if(!strcmp(str, "n")) + else if(!util_strcmp(str, "n")) result = FALSE; - else if(!strcmp(str, "no")) + else if(!util_strcmp(str, "no")) result = FALSE; - else if(!strcmp(str, "0")) + else if(!util_strcmp(str, "0")) result = FALSE; - else if(!strcmp(str, "f")) + else if(!util_strcmp(str, "f")) result = FALSE; - else if(!strcmp(str, "false")) + else if(!util_strcmp(str, "false")) result = FALSE; else result = TRUE; @@ -244,7 +244,7 @@ debug_get_flags_option(const char *name, else { result = 0; while( flags->name ) { - if (!strcmp(str, "all") || strstr(str, flags->name )) + if (!util_strcmp(str, "all") || util_strstr(str, flags->name )) result |= flags->value; ++flags; } @@ -299,10 +299,10 @@ debug_dump_flags(const struct debug_named_value *names, while(names->name) { if((names->value & value) == names->value) { if (!first) - strncat(output, "|", sizeof(output)); + util_strncat(output, "|", sizeof(output)); else first = 0; - strncat(output, names->name, sizeof(output)); + util_strncat(output, names->name, sizeof(output)); value &= ~names->value; } ++names; @@ -310,12 +310,12 @@ debug_dump_flags(const struct debug_named_value *names, if (value) { if (!first) - strncat(output, "|", sizeof(output)); + util_strncat(output, "|", sizeof(output)); else first = 0; util_snprintf(rest, sizeof(rest), "0x%08lx", value); - strncat(output, rest, sizeof(output)); + util_strncat(output, rest, sizeof(output)); } if(first) @@ -325,10 +325,6 @@ debug_dump_flags(const struct debug_named_value *names, } - - - - char *pf_sprint_name( char *str, enum pipe_format format ) { strcpy( str, "PIPE_FORMAT_" ); @@ -469,4 +465,4 @@ void debug_dump_image(const char *prefix, EngUnmapFile(iFile); #endif } -#endif \ No newline at end of file +#endif -- cgit v1.2.3 From 55c13f5af7903c2a0f5a1839259a0a0cc15d6e5e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 7 May 2008 20:27:42 +0900 Subject: gallium: Implement pf_sprint_name as a simple static table. --- src/gallium/auxiliary/util/p_debug.c | 180 +++++++++++++++++++---------------- 1 file changed, 97 insertions(+), 83 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 0e233580d6..3f3cf620fe 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -327,96 +327,110 @@ debug_dump_flags(const struct debug_named_value *names, char *pf_sprint_name( char *str, enum pipe_format format ) { - strcpy( str, "PIPE_FORMAT_" ); - switch (pf_layout( format )) { - case PIPE_FORMAT_LAYOUT_RGBAZS: - { - pipe_format_rgbazs_t rgbazs = (pipe_format_rgbazs_t) format; - uint i; - uint scale = 1 << (pf_exp8( rgbazs ) * 3); - - for (i = 0; i < 4; i++) { - uint size = pf_size_xyzw( rgbazs, i ); - - if (size == 0) { - break; - } - switch (pf_swizzle_xyzw( rgbazs, i )) { - case PIPE_FORMAT_COMP_R: - strcat( str, "R" ); - break; - case PIPE_FORMAT_COMP_G: - strcat( str, "G" ); - break; - case PIPE_FORMAT_COMP_B: - strcat( str, "B" ); - break; - case PIPE_FORMAT_COMP_A: - strcat( str, "A" ); - break; - case PIPE_FORMAT_COMP_0: - strcat( str, "0" ); - break; - case PIPE_FORMAT_COMP_1: - strcat( str, "1" ); - break; - case PIPE_FORMAT_COMP_Z: - strcat( str, "Z" ); - break; - case PIPE_FORMAT_COMP_S: - strcat( str, "S" ); - break; - } - util_snprintf( &str[strlen( str )], 32, "%u", size * scale ); - } - if (i != 0) { - strcat( str, "_" ); - } - switch (pf_type( rgbazs )) { - case PIPE_FORMAT_TYPE_UNKNOWN: - strcat( str, "NONE" ); - break; - case PIPE_FORMAT_TYPE_FLOAT: - strcat( str, "FLOAT" ); - break; - case PIPE_FORMAT_TYPE_UNORM: - strcat( str, "UNORM" ); - break; - case PIPE_FORMAT_TYPE_SNORM: - strcat( str, "SNORM" ); - break; - case PIPE_FORMAT_TYPE_USCALED: - strcat( str, "USCALED" ); - break; - case PIPE_FORMAT_TYPE_SSCALED: - strcat( str, "SSCALED" ); - break; - } - } - break; - case PIPE_FORMAT_LAYOUT_YCBCR: - { - pipe_format_ycbcr_t ycbcr = (pipe_format_ycbcr_t) format; - - strcat( str, "YCBCR" ); - if (pf_rev( ycbcr )) { - strcat( str, "_REV" ); - } - } - break; - } + strcpy( str, debug_dump_enum(pipe_format_names, fmt) ); return str; } #ifdef DEBUG +static const struct debug_named_value pipe_format_names[] = { + DEBUG_NAMED_VALUE(PIPE_FORMAT_NONE), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8R8G8B8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8R8G8B8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8A8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8X8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A1R5G5B5_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A4R4G4B4_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R5G6B5_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_I8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_YCBCR), + DEBUG_NAMED_VALUE(PIPE_FORMAT_YCBCR_REV), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_S8Z24_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z24S8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8Z24_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_Z24X8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_S8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R64_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R64G64_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R64G64B64_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R64G64B64A64_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_FLOAT), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R32G32B32A32_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16A16_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16A16_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16A16_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R16G16B16A16_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_USCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SSCALED), + DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_L8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_RGBA), + DEBUG_NAMED_VALUE_END +}; + void debug_print_format(const char *msg, unsigned fmt ) { - char fmtstr[80]; - - pf_sprint_name(fmtstr, (enum pipe_format)fmt); - - debug_printf("%s: %s\n", msg, fmtstr); + debug_printf("%s: %s\n", msg, debug_dump_enum(pipe_format_names, fmt)); } #endif -- cgit v1.2.3 From 5efd2d59eb19cc44624d3f842d3d1d291876ae7d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 7 May 2008 14:24:14 +0200 Subject: gallium: Fix compilation errors. --- src/gallium/auxiliary/util/p_debug.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 3f3cf620fe..0f3a99cf83 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -325,13 +325,6 @@ debug_dump_flags(const struct debug_named_value *names, } -char *pf_sprint_name( char *str, enum pipe_format format ) -{ - strcpy( str, debug_dump_enum(pipe_format_names, fmt) ); - return str; -} - - #ifdef DEBUG static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_NONE), @@ -432,6 +425,12 @@ void debug_print_format(const char *msg, unsigned fmt ) { debug_printf("%s: %s\n", msg, debug_dump_enum(pipe_format_names, fmt)); } + +char *pf_sprint_name( char *str, enum pipe_format format ) +{ + strcpy( str, debug_dump_enum(pipe_format_names, format) ); + return str; +} #endif -- cgit v1.2.3 From 33cda1e5e57838845ec62714677832f7cdabc5dc Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 7 May 2008 21:37:32 +0900 Subject: gallium: Fix release build. pf_sprint_name might be used there too. --- src/gallium/auxiliary/util/p_debug.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 0f3a99cf83..951dd5a2d4 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -325,8 +325,8 @@ debug_dump_flags(const struct debug_named_value *names, } -#ifdef DEBUG static const struct debug_named_value pipe_format_names[] = { +#ifdef DEBUG DEBUG_NAMED_VALUE(PIPE_FORMAT_NONE), DEBUG_NAMED_VALUE(PIPE_FORMAT_A8R8G8B8_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_X8R8G8B8_UNORM), @@ -418,20 +418,22 @@ static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_RGBA), +#endif DEBUG_NAMED_VALUE_END }; +#ifdef DEBUG void debug_print_format(const char *msg, unsigned fmt ) { debug_printf("%s: %s\n", msg, debug_dump_enum(pipe_format_names, fmt)); } +#endif char *pf_sprint_name( char *str, enum pipe_format format ) { strcpy( str, debug_dump_enum(pipe_format_names, format) ); return str; } -#endif #ifdef DEBUG -- cgit v1.2.3 From 10b7192747087ec25f97cdfcfc062654a2d8fe6d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 7 May 2008 22:01:27 +0900 Subject: gallium: Implement util_pack_color for A8, L8, and I8 formats. --- src/gallium/auxiliary/util/u_pack_color.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 0b917c005f..655e2c8259 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -101,6 +101,19 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; + case PIPE_FORMAT_A8_UNORM: + { + ubyte *d = (ubyte *) dest; + *d = a; + } + return; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + { + ubyte *d = (ubyte *) dest; + *d = r; + } + return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *d = (float *) dest; @@ -198,6 +211,19 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) *d = ((a & 0xf0) << 8) | ((r & 0xf0) << 4) | ((g & 0xf0) << 0) | (b >> 4); } return; + case PIPE_FORMAT_A8_UNORM: + { + ubyte *d = (ubyte *) dest; + *d = a; + } + return; + case PIPE_FORMAT_L8_UNORM: + case PIPE_FORMAT_I8_UNORM: + { + ubyte *d = (ubyte *) dest; + *d = r; + } + return; case PIPE_FORMAT_R32G32B32A32_FLOAT: { float *d = (float *) dest; -- cgit v1.2.3 From 53996e562f8623ccb1defeaaa77efdff24477808 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 May 2008 14:56:03 +0900 Subject: pipebuffer: Don't include standard headers directly. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 8de286e3f9..f6cc7a525b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -50,7 +50,8 @@ #define PB_BUFMGR_H_ -#include +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" #ifdef __cplusplus @@ -68,7 +69,6 @@ struct pipe_winsys; */ struct pb_manager { - /* XXX: we will likely need more allocation flags */ struct pb_buffer * (*create_buffer)( struct pb_manager *mgr, size_t size, -- cgit v1.2.3 From 665b327a47ce80d136e91cfafedbc165227ea168 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 May 2008 15:19:07 +0900 Subject: gallium: Really free hash entries. Hook up to the new cso_hash_erase function. --- src/gallium/auxiliary/util/u_hash_table.c | 77 +++++++++++++++++++++++++++---- src/gallium/auxiliary/util/u_hash_table.h | 4 ++ 2 files changed, 72 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index f3f16a8d94..dd5eca7fca 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -67,6 +67,13 @@ struct hash_table_item }; +static INLINE struct hash_table_item * +hash_table_item(struct cso_hash_iter iter) +{ + return (struct hash_table_item *)cso_hash_iter_data(iter); +} + + struct hash_table * hash_table_create(unsigned (*hash)(void *key), int (*compare)(void *key1, void *key2)) @@ -90,7 +97,27 @@ hash_table_create(unsigned (*hash)(void *key), } -static struct hash_table_item * +static INLINE struct cso_hash_iter +hash_table_find_iter(struct hash_table *ht, + void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + + iter = cso_hash_find(ht->cso, key_hash); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + if (!ht->compare(item->key, key)) + break; + iter = cso_hash_iter_next(iter); + } + + return iter; +} + + +static INLINE struct hash_table_item * hash_table_find_item(struct hash_table *ht, void *key, unsigned key_hash) @@ -117,6 +144,7 @@ hash_table_set(struct hash_table *ht, { unsigned key_hash; struct hash_table_item *item; + struct cso_hash_iter iter; assert(ht); @@ -136,9 +164,8 @@ hash_table_set(struct hash_table *ht, item->key = key; item->value = value; - cso_hash_insert(ht->cso, key_hash, item); - /* FIXME: there is no OOM propagation in cso_hash */ - if(0) { + iter = cso_hash_insert(ht->cso, key_hash, item); + if(cso_hash_iter_is_null(iter)) { FREE(item); return PIPE_ERROR_OUT_OF_MEMORY; } @@ -171,19 +198,39 @@ hash_table_remove(struct hash_table *ht, void *key) { unsigned key_hash; + struct cso_hash_iter iter; struct hash_table_item *item; assert(ht); key_hash = ht->hash(key); - item = hash_table_find_item(ht, key, key_hash); - if(!item) + iter = hash_table_find_iter(ht, key, key_hash); + if(cso_hash_iter_is_null(iter)) return; - /* FIXME: cso_hash_take takes the first element of the collision list - * indiscriminately, so we can not take the item down. */ - item->value = NULL; + item = hash_table_item(iter); + assert(item); + FREE(item); + + cso_hash_erase(ht->cso, iter); +} + + +void +hash_table_clear(struct hash_table *ht) +{ + struct cso_hash_iter iter; + struct hash_table_item *item; + + assert(ht); + + iter = cso_hash_first_node(ht->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_take(ht->cso, cso_hash_iter_key(iter)); + FREE(item); + iter = cso_hash_first_node(ht->cso); + } } @@ -196,6 +243,8 @@ hash_table_foreach(struct hash_table *ht, struct hash_table_item *item; enum pipe_error result; + assert(ht); + iter = cso_hash_first_node(ht->cso); while (!cso_hash_iter_is_null(iter)) { item = (struct hash_table_item *)cso_hash_iter_data(iter); @@ -212,7 +261,17 @@ hash_table_foreach(struct hash_table *ht, void hash_table_destroy(struct hash_table *ht) { + struct cso_hash_iter iter; + struct hash_table_item *item; + assert(ht); + + iter = cso_hash_first_node(ht->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct hash_table_item *)cso_hash_iter_data(iter); + FREE(item); + iter = cso_hash_iter_next(iter); + } cso_hash_delete(ht->cso); diff --git a/src/gallium/auxiliary/util/u_hash_table.h b/src/gallium/auxiliary/util/u_hash_table.h index 1583bd7548..feee881582 100644 --- a/src/gallium/auxiliary/util/u_hash_table.h +++ b/src/gallium/auxiliary/util/u_hash_table.h @@ -75,6 +75,10 @@ hash_table_remove(struct hash_table *ht, void *key); +void +hash_table_clear(struct hash_table *ht); + + enum pipe_error hash_table_foreach(struct hash_table *ht, enum pipe_error (*callback)(void *key, void *value, void *data), -- cgit v1.2.3 From 82dd0225e7e21a35ca66d439dce8cfa39d782470 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 May 2008 15:28:42 +0900 Subject: pipebuffer: Preliminary buffer validation. Use table to store a list of buffers to validate. Unfortunately cso_hash shrinks/regrows the hash every time, so still has to be addressed. Multi-thread validation is still WIP. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_validate.c | 170 +++++++++++++++++++++++++ src/gallium/auxiliary/pipebuffer/pb_validate.h | 91 +++++++++++++ 4 files changed, 263 insertions(+) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_validate.c create mode 100644 src/gallium/auxiliary/pipebuffer/pb_validate.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index d654dbcc91..ff09011b66 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ pb_bufmgr_mm.c \ pb_bufmgr_pool.c \ pb_bufmgr_slab.c \ + pb_validate.c \ pb_winsys.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 604a217982..9db0c0eae3 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -10,6 +10,7 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_mm.c', 'pb_bufmgr_pool.c', 'pb_bufmgr_slab.c', + 'pb_validate.c', 'pb_winsys.c', ]) diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c new file mode 100644 index 0000000000..a0a0965a46 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -0,0 +1,170 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Buffer validation. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" +#include "pipe/p_util.h" +#include "pipe/p_debug.h" + +#include "util/u_hash_table.h" + +#include "pb_buffer.h" +#include "pb_buffer_fenced.h" + + +struct pb_validate +{ + struct hash_table *buffer_table; +}; + + +static unsigned buffer_table_hash(void *pb_buf) +{ + return (unsigned)(uintptr_t)pb_buf; +} + + +static int buffer_table_compare(void *pb_buf1, void *pb_buf2) +{ + return (char *)pb_buf2 - (char *)pb_buf1; +} + + +enum pipe_error +pb_validate_add_buffer(struct pb_validate *vl, + struct pb_buffer *buf) +{ + enum pipe_error ret; + + assert(buf); + if(!buf) + return PIPE_ERROR; + + if(!hash_table_get(vl->buffer_table, buf)) { + struct pb_buffer *tmp = NULL; + + ret = hash_table_set(vl->buffer_table, buf, buf); + if(ret != PIPE_OK) + return ret; + + /* Increment reference count */ + pb_reference(&tmp, buf); + } + + return PIPE_OK; +} + + +enum pipe_error +pb_validate_validate(struct pb_validate *vl) +{ + /* FIXME: go through each buffer, ensure its not mapped, its address is + * available -- requires a new pb_buffer interface */ + return PIPE_OK; +} + + +struct pb_validate_fence_data { + struct pb_validate *vl; + struct pipe_fence_handle *fence; +}; + + +static enum pipe_error +pb_validate_fence_cb(void *key, void *value, void *_data) +{ + struct pb_buffer *buf = (struct pb_buffer *)key; + struct pb_validate_fence_data *data = (struct pb_validate_fence_data *)_data; + struct pb_validate *vl = data->vl; + struct pipe_fence_handle *fence = data->fence; + + assert(value == key); + + buffer_fence(buf, fence); + + /* Decrement the reference count -- table entry destroyed later */ + pb_reference(&buf, NULL); + + return PIPE_OK; +} + + +void +pb_validate_fence(struct pb_validate *vl, + struct pipe_fence_handle *fence) +{ + struct pb_validate_fence_data data; + + data.vl = vl; + data.fence = fence; + + hash_table_foreach(vl->buffer_table, + pb_validate_fence_cb, + &data); + + /* FIXME: cso_hash shrinks here, which is not desirable in this use case, + * as it will be refilled right soon */ + hash_table_clear(vl->buffer_table); +} + + +void +pb_validate_destroy(struct pb_validate *vl) +{ + pb_validate_fence(vl, NULL); + hash_table_destroy(vl->buffer_table); + FREE(vl); +} + + +struct pb_validate * +pb_validate_create() +{ + struct pb_validate *vl; + + vl = CALLOC_STRUCT(pb_validate); + if(!vl) + return NULL; + + vl->buffer_table = hash_table_create(buffer_table_hash, + buffer_table_compare); + if(!vl->buffer_table) { + FREE(vl); + return NULL; + } + + return vl; +} + diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h new file mode 100644 index 0000000000..b0f05d3119 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -0,0 +1,91 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Buffer validation. + * + * @author Jose Fonseca + */ + +#ifndef PB_VALIDATE_H_ +#define PB_VALIDATE_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_error.h" + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pb_buffer; +struct pipe_fence_handle; + + +/** + * Buffer validation list. + * + * It holds a list of buffers to be validated and fenced when flushing. + */ +struct pb_validate; + + +enum pipe_error +pb_validate_add_buffer(struct pb_validate *vl, + struct pb_buffer *buf); + +/** + * Validate all buffers for hardware access. + * + * Should be called right before issuing commands to the hardware. + */ +enum pipe_error +pb_validate_validate(struct pb_validate *vl); + +/** + * Fence all buffers and clear the list. + * + * Should be called right before issuing commands to the hardware. + */ +void +pb_validate_fence(struct pb_validate *vl, + struct pipe_fence_handle *fence); + +struct pb_validate * +pb_validate_create(); + +void +pb_validate_destroy(struct pb_validate *vl); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_VALIDATE_H_*/ -- cgit v1.2.3 From 7ffbfaccfb1484a4ffd5aea0e0e1fbb407977a56 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Thu, 8 May 2008 13:26:23 +0100 Subject: gallium: Make sure functions have proper prototypes and remove unused variable. --- src/gallium/auxiliary/pipebuffer/pb_validate.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_validate.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index a0a0965a46..b32fe2252e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -42,6 +42,7 @@ #include "pb_buffer.h" #include "pb_buffer_fenced.h" +#include "pb_validate.h" struct pb_validate @@ -107,7 +108,6 @@ pb_validate_fence_cb(void *key, void *value, void *_data) { struct pb_buffer *buf = (struct pb_buffer *)key; struct pb_validate_fence_data *data = (struct pb_validate_fence_data *)_data; - struct pb_validate *vl = data->vl; struct pipe_fence_handle *fence = data->fence; assert(value == key); diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h index b0f05d3119..3db1d5330b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.h +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -78,7 +78,7 @@ pb_validate_fence(struct pb_validate *vl, struct pipe_fence_handle *fence); struct pb_validate * -pb_validate_create(); +pb_validate_create(void); void pb_validate_destroy(struct pb_validate *vl); -- cgit v1.2.3 From 8eab7de888bb4056c34f80edfbc90a543736ea3b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 May 2008 00:08:57 +0900 Subject: pipebuffer: Temporarily reimplement validation as growable array. Jose --- src/gallium/auxiliary/pipebuffer/pb_validate.c | 113 +++++++++++-------------- 1 file changed, 48 insertions(+), 65 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index b32fe2252e..362fd896f3 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -38,52 +38,63 @@ #include "pipe/p_util.h" #include "pipe/p_debug.h" -#include "util/u_hash_table.h" - #include "pb_buffer.h" #include "pb_buffer_fenced.h" #include "pb_validate.h" -struct pb_validate -{ - struct hash_table *buffer_table; -}; +#define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ -static unsigned buffer_table_hash(void *pb_buf) -{ - return (unsigned)(uintptr_t)pb_buf; -} - - -static int buffer_table_compare(void *pb_buf1, void *pb_buf2) +struct pb_validate { - return (char *)pb_buf2 - (char *)pb_buf1; -} + struct pb_buffer **buffers; + unsigned used; + unsigned size; +}; enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, struct pb_buffer *buf) { - enum pipe_error ret; - assert(buf); if(!buf) return PIPE_ERROR; + + /* We only need to store one reference for each buffer, so avoid storing + * consecutive references for the same buffer. It might not be the more + * common pasttern, but it is easy to implement. + */ + if(vl->used && vl->buffers[vl->used - 1] == buf) { + return PIPE_OK; + } - if(!hash_table_get(vl->buffer_table, buf)) { - struct pb_buffer *tmp = NULL; + /* Grow the table */ + if(vl->used == vl->size) { + unsigned new_size; + struct pb_buffer **new_buffers; + + new_size = vl->size * 2; + if(!new_size) + return PIPE_ERROR_OUT_OF_MEMORY; + + new_buffers = (struct pb_buffer **)REALLOC(vl->buffers, + vl->size*sizeof(struct pb_buffer *), + new_size*sizeof(struct pb_buffer *)); + if(!new_buffers) + return PIPE_ERROR_OUT_OF_MEMORY; - ret = hash_table_set(vl->buffer_table, buf, buf); - if(ret != PIPE_OK) - return ret; + memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *)); - /* Increment reference count */ - pb_reference(&tmp, buf); + vl->size = new_size; + vl->buffers = new_buffers; } + assert(!vl->buffers[vl->used]); + pb_reference(&vl->buffers[vl->used], buf); + ++vl->used; + return PIPE_OK; } @@ -97,54 +108,26 @@ pb_validate_validate(struct pb_validate *vl) } -struct pb_validate_fence_data { - struct pb_validate *vl; - struct pipe_fence_handle *fence; -}; - - -static enum pipe_error -pb_validate_fence_cb(void *key, void *value, void *_data) -{ - struct pb_buffer *buf = (struct pb_buffer *)key; - struct pb_validate_fence_data *data = (struct pb_validate_fence_data *)_data; - struct pipe_fence_handle *fence = data->fence; - - assert(value == key); - - buffer_fence(buf, fence); - - /* Decrement the reference count -- table entry destroyed later */ - pb_reference(&buf, NULL); - - return PIPE_OK; -} - - void pb_validate_fence(struct pb_validate *vl, struct pipe_fence_handle *fence) { - struct pb_validate_fence_data data; - - data.vl = vl; - data.fence = fence; - - hash_table_foreach(vl->buffer_table, - pb_validate_fence_cb, - &data); - - /* FIXME: cso_hash shrinks here, which is not desirable in this use case, - * as it will be refilled right soon */ - hash_table_clear(vl->buffer_table); + unsigned i; + for(i = 0; i < vl->used; ++i) { + buffer_fence(vl->buffers[i], fence); + pb_reference(&vl->buffers[i], NULL); + } + vl->used = 0; } void pb_validate_destroy(struct pb_validate *vl) { - pb_validate_fence(vl, NULL); - hash_table_destroy(vl->buffer_table); + unsigned i; + for(i = 0; i < vl->used; ++i) + pb_reference(&vl->buffers[i], NULL); + FREE(vl->buffers); FREE(vl); } @@ -158,9 +141,9 @@ pb_validate_create() if(!vl) return NULL; - vl->buffer_table = hash_table_create(buffer_table_hash, - buffer_table_compare); - if(!vl->buffer_table) { + vl->size = PB_VALIDATE_INITIAL_SIZE; + vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *)); + if(!vl->buffers) { FREE(vl); return NULL; } -- cgit v1.2.3 From 1dfb3d4729ce4cd71a593c14dbb2907cd987f8ab Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Thu, 8 May 2008 19:00:11 +0100 Subject: cso_cache: Fix test for currently bound blend state. --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index eef898f486..a1a3a9efaf 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -84,7 +84,7 @@ static boolean delete_blend_state(struct cso_context *ctx, void *state) { struct cso_blend *cso = (struct cso_blend *)state; - if (ctx->blend == state) + if (ctx->blend == cso->data) return FALSE; if (cso->delete_state) -- cgit v1.2.3 From 2abc1b3641e435e0b68490fa6b0a7ffa7c030c76 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 1 May 2008 12:38:51 -0400 Subject: abstract fetching elts --- src/gallium/auxiliary/draw/draw_pt_varray.c | 17 +++++++++++------ src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 6 ++---- 2 files changed, 13 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 355093f945..b0bd2b983e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -43,6 +43,8 @@ struct varray_frontend { unsigned draw_count; unsigned fetch_count; + unsigned fetch_start; + struct draw_pt_middle_end *middle; unsigned input_prim; @@ -68,15 +70,18 @@ static void varray_flush(struct varray_frontend *varray) varray->draw_count = 0; } -#if 0 -static void varray_check_flush(struct varray_frontend *varray) +static INLINE void fetch_init(struct varray_frontend *varray, + unsigned current_count, + unsigned count) { - if (varray->draw_count + 6 >= DRAW_MAX/* || - varray->fetch_count + 4 >= FETCH_MAX*/) { - varray_flush(varray); + unsigned idx; + const unsigned end = MIN2(FETCH_MAX, count - current_count); + for (idx = 0; idx < end; ++idx) { + varray->fetch_elts[idx] = varray->fetch_start + idx; } + varray->fetch_start += idx; + varray->fetch_count = idx; } -#endif static INLINE void add_draw_el(struct varray_frontend *varray, int idx, ushort flags) diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index b9a319b253..a3509613f5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -20,10 +20,8 @@ static void FUNC(struct draw_pt_front_end *frontend, start, count); #endif - for (i = 0; i < count; ++i) { - varray->fetch_elts[i] = start + i; - } - varray->fetch_count = count; + varray->fetch_start = start; + fetch_init(varray, 0, count); switch (varray->input_prim) { case PIPE_PRIM_POINTS: -- cgit v1.2.3 From 90a46ed277cc887d49c8d8c627174c3bd693ecf7 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 1 May 2008 23:54:39 -0400 Subject: split larger primitives in the simple varray pt --- src/gallium/auxiliary/draw/draw_pt.c | 3 +- src/gallium/auxiliary/draw/draw_pt_varray.c | 62 ++++++++- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 169 +++++++++++++++++------- 3 files changed, 180 insertions(+), 54 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index c9c5d18313..bccde6c5fd 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -80,8 +80,7 @@ draw_pt_arrays(struct draw_context *draw, /* Pick the right frontend */ - if (draw->pt.user.elts || - count >= 256) { + if (draw->pt.user.elts) { frontend = draw->pt.front.vcache; } else { frontend = draw->pt.front.varray; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index b0bd2b983e..c9843bded0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -58,6 +58,11 @@ static void varray_flush(struct varray_frontend *varray) debug_printf("FLUSH fc = %d, dc = %d\n", varray->fetch_count, varray->draw_count); + debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n", + varray->fetch_elts[0], + varray->fetch_elts[varray->fetch_count-1], + varray->draw_elts[0], + varray->draw_elts[varray->draw_count-1]); #endif varray->middle->run(varray->middle, varray->fetch_elts, @@ -71,18 +76,69 @@ static void varray_flush(struct varray_frontend *varray) } static INLINE void fetch_init(struct varray_frontend *varray, - unsigned current_count, unsigned count) { unsigned idx; - const unsigned end = MIN2(FETCH_MAX, count - current_count); - for (idx = 0; idx < end; ++idx) { +#if 0 + debug_printf("FETCH INIT c = %d, fs = %d\n", + count, + varray->fetch_start); +#endif + for (idx = 0; idx < count; ++idx) { varray->fetch_elts[idx] = varray->fetch_start + idx; } varray->fetch_start += idx; varray->fetch_count = idx; } + +static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + return TRUE; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; + return TRUE; + case PIPE_PRIM_LINE_STRIP: + *first = 2; + *incr = 1; + return TRUE; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + *first = 3; + *incr = 1; + return TRUE; + case PIPE_PRIM_TRIANGLE_FAN: + *first = 3; + *incr = 1; + return TRUE; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + return TRUE; + case PIPE_PRIM_POLYGON: + *first = 3; + *incr = 1; + return TRUE; + default: + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + return FALSE; + } +} + + static INLINE void add_draw_el(struct varray_frontend *varray, int idx, ushort flags) { diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index a3509613f5..073c1aadbf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -10,30 +10,42 @@ static void FUNC(struct draw_pt_front_end *frontend, boolean flatfirst = (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first); - unsigned i, flags; + unsigned i, j, flags; + unsigned first, incr; -#if 0 - debug_printf("%s (%d) %d/%d\n", __FUNCTION__, draw->prim, start, count); -#endif -#if 0 - debug_printf("INPUT PRIM = %d (start = %d, count = %d)\n", varray->input_prim, + varray->fetch_start = start; + + split_prim_inplace(varray->input_prim, &first, &incr); + +#if 1 + debug_printf("%s (%d) %d/%d\n", __FUNCTION__, + varray->input_prim, start, count); #endif - varray->fetch_start = start; - fetch_init(varray, 0, count); - switch (varray->input_prim) { case PIPE_PRIM_POINTS: - for (i = 0; i < count; i ++) { - POINT(varray, i + 0); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i < count; i ++) { + POINT(varray, i + 0); + } + fetch_init(varray, end); + varray_flush(varray); } break; case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) { - LINE(varray, DRAW_PIPE_RESET_STIPPLE, - i + 0, i + 1); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+1 < end; i += 2) { + LINE(varray, DRAW_PIPE_RESET_STIPPLE, + i + 0, i + 1); + } + fetch_init(varray, end); + varray_flush(varray); } break; @@ -41,38 +53,68 @@ static void FUNC(struct draw_pt_front_end *frontend, if (count >= 2) { flags = DRAW_PIPE_RESET_STIPPLE; - for (i = 1; i < count; i++, flags = 0) { - LINE(varray, flags, i - 1, i); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 1; i < end; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + LINE(varray, flags, i - 1, 0); + fetch_init(varray, end); + varray_flush(varray); } - LINE(varray, flags, i - 1, 0); } break; case PIPE_PRIM_LINE_STRIP: flags = DRAW_PIPE_RESET_STIPPLE; - for (i = 1; i < count; i++, flags = 0) { - LINE(varray, flags, i - 1, i); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 1; i < end; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + fetch_init(varray, end); + varray_flush(varray); } break; case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) { - TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - i + 0, i + 1, i + 2); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i += 3) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1, i + 2); + } + fetch_init(varray, end); + varray_flush(varray); } break; case PIPE_PRIM_TRIANGLE_STRIP: if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - i + 0, i + 1 + (i&1), i + 2 - (i&1)); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1 + (i&1), i + 2 - (i&1)); + } + fetch_init(varray, end); + varray_flush(varray); } } else { - for (i = 0; i+2 < count; i++) { - TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - i + 0 + (i&1), i + 1 - (i&1), i + 2); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0 + (i&1), i + 1 - (i&1), i + 2); + } + fetch_init(varray, end); + varray_flush(varray); } } break; @@ -81,51 +123,80 @@ static void FUNC(struct draw_pt_front_end *frontend, if (count >= 3) { if (flatfirst) { flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; - for (i = 0; i+2 < count; i++) { - TRIANGLE(varray, flags, i + 1, i + 2, 0); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + fetch_init(varray, end); + varray_flush(varray); } } else { flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; - for (i = 0; i+2 < count; i++) { - TRIANGLE(varray, flags, 0, i + 1, i + 2); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, flags, 0, i + 1, i + 2); + } + fetch_init(varray, end); + varray_flush(varray); } } } break; case PIPE_PRIM_QUADS: - for (i = 0; i+3 < count; i += 4) { - QUAD(varray, i + 0, i + 1, i + 2, i + 3); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+3 < end; i += 4) { + QUAD(varray, i + 0, i + 1, i + 2, i + 3); + } + fetch_init(varray, end); + varray_flush(varray); } break; case PIPE_PRIM_QUAD_STRIP: - for (i = 0; i+3 < count; i += 2) { - QUAD(varray, i + 2, i + 0, i + 1, i + 3); + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+3 < end; i += 2) { + QUAD(varray, i + 2, i + 0, i + 1, i + 3); + } + fetch_init(varray, end); + varray_flush(varray); } break; case PIPE_PRIM_POLYGON: { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; - const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - - for (i = 0; i+2 < count; i++, flags = edge_middle) { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; + const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++, flags = edge_middle) { if (i + 3 == count) flags |= edge_last; - TRIANGLE(varray, flags, i + 1, i + 2, 0); - } + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + fetch_init(varray, end); + varray_flush(varray); } - break; + } + break; default: assert(0); -- cgit v1.2.3 From abb08e9335b5d7cb004dc9e6cec390ab6968abe5 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 3 May 2008 22:32:17 -0400 Subject: implement linear emition and fetching and plug it in the varray paths --- src/gallium/auxiliary/draw/draw_pt.h | 18 +++++ src/gallium/auxiliary/draw/draw_pt_emit.c | 45 +++++++++++ src/gallium/auxiliary/draw/draw_pt_fetch.c | 36 +++++++++ .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 88 ++++++++++++++++++++-- src/gallium/auxiliary/draw/draw_pt_varray.c | 26 ++++++- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 4 +- 6 files changed, 208 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 2dec376cee..2f96ceaf00 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -92,6 +92,12 @@ struct draw_pt_middle_end { const ushort *draw_elts, unsigned draw_count ); + void (*run_linear)(struct draw_pt_middle_end *, + unsigned fetch_start, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count); + void (*finish)( struct draw_pt_middle_end * ); void (*destroy)( struct draw_pt_middle_end * ); }; @@ -152,6 +158,13 @@ void draw_pt_emit( struct pt_emit *emit, const ushort *elts, unsigned count ); +void draw_pt_emit_linear( struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + unsigned start, + unsigned count ); + void draw_pt_emit_destroy( struct pt_emit *emit ); struct pt_emit *draw_pt_emit_create( struct draw_context *draw ); @@ -170,6 +183,11 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, unsigned count, char *verts ); +void draw_pt_fetch_run_linear( struct pt_fetch *fetch, + unsigned start, + unsigned count, + char *verts ); + void draw_pt_fetch_destroy( struct pt_fetch *fetch ); struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index f9ac16786e..2a961b7088 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -179,6 +179,51 @@ void draw_pt_emit( struct pt_emit *emit, } +void draw_pt_emit_linear(struct pt_emit *emit, + const float (*vertex_data)[4], + unsigned vertex_count, + unsigned stride, + unsigned start, + unsigned count) +{ + struct draw_context *draw = emit->draw; + struct translate *translate = emit->translate; + struct vbuf_render *render = draw->render; + void *hw_verts; + + debug_printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n"); + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = render->allocate_vertices(render, + (ushort)translate->key.output_stride, + (ushort)count); + if (!hw_verts) { + assert(0); + return; + } + + translate->set_buffer(translate, 0, + vertex_data, stride); + + translate->set_buffer(translate, 1, + &draw->rasterizer->point_size, + 0); + + translate->run(translate, + 0, + vertex_count, + hw_verts); + + render->draw_arrays(render, start, count); + + render->release_vertices(render, + hw_verts, + translate->key.output_stride, + vertex_count); +} + struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) { struct pt_emit *emit = CALLOC_STRUCT(pt_emit); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 1f765b73ad..d7cc1807d7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -167,6 +167,42 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, } +void draw_pt_fetch_run_linear( struct pt_fetch *fetch, + unsigned start, + unsigned count, + char *verts ) +{ + struct draw_context *draw = fetch->draw; + struct translate *translate = fetch->translate; + unsigned i; + + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + translate->set_buffer(translate, + i, + ((char *)draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); + } + + translate->run( translate, + start, + count, + verts ); + + /* Edgeflags are hard to fit into a translate program, populate + * them separately if required. In the setup above they are + * defaulted to one, so only need this if there is reason to change + * that default: + */ + if (fetch->need_edgeflags) { + for (i = 0; i < count; i++) { + struct vertex_header *vh = (struct vertex_header *)(verts + i * fetch->vertex_size); + vh->edgeflag = draw_pt_get_edgeflag( draw, start + i ); + } + } +} + + struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) { struct pt_fetch *fetch = CALLOC_STRUCT(pt_fetch); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 4ec20493c4..b1e08a8f40 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -162,7 +162,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, fpme->vertex_size, draw_elts, draw_count ); - } + } else { draw_pt_emit( fpme->emit, (const float (*)[4])pipeline_verts->data, @@ -177,6 +177,83 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, } +static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, + unsigned fetch_start, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vertex_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align_int( fetch_count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run_linear( fpme->fetch, + fetch_start, + fetch_count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.constants, + fetch_count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + fetch_count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + fetch_count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit_linear( fpme->emit, + (const float (*)[4])pipeline_verts->data, + fetch_count, + fpme->vertex_size, + 0, /*start*/ + draw_count ); + } + + FREE(pipeline_verts); +} + + static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) { @@ -206,10 +283,11 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * if (!fpme) goto fail; - fpme->base.prepare = fetch_pipeline_prepare; - fpme->base.run = fetch_pipeline_run; - fpme->base.finish = fetch_pipeline_finish; - fpme->base.destroy = fetch_pipeline_destroy; + fpme->base.prepare = fetch_pipeline_prepare; + fpme->base.run = fetch_pipeline_run; + fpme->base.run_linear = fetch_pipeline_linear_run; + fpme->base.finish = fetch_pipeline_finish; + fpme->base.destroy = fetch_pipeline_destroy; fpme->draw = draw; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index c9843bded0..916373acc8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -75,6 +75,28 @@ static void varray_flush(struct varray_frontend *varray) varray->draw_count = 0; } +static void varray_flush_linear(struct varray_frontend *varray) +{ + if (varray->draw_count) { + debug_printf("FLUSH LINEAR fc = %d, dc = %d\n", + varray->fetch_count, + varray->draw_count); + debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n", + varray->fetch_elts[0], + varray->fetch_elts[varray->fetch_count-1], + varray->draw_elts[0], + varray->draw_elts[varray->draw_count-1]); + varray->middle->run_linear(varray->middle, + varray->fetch_elts[0], + varray->fetch_count, + varray->draw_elts, + varray->draw_count); + } + + varray->fetch_count = 0; + varray->draw_count = 0; +} + static INLINE void fetch_init(struct varray_frontend *varray, unsigned count) { @@ -265,8 +287,8 @@ static void varray_prepare(struct draw_pt_front_end *frontend, if (opt & PT_PIPELINE) { varray->base.run = varray_run_extras; - } - else + } + else { varray->base.run = varray_run; } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index 073c1aadbf..67baafa3be 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -17,7 +17,7 @@ static void FUNC(struct draw_pt_front_end *frontend, split_prim_inplace(varray->input_prim, &first, &incr); -#if 1 +#if 0 debug_printf("%s (%d) %d/%d\n", __FUNCTION__, varray->input_prim, start, count); @@ -88,7 +88,7 @@ static void FUNC(struct draw_pt_front_end *frontend, i + 0, i + 1, i + 2); } fetch_init(varray, end); - varray_flush(varray); + varray_flush_linear(varray); } break; -- cgit v1.2.3 From ff1fee2cae9fabb47d6a2eb1f9f8094fec3c377f Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 4 May 2008 00:44:27 -0400 Subject: don't fill in linear fetch_elts --- src/gallium/auxiliary/draw/draw_pt_varray.c | 4 +++- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 3 ++- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 916373acc8..fb1b59d53e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -78,6 +78,7 @@ static void varray_flush(struct varray_frontend *varray) static void varray_flush_linear(struct varray_frontend *varray) { if (varray->draw_count) { +#if 0 debug_printf("FLUSH LINEAR fc = %d, dc = %d\n", varray->fetch_count, varray->draw_count); @@ -86,8 +87,9 @@ static void varray_flush_linear(struct varray_frontend *varray) varray->fetch_elts[varray->fetch_count-1], varray->draw_elts[0], varray->draw_elts[varray->draw_count-1]); +#endif varray->middle->run_linear(varray->middle, - varray->fetch_elts[0], + varray->fetch_start, varray->fetch_count, varray->draw_elts, varray->draw_count); diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index 67baafa3be..a9f844a357 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -87,8 +87,9 @@ static void FUNC(struct draw_pt_front_end *frontend, TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, i + 0, i + 1, i + 2); } - fetch_init(varray, end); + varray->fetch_count = end; varray_flush_linear(varray); + varray->fetch_start += end; } break; -- cgit v1.2.3 From a24cb269e1ba5434acf8c94abd03517c149b9c51 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 4 May 2008 01:23:01 -0400 Subject: implement linear path for fetch_emit pipeline --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 64 +++++++++++++++++++++++-- src/gallium/auxiliary/draw/draw_pt_varray.c | 1 + 2 files changed, 61 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index a4de341df8..6d5a54cf0e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -257,6 +257,61 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, } +static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, + unsigned fetch_start, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)fetch_count ); + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices and emit HW verts. + */ + feme->translate->run( feme->translate, + fetch_start, + fetch_count, + hw_verts ); + + if (0) { + unsigned i; + for (i = 0; i < fetch_count; i++) { + debug_printf("\n\nvertex %d:\n", i); + draw_dump_emitted_vertex( feme->vinfo, + (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); + } + } + + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, /*start*/ + draw_count ); + + /* Done -- that was easy, wasn't it: + */ + draw->render->release_vertices( draw->render, + hw_verts, + feme->translate->key.output_stride, + fetch_count ); + +} + + static void fetch_emit_finish( struct draw_pt_middle_end *middle ) { @@ -285,10 +340,11 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) return NULL; } - fetch_emit->base.prepare = fetch_emit_prepare; - fetch_emit->base.run = fetch_emit_run; - fetch_emit->base.finish = fetch_emit_finish; - fetch_emit->base.destroy = fetch_emit_destroy; + fetch_emit->base.prepare = fetch_emit_prepare; + fetch_emit->base.run = fetch_emit_run; + fetch_emit->base.run_linear = fetch_emit_run_linear; + fetch_emit->base.finish = fetch_emit_finish; + fetch_emit->base.destroy = fetch_emit_destroy; fetch_emit->draw = draw; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index fb1b59d53e..e7e21e4bf6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -88,6 +88,7 @@ static void varray_flush_linear(struct varray_frontend *varray) varray->draw_elts[0], varray->draw_elts[varray->draw_count-1]); #endif + assert(varray->middle->run_linear); varray->middle->run_linear(varray->middle, varray->fetch_start, varray->fetch_count, -- cgit v1.2.3 From 66d72f176de2568f053c6dc54e93d423723ae8aa Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sun, 4 May 2008 01:37:32 -0400 Subject: silence debugging output --- src/gallium/auxiliary/draw/draw_pt_emit.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 2a961b7088..776ca32cfa 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -191,7 +191,9 @@ void draw_pt_emit_linear(struct pt_emit *emit, struct vbuf_render *render = draw->render; void *hw_verts; - debug_printf("XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX\n"); +#if 0 + debug_printf("Linear emit\n"); +#endif /* XXX: need to flush to get prim_vbuf.c to release its allocation?? */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); -- cgit v1.2.3 From e897fd6cd35c6b9e398e1903d2e79678fe85708a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 5 May 2008 12:49:40 -0400 Subject: fix the regressions --- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index a9f844a357..10ac08ea30 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -28,9 +28,10 @@ static void FUNC(struct draw_pt_front_end *frontend, for (j = 0; j + first <= count; j += i) { unsigned end = MIN2(FETCH_MAX, count - j); end -= (end % incr); - for (i = 0; i < count; i ++) { + for (i = 0; i < count; i++) { POINT(varray, i + 0); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -44,6 +45,7 @@ static void FUNC(struct draw_pt_front_end *frontend, LINE(varray, DRAW_PIPE_RESET_STIPPLE, i + 0, i + 1); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -60,6 +62,7 @@ static void FUNC(struct draw_pt_front_end *frontend, LINE(varray, flags, i - 1, i); } LINE(varray, flags, i - 1, 0); + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -74,6 +77,7 @@ static void FUNC(struct draw_pt_front_end *frontend, for (i = 1; i < end; i++, flags = 0) { LINE(varray, flags, i - 1, i); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -87,6 +91,7 @@ static void FUNC(struct draw_pt_front_end *frontend, TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, i + 0, i + 1, i + 2); } + i = end; varray->fetch_count = end; varray_flush_linear(varray); varray->fetch_start += end; @@ -102,6 +107,7 @@ static void FUNC(struct draw_pt_front_end *frontend, TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, i + 0, i + 1 + (i&1), i + 2 - (i&1)); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -114,6 +120,7 @@ static void FUNC(struct draw_pt_front_end *frontend, TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, i + 0 + (i&1), i + 1 - (i&1), i + 2); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -130,6 +137,7 @@ static void FUNC(struct draw_pt_front_end *frontend, for (i = 0; i+2 < end; i++) { TRIANGLE(varray, flags, i + 1, i + 2, 0); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -142,6 +150,7 @@ static void FUNC(struct draw_pt_front_end *frontend, for (i = 0; i+2 < end; i++) { TRIANGLE(varray, flags, 0, i + 1, i + 2); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -156,6 +165,7 @@ static void FUNC(struct draw_pt_front_end *frontend, for (i = 0; i+3 < end; i += 4) { QUAD(varray, i + 0, i + 1, i + 2, i + 3); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -168,6 +178,7 @@ static void FUNC(struct draw_pt_front_end *frontend, for (i = 0; i+3 < end; i += 2) { QUAD(varray, i + 2, i + 0, i + 1, i + 3); } + i = end; fetch_init(varray, end); varray_flush(varray); } @@ -193,6 +204,7 @@ static void FUNC(struct draw_pt_front_end *frontend, TRIANGLE(varray, flags, i + 1, i + 2, 0); } + i = end; fetch_init(varray, end); varray_flush(varray); } -- cgit v1.2.3 From fe586f8612dd517b9a1f0d87fbaf3a75e3caf588 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 6 May 2008 18:59:45 -0400 Subject: redo the linear paths --- src/gallium/auxiliary/draw/draw_pipe.c | 39 ++++ src/gallium/auxiliary/draw/draw_private.h | 6 + src/gallium/auxiliary/draw/draw_pt.h | 6 +- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 18 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 36 ++-- src/gallium/auxiliary/draw/draw_pt_varray.c | 27 +-- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 5 +- .../auxiliary/draw/draw_pt_varray_tmp_linear.h | 198 +++++++++++++++++++++ 8 files changed, 279 insertions(+), 56 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 46afb0f41f..cb97f955b2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -212,6 +212,45 @@ void draw_pipeline_run( struct draw_context *draw, draw->pipeline.vertex_count = 0; } +void draw_pipeline_run_linear( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned count, + unsigned stride ) +{ + char *verts = (char *)vertices; + unsigned i; + + draw->pipeline.verts = verts; + draw->pipeline.vertex_stride = stride; + draw->pipeline.vertex_count = count; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i++) + do_point( draw, + verts + stride * i ); + break; + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) + do_line( draw, + i+0, /* flags */ + verts + stride * ((i+0) & ~DRAW_PIPE_FLAG_MASK), + verts + stride * (i+1)); + break; + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) + do_triangle( draw, + (i+0), /* flags */ + verts + stride * ((i+0) & ~DRAW_PIPE_FLAG_MASK), + verts + stride * (i+1), + verts + stride * (i+2)); + break; + } + + draw->pipeline.verts = NULL; + draw->pipeline.vertex_count = 0; +} void draw_pipeline_flush( struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index cee58bbf73..e036d498b8 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -247,6 +247,12 @@ void draw_pipeline_run( struct draw_context *draw, const ushort *elts, unsigned count ); +void draw_pipeline_run_linear( struct draw_context *draw, + unsigned prim, + struct vertex_header *vertices, + unsigned count, + unsigned stride ); + void draw_pipeline_flush( struct draw_context *draw, diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 2f96ceaf00..312fdbe4f4 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -93,10 +93,8 @@ struct draw_pt_middle_end { unsigned draw_count ); void (*run_linear)(struct draw_pt_middle_end *, - unsigned fetch_start, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count); + unsigned start, + unsigned count); void (*finish)( struct draw_pt_middle_end * ); void (*destroy)( struct draw_pt_middle_end * ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 6d5a54cf0e..8df4241b82 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -258,10 +258,8 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, - unsigned fetch_start, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) + unsigned start, + unsigned count ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; @@ -273,7 +271,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, hw_verts = draw->render->allocate_vertices( draw->render, (ushort)feme->translate->key.output_stride, - (ushort)fetch_count ); + (ushort)count ); if (!hw_verts) { assert(0); return; @@ -282,13 +280,13 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, /* Single routine to fetch vertices and emit HW verts. */ feme->translate->run( feme->translate, - fetch_start, - fetch_count, + start, + count, hw_verts ); if (0) { unsigned i; - for (i = 0; i < fetch_count; i++) { + for (i = 0; i < count; i++) { debug_printf("\n\nvertex %d:\n", i); draw_dump_emitted_vertex( feme->vinfo, (const uint8_t *)hw_verts + feme->vinfo->size * 4 * i ); @@ -300,14 +298,14 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, */ draw->render->draw_arrays( draw->render, 0, /*start*/ - draw_count ); + count ); /* Done -- that was easy, wasn't it: */ draw->render->release_vertices( draw->render, hw_verts, feme->translate->key.output_stride, - fetch_count ); + count ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index b1e08a8f40..dad54690a5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -178,18 +178,16 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, - unsigned fetch_start, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) + unsigned start, + unsigned count) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vertex_shader; unsigned opt = fpme->opt; - unsigned alloc_count = align_int( fetch_count, 4 ); + unsigned alloc_count = align_int( count, 4 ); - struct vertex_header *pipeline_verts = + struct vertex_header *pipeline_verts = (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); if (!pipeline_verts) { @@ -202,8 +200,8 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, /* Fetch into our vertex buffer */ draw_pt_fetch_run_linear( fpme->fetch, - fetch_start, - fetch_count, + start, + count, (char *)pipeline_verts ); /* Run the shader, note that this overwrites the data[] parts of @@ -213,18 +211,18 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, */ if (opt & PT_SHADE) { - shader->run_linear(shader, + shader->run_linear(shader, (const float (*)[4])pipeline_verts->data, ( float (*)[4])pipeline_verts->data, (const float (*)[4])draw->pt.user.constants, - fetch_count, + count, fpme->vertex_size, fpme->vertex_size); } if (draw_pt_post_vs_run( fpme->post_vs, pipeline_verts, - fetch_count, + count, fpme->vertex_size )) { opt |= PT_PIPELINE; @@ -233,21 +231,19 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, /* Do we need to run the pipeline? */ if (opt & PT_PIPELINE) { - draw_pipeline_run( fpme->draw, - fpme->prim, - pipeline_verts, - fetch_count, - fpme->vertex_size, - draw_elts, - draw_count ); + draw_pipeline_run_linear( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size); } else { draw_pt_emit_linear( fpme->emit, (const float (*)[4])pipeline_verts->data, - fetch_count, + count, fpme->vertex_size, 0, /*start*/ - draw_count ); + count ); } FREE(pipeline_verts); diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index e7e21e4bf6..59a9569270 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -75,29 +75,18 @@ static void varray_flush(struct varray_frontend *varray) varray->draw_count = 0; } -static void varray_flush_linear(struct varray_frontend *varray) +static void varray_flush_linear(struct varray_frontend *varray, + unsigned start, unsigned count) { - if (varray->draw_count) { + if (count) { #if 0 - debug_printf("FLUSH LINEAR fc = %d, dc = %d\n", - varray->fetch_count, - varray->draw_count); - debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n", - varray->fetch_elts[0], - varray->fetch_elts[varray->fetch_count-1], - varray->draw_elts[0], - varray->draw_elts[varray->draw_count-1]); + debug_printf("FLUSH LINEAR start = %d, count = %d\n", + start, + count); #endif assert(varray->middle->run_linear); - varray->middle->run_linear(varray->middle, - varray->fetch_start, - varray->fetch_count, - varray->draw_elts, - varray->draw_count); + varray->middle->run_linear(varray->middle, start, count); } - - varray->fetch_count = 0; - varray->draw_count = 0; } static INLINE void fetch_init(struct varray_frontend *varray, @@ -261,7 +250,7 @@ static INLINE void varray_ef_quad( struct varray_frontend *varray, #define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1) #define POINT(vc,i0) varray_point(vc,i0) #define FUNC varray_run -#include "draw_pt_varray_tmp.h" +#include "draw_pt_varray_tmp_linear.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index 10ac08ea30..335c4c89ca 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -92,9 +92,8 @@ static void FUNC(struct draw_pt_front_end *frontend, i + 0, i + 1, i + 2); } i = end; - varray->fetch_count = end; - varray_flush_linear(varray); - varray->fetch_start += end; + fetch_init(varray, end); + varray_flush(varray); } break; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h new file mode 100644 index 0000000000..dfa4338407 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -0,0 +1,198 @@ + +static void FUNC(struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned count) +{ + struct varray_frontend *varray = (struct varray_frontend *)frontend; + struct draw_context *draw = varray->draw; + unsigned start = (unsigned)elts; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + unsigned i, j, flags; + unsigned first, incr; + + varray->fetch_start = start; + + split_prim_inplace(varray->input_prim, &first, &incr); + +#if 0 + debug_printf("%s (%d) %d/%d\n", __FUNCTION__, + varray->input_prim, + start, count); +#endif + + switch (varray->input_prim) { + case PIPE_PRIM_POINTS: + case PIPE_PRIM_LINES: + case PIPE_PRIM_TRIANGLES: + j = 0; + while (j + first <= count) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + varray_flush_linear(varray, start + j, end); + j += end; + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 1; i < end; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + LINE(varray, flags, i - 1, 0); + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 1; i < end; i++, flags = 0) { + LINE(varray, flags, i - 1, i); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0, i + 1 + (i&1), i + 2 - (i&1)); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + else { + for (j = 0; j + first <= count;) { + unsigned end = MIN2(FETCH_MAX, count - j); + //end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + i + 0 + (i&1), i + 1 - (i&1), i + 2); + } + fetch_init(varray, end); + varray_flush(varray); + j += end; + if (j <= count) + j -= incr; + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + else { + flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++) { + TRIANGLE(varray, flags, 0, i + 1, i + 2); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + } + break; + + case PIPE_PRIM_QUADS: + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+3 < end; i += 4) { + QUAD(varray, i + 0, i + 1, i + 2, i + 3); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+3 < end; i += 2) { + QUAD(varray, i + 2, i + 0, i + 1, i + 3); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; + const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + for (j = 0; j + first <= count; j += i) { + unsigned end = MIN2(FETCH_MAX, count - j); + end -= (end % incr); + for (i = 0; i+2 < end; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE(varray, flags, i + 1, i + 2, 0); + } + i = end; + fetch_init(varray, end); + varray_flush(varray); + } + } + break; + + default: + assert(0); + break; + } + + varray_flush(varray); +} + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC -- cgit v1.2.3 From 22323af525d00022a1fa06fab7ee84df5ef2d1f0 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 7 May 2008 19:34:12 -0400 Subject: fix silly mistakes --- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 2 +- src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index 335c4c89ca..fb49452d8b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -28,7 +28,7 @@ static void FUNC(struct draw_pt_front_end *frontend, for (j = 0; j + first <= count; j += i) { unsigned end = MIN2(FETCH_MAX, count - j); end -= (end % incr); - for (i = 0; i < count; i++) { + for (i = 0; i < end; i++) { POINT(varray, i + 0); } i = end; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index dfa4338407..ab28859c35 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -85,7 +85,7 @@ static void FUNC(struct draw_pt_front_end *frontend, else { for (j = 0; j + first <= count;) { unsigned end = MIN2(FETCH_MAX, count - j); - //end -= (end % incr); + end -= (end % incr); for (i = 0; i+2 < end; i++) { TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, i + 0 + (i&1), i + 1 - (i&1), i + 2); @@ -93,8 +93,6 @@ static void FUNC(struct draw_pt_front_end *frontend, fetch_init(varray, end); varray_flush(varray); j += end; - if (j <= count) - j -= incr; } } break; -- cgit v1.2.3 From 8d709ae1595047b45a81f2fbd22850887fdbfea0 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 8 May 2008 12:10:24 -0400 Subject: fix triangle strips --- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 12 ++++++++++-- src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 12 ++++++++++-- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index fb49452d8b..d137a758e2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -109,19 +109,27 @@ static void FUNC(struct draw_pt_front_end *frontend, i = end; fetch_init(varray, end); varray_flush(varray); + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } } } else { for (j = 0; j + first <= count; j += i) { unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++) { + end -= (end % incr); + for (i = 0; i + 2 < end; i++) { TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, i + 0 + (i&1), i + 1 - (i&1), i + 2); } i = end; fetch_init(varray, end); varray_flush(varray); + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } } } break; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index ab28859c35..4bf04fa62b 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -80,19 +80,27 @@ static void FUNC(struct draw_pt_front_end *frontend, i = end; fetch_init(varray, end); varray_flush(varray); + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } } } else { - for (j = 0; j + first <= count;) { + for (j = 0; j + first <= count; j += i) { unsigned end = MIN2(FETCH_MAX, count - j); end -= (end % incr); for (i = 0; i+2 < end; i++) { TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, i + 0 + (i&1), i + 1 - (i&1), i + 2); } + i = end; fetch_init(varray, end); varray_flush(varray); - j += end; + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } } } break; -- cgit v1.2.3 From 8ea6106f01f38853e9c0f1029da55eb449109aea Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 8 May 2008 15:11:16 -0400 Subject: fix quad strips --- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 4 ++++ src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 4 ++++ 2 files changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index d137a758e2..1395275897 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -188,6 +188,10 @@ static void FUNC(struct draw_pt_front_end *frontend, i = end; fetch_init(varray, end); varray_flush(varray); + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } } break; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index 4bf04fa62b..6e2b16d9be 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -159,6 +159,10 @@ static void FUNC(struct draw_pt_front_end *frontend, i = end; fetch_init(varray, end); varray_flush(varray); + if (j + first + i <= count) { + varray->fetch_start -= 2; + i -= 2; + } } break; -- cgit v1.2.3 From b514f5f3ba4c9cf6c39cbcdf5bf0d2d8efb8d19b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 8 May 2008 22:13:21 +0100 Subject: draw: only fill in / compare the part of the translate key we're using. It's quite a big struct & we examine it a lot (too much). Reduce the impact of this by just looking at the active part where possible. --- src/gallium/auxiliary/draw/draw_pt_emit.c | 8 ++++---- src/gallium/auxiliary/draw/draw_pt_fetch.c | 8 +++++--- src/gallium/auxiliary/translate/translate.h | 5 ++--- 3 files changed, 11 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index f9ac16786e..671abc25ce 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -49,6 +49,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, const struct vertex_info *vinfo; unsigned dst_offset; struct translate_key hw_key; + unsigned keysize; unsigned i; boolean ok; @@ -58,12 +59,10 @@ void draw_pt_emit_prepare( struct pt_emit *emit, return; } - memset(&hw_key, 0, sizeof(hw_key)); - /* Must do this after set_primitive() above: */ vinfo = draw->render->get_vertex_info(draw->render); - + keysize = 2*4 + vinfo->num_attribs * sizeof(hw_key.element[0]); /* Translate from pipeline vertices to hw vertices. */ @@ -122,8 +121,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit, hw_key.output_stride = vinfo->size * 4; if (!emit->translate || - memcmp(&emit->translate->key, &hw_key, sizeof(hw_key)) != 0) + memcmp(&emit->translate->key, &hw_key, keysize) != 0) { + memset((char *)&hw_key + keysize, 0, sizeof(hw_key) - keysize); emit->translate = translate_cache_find(emit->cache, &hw_key); } } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 1f765b73ad..a5bebb4ca1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -62,10 +62,11 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned i, nr = 0; unsigned dst_offset = 0; struct translate_key key; + unsigned keysize; fetch->vertex_size = vertex_size; - - memset(&key, 0, sizeof(key)); + keysize = (2*4 + + (draw->pt.nr_vertex_elements + 1) * sizeof(key.element[0])); /* Always emit/leave space for a vertex header. * @@ -110,8 +111,9 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, if (!fetch->translate || - memcmp(&fetch->translate->key, &key, sizeof(key)) != 0) + memcmp(&fetch->translate->key, &key, keysize) != 0) { + memset((char *)&key + keysize, 0, sizeof(key) - keysize); fetch->translate = translate_cache_find(fetch->cache, &key); { diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 6c15d7e4dc..de6f09d18a 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -47,10 +47,9 @@ struct translate_element { enum pipe_format input_format; - unsigned input_buffer; - unsigned input_offset; - enum pipe_format output_format; + unsigned input_buffer; + unsigned input_offset; /* can't really reduce the size of these */ unsigned output_offset; }; -- cgit v1.2.3 From 044d583ba12689cbe99098eb999854303de57f59 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 8 May 2008 18:38:02 -0600 Subject: gallium: handle null ptrs --- src/gallium/auxiliary/util/u_draw_quad.c | 81 ++++++++++++++++---------------- 1 file changed, 40 insertions(+), 41 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index e659edb088..bf143815d8 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -82,52 +82,51 @@ util_draw_texquad(struct pipe_context *pipe, { struct pipe_buffer *vbuf; uint numAttribs = 2, vertexBytes, i, j; - float *v; vertexBytes = 4 * (4 * numAttribs * sizeof(float)); /* XXX create one-time */ vbuf = pipe->winsys->buffer_create(pipe->winsys, 32, PIPE_BUFFER_USAGE_VERTEX, vertexBytes); - assert(vbuf); - - v = (float *) pipe->winsys->buffer_map(pipe->winsys, vbuf, - PIPE_BUFFER_USAGE_CPU_WRITE); - - /* - * Load vertex buffer - */ - for (i = j = 0; i < 4; i++) { - v[j + 2] = z; /* z */ - v[j + 3] = 1.0; /* w */ - v[j + 6] = 0.0; /* r */ - v[j + 7] = 1.0; /* q */ - j += 8; + if (vbuf) { + float *v = (float *) pipe->winsys->buffer_map(pipe->winsys, vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + if (v) { + /* + * Load vertex buffer + */ + for (i = j = 0; i < 4; i++) { + v[j + 2] = z; /* z */ + v[j + 3] = 1.0; /* w */ + v[j + 6] = 0.0; /* r */ + v[j + 7] = 1.0; /* q */ + j += 8; + } + + v[0] = x0; + v[1] = y0; + v[4] = 0.0; /*s*/ + v[5] = 0.0; /*t*/ + + v[8] = x1; + v[9] = y0; + v[12] = 1.0; + v[13] = 0.0; + + v[16] = x1; + v[17] = y1; + v[20] = 1.0; + v[21] = 1.0; + + v[24] = x0; + v[25] = y1; + v[28] = 0.0; + v[29] = 1.0; + + pipe->winsys->buffer_unmap(pipe->winsys, vbuf); + util_draw_vertex_buffer(pipe, vbuf, PIPE_PRIM_TRIANGLE_FAN, 4, 2); + } + + pipe_buffer_reference(pipe->winsys, &vbuf, NULL); } - - v[0] = x0; - v[1] = y0; - v[4] = 0.0; /*s*/ - v[5] = 0.0; /*t*/ - - v[8] = x1; - v[9] = y0; - v[12] = 1.0; - v[13] = 0.0; - - v[16] = x1; - v[17] = y1; - v[20] = 1.0; - v[21] = 1.0; - - v[24] = x0; - v[25] = y1; - v[28] = 0.0; - v[29] = 1.0; - - pipe->winsys->buffer_unmap(pipe->winsys, vbuf); - - util_draw_vertex_buffer(pipe, vbuf, PIPE_PRIM_TRIANGLE_FAN, 4, 2); - - pipe_buffer_reference(pipe->winsys, &vbuf, NULL); } -- cgit v1.2.3 From 2268306f58769dff4b2b1da8bb668bdff2856d70 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 May 2008 11:02:10 +0900 Subject: gallium: Don't serialize GPU writes. Only make sure the GPU is finished with a buffer before mapping. The opposite -- waiting for the CPU to be finished before handing to the CPU -- must be done before fencing. --- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 100 ++++++++++----------- 1 file changed, 50 insertions(+), 50 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 2fa0842971..7f236887a9 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -168,6 +168,28 @@ _fenced_buffer_remove(struct fenced_buffer *fenced_buf) } +static INLINE enum pipe_error +_fenced_buffer_finish(struct fenced_buffer *fenced_buf) +{ + struct fenced_buffer_list *fenced_list = fenced_buf->list; + struct pipe_winsys *winsys = fenced_list->winsys; + + debug_warning("waiting for GPU"); + + assert(fenced_buf->fence); + if(fenced_buf->fence) { + if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) { + return PIPE_ERROR; + } + /* Remove from the fenced list */ + _fenced_buffer_remove(fenced_buf); /* TODO: remove consequents */ + } + + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return PIPE_OK; +} + + /** * Free as many fenced buffers from the list head as possible. */ @@ -207,40 +229,6 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, } -/** - * Serialize writes, but allow concurrent reads. - */ -static INLINE enum pipe_error -fenced_buffer_serialize(struct fenced_buffer *fenced_buf, unsigned flags) -{ - struct fenced_buffer_list *fenced_list = fenced_buf->list; - struct pipe_winsys *winsys = fenced_list->winsys; - - /* Allow concurrent reads */ - if(((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_WRITE) == 0) - return PIPE_OK; - - /* Wait for the CPU to finish */ - if(fenced_buf->mapcount) { - /* FIXME: Use thread conditions variables to signal when mapcount - * reaches zero */ - debug_warning("attemp to write concurrently to buffer"); - /* XXX: we must not fail here in order to support texture mipmap generation - return PIPE_ERROR_RETRY; - */ - } - - /* Wait for the GPU to finish */ - if(fenced_buf->fence) { - if(winsys->fence_finish(winsys, fenced_buf->fence, 0) != 0) - return PIPE_ERROR_RETRY; - _fenced_buffer_remove(fenced_buf); - } - - return PIPE_OK; -} - - static void fenced_buffer_destroy(struct pb_buffer *buf) { @@ -280,15 +268,28 @@ fenced_buffer_map(struct pb_buffer *buf, { struct fenced_buffer *fenced_buf = fenced_buffer(buf); void *map; - assert((flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE) == 0); + + assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE; - if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) - return NULL; + /* Check for GPU read/write access */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) { + /* Wait for the GPU to finish writing */ + _fenced_buffer_finish(fenced_buf); + } + + /* Check for CPU write access (read is OK) */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { + /* this is legal -- just for debugging */ + debug_warning("concurrent CPU writes"); + } map = pb_map(fenced_buf->buffer, flags); - if(map) + if(map) { ++fenced_buf->mapcount; - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE; + fenced_buf->flags |= flags; + } + return map; } @@ -298,10 +299,12 @@ fenced_buffer_unmap(struct pb_buffer *buf) { struct fenced_buffer *fenced_buf = fenced_buffer(buf); assert(fenced_buf->mapcount); - pb_unmap(fenced_buf->buffer); - --fenced_buf->mapcount; - if(!fenced_buf->mapcount) - fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; + if(fenced_buf->mapcount) { + pb_unmap(fenced_buf->buffer); + --fenced_buf->mapcount; + if(!fenced_buf->mapcount) + fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE; + } } @@ -334,8 +337,10 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, return NULL; buf = CALLOC_STRUCT(fenced_buffer); - if(!buf) + if(!buf) { + pb_reference(&buffer, NULL); return NULL; + } buf->base.base.refcount = 1; buf->base.base.alignment = buffer->base.alignment; @@ -374,7 +379,7 @@ buffer_fence(struct pb_buffer *buf, fenced_list = fenced_buf->list; winsys = fenced_list->winsys; - if(fence == fenced_buf->fence) { + if(!fence || fence == fenced_buf->fence) { /* Handle the same fence case specially, not only because it is a fast * path, but mostly to avoid serializing two writes with the same fence, * as that would bring the hardware down to synchronous operation without @@ -384,11 +389,6 @@ buffer_fence(struct pb_buffer *buf, return; } - if(fenced_buffer_serialize(fenced_buf, flags) != PIPE_OK) { - /* FIXME: propagate error */ - (void)0; - } - _glthread_LOCK_MUTEX(fenced_list->mutex); if (fenced_buf->fence) _fenced_buffer_remove(fenced_buf); -- cgit v1.2.3 From 47f639a62989cea4b3b14cd73bb39de85acec8ea Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 May 2008 14:34:51 +0900 Subject: gallium: Disable debug_get_option for release builds on Windows. It always creates the C:\gallium.cfg , even if it does not exists, which might be confusing. --- src/gallium/auxiliary/util/p_debug.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 951dd5a2d4..ce7fb58956 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -154,6 +154,7 @@ debug_get_option(const char *name, const char *dfault) { const char *result; #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +#ifdef DEBUG ULONG_PTR iFile = 0; const void *pMap = NULL; const char *sol, *eol, *sep; @@ -183,6 +184,9 @@ debug_get_option(const char *name, const char *dfault) } EngUnmapFile(iFile); } +#else + result = dfault; +#endif #else result = getenv(name); -- cgit v1.2.3 From 54777e124c38812e5e80319048b6c71009bcf9dd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 May 2008 18:47:44 +0900 Subject: gallium: Define util_vsprintf. --- src/gallium/auxiliary/util/u_string.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h index d7d925ee9b..73c88d87b4 100644 --- a/src/gallium/auxiliary/util/u_string.h +++ b/src/gallium/auxiliary/util/u_string.h @@ -54,6 +54,12 @@ extern "C" { int util_vsnprintf(char *, size_t, const char *, va_list); int util_snprintf(char *str, size_t size, const char *format, ...); +static INLINE void +util_vsprintf(char *str, const char *format, va_list ap) +{ + util_vsnprintf(str, (size_t)-1, format, ap); +} + static INLINE void util_sprintf(char *str, const char *format, ...) { @@ -158,6 +164,7 @@ util_memmove(void *dest, const void *src, size_t n) #define util_vsnprintf vsnprintf #define util_snprintf snprintf +#define util_vsprintf vsprintf #define util_sprintf sprintf #define util_strchr strchr #define util_strcmp strcmp -- cgit v1.2.3 From 80474d576c2e92441f6bcc18faae71a38b91bd70 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 May 2008 13:09:58 +0100 Subject: translate: helper functions for mimizing cost of key compares --- src/gallium/auxiliary/translate/translate.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index de6f09d18a..b8210af50c 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -95,6 +95,27 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx, struct translate *translate_create( const struct translate_key *key ); +static INLINE int translate_keysize( const struct translate_key *key ) +{ + return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); +} + +static INLINE int translate_key_compare( const struct translate_key *a, + const struct translate_key *b ) +{ + int keysize = translate_keysize(a); + return memcmp(a, b, keysize); +} + + +static INLINE void translate_key_sanitize( struct translate_key *a ) +{ + int keysize = translate_keysize(a); + char *ptr = (char *)a; + memset(ptr + keysize, 0, sizeof(*a) - keysize); +} + + /******************************************************************************* * Private: */ -- cgit v1.2.3 From 1a03812fb57e956b438cd42ac68978facb49a99d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 May 2008 13:10:15 +0100 Subject: draw: mimize cost of translate key compares, use cache universally --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 20 ++++++++++++++------ src/gallium/auxiliary/draw/draw_pt_emit.c | 10 +++++----- src/gallium/auxiliary/draw/draw_pt_fetch.c | 7 ++----- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 ++- 4 files changed, 23 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 2a19e6916a..86a7d1c730 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -42,6 +42,7 @@ #include "draw_vertex.h" #include "draw_pipe.h" #include "translate/translate.h" +#include "translate/translate_cache.h" /** @@ -75,6 +76,8 @@ struct vbuf_stage { /* Cache point size somewhere it's address won't change: */ float point_size; + + struct translate_cache *cache; }; @@ -220,7 +223,6 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) /* Translate from pipeline vertices to hw vertices. */ dst_offset = 0; - memset(&hw_key, 0, sizeof(hw_key)); for (i = 0; i < vbuf->vinfo->num_attribs; i++) { unsigned emit_sz = 0; @@ -277,12 +279,10 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) /* Don't bother with caching at this stage: */ if (!vbuf->translate || - memcmp(&vbuf->translate->key, &hw_key, sizeof(hw_key)) != 0) + translate_key_compare(&vbuf->translate->key, &hw_key) != 0) { - if (vbuf->translate) - vbuf->translate->release(vbuf->translate); - - vbuf->translate = translate_create( &hw_key ); + translate_key_sanitize(&hw_key); + vbuf->translate = translate_cache_find(vbuf->cache, &hw_key); vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); } @@ -433,6 +433,9 @@ static void vbuf_destroy( struct draw_stage *stage ) if (vbuf->render) vbuf->render->destroy( vbuf->render ); + if (vbuf->cache) + translate_cache_destroy(vbuf->cache); + FREE( stage ); } @@ -463,6 +466,11 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, 16 ); if (!vbuf->indices) goto fail; + + vbuf->cache = translate_cache_create(); + if (!vbuf->cache) + goto fail; + vbuf->vertices = NULL; vbuf->vertex_ptr = vbuf->vertices; diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 671abc25ce..e458cbe533 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -49,7 +49,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit, const struct vertex_info *vinfo; unsigned dst_offset; struct translate_key hw_key; - unsigned keysize; unsigned i; boolean ok; @@ -62,7 +61,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, /* Must do this after set_primitive() above: */ vinfo = draw->render->get_vertex_info(draw->render); - keysize = 2*4 + vinfo->num_attribs * sizeof(hw_key.element[0]); + /* Translate from pipeline vertices to hw vertices. */ @@ -121,9 +120,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit, hw_key.output_stride = vinfo->size * 4; if (!emit->translate || - memcmp(&emit->translate->key, &hw_key, keysize) != 0) + translate_key_compare(&emit->translate->key, &hw_key) != 0) { - memset((char *)&hw_key + keysize, 0, sizeof(hw_key) - keysize); + translate_key_sanitize(&hw_key); emit->translate = translate_cache_find(emit->cache, &hw_key); } } @@ -197,7 +196,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) void draw_pt_emit_destroy( struct pt_emit *emit ) { - translate_cache_destroy(emit->cache); + if (emit->cache) + translate_cache_destroy(emit->cache); FREE(emit); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index a5bebb4ca1..100117a9ae 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -62,11 +62,8 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned i, nr = 0; unsigned dst_offset = 0; struct translate_key key; - unsigned keysize; fetch->vertex_size = vertex_size; - keysize = (2*4 + - (draw->pt.nr_vertex_elements + 1) * sizeof(key.element[0])); /* Always emit/leave space for a vertex header. * @@ -111,9 +108,9 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, if (!fetch->translate || - memcmp(&fetch->translate->key, &key, keysize) != 0) + translate_key_compare(&fetch->translate->key, &key) != 0) { - memset((char *)&key + keysize, 0, sizeof(key) - keysize); + translate_key_sanitize(&key); fetch->translate = translate_cache_find(fetch->cache, &key); { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index a4de341df8..b7b970a297 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -174,8 +174,9 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, /* Don't bother with caching at this stage: */ if (!feme->translate || - memcmp(&feme->translate->key, &key, sizeof(key)) != 0) + translate_key_compare(&feme->translate->key, &key) != 0) { + translate_key_sanitize(&key); feme->translate = translate_cache_find(feme->cache, &key); -- cgit v1.2.3 From 5ee2b5bdcc62e844079829f4f4301aad5374c62e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 May 2008 15:02:59 +0100 Subject: draw: fix translate double-free, minor cleanups --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 3 ++- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 86a7d1c730..67b9a9503d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -427,9 +427,6 @@ static void vbuf_destroy( struct draw_stage *stage ) if(vbuf->indices) align_free( vbuf->indices ); - if(vbuf->translate) - vbuf->translate->release( vbuf->translate ); - if (vbuf->render) vbuf->render->destroy( vbuf->render ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 100117a9ae..b96335b789 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -184,7 +184,8 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) void draw_pt_fetch_destroy( struct pt_fetch *fetch ) { - translate_cache_destroy(fetch->cache); + if (fetch->cache) + translate_cache_destroy(fetch->cache); FREE(fetch); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index b7b970a297..4ea7d4359f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -268,7 +268,8 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; - translate_cache_destroy(feme->cache); + if (feme->cache) + translate_cache_destroy(feme->cache); FREE(middle); } -- cgit v1.2.3 From 501be9c7dd0cc5f985c708fa0e5f35d7fd20deb4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 May 2008 15:02:59 +0100 Subject: draw: fix translate double-free, minor cleanups --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 3 ++- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 ++- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 2a19e6916a..64a9f9084f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -427,9 +427,6 @@ static void vbuf_destroy( struct draw_stage *stage ) if(vbuf->indices) align_free( vbuf->indices ); - if(vbuf->translate) - vbuf->translate->release( vbuf->translate ); - if (vbuf->render) vbuf->render->destroy( vbuf->render ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index d62cd9358b..034e0eccb2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -223,7 +223,8 @@ struct pt_fetch *draw_pt_fetch_create( struct draw_context *draw ) void draw_pt_fetch_destroy( struct pt_fetch *fetch ) { - translate_cache_destroy(fetch->cache); + if (fetch->cache) + translate_cache_destroy(fetch->cache); FREE(fetch); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 8df4241b82..bdbb039f9e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -320,7 +320,8 @@ static void fetch_emit_destroy( struct draw_pt_middle_end *middle ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; - translate_cache_destroy(feme->cache); + if (feme->cache) + translate_cache_destroy(feme->cache); FREE(middle); } -- cgit v1.2.3 From 7462f0557f9cce73ff2d32e62ef110b5d8622f87 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Fri, 9 May 2008 18:04:16 +0100 Subject: draw: Fix number of vertices allocated in draw_pt_emit(). --- src/gallium/auxiliary/draw/draw_pt_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index e458cbe533..ce3a153f64 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -146,7 +146,7 @@ void draw_pt_emit( struct pt_emit *emit, hw_verts = render->allocate_vertices(render, (ushort)translate->key.output_stride, - (ushort)count); + (ushort)vertex_count); if (!hw_verts) { assert(0); return; -- cgit v1.2.3 From c0a6040f568e0c9be07797b2dc2fdd8a3624ec34 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 May 2008 13:09:58 +0100 Subject: translate: helper functions for mimizing cost of key compares --- src/gallium/auxiliary/translate/translate.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index de6f09d18a..b8210af50c 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -95,6 +95,27 @@ struct translate *translate_lookup_or_create( struct translate_context *tctx, struct translate *translate_create( const struct translate_key *key ); +static INLINE int translate_keysize( const struct translate_key *key ) +{ + return 2 * sizeof(int) + key->nr_elements * sizeof(struct translate_element); +} + +static INLINE int translate_key_compare( const struct translate_key *a, + const struct translate_key *b ) +{ + int keysize = translate_keysize(a); + return memcmp(a, b, keysize); +} + + +static INLINE void translate_key_sanitize( struct translate_key *a ) +{ + int keysize = translate_keysize(a); + char *ptr = (char *)a; + memset(ptr + keysize, 0, sizeof(*a) - keysize); +} + + /******************************************************************************* * Private: */ -- cgit v1.2.3 From 7ddb925b8bc6c18eba953e34d2b630a3a6593f05 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 9 May 2008 13:10:15 +0100 Subject: draw: mimize cost of translate key compares, use cache universally --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 20 ++++++++++++++------ src/gallium/auxiliary/draw/draw_pt_emit.c | 10 +++++----- src/gallium/auxiliary/draw/draw_pt_fetch.c | 7 ++----- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 ++- 4 files changed, 23 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 64a9f9084f..67b9a9503d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -42,6 +42,7 @@ #include "draw_vertex.h" #include "draw_pipe.h" #include "translate/translate.h" +#include "translate/translate_cache.h" /** @@ -75,6 +76,8 @@ struct vbuf_stage { /* Cache point size somewhere it's address won't change: */ float point_size; + + struct translate_cache *cache; }; @@ -220,7 +223,6 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) /* Translate from pipeline vertices to hw vertices. */ dst_offset = 0; - memset(&hw_key, 0, sizeof(hw_key)); for (i = 0; i < vbuf->vinfo->num_attribs; i++) { unsigned emit_sz = 0; @@ -277,12 +279,10 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) /* Don't bother with caching at this stage: */ if (!vbuf->translate || - memcmp(&vbuf->translate->key, &hw_key, sizeof(hw_key)) != 0) + translate_key_compare(&vbuf->translate->key, &hw_key) != 0) { - if (vbuf->translate) - vbuf->translate->release(vbuf->translate); - - vbuf->translate = translate_create( &hw_key ); + translate_key_sanitize(&hw_key); + vbuf->translate = translate_cache_find(vbuf->cache, &hw_key); vbuf->translate->set_buffer(vbuf->translate, 1, &vbuf->point_size, 0); } @@ -430,6 +430,9 @@ static void vbuf_destroy( struct draw_stage *stage ) if (vbuf->render) vbuf->render->destroy( vbuf->render ); + if (vbuf->cache) + translate_cache_destroy(vbuf->cache); + FREE( stage ); } @@ -460,6 +463,11 @@ struct draw_stage *draw_vbuf_stage( struct draw_context *draw, 16 ); if (!vbuf->indices) goto fail; + + vbuf->cache = translate_cache_create(); + if (!vbuf->cache) + goto fail; + vbuf->vertices = NULL; vbuf->vertex_ptr = vbuf->vertices; diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 999b2007a2..4a854f4362 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -49,7 +49,6 @@ void draw_pt_emit_prepare( struct pt_emit *emit, const struct vertex_info *vinfo; unsigned dst_offset; struct translate_key hw_key; - unsigned keysize; unsigned i; boolean ok; @@ -62,7 +61,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, /* Must do this after set_primitive() above: */ vinfo = draw->render->get_vertex_info(draw->render); - keysize = 2*4 + vinfo->num_attribs * sizeof(hw_key.element[0]); + /* Translate from pipeline vertices to hw vertices. */ @@ -121,9 +120,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit, hw_key.output_stride = vinfo->size * 4; if (!emit->translate || - memcmp(&emit->translate->key, &hw_key, keysize) != 0) + translate_key_compare(&emit->translate->key, &hw_key) != 0) { - memset((char *)&hw_key + keysize, 0, sizeof(hw_key) - keysize); + translate_key_sanitize(&hw_key); emit->translate = translate_cache_find(emit->cache, &hw_key); } } @@ -244,7 +243,8 @@ struct pt_emit *draw_pt_emit_create( struct draw_context *draw ) void draw_pt_emit_destroy( struct pt_emit *emit ) { - translate_cache_destroy(emit->cache); + if (emit->cache) + translate_cache_destroy(emit->cache); FREE(emit); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 034e0eccb2..07f4c99164 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -62,11 +62,8 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, unsigned i, nr = 0; unsigned dst_offset = 0; struct translate_key key; - unsigned keysize; fetch->vertex_size = vertex_size; - keysize = (2*4 + - (draw->pt.nr_vertex_elements + 1) * sizeof(key.element[0])); /* Always emit/leave space for a vertex header. * @@ -111,9 +108,9 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, if (!fetch->translate || - memcmp(&fetch->translate->key, &key, keysize) != 0) + translate_key_compare(&fetch->translate->key, &key) != 0) { - memset((char *)&key + keysize, 0, sizeof(key) - keysize); + translate_key_sanitize(&key); fetch->translate = translate_cache_find(fetch->cache, &key); { diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index bdbb039f9e..a1d041a74f 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -174,8 +174,9 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, /* Don't bother with caching at this stage: */ if (!feme->translate || - memcmp(&feme->translate->key, &key, sizeof(key)) != 0) + translate_key_compare(&feme->translate->key, &key) != 0) { + translate_key_sanitize(&key); feme->translate = translate_cache_find(feme->cache, &key); -- cgit v1.2.3 From b5e5369da5fc50d63a6ece931fac44b555eb0314 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 12 May 2008 15:20:38 +0100 Subject: draw: add fetch-shade-emit path Enable with TEST_FSE=t. Performs fetch from API-provided vertex buffers, transformation with one of three (two working) hard-coded shaders, and final emit to hardware vertices all in a single pass. Currently only really useful for profiling in conjunction with SP_NO_RAST=t. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_private.h | 3 + src/gallium/auxiliary/draw/draw_pt.c | 25 +- src/gallium/auxiliary/draw/draw_pt.h | 3 + .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 714 +++++++++++++++++++++ 5 files changed, 742 insertions(+), 4 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index da7eded21f..68e7744cc5 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -26,6 +26,7 @@ C_SOURCES = \ draw_pt_emit.c \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ + draw_pt_fetch_shade_emit.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ draw_pt_varray.c \ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index e036d498b8..cbe64cd290 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -124,12 +124,14 @@ struct draw_context struct { struct { struct draw_pt_middle_end *fetch_emit; + /*struct draw_pt_middle_end *fetch_shade_emit;*/ struct draw_pt_middle_end *general; } middle; struct { struct draw_pt_front_end *vcache; struct draw_pt_front_end *varray; + struct draw_pt_front_end *fetch_shade_emit; /* temp hack */ } front; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; @@ -154,6 +156,7 @@ struct draw_context const void *constants; } user; + boolean test_fse; } pt; struct { diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index bccde6c5fd..448deef98c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -64,7 +64,7 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_PIPELINE; } - if (!draw->bypass_clipping) { + if (!draw->bypass_clipping && !draw->pt.test_fse) { opt |= PT_CLIPTEST; } @@ -72,16 +72,20 @@ draw_pt_arrays(struct draw_context *draw, opt |= PT_SHADE; } - if (opt) - middle = draw->pt.middle.general; - else + + if (opt == 0) middle = draw->pt.middle.fetch_emit; + else + middle = draw->pt.middle.general; /* Pick the right frontend */ if (draw->pt.user.elts) { frontend = draw->pt.front.vcache; + } else if (opt == PT_SHADE && draw->pt.test_fse) { + /* should be a middle end.. */ + frontend = draw->pt.front.fetch_shade_emit; } else { frontend = draw->pt.front.varray; } @@ -113,6 +117,14 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_emit) return FALSE; + draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; + if (draw->pt.test_fse) { + draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw ); + if (!draw->pt.front.fetch_shade_emit) + return FALSE; + } + + draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) return FALSE; @@ -133,6 +145,11 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_emit = NULL; } + if (draw->pt.front.fetch_shade_emit) { + draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit ); + draw->pt.front.fetch_shade_emit = NULL; + } + if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 312fdbe4f4..bcd89f6bd6 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -121,6 +121,8 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); +struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); + /* Middle-ends: * * Currently one general-purpose case which can do all possibilities, @@ -132,6 +134,7 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); * vertex_elements. */ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); +//struct draw_pt_middle_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c new file mode 100644 index 0000000000..9e1d1add36 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -0,0 +1,714 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" + +#include "translate/translate.h" + +struct fetch_shade_emit; + +struct fse_shader { + struct translate_key key; + + void (*run_linear)( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ); +}; + +/* Prototype fetch, shade, emit-hw-verts all in one go. + */ +struct fetch_shade_emit { + struct draw_pt_front_end base; + + struct draw_context *draw; + + struct translate_key key; + + /* Temporaries: + */ + const float *constants; + unsigned pitch[PIPE_MAX_ATTRIBS]; + const ubyte *src[PIPE_MAX_ATTRIBS]; + unsigned prim; + + /* Points to one of the three hardwired example shaders, below: + */ + struct fse_shader *active; + + /* Temporary: A list of hard-wired shaders. Of course the plan + * would be to generate these for a given (vertex-shader, + * translate-key) pair... + */ + struct fse_shader shader[10]; + int nr_shaders; +}; + + + +/* Not quite passthrough yet -- we're still running the 'shader' here, + * inlined into the vertex fetch function. + */ +static void fetch_xyz_rgb_st( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + + const float *m = fse->constants; + const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; + const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; + const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; + const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; + + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *st = fse->src[2] + start * fse->pitch[2]; + + float *out = (float *)buffer; + + + assert(fse->pitch[1] == 0); + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m0 * ix + m4 * iy + m8 * iz + m12; + out[1] = m1 * ix + m5 * iy + m9 * iz + m13; + out[2] = m2 * ix + m6 * iy + m10 * iz + m14; + out[3] = m3 * ix + m7 * iy + m11 * iz + m15; + xyz += fse->pitch[0]; + } + + { + out[4] = 1.0f; + out[5] = 1.0f; + out[6] = 1.0f; + out[7] = 1.0f; + } + + { + const float *in = (const float *)st; st += fse->pitch[2]; + out[8] = in[0]; + out[9] = in[1]; + out[10] = 0.0f; + out[11] = 1.0f; + } + + out += 12; + } +} + + + +static void fetch_xyz_rgb( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + + const float *m = (const float *)fse->constants; + const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; + const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; + const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; + const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; + + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; + + float *out = (float *)buffer; + +// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]); + + + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m0 * ix + m4 * iy + m8 * iz + m12; + out[1] = m1 * ix + m5 * iy + m9 * iz + m13; + out[2] = m2 * ix + m6 * iy + m10 * iz + m14; + out[3] = m3 * ix + m7 * iy + m11 * iz + m15; + xyz += fse->pitch[0]; + } + + { + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + out += 8; + } +} + + + + +static void fetch_xyz_rgb_psiz( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + + const float *m = (const float *)fse->constants; + const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; + const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; + const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; + const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; + + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const float *rgb = (const float *)(fse->src[1] + start * fse->pitch[1]); + const float psiz = 1.0; + + float *out = (float *)buffer; + + + assert(fse->pitch[1] == 0); + + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m0 * ix + m4 * iy + m8 * iz + m12; + out[1] = m1 * ix + m5 * iy + m9 * iz + m13; + out[2] = m2 * ix + m6 * iy + m10 * iz + m14; + out[3] = m3 * ix + m7 * iy + m11 * iz + m15; + xyz += fse->pitch[0]; + } + + { + out[4] = rgb[0]; + out[5] = rgb[1]; + out[6] = rgb[2]; + out[7] = 1.0f; + } + + { + out[8] = psiz; + } + + out += 9; + } +} + + + + +static boolean set_prim( struct fetch_shade_emit *fse, + unsigned prim, + unsigned count ) +{ + struct draw_context *draw = fse->draw; + + fse->prim = prim; + + switch (prim) { + case PIPE_PRIM_LINE_LOOP: + if (count > 1024) + return FALSE; + draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP ); + break; + + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + if (count > 1024) + return FALSE; + draw->render->set_primitive( draw->render, prim ); + break; + + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES ); + break; + + default: + draw->render->set_primitive( draw->render, prim ); + break; + } + + return TRUE; +} + + + + + + +static void fse_prepare( struct draw_pt_front_end *fe, + unsigned prim, + struct draw_pt_middle_end *unused, + unsigned opt ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe; + struct draw_context *draw = fse->draw; + unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs; + unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs; + const struct vertex_info *vinfo; + unsigned i; + boolean need_psize = 0; + + + if (draw->pt.user.elts) { + assert(0); + return ; + } + + if (!set_prim(fse, prim, /*count*/1022 )) { + assert(0); + return ; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + + fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ + num_vs_inputs); /* inputs - fetch from api format */ + + fse->key.output_stride = vinfo->size * 4; + memset(fse->key.element, 0, + fse->key.nr_elements * sizeof(fse->key.element[0])); + + for (i = 0; i < num_vs_inputs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = src->src_format; + + /* Consider ignoring these at this point, ie make generated + * programs independent of this state: + */ + fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index; + fse->key.element[i].input_offset = 0; //src->src_offset; + } + + + { + unsigned dst_offset = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned output_format = PIPE_FORMAT_NONE; + unsigned vs_output = vinfo->src_index[i]; + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + need_psize = 1; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + vs_output = num_vs_outputs + 1; + + break; + default: + assert(0); + break; + } + + /* The elements in the key correspond to vertex shader output + * numbers, not to positions in the hw vertex description -- + * that's handled by the output_offset field. + */ + fse->key.element[vs_output].output_format = output_format; + fse->key.element[vs_output].output_offset = dst_offset; + + dst_offset += emit_sz; + assert(fse->key.output_stride >= dst_offset); + } + } + + /* To make psize work, really need to tell the vertex shader to + * copy that value from input->output. For 'translate' this was + * implicit for all elements. + */ +#if 0 + if (need_psize) { + unsigned input = num_vs_inputs + 1; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + fse->key.element[i].input_buffer = 0; //nr_buffers + 1; + fse->key.element[i].input_offset = 0; + + fse->key.nr_elements += 1; + + } +#endif + + fse->constants = draw->pt.user.constants; + + /* Would normally look up a vertex shader and peruse its list of + * varients somehow. We omitted that step and put all the + * hardcoded "shaders" into an array. We're just making the + * assumption that this happens to be a matching shader... ie + * you're running isosurf, aren't you? + */ + fse->active = NULL; + for (i = 0; i < fse->nr_shaders; i++) { + if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0) + fse->active = &fse->shader[i]; + } + + if (!fse->active) { + assert(0); + return ; + } + + /* Now set buffer pointers: + */ + for (i = 0; i < num_vs_inputs; i++) { + unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; + + fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] + + draw->pt.vertex_buffer[buf].buffer_offset + + draw->pt.vertex_element[i].src_offset); + + fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch; + + } + + + //return TRUE; +} + + +static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + return TRUE; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; + return TRUE; + case PIPE_PRIM_LINE_STRIP: + *first = 2; + *incr = 1; + return TRUE; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + return TRUE; + case PIPE_PRIM_TRIANGLE_STRIP: + *first = 3; + *incr = 1; + return TRUE; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + return TRUE; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + return TRUE; + default: + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + return FALSE; + } +} + + + + +#define INDEX(i) (start + (i)) +static void fse_render_linear( struct vbuf_render *render, + unsigned prim, + unsigned start, + unsigned length ) +{ + ushort *tmp = NULL; + unsigned i, j; + + switch (prim) { + case PIPE_PRIM_LINE_LOOP: + tmp = MALLOC( sizeof(ushort) * (length + 1) ); + + for (i = 0; i < length; i++) + tmp[i] = INDEX(i); + tmp[length] = 0; + + render->draw( render, + tmp, + length+1 ); + break; + + + case PIPE_PRIM_QUAD_STRIP: + tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) ); + + for (j = i = 0; i + 3 < length; i += 2, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+2); + tmp[j+4] = INDEX(i+0); + tmp[j+5] = INDEX(i+3); + } + + if (j) + render->draw( render, tmp, j ); + break; + + case PIPE_PRIM_QUADS: + tmp = MALLOC( sizeof(int) * (length / 4 * 6) ); + + for (j = i = 0; i + 3 < length; i += 4, j += 6) { + tmp[j+0] = INDEX(i+0); + tmp[j+1] = INDEX(i+1); + tmp[j+2] = INDEX(i+3); + + tmp[j+3] = INDEX(i+1); + tmp[j+4] = INDEX(i+2); + tmp[j+5] = INDEX(i+3); + } + + if (j) + render->draw( render, tmp, j ); + break; + + default: + render->draw_arrays( render, + start, + length ); + break; + } + + if (tmp) + FREE(tmp); +} + + + +static boolean do_draw( struct fetch_shade_emit *fse, + unsigned start, unsigned count ) +{ + struct draw_context *draw = fse->draw; + + char *hw_verts = + draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count ); + + if (!hw_verts) + return FALSE; + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done on hardware. + */ + fse->active->run_linear( fse, + start, count, + hw_verts ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + fse_render_linear( draw->render, + fse->prim, + 0, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); + + return TRUE; +} + + +static void +fse_run(struct draw_pt_front_end *fe, + pt_elt_func elt_func, + const void *elt_ptr, + unsigned count) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe; + unsigned i = 0; + unsigned first, incr; + unsigned start = elt_func(elt_ptr, 0); + + //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); + + split_prim_inplace(fse->prim, &first, &incr); + + count -= (count - first) % incr; + + while (i + first <= count) { + int nr = MIN2( count - i, 1024 ); + + /* snap to prim boundary + */ + nr -= (nr - first) % incr; + + if (!do_draw( fse, start + i, nr )) { + assert(0); + return ; + } + + /* increment allowing for repeated vertices + */ + i += nr - (first - incr); + } + + //return TRUE; +} + + +static void fse_finish( struct draw_pt_front_end *frontend ) +{ +} + + +static void +fse_destroy( struct draw_pt_front_end *frontend ) +{ + FREE(frontend); +} + +struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ) +{ + struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); + if (!fse) + return NULL; + + fse->base.prepare = fse_prepare; + fse->base.run = fse_run; + fse->base.finish = fse_finish; + fse->base.destroy = fse_destroy; + fse->draw = draw; + + fse->shader[0].run_linear = fetch_xyz_rgb_st; + fse->shader[0].key.nr_elements = 3; + fse->shader[0].key.output_stride = 12 * sizeof(float); + + fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[0].key.element[0].input_buffer = 0; + fse->shader[0].key.element[0].input_offset = 0; + fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[0].output_offset = 0; + + fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[0].key.element[1].input_buffer = 0; + fse->shader[0].key.element[1].input_offset = 0; + fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[1].output_offset = 16; + + fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT; + fse->shader[0].key.element[1].input_buffer = 0; + fse->shader[0].key.element[1].input_offset = 0; + fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[1].output_offset = 32; + + fse->shader[1].run_linear = fetch_xyz_rgb; + fse->shader[1].key.nr_elements = 2; + fse->shader[1].key.output_stride = 8 * sizeof(float); + + fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[1].key.element[0].input_buffer = 0; + fse->shader[1].key.element[0].input_offset = 0; + fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[1].key.element[0].output_offset = 0; + + fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[1].key.element[1].input_buffer = 0; + fse->shader[1].key.element[1].input_offset = 0; + fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[1].key.element[1].output_offset = 16; + + fse->shader[2].run_linear = fetch_xyz_rgb_psiz; + fse->shader[2].key.nr_elements = 3; + fse->shader[2].key.output_stride = 9 * sizeof(float); + + fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[2].key.element[0].input_buffer = 0; + fse->shader[2].key.element[0].input_offset = 0; + fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[2].key.element[0].output_offset = 0; + + fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[2].key.element[1].input_buffer = 0; + fse->shader[2].key.element[1].input_offset = 0; + fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[2].key.element[1].output_offset = 16; + + /* psize is special + * -- effectively add it here as another input!?! + * -- who knows how to add it as a buffer? + */ + fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT; + fse->shader[2].key.element[2].input_buffer = 0; + fse->shader[2].key.element[2].input_offset = 0; + fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT; + fse->shader[2].key.element[2].output_offset = 32; + + fse->nr_shaders = 3; + + return &fse->base; +} -- cgit v1.2.3 From 44463b2997826cd14def00abf724a7a65a4fc7cb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 12 May 2008 17:36:35 +0100 Subject: draw: streamline the varray path - drop support for running the pipeline (ie. don't populate the flags values) - pass through all split-able primitives intact to the middle end - only primitives that can't be split are shunted on the draw-element path --- src/gallium/auxiliary/draw/draw_pt.c | 2 +- src/gallium/auxiliary/draw/draw_pt_varray.c | 106 +++---------- .../auxiliary/draw/draw_pt_varray_tmp_linear.h | 165 +++------------------ 3 files changed, 44 insertions(+), 229 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 448deef98c..d9e73a2396 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -81,7 +81,7 @@ draw_pt_arrays(struct draw_context *draw, /* Pick the right frontend */ - if (draw->pt.user.elts) { + if (draw->pt.user.elts || (opt & PT_PIPELINE)) { frontend = draw->pt.front.vcache; } else if (opt == PT_SHADE && draw->pt.test_fse) { /* should be a middle end.. */ diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 59a9569270..d92ad4fda1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -154,9 +154,9 @@ static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr static INLINE void add_draw_el(struct varray_frontend *varray, - int idx, ushort flags) + int idx) { - varray->draw_elts[varray->draw_count++] = idx | flags; + varray->draw_elts[varray->draw_count++] = idx; } @@ -165,106 +165,47 @@ static INLINE void varray_triangle( struct varray_frontend *varray, unsigned i1, unsigned i2 ) { - add_draw_el(varray, i0, 0); - add_draw_el(varray, i1, 0); - add_draw_el(varray, i2, 0); -} - -static INLINE void varray_triangle_flags( struct varray_frontend *varray, - ushort flags, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - add_draw_el(varray, i0, flags); - add_draw_el(varray, i1, 0); - add_draw_el(varray, i2, 0); + add_draw_el(varray, i0); + add_draw_el(varray, i1); + add_draw_el(varray, i2); } static INLINE void varray_line( struct varray_frontend *varray, unsigned i0, unsigned i1 ) { - add_draw_el(varray, i0, 0); - add_draw_el(varray, i1, 0); -} - - -static INLINE void varray_line_flags( struct varray_frontend *varray, - ushort flags, - unsigned i0, - unsigned i1 ) -{ - add_draw_el(varray, i0, flags); - add_draw_el(varray, i1, 0); + add_draw_el(varray, i0); + add_draw_el(varray, i1); } static INLINE void varray_point( struct varray_frontend *varray, unsigned i0 ) { - add_draw_el(varray, i0, 0); -} - -static INLINE void varray_quad( struct varray_frontend *varray, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) -{ - varray_triangle( varray, i0, i1, i3 ); - varray_triangle( varray, i1, i2, i3 ); + add_draw_el(varray, i0); } -static INLINE void varray_ef_quad( struct varray_frontend *varray, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) -{ - const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; - const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; - - varray_triangle_flags( varray, - DRAW_PIPE_RESET_STIPPLE | omitEdge1, - i0, i1, i3 ); - - varray_triangle_flags( varray, - omitEdge2, - i1, i2, i3 ); -} -/* At least for now, we're back to using a template include file for - * this. The two paths aren't too different though - it may be - * possible to reunify them. - */ -#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle_flags(vc,flags,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) varray_ef_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) varray_line_flags(vc,flags,i0,i1) -#define POINT(vc,i0) varray_point(vc,i0) -#define FUNC varray_run_extras -#include "draw_pt_varray_tmp.h" -#define TRIANGLE(vc,flags,i0,i1,i2) varray_triangle(vc,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) varray_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) varray_line(vc,i0,i1) +#define TRIANGLE(vc,i0,i1,i2) varray_triangle(vc,i0,i1,i2) +#define LINE(vc,i0,i1) varray_line(vc,i0,i1) #define POINT(vc,i0) varray_point(vc,i0) #define FUNC varray_run #include "draw_pt_varray_tmp_linear.h" -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { +static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { PIPE_PRIM_POINTS, PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, + PIPE_PRIM_LINE_STRIP, + PIPE_PRIM_LINES, /* decomposed */ PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES + PIPE_PRIM_TRIANGLE_STRIP, + PIPE_PRIM_TRIANGLES, /* decomposed */ + PIPE_PRIM_QUADS, + PIPE_PRIM_QUAD_STRIP, + PIPE_PRIM_TRIANGLES /* decomposed */ }; @@ -276,17 +217,10 @@ static void varray_prepare(struct draw_pt_front_end *frontend, { struct varray_frontend *varray = (struct varray_frontend *)frontend; - if (opt & PT_PIPELINE) - { - varray->base.run = varray_run_extras; - } - else - { - varray->base.run = varray_run; - } + varray->base.run = varray_run; varray->input_prim = prim; - varray->output_prim = reduced_prim[prim]; + varray->output_prim = decompose_prim[prim]; varray->middle = middle; middle->prepare(middle, varray->output_prim, opt); diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index 6e2b16d9be..b6f1f0cadc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -1,3 +1,7 @@ +static unsigned trim( unsigned count, unsigned first, unsigned incr ) +{ + return count - (count - first) % incr; +} static void FUNC(struct draw_pt_front_end *frontend, pt_elt_func get_elt, @@ -5,12 +9,9 @@ static void FUNC(struct draw_pt_front_end *frontend, unsigned count) { struct varray_frontend *varray = (struct varray_frontend *)frontend; - struct draw_context *draw = varray->draw; unsigned start = (unsigned)elts; - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - unsigned i, j, flags; + unsigned i, j; unsigned first, incr; varray->fetch_start = start; @@ -27,26 +28,30 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_POINTS: case PIPE_PRIM_LINES: case PIPE_PRIM_TRIANGLES: - j = 0; - while (j + first <= count) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - varray_flush_linear(varray, start + j, end); - j += end; + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + + for (j = 0; j < count;) { + unsigned remaining = count - j; + unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr ); + varray_flush_linear(varray, start + j, nr); + j += nr; + if (nr != remaining) + j -= (first - incr); } break; case PIPE_PRIM_LINE_LOOP: if (count >= 2) { - flags = DRAW_PIPE_RESET_STIPPLE; - for (j = 0; j + first <= count; j += i) { unsigned end = MIN2(FETCH_MAX, count - j); end -= (end % incr); - for (i = 1; i < end; i++, flags = 0) { - LINE(varray, flags, i - 1, i); + for (i = 1; i < end; i++) { + LINE(varray, i - 1, i); } - LINE(varray, flags, i - 1, 0); + LINE(varray, i - 1, 0); i = end; fetch_init(varray, end); varray_flush(varray); @@ -54,145 +59,21 @@ static void FUNC(struct draw_pt_front_end *frontend, } break; - case PIPE_PRIM_LINE_STRIP: - flags = DRAW_PIPE_RESET_STIPPLE; - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 1; i < end; i++, flags = 0) { - LINE(varray, flags, i - 1, i); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - break; - - case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++) { - TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - i + 0, i + 1 + (i&1), i + 2 - (i&1)); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - if (j + first + i <= count) { - varray->fetch_start -= 2; - i -= 2; - } - } - } - else { - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++) { - TRIANGLE(varray, DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - i + 0 + (i&1), i + 1 - (i&1), i + 2); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - if (j + first + i <= count) { - varray->fetch_start -= 2; - i -= 2; - } - } - } - break; + case PIPE_PRIM_POLYGON: case PIPE_PRIM_TRIANGLE_FAN: - if (count >= 3) { - if (flatfirst) { - flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++) { - TRIANGLE(varray, flags, i + 1, i + 2, 0); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - } - else { - flags = DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL; - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++) { - TRIANGLE(varray, flags, 0, i + 1, i + 2); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - } - } - break; - - case PIPE_PRIM_QUADS: - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+3 < end; i += 4) { - QUAD(varray, i + 0, i + 1, i + 2, i + 3); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - break; - - case PIPE_PRIM_QUAD_STRIP: for (j = 0; j + first <= count; j += i) { unsigned end = MIN2(FETCH_MAX, count - j); end -= (end % incr); - for (i = 0; i+3 < end; i += 2) { - QUAD(varray, i + 2, i + 0, i + 1, i + 3); + for (i = 2; i < end; i++) { + TRIANGLE(varray, 0, i - 1, i); } i = end; fetch_init(varray, end); varray_flush(varray); - if (j + first + i <= count) { - varray->fetch_start -= 2; - i -= 2; - } } break; - case PIPE_PRIM_POLYGON: - { - /* These bitflags look a little odd because we submit the - * vertices as (1,2,0) to satisfy flatshade requirements. - */ - const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; - const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); - end -= (end % incr); - for (i = 0; i+2 < end; i++, flags = edge_middle) { - - if (i + 3 == count) - flags |= edge_last; - - TRIANGLE(varray, flags, i + 1, i + 2, 0); - } - i = end; - fetch_init(varray, end); - varray_flush(varray); - } - } - break; - default: assert(0); break; -- cgit v1.2.3 From bbda45ec769120324f44febf00c6bb170f594f23 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 12 May 2008 19:40:20 +0100 Subject: draw: turn fse path into a middle end Also add some util functions in pt_util.c --- src/gallium/auxiliary/draw/Makefile | 2 + src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/auxiliary/draw/draw_pt.c | 34 +- src/gallium/auxiliary/draw/draw_pt.h | 9 +- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 39 +- src/gallium/auxiliary/draw/draw_pt_middle_fse.c | 705 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pt_util.c | 103 +++ src/gallium/auxiliary/draw/draw_pt_varray.c | 45 -- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 2 +- .../auxiliary/draw/draw_pt_varray_tmp_linear.h | 9 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 15 +- 11 files changed, 847 insertions(+), 118 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_middle_fse.c create mode 100644 src/gallium/auxiliary/draw/draw_pt_util.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 68e7744cc5..67d78bdbbd 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -27,8 +27,10 @@ C_SOURCES = \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ draw_pt_fetch_shade_emit.c \ + draw_pt_middle_fse.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ + draw_pt_util.c \ draw_pt_varray.c \ draw_pt_vcache.c \ draw_vertex.c \ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index cbe64cd290..86b901a3c8 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -124,7 +124,7 @@ struct draw_context struct { struct { struct draw_pt_middle_end *fetch_emit; - /*struct draw_pt_middle_end *fetch_shade_emit;*/ + struct draw_pt_middle_end *fetch_shade_emit; struct draw_pt_middle_end *general; } middle; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index d9e73a2396..91e35db819 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -75,6 +75,8 @@ draw_pt_arrays(struct draw_context *draw, if (opt == 0) middle = draw->pt.middle.fetch_emit; + else if (opt == PT_SHADE && draw->pt.test_fse) + middle = draw->pt.middle.fetch_shade_emit; else middle = draw->pt.middle.general; @@ -83,9 +85,11 @@ draw_pt_arrays(struct draw_context *draw, */ if (draw->pt.user.elts || (opt & PT_PIPELINE)) { frontend = draw->pt.front.vcache; +#if 0 } else if (opt == PT_SHADE && draw->pt.test_fse) { /* should be a middle end.. */ frontend = draw->pt.front.fetch_shade_emit; +#endif } else { frontend = draw->pt.front.varray; } @@ -105,6 +109,8 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { + draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; + draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) return FALSE; @@ -117,8 +123,11 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_emit) return FALSE; - draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; if (draw->pt.test_fse) { + draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); + if (!draw->pt.middle.fetch_shade_emit) + return FALSE; + draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw ); if (!draw->pt.front.fetch_shade_emit) return FALSE; @@ -145,6 +154,11 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_emit = NULL; } + if (draw->pt.middle.fetch_shade_emit) { + draw->pt.middle.fetch_shade_emit->destroy( draw->pt.middle.fetch_shade_emit ); + draw->pt.middle.fetch_shade_emit = NULL; + } + if (draw->pt.front.fetch_shade_emit) { draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit ); draw->pt.front.fetch_shade_emit = NULL; @@ -163,19 +177,6 @@ void draw_pt_destroy( struct draw_context *draw ) -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { - PIPE_PRIM_POINTS, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES -}; - /** * Draw vertex arrays @@ -188,9 +189,10 @@ void draw_arrays(struct draw_context *draw, unsigned prim, unsigned start, unsigned count) { - if (reduced_prim[prim] != draw->reduced_prim) { + unsigned reduced_prim = draw_pt_reduced_prim(prim); + if (reduced_prim != draw->reduced_prim) { draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); - draw->reduced_prim = reduced_prim[prim]; + draw->reduced_prim = reduced_prim; } /* drawing done here: */ diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index bcd89f6bd6..cdae46b8d2 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -134,7 +134,7 @@ struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); * vertex_elements. */ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ); -//struct draw_pt_middle_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); +struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ); struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit(struct draw_context *draw); @@ -213,4 +213,11 @@ struct pt_post_vs *draw_pt_post_vs_create( struct draw_context *draw ); void draw_pt_post_vs_destroy( struct pt_post_vs *pvs ); +/******************************************************************************* + * Utils: + */ +void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr); +unsigned draw_pt_reduced_prim(unsigned prim); + + #endif diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 9e1d1add36..f756d3e0bb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -434,43 +434,6 @@ static void fse_prepare( struct draw_pt_front_end *fe, } -static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) -{ - switch (prim) { - case PIPE_PRIM_POINTS: - *first = 1; - *incr = 1; - return TRUE; - case PIPE_PRIM_LINES: - *first = 2; - *incr = 2; - return TRUE; - case PIPE_PRIM_LINE_STRIP: - *first = 2; - *incr = 1; - return TRUE; - case PIPE_PRIM_TRIANGLES: - *first = 3; - *incr = 3; - return TRUE; - case PIPE_PRIM_TRIANGLE_STRIP: - *first = 3; - *incr = 1; - return TRUE; - case PIPE_PRIM_QUADS: - *first = 4; - *incr = 4; - return TRUE; - case PIPE_PRIM_QUAD_STRIP: - *first = 4; - *incr = 2; - return TRUE; - default: - *first = 0; - *incr = 1; /* set to one so that count % incr works */ - return FALSE; - } -} @@ -596,7 +559,7 @@ fse_run(struct draw_pt_front_end *fe, //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); - split_prim_inplace(fse->prim, &first, &incr); + draw_pt_split_prim(fse->prim, &first, &incr); count -= (count - first) % incr; diff --git a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c new file mode 100644 index 0000000000..cdb7d260da --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c @@ -0,0 +1,705 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" + +#include "translate/translate.h" + +struct fetch_shade_emit; + +struct fse_shader { + struct translate_key key; + + void (*run_linear)( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ); + + void (*run_elts)( const struct fetch_shade_emit *fse, + const unsigned *fetch_elts, + unsigned fetch_count, + char *buffer ); + +}; + +/* Prototype fetch, shade, emit-hw-verts all in one go. + */ +struct fetch_shade_emit { + struct draw_pt_middle_end base; + struct draw_context *draw; + + struct translate_key key; + + /* Temporaries: + */ + const float *constants; + unsigned pitch[PIPE_MAX_ATTRIBS]; + const ubyte *src[PIPE_MAX_ATTRIBS]; + unsigned prim; + + /* Points to one of the three hardwired example shaders, below: + */ + struct fse_shader *active; + + /* Temporary: A list of hard-wired shaders. Of course the plan + * would be to generate these for a given (vertex-shader, + * translate-key) pair... + */ + struct fse_shader shader[10]; + int nr_shaders; +}; + + + +/* Not quite passthrough yet -- we're still running the 'shader' here, + * inlined into the vertex fetch function. + */ +static void shader0_run_linear( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + + const float *m = fse->constants; + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; + const ubyte *st = fse->src[2] + start * fse->pitch[2]; + + float *out = (float *)buffer; + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + xyz += fse->pitch[0]; + } + + { + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + { + const float *in = (const float *)st; + out[8] = in[0]; + out[9] = in[1]; + out[10] = 0.0f; + out[11] = 1.0f; + st += fse->pitch[2]; + } + + out += 12; + } +} + + + +static void shader1_run_linear( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = (const float *)fse->constants; + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; + float *out = (float *)buffer; + +// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]); + + + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + xyz += fse->pitch[0]; + } + + { + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + out += 8; + } +} + + + + +static void shader2_run_linear( const struct fetch_shade_emit *fse, + unsigned start, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = (const float *)fse->constants; + const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; + const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; + const float psiz = 1.0; + float *out = (float *)buffer; + + + assert(fse->pitch[1] == 0); + + for (i = 0; i < count; i++) { + { + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + xyz += fse->pitch[0]; + } + + { + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + { + out[8] = psiz; + } + + out += 9; + } +} + + + + +static void shader0_run_elts( const struct fetch_shade_emit *fse, + const unsigned *elts, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = fse->constants; + float *out = (float *)buffer; + + + /* loop over vertex attributes (vertex shader inputs) + */ + for (i = 0; i < count; i++) { + unsigned elt = elts[i]; + { + const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + } + + { + const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + } + + { + const ubyte *st = fse->src[2] + elt * fse->pitch[2]; + const float *in = (const float *)st; + out[8] = in[0]; + out[9] = in[1]; + out[10] = 0.0f; + out[11] = 1.0f; + } + + out += 12; + } +} + + + +static void shader1_run_elts( const struct fetch_shade_emit *fse, + const unsigned *elts, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = (const float *)fse->constants; + float *out = (float *)buffer; + + for (i = 0; i < count; i++) { + unsigned elt = elts[i]; + + { + const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + xyz += fse->pitch[0]; + } + + { + const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; + const float *in = (const float *)rgb; + out[4] = in[0]; + out[5] = in[1]; + out[6] = in[2]; + out[7] = 1.0f; + rgb += fse->pitch[1]; + } + + out += 8; + } +} + + + + +static void shader2_run_elts( const struct fetch_shade_emit *fse, + const unsigned *elts, + unsigned count, + char *buffer ) +{ + unsigned i; + const float *m = (const float *)fse->constants; + const float psiz = 1.0; + float *out = (float *)buffer; + + for (i = 0; i < count; i++) { + unsigned elt = elts[i]; + { + const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; + const float *in = (const float *)xyz; + const float ix = in[0], iy = in[1], iz = in[2]; + + out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; + out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; + out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; + out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; + } + + { + const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; + out[4] = rgb[0]; + out[5] = rgb[1]; + out[6] = rgb[2]; + out[7] = 1.0f; + } + + { + out[8] = psiz; + } + + out += 9; + } +} + + + +static void fse_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs; + unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs; + const struct vertex_info *vinfo; + unsigned i; + boolean need_psize = 0; + + + if (draw->pt.user.elts) { + assert(0); + return ; + } + + if (!draw->render->set_primitive( draw->render, + prim )) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + + fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ + num_vs_inputs); /* inputs - fetch from api format */ + + fse->key.output_stride = vinfo->size * 4; + memset(fse->key.element, 0, + fse->key.nr_elements * sizeof(fse->key.element[0])); + + for (i = 0; i < num_vs_inputs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = src->src_format; + + /* Consider ignoring these at this point, ie make generated + * programs independent of this state: + */ + fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index; + fse->key.element[i].input_offset = 0; //src->src_offset; + } + + + { + unsigned dst_offset = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned output_format = PIPE_FORMAT_NONE; + unsigned vs_output = vinfo->src_index[i]; + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + need_psize = 1; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + vs_output = num_vs_outputs + 1; + + break; + default: + assert(0); + break; + } + + /* The elements in the key correspond to vertex shader output + * numbers, not to positions in the hw vertex description -- + * that's handled by the output_offset field. + */ + fse->key.element[vs_output].output_format = output_format; + fse->key.element[vs_output].output_offset = dst_offset; + + dst_offset += emit_sz; + assert(fse->key.output_stride >= dst_offset); + } + } + + /* To make psize work, really need to tell the vertex shader to + * copy that value from input->output. For 'translate' this was + * implicit for all elements. + */ +#if 0 + if (need_psize) { + unsigned input = num_vs_inputs + 1; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + fse->key.element[i].input_buffer = 0; //nr_buffers + 1; + fse->key.element[i].input_offset = 0; + + fse->key.nr_elements += 1; + + } +#endif + + fse->constants = draw->pt.user.constants; + + /* Would normally look up a vertex shader and peruse its list of + * varients somehow. We omitted that step and put all the + * hardcoded "shaders" into an array. We're just making the + * assumption that this happens to be a matching shader... ie + * you're running isosurf, aren't you? + */ + fse->active = NULL; + for (i = 0; i < fse->nr_shaders; i++) { + if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0) + fse->active = &fse->shader[i]; + } + + if (!fse->active) { + assert(0); + return ; + } + + /* Now set buffer pointers: + */ + for (i = 0; i < num_vs_inputs; i++) { + unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; + + fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] + + draw->pt.vertex_buffer[buf].buffer_offset + + draw->pt.vertex_element[i].src_offset); + + fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch; + + } + + + //return TRUE; +} + + + + + + + +static void fse_run_linear( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + + char *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)count ); + + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done elsewhere -- + * either by the API or on hardware, or for some other reason not + * required... + */ + fse->active->run_linear( fse, + start, count, + hw_verts ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); +} + + +static void +fse_run(struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)fetch_count ); + if (!hw_verts) { + assert(0); + return; + } + + + /* Single routine to fetch vertices, run shader and emit HW verts. + */ + fse->active->run_elts( fse, + fetch_elts, + fetch_count, + hw_verts ); + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + fetch_count ); + +} + + +static void fse_finish( struct draw_pt_middle_end *middle ) +{ +} + + +static void +fse_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + +struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) +{ + struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); + if (!fse) + return NULL; + + fse->base.prepare = fse_prepare; + fse->base.run = fse_run; + fse->base.run_linear = fse_run_linear; + fse->base.finish = fse_finish; + fse->base.destroy = fse_destroy; + fse->draw = draw; + + fse->shader[0].run_linear = shader0_run_linear; + fse->shader[0].run_elts = shader0_run_elts; + fse->shader[0].key.nr_elements = 3; + fse->shader[0].key.output_stride = 12 * sizeof(float); + + fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[0].key.element[0].input_buffer = 0; + fse->shader[0].key.element[0].input_offset = 0; + fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[0].output_offset = 0; + + fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[0].key.element[1].input_buffer = 0; + fse->shader[0].key.element[1].input_offset = 0; + fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[1].output_offset = 16; + + fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT; + fse->shader[0].key.element[1].input_buffer = 0; + fse->shader[0].key.element[1].input_offset = 0; + fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[0].key.element[1].output_offset = 32; + + fse->shader[1].run_linear = shader1_run_linear; + fse->shader[1].run_elts = shader1_run_elts; + fse->shader[1].key.nr_elements = 2; + fse->shader[1].key.output_stride = 8 * sizeof(float); + + fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[1].key.element[0].input_buffer = 0; + fse->shader[1].key.element[0].input_offset = 0; + fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[1].key.element[0].output_offset = 0; + + fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[1].key.element[1].input_buffer = 0; + fse->shader[1].key.element[1].input_offset = 0; + fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[1].key.element[1].output_offset = 16; + + fse->shader[2].run_linear = shader2_run_linear; + fse->shader[2].run_elts = shader2_run_elts; + fse->shader[2].key.nr_elements = 3; + fse->shader[2].key.output_stride = 9 * sizeof(float); + + fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[2].key.element[0].input_buffer = 0; + fse->shader[2].key.element[0].input_offset = 0; + fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[2].key.element[0].output_offset = 0; + + fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; + fse->shader[2].key.element[1].input_buffer = 0; + fse->shader[2].key.element[1].input_offset = 0; + fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fse->shader[2].key.element[1].output_offset = 16; + + /* psize is special + * -- effectively add it here as another input!?! + * -- who knows how to add it as a buffer? + */ + fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT; + fse->shader[2].key.element[2].input_buffer = 0; + fse->shader[2].key.element[2].input_offset = 0; + fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT; + fse->shader[2].key.element[2].output_offset = 32; + + fse->nr_shaders = 3; + + return &fse->base; +} diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c new file mode 100644 index 0000000000..32c8a9632c --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -0,0 +1,103 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_pt.h" + +void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + *first = 1; + *incr = 1; + break; + case PIPE_PRIM_LINES: + *first = 2; + *incr = 2; + break; + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + *first = 2; + *incr = 1; + break; + case PIPE_PRIM_TRIANGLES: + *first = 3; + *incr = 3; + break; + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + *first = 3; + *incr = 1; + break; + case PIPE_PRIM_QUADS: + *first = 4; + *incr = 4; + break; + case PIPE_PRIM_QUAD_STRIP: + *first = 4; + *incr = 2; + break; + default: + assert(0); + *first = 0; + *incr = 1; /* set to one so that count % incr works */ + break; + } +} + + +unsigned draw_pt_reduced_prim(unsigned prim) +{ + switch (prim) { + case PIPE_PRIM_POINTS: + return PIPE_PRIM_POINTS; + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + return PIPE_PRIM_LINES; + case PIPE_PRIM_TRIANGLES: + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + case PIPE_PRIM_QUADS: + case PIPE_PRIM_QUAD_STRIP: + return PIPE_PRIM_TRIANGLES; + default: + assert(0); + return PIPE_PRIM_POINTS; + } +} + + diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index d92ad4fda1..af6e2d5157 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -106,51 +106,6 @@ static INLINE void fetch_init(struct varray_frontend *varray, } -static boolean split_prim_inplace(unsigned prim, unsigned *first, unsigned *incr) -{ - switch (prim) { - case PIPE_PRIM_POINTS: - *first = 1; - *incr = 1; - return TRUE; - case PIPE_PRIM_LINES: - *first = 2; - *incr = 2; - return TRUE; - case PIPE_PRIM_LINE_STRIP: - *first = 2; - *incr = 1; - return TRUE; - case PIPE_PRIM_TRIANGLES: - *first = 3; - *incr = 3; - return TRUE; - case PIPE_PRIM_TRIANGLE_STRIP: - *first = 3; - *incr = 1; - return TRUE; - case PIPE_PRIM_TRIANGLE_FAN: - *first = 3; - *incr = 1; - return TRUE; - case PIPE_PRIM_QUADS: - *first = 4; - *incr = 4; - return TRUE; - case PIPE_PRIM_QUAD_STRIP: - *first = 4; - *incr = 2; - return TRUE; - case PIPE_PRIM_POLYGON: - *first = 3; - *incr = 1; - return TRUE; - default: - *first = 0; - *incr = 1; /* set to one so that count % incr works */ - return FALSE; - } -} static INLINE void add_draw_el(struct varray_frontend *varray, diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index 1395275897..6979f6b544 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -15,7 +15,7 @@ static void FUNC(struct draw_pt_front_end *frontend, varray->fetch_start = start; - split_prim_inplace(varray->input_prim, &first, &incr); + draw_pt_split_prim(varray->input_prim, &first, &incr); #if 0 debug_printf("%s (%d) %d/%d\n", __FUNCTION__, diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index b6f1f0cadc..114ed371a0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -16,7 +16,13 @@ static void FUNC(struct draw_pt_front_end *frontend, varray->fetch_start = start; - split_prim_inplace(varray->input_prim, &first, &incr); + draw_pt_split_prim(varray->input_prim, &first, &incr); + + /* Sanitize primitive length: + */ + count = trim(count, first, incr); + if (count < first) + return; #if 0 debug_printf("%s (%d) %d/%d\n", __FUNCTION__, @@ -32,7 +38,6 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_TRIANGLE_STRIP: case PIPE_PRIM_QUADS: case PIPE_PRIM_QUAD_STRIP: - for (j = 0; j < count;) { unsigned remaining = count - j; unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr ); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 6b3fb1406b..a3495f2a30 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -204,19 +204,6 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, -static unsigned reduced_prim[PIPE_PRIM_POLYGON + 1] = { - PIPE_PRIM_POINTS, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_LINES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES, - PIPE_PRIM_TRIANGLES -}; - static void vcache_prepare( struct draw_pt_front_end *frontend, @@ -236,7 +223,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, } vcache->input_prim = prim; - vcache->output_prim = reduced_prim[prim]; + vcache->output_prim = draw_pt_reduced_prim(prim); vcache->middle = middle; middle->prepare( middle, vcache->output_prim, opt ); -- cgit v1.2.3 From 19f15277d1871b62902031f9fa9aabf2f1bc7c40 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 12 May 2008 17:17:18 -0400 Subject: adjust llvm code to the changes in 2.3 --- src/gallium/auxiliary/gallivm/instructions.cpp | 30 ++++++++++++-------------- src/gallium/auxiliary/gallivm/storage.cpp | 16 +++++++------- src/gallium/auxiliary/gallivm/storagesoa.cpp | 8 +++---- 3 files changed, 26 insertions(+), 28 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 95a670edaf..1a98491b82 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -166,10 +166,9 @@ llvm::Value * Instructions::rsq(llvm::Value *in1) Value *abs = callFAbs(x); Value *sqrt = callFSqrt(abs); - Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, - APFloat(1.f)), - sqrt, - name("rsqrt")); + Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + sqrt, + name("rsqrt")); return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); } @@ -278,9 +277,8 @@ llvm::Value * Instructions::rcp(llvm::Value *in1) Value *x1 = m_builder.CreateExtractElement(in1, m_storage->constantInt(0), name("x1")); - Value *res = m_builder.CreateFDiv(ConstantFP::get(Type::FloatTy, - APFloat(1.f)), - x1, name("rcp")); + Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + x1, name("rcp")); return vectorFromVals(res, res, res, res); } @@ -319,13 +317,13 @@ llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) m_storage->constantInt(3), name("w")); Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); - return vectorFromVals(ConstantFP::get(Type::FloatTy, APFloat(1.f)), + return vectorFromVals(ConstantFP::get(APFloat(1.f)), ry, z, w); } llvm::Value * Instructions::ex2(llvm::Value *in) { - llvm::Value *val = callPow(ConstantFP::get(Type::FloatTy, APFloat(2.f)), + llvm::Value *val = callPow(ConstantFP::get(APFloat(2.f)), m_builder.CreateExtractElement( in, m_storage->constantInt(0), name("x1"))); @@ -526,7 +524,7 @@ llvm::Function * Instructions::declarePrintf() llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) { - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); Constant *const0f = Constant::getNullValue(Type::FloatTy); std::vector vec1 = extractVector(in1); @@ -547,7 +545,7 @@ llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) } llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) { - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); Constant *const0f = Constant::getNullValue(Type::FloatTy); std::vector vec1 = extractVector(in1); @@ -571,7 +569,7 @@ llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) { - Constant *const1f = ConstantFP::get(Type::FloatTy, APFloat(1.000000e+00f)); + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); Constant *const0f = Constant::getNullValue(Type::FloatTy); std::vector vec1 = extractVector(in1); @@ -814,10 +812,10 @@ llvm::Function * Instructions::findFunction(int label) llvm::Value * Instructions::constVector(float x, float y, float z, float w) { std::vector vec(4); - vec[0] = ConstantFP::get(Type::FloatTy, APFloat(x)); - vec[1] = ConstantFP::get(Type::FloatTy, APFloat(y)); - vec[2] = ConstantFP::get(Type::FloatTy, APFloat(z)); - vec[3] = ConstantFP::get(Type::FloatTy, APFloat(w)); + vec[0] = ConstantFP::get(APFloat(x)); + vec[1] = ConstantFP::get(APFloat(y)); + vec[2] = ConstantFP::get(APFloat(z)); + vec[3] = ConstantFP::get(APFloat(w)); return ConstantVector::get(m_floatVecType, vec); } diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp index 9d9fd12360..6f373f6dd5 100644 --- a/src/gallium/auxiliary/gallivm/storage.cpp +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -69,10 +69,10 @@ llvm::Constant *Storage::shuffleMask(int vec) { if (!m_extSwizzleVec) { std::vector elems; - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(0.f))); - elems.push_back(ConstantFP::get(Type::FloatTy, APFloat(1.f))); + elems.push_back(ConstantFP::get(APFloat(0.f))); + elems.push_back(ConstantFP::get(APFloat(1.f))); + elems.push_back(ConstantFP::get(APFloat(0.f))); + elems.push_back(ConstantFP::get(APFloat(1.f))); m_extSwizzleVec = ConstantVector::get(m_floatVecType, elems); } @@ -295,10 +295,10 @@ llvm::Value * Storage::immediateElement(int idx) void Storage::addImmediate(float *val) { std::vector vec(4); - vec[0] = ConstantFP::get(Type::FloatTy, APFloat(val[0])); - vec[1] = ConstantFP::get(Type::FloatTy, APFloat(val[1])); - vec[2] = ConstantFP::get(Type::FloatTy, APFloat(val[2])); - vec[3] = ConstantFP::get(Type::FloatTy, APFloat(val[3])); + vec[0] = ConstantFP::get(APFloat(val[0])); + vec[1] = ConstantFP::get(APFloat(val[1])); + vec[2] = ConstantFP::get(APFloat(val[2])); + vec[3] = ConstantFP::get(APFloat(val[3])); m_immediates.push_back(ConstantVector::get(m_floatVecType, vec)); } diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index 0e6e68c9d7..78d754371f 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -264,10 +264,10 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector &v { VectorType *vectorType = VectorType::get(Type::FloatTy, 4); std::vector immValues; - ConstantFP *constx = ConstantFP::get(Type::FloatTy, APFloat(vec[0])); - ConstantFP *consty = ConstantFP::get(Type::FloatTy, APFloat(vec[1])); - ConstantFP *constz = ConstantFP::get(Type::FloatTy, APFloat(vec[2])); - ConstantFP *constw = ConstantFP::get(Type::FloatTy, APFloat(vec[3])); + ConstantFP *constx = ConstantFP::get(APFloat(vec[0])); + ConstantFP *consty = ConstantFP::get(APFloat(vec[1])); + ConstantFP *constz = ConstantFP::get(APFloat(vec[2])); + ConstantFP *constw = ConstantFP::get(APFloat(vec[3])); immValues.push_back(constx); immValues.push_back(consty); immValues.push_back(constz); -- cgit v1.2.3 From 1c624846a81b0218b4a07328f485e295432c6312 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 13 May 2008 16:06:09 -0400 Subject: decomposition from keith, adds decomposition of more prim to the pipeline --- src/gallium/auxiliary/draw/draw_pipe.c | 74 ++++++++---- src/gallium/auxiliary/draw/draw_pt_decompose.h | 153 +++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_pt_varray.c | 11 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 117 +++++++++---------- 4 files changed, 263 insertions(+), 92 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_decompose.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index cb97f955b2..1d26706dee 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -212,6 +212,55 @@ void draw_pipeline_run( struct draw_context *draw, draw->pipeline.vertex_count = 0; } +#define QUAD(i0,i1,i2,i3) \ + do_triangle( draw, \ + ( DRAW_PIPE_RESET_STIPPLE | \ + DRAW_PIPE_EDGE_FLAG_0 | \ + DRAW_PIPE_EDGE_FLAG_2 ), \ + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1), \ + verts + stride * (i3)); \ + do_triangle( draw, \ + ( DRAW_PIPE_EDGE_FLAG_0 | \ + DRAW_PIPE_EDGE_FLAG_1 ), \ + verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i2), \ + verts + stride * (i3)) + +#define TRIANGLE(flags,i0,i1,i2) \ + do_triangle( draw, \ + flags, /* flags */ \ + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i1), \ + verts + stride * (i2)) + +#define LINE(flags,i0,i1) \ + do_line( draw, \ + flags, \ + verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ + verts + stride * (i+1)) + +#define POINT(i0) \ + do_point( draw, \ + verts + stride * i0 ) + +#define FUNC pipe_run_linear +#define ARGS \ + struct draw_context *draw, \ + unsigned prim, \ + struct vertex_header *vertices, \ + unsigned stride + +#define LOCAL_VARS \ + char *verts = (char *)vertices; \ + boolean flatfirst = (draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + unsigned i, flags + +#define FLUSH + +#include "draw_pt_decompose.h" + void draw_pipeline_run_linear( struct draw_context *draw, unsigned prim, struct vertex_header *vertices, @@ -219,34 +268,11 @@ void draw_pipeline_run_linear( struct draw_context *draw, unsigned stride ) { char *verts = (char *)vertices; - unsigned i; - draw->pipeline.verts = verts; draw->pipeline.vertex_stride = stride; draw->pipeline.vertex_count = count; - switch (prim) { - case PIPE_PRIM_POINTS: - for (i = 0; i < count; i++) - do_point( draw, - verts + stride * i ); - break; - case PIPE_PRIM_LINES: - for (i = 0; i+1 < count; i += 2) - do_line( draw, - i+0, /* flags */ - verts + stride * ((i+0) & ~DRAW_PIPE_FLAG_MASK), - verts + stride * (i+1)); - break; - case PIPE_PRIM_TRIANGLES: - for (i = 0; i+2 < count; i += 3) - do_triangle( draw, - (i+0), /* flags */ - verts + stride * ((i+0) & ~DRAW_PIPE_FLAG_MASK), - verts + stride * (i+1), - verts + stride * (i+2)); - break; - } + pipe_run_linear(draw, prim, vertices, stride, count); draw->pipeline.verts = NULL; draw->pipeline.vertex_count = 0; diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h new file mode 100644 index 0000000000..dccfde99dd --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -0,0 +1,153 @@ + + +static void FUNC( ARGS, + unsigned count ) +{ + LOCAL_VARS; + + switch (prim) { + case PIPE_PRIM_POINTS: + for (i = 0; i < count; i ++) { + POINT( (i + 0) ); + } + break; + + case PIPE_PRIM_LINES: + for (i = 0; i+1 < count; i += 2) { + LINE( DRAW_PIPE_RESET_STIPPLE, + (i + 0), + (i + 1)); + } + break; + + case PIPE_PRIM_LINE_LOOP: + if (count >= 2) { + flags = DRAW_PIPE_RESET_STIPPLE; + + for (i = 1; i < count; i++, flags = 0) { + LINE( flags, + (i - 1), + (i )); + } + + LINE( flags, + (i - 1), + (0 )); + } + break; + + case PIPE_PRIM_LINE_STRIP: + flags = DRAW_PIPE_RESET_STIPPLE; + for (i = 1; i < count; i++, flags = 0) { + LINE( flags, + (i - 1), + (i )); + } + break; + + case PIPE_PRIM_TRIANGLES: + for (i = 0; i+2 < count; i += 3) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (i + 0), + (i + 1), + (i + 2 )); + } + break; + + case PIPE_PRIM_TRIANGLE_STRIP: + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (i + 0), + (i + 1 + (i&1)), + (i + 2 - (i&1))); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (i + 0 + (i&1)), + (i + 1 - (i&1)), + (i + 2 )); + } + } + break; + + case PIPE_PRIM_TRIANGLE_FAN: + if (count >= 3) { + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (i + 1), + (i + 2), + (0 )); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + (0), + (i + 1), + (i + 2 )); + } + } + } + break; + + + case PIPE_PRIM_QUADS: + for (i = 0; i+3 < count; i += 4) { + QUAD( (i + 0), + (i + 1), + (i + 2), + (i + 3)); + } + break; + + case PIPE_PRIM_QUAD_STRIP: + for (i = 0; i+3 < count; i += 2) { + QUAD( (i + 2), + (i + 0), + (i + 1), + (i + 3)); + } + break; + + case PIPE_PRIM_POLYGON: + { + /* These bitflags look a little odd because we submit the + * vertices as (1,2,0) to satisfy flatshade requirements. + */ + const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; + const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE( flags, + (i + 1), + (i + 2), + (0)); + } + } + break; + + default: + assert(0); + break; + } + + FLUSH; +} + + +#undef TRIANGLE +#undef QUAD +#undef POINT +#undef LINE +#undef FUNC diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index af6e2d5157..06fd866ccd 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -141,14 +141,19 @@ static INLINE void varray_point( struct varray_frontend *varray, } - +#if 0 +#define TRIANGLE(flags,i0,i1,i2) varray_triangle(varray,i0,i1,i2) +#define LINE(flags,i0,i1) varray_line(varray,i0,i1) +#define POINT(i0) varray_point(varray,i0) +#define FUNC varray_decompose +#include "draw_pt_decompose.h" +#else #define TRIANGLE(vc,i0,i1,i2) varray_triangle(vc,i0,i1,i2) #define LINE(vc,i0,i1) varray_line(vc,i0,i1) #define POINT(vc,i0) varray_point(vc,i0) #define FUNC varray_run #include "draw_pt_varray_tmp_linear.h" - - +#endif static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { PIPE_PRIM_POINTS, diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index a3495f2a30..6c17edba34 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -104,22 +104,10 @@ static INLINE void vcache_elt( struct vcache_frontend *vcache, static void vcache_triangle( struct vcache_frontend *vcache, + ushort flags, unsigned i0, unsigned i1, unsigned i2 ) -{ - vcache_elt(vcache, i0, 0); - vcache_elt(vcache, i1, 0); - vcache_elt(vcache, i2, 0); - vcache_check_flush(vcache); -} - - -static void vcache_triangle_flags( struct vcache_frontend *vcache, - ushort flags, - unsigned i0, - unsigned i1, - unsigned i2 ) { vcache_elt(vcache, i0, flags); vcache_elt(vcache, i1, 0); @@ -128,19 +116,9 @@ static void vcache_triangle_flags( struct vcache_frontend *vcache, } static void vcache_line( struct vcache_frontend *vcache, + ushort flags, unsigned i0, unsigned i1 ) -{ - vcache_elt(vcache, i0, 0); - vcache_elt(vcache, i1, 0); - vcache_check_flush(vcache); -} - - -static void vcache_line_flags( struct vcache_frontend *vcache, - ushort flags, - unsigned i0, - unsigned i1 ) { vcache_elt(vcache, i0, flags); vcache_elt(vcache, i1, 0); @@ -161,46 +139,63 @@ static void vcache_quad( struct vcache_frontend *vcache, unsigned i2, unsigned i3 ) { - vcache_triangle( vcache, i0, i1, i3 ); - vcache_triangle( vcache, i1, i2, i3 ); -} - -static void vcache_ef_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) -{ - const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; - const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; - - vcache_triangle_flags( vcache, - DRAW_PIPE_RESET_STIPPLE | omitEdge1, - i0, i1, i3 ); - - vcache_triangle_flags( vcache, - omitEdge2, - i1, i2, i3 ); + vcache_triangle( vcache, + ( DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2 ), + i0, i1, i3 ); + + vcache_triangle( vcache, + ( DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1 ), + i1, i2, i3 ); } /* At least for now, we're back to using a template include file for * this. The two paths aren't too different though - it may be * possible to reunify them. */ -#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1) -#define POINT(vc,i0) vcache_point(vc,i0) -#define FUNC vcache_run_extras -#include "draw_pt_vcache_tmp.h" - -#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2) -#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3) -#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1) -#define POINT(vc,i0) vcache_point(vc,i0) +#define TRIANGLE(flags,i0,i1,i2) \ + vcache_triangle(vcache, \ + flags, \ + get_elt(elts,i0), \ + get_elt(elts,i1), \ + get_elt(elts,i2)) + +#define QUAD(i0,i1,i2,i3) \ + vcache_quad(vcache, \ + get_elt(elts,i0), \ + get_elt(elts,i1), \ + get_elt(elts,i2), \ + get_elt(elts,i3)) + +#define LINE(flags,i0,i1) \ + vcache_line(vcache, \ + flags, \ + get_elt(elts,i0), \ + get_elt(elts,i1)) + +#define POINT(i0) \ + vcache_point(vcache, \ + get_elt(elts,i0)) + #define FUNC vcache_run -#include "draw_pt_vcache_tmp.h" +#define ARGS \ + struct draw_pt_front_end *frontend, \ + pt_elt_func get_elt, \ + const void *elts + +#define LOCAL_VARS \ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; \ + struct draw_context *draw = vcache->draw; \ + boolean flatfirst = (draw->rasterizer->flatshade && \ + draw->rasterizer->flatshade_first); \ + unsigned prim = vcache->input_prim; \ + unsigned i, flags; +#define FLUSH vcache_flush( vcache ) + +#include "draw_pt_decompose.h" @@ -213,15 +208,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - if (opt & PT_PIPELINE) - { - vcache->base.run = vcache_run_extras; - } - else - { - vcache->base.run = vcache_run; - } - + vcache->base.run = vcache_run; vcache->input_prim = prim; vcache->output_prim = draw_pt_reduced_prim(prim); -- cgit v1.2.3 From 9671f7ae476cadb46f9f8f9d0363f24aabaf9f87 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 17 May 2008 10:30:21 -0600 Subject: gallium: in drivers, make copy of tokens passed to pipe->create_vs/fs_state() The caller can then free the token array immediately. --- src/gallium/auxiliary/draw/draw_vs_exec.c | 11 ++-- src/gallium/auxiliary/draw/draw_vs_llvm.c | 7 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 5 +- src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 15 +++++ src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 31 ++++++++- src/gallium/drivers/cell/ppu/cell_state_shader.c | 84 +++++++++++++++--------- src/gallium/drivers/i915simple/i915_state.c | 5 +- src/gallium/drivers/i965simple/brw_state.c | 19 +++--- src/gallium/drivers/softpipe/sp_fs_exec.c | 5 +- src/gallium/drivers/softpipe/sp_state.h | 7 +- src/gallium/drivers/softpipe/sp_state_fs.c | 5 +- 11 files changed, 137 insertions(+), 57 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 54a2b2ab04..7a02f6334b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -39,6 +39,7 @@ #include "draw_vs.h" #include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_scan.h" struct exec_vertex_shader { @@ -165,21 +166,23 @@ draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *state) { struct exec_vertex_shader *vs = CALLOC_STRUCT( exec_vertex_shader ); - uint nt = tgsi_num_tokens(state->tokens); if (vs == NULL) return NULL; /* we make a private copy of the tokens */ - vs->base.state.tokens = mem_dup(state->tokens, nt * sizeof(state->tokens[0])); - tgsi_scan_shader(state->tokens, &vs->base.info); + vs->base.state.tokens = tgsi_dup_tokens(state->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } + tgsi_scan_shader(state->tokens, &vs->base.info); vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; vs->machine = &draw->machine; - return &vs->base; } diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index dcada66514..be6907ed04 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -138,14 +138,17 @@ draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ) { struct draw_llvm_vertex_shader *vs; - uint nt = tgsi_num_tokens(templ->tokens); vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); if (vs == NULL) return NULL; /* we make a private copy of the tokens */ - vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) { + FREE(vs); + return NULL; + } tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index a57c938fbf..5929ea76b2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -188,7 +188,6 @@ draw_create_vs_sse(struct draw_context *draw, const struct pipe_shader_state *templ) { struct draw_sse_vertex_shader *vs; - uint nt = tgsi_num_tokens(templ->tokens); if (!rtasm_cpu_has_sse2()) return NULL; @@ -198,7 +197,9 @@ draw_create_vs_sse(struct draw_context *draw, return NULL; /* we make a private copy of the tokens */ - vs->base.state.tokens = mem_dup(templ->tokens, nt * sizeof(templ->tokens[0])); + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; tgsi_scan_shader(templ->tokens, &vs->base.info); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c index 5bea773840..5c0b0bfd61 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c @@ -330,3 +330,18 @@ tgsi_num_tokens(const struct tgsi_token *tokens) } return 0; } + + +/** + * Make a new copy of a token array. + */ +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens) +{ + unsigned n = tgsi_num_tokens(tokens); + unsigned bytes = n * sizeof(struct tgsi_token); + struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes); + if (new_tokens) + memcpy(new_tokens, tokens, bytes); + return new_tokens; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index 15e76feb7c..4102101093 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -1,4 +1,31 @@ -#if !defined TGSI_PARSE_H +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PARSE_H #define TGSI_PARSE_H #include "pipe/p_shader_tokens.h" @@ -118,6 +145,8 @@ tgsi_parse_token( unsigned tgsi_num_tokens(const struct tgsi_token *tokens); +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens); #if defined __cplusplus } diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index fb2e940348..c3a3fbd066 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -30,33 +30,50 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "draw/draw_context.h" -#if 0 -#include "pipe/p_shader_tokens.h" -#include "gallivm/gallivm.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/exec/tgsi_sse2.h" -#endif +#include "tgsi/util/tgsi_parse.h" #include "cell_context.h" #include "cell_state.h" + +/** cast wrapper */ +static INLINE struct cell_fragment_shader_state * +cell_fragment_shader_state(void *shader) +{ + return (struct pipe_shader_state *) shader; +} + + +/** cast wrapper */ +static INLINE struct cell_vertex_shader_state * +cell_vertex_shader_state(void *shader) +{ + return (struct pipe_shader_state *) shader; +} + + + static void * cell_create_fs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { /*struct cell_context *cell = cell_context(pipe);*/ - struct cell_fragment_shader_state *state; + struct cell_fragment_shader_state *cfs; - state = CALLOC_STRUCT(cell_fragment_shader_state); - if (!state) + cfs = CALLOC_STRUCT(cell_fragment_shader_state); + if (!cfs) return NULL; - state->shader = *templ; + cfs->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (!cfs->shader.tokens) { + FREE(cfs); + return NULL; + } - tgsi_scan_shader(templ->tokens, &state->info); + tgsi_scan_shader(templ->tokens, &cfs->info); - return state; + return cfs; } @@ -65,7 +82,7 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) { struct cell_context *cell = cell_context(pipe); - cell->fs = (struct cell_fragment_shader_state *) fs; + cell->fs = cell_fragment_shader_state(fs); cell->dirty |= CELL_NEW_FS; } @@ -74,10 +91,10 @@ cell_bind_fs_state(struct pipe_context *pipe, void *fs) static void cell_delete_fs_state(struct pipe_context *pipe, void *fs) { - struct cell_fragment_shader_state *state = - (struct cell_fragment_shader_state *) fs; + struct cell_fragment_shader_state *cfs = cell_fragment_shader_state(fs); - FREE( state ); + FREE((void *) cfs->shader.tokens); + FREE(cfs); } @@ -86,22 +103,28 @@ cell_create_vs_state(struct pipe_context *pipe, const struct pipe_shader_state *templ) { struct cell_context *cell = cell_context(pipe); - struct cell_vertex_shader_state *state; + struct cell_vertex_shader_state *cvs; - state = CALLOC_STRUCT(cell_vertex_shader_state); - if (!state) + cvs = CALLOC_STRUCT(cell_vertex_shader_state); + if (!cvs) return NULL; - state->shader = *templ; - tgsi_scan_shader(templ->tokens, &state->info); + cvs->shader.tokens = tgsi_dup_tokens(templ->tokens); + if (!cvs->shader.tokens) { + FREE(cvs); + return NULL; + } - state->draw_data = draw_create_vertex_shader(cell->draw, &state->shader); - if (state->draw_data == NULL) { - FREE( state ); + tgsi_scan_shader(templ->tokens, &cvs->info); + + cvs->draw_data = draw_create_vertex_shader(cell->draw, &cvs->shader); + if (cvs->draw_data == NULL) { + FREE( (void *) cvs->shader.tokens ); + FREE( cvs ); return NULL; } - return state; + return cvs; } @@ -110,7 +133,7 @@ cell_bind_vs_state(struct pipe_context *pipe, void *vs) { struct cell_context *cell = cell_context(pipe); - cell->vs = (const struct cell_vertex_shader_state *) vs; + cell->vs = cell_vertex_shader_state(vs); draw_bind_vertex_shader(cell->draw, (cell->vs ? cell->vs->draw_data : NULL)); @@ -123,12 +146,11 @@ static void cell_delete_vs_state(struct pipe_context *pipe, void *vs) { struct cell_context *cell = cell_context(pipe); + struct cell_vertex_shader_state *cvs = cell_vertex_shader_state(vs); - struct cell_vertex_shader_state *state = - (struct cell_vertex_shader_state *) vs; - - draw_delete_vertex_shader(cell->draw, state->draw_data); - FREE( state ); + draw_delete_vertex_shader(cell->draw, cvs->draw_data); + FREE( (void *) cvs->shader.tokens ); + FREE( cvs ); } diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 3d94b52366..4adeb37e86 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -33,6 +33,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "tgsi/util/tgsi_parse.h" #include "i915_context.h" #include "i915_reg.h" @@ -436,7 +437,7 @@ i915_create_fs_state(struct pipe_context *pipe, if (!ifs) return NULL; - ifs->state = *templ; + ifs->state.tokens = tgsi_dup_tokens(templ->tokens); tgsi_scan_shader(templ->tokens, &ifs->info); @@ -465,6 +466,8 @@ void i915_delete_fs_state(struct pipe_context *pipe, void *shader) FREE(ifs->program); ifs->program_len = 0; + FREE(ifs->state.tokens); + FREE(ifs); } diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 376f1487b2..ac243b7e4f 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -35,6 +35,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_dump.h" +#include "tgsi/util/tgsi_parse.h" #include "brw_context.h" #include "brw_defines.h" @@ -182,9 +183,7 @@ static void * brw_create_fs_state(struct pipe_context *pipe, { struct brw_fragment_program *brw_fp = CALLOC_STRUCT(brw_fragment_program); - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_fp->program = *shader; + brw_fp->program.tokens = tgsi_dup_tokens(shader->tokens); brw_fp->id = brw_context(pipe)->program_id++; tgsi_scan_shader(shader->tokens, &brw_fp->info); @@ -210,7 +209,10 @@ static void brw_bind_fs_state(struct pipe_context *pipe, void *shader) static void brw_delete_fs_state(struct pipe_context *pipe, void *shader) { - FREE(shader); + struct brw_fragment_program *brw_fp = (struct brw_fragment_program *) shader; + + FREE((void *) brw_fp->program.tokens); + FREE(brw_fp); } @@ -223,9 +225,7 @@ static void *brw_create_vs_state(struct pipe_context *pipe, { struct brw_vertex_program *brw_vp = CALLOC_STRUCT(brw_vertex_program); - /* XXX: Do I have to duplicate the tokens as well?? - */ - brw_vp->program = *shader; + brw_vp->program.tokens = tgsi_dup_tokens(shader->tokens); brw_vp->id = brw_context(pipe)->program_id++; tgsi_scan_shader(shader->tokens, &brw_vp->info); @@ -251,7 +251,10 @@ static void brw_bind_vs_state(struct pipe_context *pipe, void *vs) static void brw_delete_vs_state(struct pipe_context *pipe, void *shader) { - FREE(shader); + struct brw_vertex_program *brw_vp = (struct brw_vertex_program *) shader; + + FREE((void *) brw_vp->program.tokens); + FREE(brw_vp); } diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index d5bd7a702f..0b199a2193 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -37,6 +37,7 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_parse.h" struct sp_exec_fragment_shader { struct sp_fragment_shader base; @@ -116,6 +117,7 @@ exec_run( const struct sp_fragment_shader *base, static void exec_delete( struct sp_fragment_shader *base ) { + FREE((void *) base->shader.tokens); FREE(base); } @@ -137,7 +139,8 @@ softpipe_create_fs_exec(struct softpipe_context *softpipe, if (!shader) return NULL; - shader->base.shader = *templ; + /* we need to keep a local copy of the tokens */ + shader->base.shader.tokens = tgsi_dup_tokens(templ->tokens); shader->base.prepare = exec_prepare; shader->base.run = exec_run; shader->base.delete = exec_delete; diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 45056502b8..452e51fa79 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -54,9 +54,11 @@ struct tgsi_sampler; struct tgsi_exec_machine; +struct vertex_info; -/** Subclass of pipe_shader_state (though it doesn't really need to be). +/** + * Subclass of pipe_shader_state (though it doesn't really need to be). * * This is starting to look an awful lot like a quad pipeline stage... */ @@ -80,11 +82,10 @@ struct sp_fragment_shader { void (*delete)( struct sp_fragment_shader * ); }; -struct vertex_info; /** Subclass of pipe_shader_state */ struct sp_vertex_shader { - struct pipe_shader_state shader; + struct pipe_shader_state shader; /* Note: this field not actually used */ struct draw_vertex_shader *draw_data; }; diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 9e77b7e91b..24b91fbc79 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -102,10 +102,7 @@ softpipe_create_vs_state(struct pipe_context *pipe, return NULL; } - state->shader = *templ; - - state->draw_data = draw_create_vertex_shader(softpipe->draw, - &state->shader); + state->draw_data = draw_create_vertex_shader(softpipe->draw, templ); if (state->draw_data == NULL) { FREE( state ); return NULL; -- cgit v1.2.3 From aeae57693b31bf42833a9d51844fe92e3ab61034 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 15 May 2008 12:17:46 -0400 Subject: move the swizzling code to gallivm in preperation for code-generating it also some minor cleanups --- src/gallium/auxiliary/draw/draw_vs_llvm.c | 58 +---- src/gallium/auxiliary/gallivm/gallivm.h | 13 +- src/gallium/auxiliary/gallivm/gallivm_builtins.cpp | 267 ++++++++++----------- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 56 ++++- src/gallium/auxiliary/gallivm/llvm_builtins.c | 1 - 5 files changed, 201 insertions(+), 194 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index be6907ed04..171da51dd5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -1,8 +1,8 @@ /************************************************************************** - * + * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. - * + * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the * "Software"), to deal in the Software without restriction, including @@ -10,11 +10,11 @@ * distribute, sub license, and/or sell copies of the Software, and to * permit persons to whom the Software is furnished to do so, subject to * the following conditions: - * + * * The above copyright notice and this permission notice (including the * next paragraph) shall be included in all copies or substantial portions * of the Software. - * + * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. @@ -22,7 +22,7 @@ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * + * **************************************************************************/ /* @@ -72,47 +72,9 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; - struct tgsi_exec_machine *machine = shader->machine; - unsigned int i, j; - unsigned slot; - - - for (i = 0; i < count; i += MAX_TGSI_VERTICES) { - unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); - - /* Swizzle inputs. - */ - for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < base->info.num_inputs; slot++) { - machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; - machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; - machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; - machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; - } - - input = (const float (*)[4])((const char *)input + input_stride); - } - - /* run shader */ - gallivm_cpu_vs_exec(shader->llvm_prog, - machine->Inputs, - machine->Outputs, - (float (*)[4]) constants, - machine->Temps); - - - /* Unswizzle all output results - */ - for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < base->info.num_outputs; slot++) { - output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - output = (float (*)[4])((char *)output + output_stride); - } - } + gallivm_cpu_vs_exec(shader->llvm_prog, shader->machine, + input, base->info.num_inputs, output, base->info.num_outputs, + constants, count, input_stride, output_stride); } @@ -120,7 +82,7 @@ vs_llvm_run_linear( struct draw_vertex_shader *base, static void vs_llvm_delete( struct draw_vertex_shader *base ) { - struct draw_llvm_vertex_shader *shader = + struct draw_llvm_vertex_shader *shader = (struct draw_llvm_vertex_shader *)base; /* Do something to free compiled shader: @@ -140,7 +102,7 @@ draw_create_vs_llvm(struct draw_context *draw, struct draw_llvm_vertex_shader *vs; vs = CALLOC_STRUCT( draw_llvm_vertex_shader ); - if (vs == NULL) + if (vs == NULL) return NULL; /* we make a private copy of the tokens */ diff --git a/src/gallium/auxiliary/gallivm/gallivm.h b/src/gallium/auxiliary/gallivm/gallivm.h index b4d6555d2f..36a64a7747 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.h +++ b/src/gallium/auxiliary/gallivm/gallivm.h @@ -90,10 +90,15 @@ void gallivm_prog_dump(struct gallivm_prog *prog, const char *file_prefix); struct gallivm_cpu_engine *gallivm_cpu_engine_create(struct gallivm_prog *prog); struct gallivm_cpu_engine *gallivm_global_cpu_engine(); int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_vector *inputs, - struct tgsi_exec_vector *dests, - float (*consts)[4], - struct tgsi_exec_vector *temps); + struct tgsi_exec_machine *machine, + const float (*input)[4], + unsigned num_inputs, + float (*output)[4], + unsigned num_outputs, + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride); int gallivm_cpu_fs_exec(struct gallivm_prog *prog, float x, float y, float (*dests)[PIPE_MAX_SHADER_INPUTS][4], diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index a6f8cd043b..0fc5c4ec5c 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -1,141 +1,140 @@ static const unsigned char llvm_builtins_data[] = { -0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x2b,0x02,0x00,0x00,0x01,0x10,0x00,0x00, +0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x29,0x02,0x00,0x00,0x01,0x10,0x00,0x00, 0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, 0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, 0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, 0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, 0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x02,0x00,0x00,0x00,0x0b,0x04,0x00,0x0c, -0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x13,0x00,0x00,0x00,0x32,0x22,0x48,0x09, +0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x32,0x22,0x48,0x09, 0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45,0xc6,0x05,0x42,0x52, -0x26,0x08,0xb0,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,0x21,0x80,0x39,0x82, -0x60,0x0a,0x80,0x2e,0xd5,0x61,0x04,0x42,0x20,0x49,0x90,0x22,0x4d,0xa2,0x73,0x04, -0x08,0xb9,0x32,0x00,0x00,0x8a,0x10,0xc2,0x65,0xb8,0x42,0x84,0x10,0x42,0x0d,0x44, -0x11,0x00,0x18,0x01,0x28,0x82,0x08,0x00,0x13,0xa2,0x74,0xb0,0x03,0x3c,0xb0,0x83, -0x36,0x80,0x87,0x71,0x68,0x03,0x76,0x48,0x07,0x77,0xa8,0x07,0x7c,0x68,0x83,0x73, -0x70,0x87,0x7a,0xd8,0x70,0x0f,0xe5,0xd0,0x06,0xf0,0xa0,0x07,0x73,0x20,0x07,0x7a, -0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x71,0xa0,0x07,0x78,0xa0, -0x07,0x78,0xd0,0x06,0xe9,0x80,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e, -0x71,0x60,0x07,0x7a,0x10,0x07,0x76,0xa0,0x07,0x71,0x60,0x07,0x6d,0x90,0x0e,0x73, -0x20,0x07,0x7a,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x76,0x40, -0x07,0x7a,0x30,0x07,0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0e,0x73,0x20,0x07, -0x7a,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x60,0x0e,0x76,0x40,0x07,0x7a, -0x30,0x07,0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x76,0x40,0x07,0x7a,0x60, -0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x71,0x20,0x07,0x78,0xa0,0x07, -0x71,0x20,0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78,0xd0,0x06,0xe1,0x00,0x07,0x7a, -0x00,0x07,0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x80,0x07,0x70,0xa0,0x07,0x71,0x20, -0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78,0xa0,0xf3,0x40,0x88,0x04,0x32,0x32,0x02, -0x04,0x60,0x76,0xc6,0xfc,0x6c,0x48,0xa2,0x00,0x40,0x00,0x00,0x00,0x00,0x0c,0x49, -0x14,0x20,0x00,0x00,0x00,0x00,0x80,0x21,0xc9,0x02,0x00,0x01,0x00,0x00,0x00,0x30, -0x24,0x61,0x00,0x20,0x08,0x00,0x00,0x00,0x86,0x24,0x0b,0x00,0x04,0x00,0x00,0x00, -0xc0,0x90,0xa4,0x01,0x02,0x00,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00, -0x00,0x00,0x43,0x92,0x05,0x00,0x02,0x00,0x00,0x00,0x60,0x48,0x72,0x00,0x01,0x00, -0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x00,0x00,0x00,0x80,0x21,0x89,0x01,0x00, -0x41,0x00,0x00,0x00,0x90,0x05,0x02,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10, -0x19,0x11,0x4c,0x90,0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x92,0x8a,0x59,0x8b,0x43, -0x50,0xd2,0x09,0x02,0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8, -0x64,0x56,0x28,0x66,0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33, -0xa8,0x64,0x06,0x95,0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c, -0x04,0x00,0x00,0x00,0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00, -0x05,0x00,0x00,0x00,0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41, -0x10,0x84,0xc3,0x60,0x04,0x00,0x00,0x00,0x93,0x0c,0xce,0x43,0x4c,0x31,0x3c,0x8e, -0x34,0xc9,0x30,0x41,0xc2,0x14,0x03,0x34,0x51,0x93,0x0c,0x4d,0x44,0x4c,0x31,0x44, -0x8d,0x35,0x56,0x01,0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00, -0x46,0x41,0x08,0xcc,0x73,0x9b,0x05,0x21,0x30,0xcf,0x6e,0x18,0x84,0x00,0x2c,0x8b, -0x35,0x04,0x80,0x39,0x04,0x81,0x5d,0x20,0x80,0x0f,0x0c,0x43,0xe4,0xd3,0x36,0x81, -0x04,0x3e,0x30,0x0c,0x91,0x4f,0x5b,0x05,0x12,0xf8,0xc0,0x30,0x44,0x7e,0x7d,0x00, -0x05,0xd1,0x4c,0x11,0x66,0x12,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x61,0x20,0x00,0x00,0x2a,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55, -0x55,0xd6,0x1c,0x84,0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04, -0x41,0xfc,0x03,0x00,0x63,0x08,0x0d,0x34,0xc9,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60, -0x73,0x0c,0xd3,0x15,0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0, -0x6d,0x73,0x0c,0x19,0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x13,0x00,0x00,0x00, -0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,0x14,0x13, -0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,0xba,0x35, -0x0c,0x13,0xf3,0x9c,0x80,0xe4,0x36,0x48,0x81,0x10,0xc3,0x4a,0x4c,0x54,0xd4,0x6c, -0x8b,0x23,0x28,0x76,0x41,0x4c,0xcc,0xa3,0x1b,0x07,0x21,0x00,0xcb,0x72,0x00,0x05, -0xd1,0x4c,0x11,0x66,0x18,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, -0x61,0x20,0x00,0x00,0x82,0x00,0x00,0x00,0x13,0x04,0x47,0x2c,0x10,0x00,0x00,0x00, -0x08,0x00,0x00,0x00,0x24,0x46,0x00,0x8a,0xa0,0x0c,0x4a,0xa0,0x14,0x8a,0xa1,0x1c, -0x68,0x8c,0x00,0x10,0x9a,0x83,0x80,0xa8,0x48,0x9a,0x83,0x80,0xa6,0x4a,0x9a,0x83, -0x80,0xa6,0xc8,0x02,0x63,0x08,0x0d,0x64,0xdb,0xc0,0x49,0x06,0xee,0x22,0xc6,0x10, -0x9a,0xc9,0xbc,0x81,0x93,0x0c,0xdf,0x45,0x4c,0x31,0x38,0x4f,0x37,0xcb,0x10,0x08, -0x60,0x30,0xc8,0x10,0x06,0x0e,0x36,0x86,0xd0,0x44,0x36,0x06,0x03,0x27,0x19,0xc8, -0xe0,0x22,0x66,0x19,0x06,0xa2,0x0c,0x06,0x19,0xc2,0xe0,0xc1,0xc6,0x10,0x9a,0xc8, -0xce,0x60,0xe0,0x24,0x03,0x1a,0x5c,0xc4,0x2c,0xc3,0x40,0xa4,0xc1,0x40,0x45,0x20, -0x06,0x81,0x19,0x08,0x83,0x0c,0x6a,0xe0,0x64,0x63,0x08,0x8d,0x64,0x6c,0x30,0x70, -0x92,0xa1,0x0d,0x2e,0x62,0x96,0xa1,0x30,0xdc,0x60,0xa0,0x22,0x10,0x83,0xc0,0x0c, -0x84,0x41,0x86,0x37,0x78,0xb2,0x31,0x84,0x46,0xb2,0x38,0x18,0x38,0xc9,0x20,0x07, -0x17,0x31,0xcb,0x50,0x18,0x73,0x30,0x50,0x11,0xac,0xc1,0x00,0x07,0xc4,0x20,0x03, -0x1d,0x38,0x1a,0xd6,0xc1,0x40,0x45,0xb0,0x06,0x03,0x1c,0x10,0x83,0x0c,0x76,0xf0, -0x68,0x78,0x07,0xe1,0x40,0x00,0x00,0x00,0x4c,0x00,0x00,0x00,0x56,0x62,0x08,0xcc, -0x63,0xef,0x3a,0xa9,0x00,0x19,0x7b,0x73,0x23,0x73,0xf9,0xa1,0x91,0x31,0x98,0x62, -0x62,0x9e,0x7b,0xb7,0x06,0x62,0x62,0x1e,0xda,0x1c,0x88,0x89,0x79,0x6a,0x7b,0x20, -0x26,0xe6,0xb1,0x6d,0x83,0x98,0x98,0xe7,0x36,0x92,0x43,0x70,0x9a,0xca,0xd6,0x73, -0xa3,0x79,0x26,0xe6,0xb9,0x77,0x3f,0x22,0x0c,0x9b,0x21,0x18,0x9f,0xb6,0x90,0x64, -0x62,0x9e,0xda,0x9f,0x98,0xc7,0x36,0x9b,0x67,0x62,0x9e,0x7b,0xf7,0x23,0xc2,0xb0, -0x19,0x82,0xf1,0x6b,0x53,0x79,0x26,0xe6,0xb1,0x6f,0x3f,0x22,0x0c,0x9b,0x21,0x18, -0x9f,0xb6,0x98,0x62,0x62,0x9e,0xbb,0xb7,0x97,0x67,0x62,0x1e,0xfb,0xf6,0x23,0xc2, -0xb0,0x19,0x82,0xf1,0x6b,0x43,0x31,0x04,0xa7,0xa9,0x6c,0xdd,0x66,0x0a,0x81,0x79, -0xf0,0xfa,0x08,0x16,0xc1,0x69,0x06,0x5f,0x70,0x9a,0xe9,0xc6,0x49,0x01,0xc8,0xd8, -0x9b,0x1b,0x99,0xcb,0x4f,0x0c,0x8d,0xad,0x18,0x13,0xf3,0xdc,0x3b,0x6f,0x35,0xc7, -0xc4,0x3c,0x79,0x5d,0xdf,0x06,0x52,0x08,0xcc,0x53,0xdf,0x26,0x62,0x4c,0xcc,0x63, -0xdf,0xb7,0xb9,0x1c,0x02,0xf3,0xe0,0x75,0x5d,0x5b,0xc7,0x20,0x30,0x8f,0x79,0x14, -0x13,0xf3,0xd4,0xf5,0x19,0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0x13,0xce,0x5b,0x0b, -0x22,0x38,0x4d,0x85,0xd3,0x35,0x6d,0x37,0xc5,0xc4,0x3c,0x79,0x4d,0x1a,0x40,0xc6, -0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x8c,0x83,0x10,0x9c,0xa6,0xb2,0x94,0x42,0x60, -0x1e,0x7b,0x37,0x19,0x43,0x70,0x9a,0x0a,0xa7,0xcd,0xa4,0x98,0x98,0xc7,0xbe,0x8d, -0xc5,0x98,0x98,0xe7,0xee,0x7b,0x3b,0x29,0x26,0xe6,0xb1,0xf3,0x13,0x58,0x04,0xa7, -0x19,0x7c,0xc1,0x69,0x26,0x9b,0xb6,0x0f,0x43,0x70,0x9a,0xaa,0xb6,0x6d,0xc4,0x98, -0x98,0xc7,0xce,0xf1,0x03,0x28,0x88,0x66,0x8a,0x30,0x00,0x00,0x00,0x00,0x00,0x00, -0x61,0x20,0x00,0x00,0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00, -0x07,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xbd, -0x61,0x8c,0x04,0x10,0x1e,0xe1,0x19,0xc6,0x48,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00, -0x63,0x08,0xcd,0x63,0xd5,0xc0,0x31,0x84,0x06,0xb2,0x6b,0xe0,0x18,0x42,0x13,0x59, -0x36,0x70,0x0c,0xa1,0x71,0x6c,0x1b,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37, -0x16,0x01,0x04,0x48,0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3, -0x07,0x06,0xd0,0x58,0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01, -0x04,0x48,0x35,0xc7,0x30,0x06,0x64,0xe0,0x98,0x37,0xd0,0xc0,0x60,0xa0,0x89,0xc1, -0x40,0x23,0x83,0x81,0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58, -0x06,0xe1,0x40,0x00,0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41, -0x4c,0xcc,0x53,0xdb,0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10, -0x13,0xf3,0xf4,0xd6,0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46, -0x21,0x38,0x4d,0xb5,0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53, -0xdd,0xb7,0x9d,0x18,0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb, -0x47,0x31,0x31,0x4f,0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0, -0xb2,0xd4,0xbc,0x59,0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85, -0x14,0x13,0xf3,0xd8,0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c, -0xf6,0x6d,0x24,0x86,0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6, -0x30,0x54,0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00, -0x61,0x20,0x00,0x00,0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00, -0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10, -0x28,0xd1,0x1c,0xc3,0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00, -0x16,0x41,0x4c,0xcc,0x63,0xdb,0x04,0x31,0x31,0x4f,0x6e,0x0d,0x43,0x05,0x2c,0x07, -0x50,0x10,0xcd,0x14,0x61,0x56,0x41,0x4c,0xcc,0xd3,0x1b,0x45,0x21,0x00,0xcb,0xb2, -0x9b,0x04,0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x1b,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, -0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33, -0x16,0x01,0x04,0xca,0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16, -0x15,0xcd,0x31,0x5c,0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00, -0x86,0x51,0x4c,0xcc,0x53,0xe7,0x76,0x51,0x4c,0xcc,0x53,0xdb,0x36,0x41,0x4c,0xcc, -0x63,0x5b,0x05,0x31,0x31,0x8f,0x6e,0x0d,0x43,0x05,0x2c,0x66,0x41,0x4c,0xcc,0xd3, -0x1f,0x40,0x41,0x34,0x53,0x84,0x19,0x05,0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00, -0x61,0x20,0x00,0x00,0x2f,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00, -0x03,0x00,0x00,0x00,0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00, -0x63,0x08,0x0d,0x34,0xc9,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33, -0xc9,0x50,0x49,0xc4,0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xc9,0x70,0x49,0xc4, -0x2c,0x03,0x31,0x60,0x63,0x08,0x8d,0x33,0xc9,0x90,0x49,0x84,0x69,0x22,0x70,0xc3, -0x27,0x1c,0x08,0x00,0x1a,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41, -0x08,0xcc,0x83,0xdb,0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10, -0x02,0xf3,0xf0,0x47,0x20,0xb9,0x0d,0x52,0x20,0xc4,0xb0,0x12,0x13,0x15,0x35,0xdb, -0xe2,0x08,0x8a,0x5d,0x10,0x13,0xf3,0xec,0x37,0x90,0x2c,0x4e,0xf4,0x47,0x87,0x54, -0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44,0x7f,0x74,0x48, -0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0,0x62,0x0d,0x43, -0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03,0x00,0x00,0x00, -0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82, -0x23,0x59,0xc2,0x20,0x09,0x92,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3, -0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, -0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, -0x84,0x34,0x85,0x31,0x10,0x0a,0xb2,0x3c,0x56,0x30,0x08,0xcc,0x63,0x0b,0x44,0x25, -0x21,0x0d,0x00,0x00,0x00,0x00,0x00,0x00}; +0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,0x29,0x80,0x21,0x00, +0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47,0x80,0x50,0x2b,0x03, +0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40,0x14,0x01,0x80,0x11, +0x80,0x22,0x88,0x00,0x13,0xa2,0x74,0xb0,0x03,0x3c,0xb0,0x83,0x36,0x80,0x87,0x71, +0x68,0x03,0x76,0x48,0x07,0x77,0xa8,0x07,0x7c,0x68,0x83,0x73,0x70,0x87,0x7a,0xd8, +0x70,0x0f,0xe5,0xd0,0x06,0xf0,0xa0,0x07,0x73,0x20,0x07,0x7a,0x30,0x07,0x72,0xa0, +0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x71,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06, +0xe9,0x80,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e,0x71,0x60,0x07,0x7a, +0x10,0x07,0x76,0xa0,0x07,0x71,0x60,0x07,0x6d,0x90,0x0e,0x73,0x20,0x07,0x7a,0x30, +0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07, +0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0e,0x73,0x20,0x07,0x7a,0x30,0x07,0x72, +0xa0,0x07,0x73,0x20,0x07,0x6d,0x60,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07,0x72,0xa0, +0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x76,0x40,0x07,0x7a,0x60,0x07,0x74,0xa0,0x07, +0x76,0x40,0x07,0x6d,0x60,0x0f,0x71,0x20,0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78, +0xa0,0x07,0x71,0x20,0x07,0x78,0xd0,0x06,0xe1,0x00,0x07,0x7a,0x00,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xe6,0x80,0x07,0x70,0xa0,0x07,0x71,0x20,0x07,0x78,0xa0,0x07, +0x71,0x20,0x07,0x78,0xa0,0xf3,0x40,0x88,0x04,0x32,0x32,0x02,0x04,0x20,0x76,0x46, +0xfc,0x6c,0x48,0x92,0x00,0x40,0x00,0x00,0x00,0x00,0x0c,0x49,0x12,0x20,0x00,0x00, +0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x01,0x00,0x00,0x00,0x30,0x24,0x59,0x00,0x20, +0x08,0x00,0x00,0x00,0x86,0x24,0x0a,0x00,0x04,0x00,0x00,0x00,0xc0,0x90,0x84,0x01, +0x02,0x00,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, +0x05,0x00,0x02,0x00,0x00,0x00,0x60,0x48,0x72,0x00,0x01,0x00,0x00,0x00,0x00,0x0c, +0x49,0x14,0x00,0x08,0x00,0x00,0x00,0x80,0x21,0x49,0x01,0x00,0x41,0x00,0x00,0x00, +0x90,0x05,0x02,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, +0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02, +0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66, +0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95, +0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00, +0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00, +0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60, +0x04,0x00,0x00,0x00,0x93,0x0c,0xce,0x43,0x4c,0x31,0x3c,0x8e,0x34,0xc9,0x30,0x41, +0xc2,0x14,0x03,0x34,0x51,0x93,0x0c,0x4d,0x44,0x4c,0x31,0x44,0x8d,0x35,0x56,0x01, +0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0x46,0x41,0x08,0xcc, +0x73,0x9b,0x05,0x21,0x30,0xcf,0x6e,0x18,0x84,0x00,0x2c,0x8b,0x35,0x04,0x80,0x39, +0x04,0x81,0x5d,0x20,0x80,0x0f,0x0c,0x43,0xe4,0xd3,0x36,0x81,0x04,0x3e,0x30,0x0c, +0x91,0x4f,0x5b,0x05,0x12,0xf8,0xc0,0x30,0x44,0x7e,0x7d,0x00,0x05,0xd1,0x4c,0x11, +0x66,0x12,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x2a,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, +0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84, +0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00, +0x63,0x08,0x0d,0x34,0xc9,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, +0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19, +0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x13,0x00,0x00,0x00,0x17,0x60,0x20,0xc5, +0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,0x14,0x13,0xf3,0xd4,0xb8,0x69, +0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,0xba,0x35,0x0c,0x13,0xf3,0x9c, +0x80,0xe4,0x36,0x48,0x81,0x10,0xc3,0x4a,0x4c,0x54,0xd4,0x6c,0x8b,0x23,0x28,0x76, +0x41,0x4c,0xcc,0xa3,0x1b,0x07,0x21,0x00,0xcb,0x72,0x00,0x05,0xd1,0x4c,0x11,0x66, +0x18,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00,0x04,0x00,0x00,0x00, +0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91,0x11,0x00,0x00,0x00, +0x63,0x08,0x4d,0x64,0x16,0xc1,0x49,0x86,0xab,0x22,0x66,0x19,0x02,0x01,0x1b,0x43, +0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81,0x0a,0x20,0x0b,0x34, +0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x24,0x83,0x57,0x11,0xb3, +0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0,0x48,0xb3,0x04,0xc6, +0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48,0x63,0x08,0xcd,0x64, +0x64,0x40,0x70,0x92,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc,0x60,0x0c,0xc1,0x99, +0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80,0x33,0x38,0xd0,0x00, +0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80,0xe0,0x24,0x03,0x1b, +0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12,0x30,0x63,0x08,0x0f, +0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7,0xa0,0x06,0x70,0x00, +0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x76,0x52,0x4c,0xcc, +0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46,0xc6,0x50,0x8a,0x89, +0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89,0x79,0x68,0x73,0x20, +0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62,0x9e,0xdb,0x32,0x88, +0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98,0xa7,0xb7,0x95,0x62, +0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a,0x34,0x35,0x56,0x62, +0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25,0x2c,0x82,0xd3,0x0c, +0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7,0x24,0x01,0x63,0xec, +0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9,0xfc,0xc4,0xd0,0x90, +0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6,0xc1,0x71,0x7b,0x29, +0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01,0x32,0xf6,0xe6,0x46, +0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08,0x4e,0x33,0x58,0x47, +0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e,0x33,0xe1,0xbc,0xa5, +0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e,0x3a,0x40,0xc6,0xde, +0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee,0xed,0x82,0x10,0x9c, +0xa6,0xba,0x81,0x44,0x70,0x9a,0xc1,0x17,0x9c,0x66,0x32,0x93,0x42,0x60,0x1e,0x7b, +0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a,0xa7,0x6d,0xa4,0x98, +0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10,0x9c,0x66,0xc0,0x7b, +0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06,0x8b,0xe0,0x34,0x83, +0x2f,0x38,0xcd,0x64,0xd3,0xe6,0x61,0x08,0x4e,0x53,0xd5,0xf6,0x01,0x14,0x44,0x33, +0x45,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x4a,0x00,0x00,0x00, +0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x24,0xca,0x60,0x04, +0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10,0x1e,0xe1,0x19,0xc6, +0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63,0x15,0xc1,0x31,0x84, +0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1,0x71,0x6c,0x23,0x38, +0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48,0x35,0xc7,0x20,0x79, +0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58,0x04,0x10,0x20,0xd5, +0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7,0x30,0x06,0x64,0xe0, +0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82,0x63,0x21,0x40,0x70, +0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00,0x25,0x00,0x00,0x00, +0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb,0x05,0x31,0x31,0xcf, +0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6,0x41,0x08,0xc0,0xb2, +0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5,0x9b,0x8a,0x21,0x00, +0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18,0x82,0xd3,0x54,0xb7, +0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f,0x9d,0x9b,0x87,0x21, +0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59,0x10,0x82,0xd3,0x54, +0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8,0xb4,0x8d,0x14,0x13, +0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86,0x00,0x2c,0x8b,0xcd, +0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72,0x00,0x05,0xd1,0x4c, +0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x19,0x00,0x00,0x00, +0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0x4a,0x60,0x04, +0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0x48, +0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3,0x44,0x39,0x58,0x85, +0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x16,0x41,0x4c,0xcc,0x63,0xdb,0x04,0x31, +0x31,0x4f,0x6e,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x56,0x41,0x4c, +0xcc,0xd3,0x1b,0x45,0x21,0x00,0xcb,0xb2,0x9b,0x04,0x21,0x00,0xcb,0x02,0x00,0x00, +0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00,0x13,0x04,0x41,0x2c, +0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80, +0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca,0x34,0xc7,0x20,0x51, +0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c,0x94,0x83,0x58,0x38, +0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x76,0x51, +0x4c,0xcc,0x53,0xdb,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31,0x31,0x8f,0x6e,0x0d, +0x43,0x05,0x2c,0x66,0x41,0x4c,0xcc,0xd3,0x1f,0x40,0x41,0x34,0x53,0x84,0x19,0x05, +0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x2f,0x00,0x00,0x00, +0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0xa0,0x04, +0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34,0xc9,0x30,0x49,0xc4, +0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xc9,0x50,0x49,0xc4,0x2c,0x03,0x21,0x58, +0x63,0x08,0x4d,0x34,0xc9,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60,0x63,0x08,0x8d,0x33, +0xc9,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00,0x1a,0x00,0x00,0x00, +0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb,0x04,0x31,0x31,0x4f, +0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x47,0x20,0xb9,0x0d,0x52, +0x20,0xc4,0xb0,0x12,0x13,0x15,0x35,0xdb,0xe2,0x08,0x8a,0x5d,0x10,0x13,0xf3,0xec, +0x37,0x90,0x2c,0x4e,0xf4,0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87, +0x74,0x02,0xc8,0xe2,0x44,0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d, +0x18,0x11,0x31,0x55,0xc0,0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61, +0x46,0x31,0x08,0xcc,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00, +0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82,0x23,0x59,0xc2,0x20,0x09,0x92,0x1d,0x18, +0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3,0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5, +0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73, +0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,0x84,0x34,0x85,0x31,0x10,0x0a,0xb2,0x3c, +0x56,0x30,0x08,0xcc,0x63,0x0b,0x44,0x25,0x21,0x0d,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 8f9830d0b1..857c190f7b 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -41,6 +41,7 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_util.h" #include "tgsi/exec/tgsi_exec.h" #include "tgsi/util/tgsi_dump.h" @@ -179,22 +180,63 @@ typedef void (*vertex_shader_runner)(void *ainputs, float (*aconsts)[4], void *temps); - +#define MAX_TGSI_VERTICES 4 /*! This function is used to execute the gallivm_prog in software. Before calling this function the gallivm_prog has to be JIT compiled with the gallivm_cpu_jit_compile function. */ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, - struct tgsi_exec_vector *inputs, - struct tgsi_exec_vector *dests, - float (*consts)[4], - struct tgsi_exec_vector *temps) + struct tgsi_exec_machine *machine, + const float (*input)[4], + unsigned num_inputs, + float (*output)[4], + unsigned num_outputs, + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) { + unsigned int i, j; + unsigned slot; vertex_shader_runner runner = reinterpret_cast(prog->function); + assert(runner); - /*FIXME*/ - runner(inputs, dests, consts, temps); + + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { + unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + + /* Swizzle inputs. + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < num_inputs; slot++) { + machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; + machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; + machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; + machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; + } + + input = (const float (*)[4])((const char *)input + input_stride); + } + + /* run shader */ + runner(machine->Inputs, + machine->Outputs, + (float (*)[4]) constants, + machine->Temps); + + /* Unswizzle all output results + */ + for (j = 0; j < max_vertices; j++) { + for (slot = 0; slot < num_outputs; slot++) { + output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; + output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; + output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; + output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; + } + output = (float (*)[4])((char *)output + output_stride); + } + } return 0; } diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c index 64b5d499a8..6b9d626ed4 100644 --- a/src/gallium/auxiliary/gallivm/llvm_builtins.c +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c @@ -1,4 +1,3 @@ -/*clang --emit-llvm llvm_builtins.c |llvm-as|opt -std-compile-opts|llvm2cpp -gen-contents -o=gallivm_builtins.cpp -f -for=shader -funcname=createGallivmBuiltins*/ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. -- cgit v1.2.3 From 735752e8dceeec9b202147d1d19ef3dc70e08673 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 15 May 2008 14:11:19 -0400 Subject: fix injections of functions from builtins into shaders --- src/gallium/auxiliary/gallivm/gallivm.cpp | 3 --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 18 +++++++++++------- 2 files changed, 11 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index b6f641a3f8..48a3b18cdc 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -288,10 +288,7 @@ void gallivm_ir_fill_from_tgsi(struct gallivm_ir *ir, std::cout << "Creating llvm from: " <module = mod; gallivm_ir_dump(ir, 0); } diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index f0122802db..17e876e32d 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -38,7 +38,6 @@ #include #include #include -//#include #include #include @@ -193,9 +192,13 @@ llvm::Function * InstructionsSoa::function(int op) std::string name = m_functionsMap[op]; + std::cout <<"For op = "< deps = m_builtinDependencies[name]; for (unsigned int i = 0; i < deps.size(); ++i) { - injectFunction(m_builtins->getFunction(deps[i])); + llvm::Function *func = m_builtins->getFunction(deps[i]); + std::cout <<"\tinjecting dep = '"<getName()<<"'"<getFunction(name); @@ -216,8 +219,10 @@ void InstructionsSoa::createBuiltins() { MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data)-1]); + (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); m_builtins = ParseBitcodeFile(buffer); + std::cout<<"Builtins created at "<isDeclaration()) { - std::cout << "function decleration" <getFunctionType(), GlobalValue::ExternalLinkage, originalFunc->getName(), currentModule()); func->setCallingConv(CallingConv::C); @@ -398,17 +402,17 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) } else { DenseMap val; val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf"); + func = CloneFunction(originalFunc, val); +#if 0 std::cout <<" replacing "<getFunction("powf") <<", with " <getFunction("powf")<getFunctionList().push_back(func); - std::cout << "Func parent is "<getParent() - <<", cur is "< Date: Thu, 15 May 2008 17:46:20 -0400 Subject: llvm: implement sub and abs --- src/gallium/auxiliary/gallivm/gallivm.cpp | 6 ++-- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 35 +++++++++++++++++++-- src/gallium/auxiliary/gallivm/instructionssoa.h | 3 ++ src/gallium/auxiliary/gallivm/soabuiltins.c | 38 ++++++++++++++++------- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 2 ++ 5 files changed, 67 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index 48a3b18cdc..77900e342b 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -303,11 +303,11 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) { struct gallivm_prog *prog = (struct gallivm_prog *)calloc(1, sizeof(struct gallivm_prog)); - + std::cout << "Before optimizations:"<module->dump(); std::cout<<"-------------------------------"<module); @@ -315,7 +315,7 @@ struct gallivm_prog * gallivm_ir_compile(struct gallivm_ir *ir) prog->num_consts = ir->num_consts; memcpy(prog->interpolators, ir->interpolators, sizeof(prog->interpolators)); prog->num_interp = ir->num_interp; - + /* Run optimization passes over it */ PassManager passes; passes.add(new TargetData(mod)); diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 17e876e32d..4520559ba2 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -173,6 +173,7 @@ std::vector InstructionsSoa::extractVector(llvm::Value *vector) void InstructionsSoa::createFunctionMap() { + m_functionsMap[TGSI_OPCODE_ABS] = "abs"; m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; m_functionsMap[TGSI_OPCODE_POWER] = "pow"; @@ -180,9 +181,16 @@ void InstructionsSoa::createFunctionMap() void InstructionsSoa::createDependencies() { - std::vector powDeps(1); - powDeps[0] = "powf"; - m_builtinDependencies["pow"] = powDeps; + { + std::vector powDeps(1); + powDeps[0] = "powf"; + m_builtinDependencies["pow"] = powDeps; + } + { + std::vector absDeps(1); + absDeps[0] = "fabsf"; + m_builtinDependencies["abs"] = absDeps; + } } llvm::Function * InstructionsSoa::function(int op) @@ -226,6 +234,13 @@ void InstructionsSoa::createBuiltins() createDependencies(); } + +std::vector InstructionsSoa::abs(const std::vector in1) +{ + llvm::Function *func = function(TGSI_OPCODE_ABS); + return callBuiltin(func, in1); +} + std::vector InstructionsSoa::dp3(const std::vector in1, const std::vector in2) { @@ -418,3 +433,17 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) m_functions[op] = func; } } + +std::vector InstructionsSoa::sub(const std::vector in1, + const std::vector in2) +{ + std::vector res(4); + + res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); + res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); + res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); + res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); + + return res; +} + diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 060ee72f2e..02e5fab51f 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -48,6 +48,7 @@ public: InstructionsSoa(llvm::Module *mod, llvm::Function *func, llvm::BasicBlock *block, StorageSoa *storage); + std::vector abs(const std::vector in1); std::vector arl(const std::vector in); std::vector add(const std::vector in1, const std::vector in2); @@ -62,6 +63,8 @@ public: const std::vector in2); std::vector pow(const std::vector in1, const std::vector in2); + std::vector sub(const std::vector in1, + const std::vector in2); void end(); std::vector extractVector(llvm::Value *vector); diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 40addebd8c..f04e4c974d 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -33,6 +33,33 @@ */ typedef __attribute__(( ext_vector_type(4) )) float float4; + +extern float fabsf(float val); + +void abs(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + res[0].x = fabsf(tmp0x.x); + res[0].y = fabsf(tmp0x.y); + res[0].z = fabsf(tmp0x.z); + res[0].w = fabsf(tmp0x.w); + + res[1].x = fabsf(tmp0y.x); + res[1].y = fabsf(tmp0y.y); + res[1].z = fabsf(tmp0y.z); + res[1].w = fabsf(tmp0y.w); + + res[2].x = fabsf(tmp0z.x); + res[2].y = fabsf(tmp0z.y); + res[2].z = fabsf(tmp0z.z); + res[2].w = fabsf(tmp0z.w); + + res[3].x = fabsf(tmp0w.x); + res[3].y = fabsf(tmp0w.y); + res[3].z = fabsf(tmp0w.z); + res[3].w = fabsf(tmp0w.w); +} + void dp3(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) @@ -77,14 +104,3 @@ void pow(float4 *res, res[2] = p; res[3] = p; } - -#if 0 -void yo(float4 *out, float4 *in) -{ - float4 res[4]; - - dp3(res, in[0], in[1], in[2], in[3], - in[4], in[5], in[6], in[7]); - out[1] = res[1]; -} -#endif diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index ab8c851f14..5465d3a95e 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -740,6 +740,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_SUB: { + out = instr->sub(inputs[0], inputs[1]); } break; case TGSI_OPCODE_LERP: { @@ -781,6 +782,7 @@ translate_instructionir(llvm::Module *module, case TGSI_OPCODE_MULTIPLYMATRIX: break; case TGSI_OPCODE_ABS: { + out = instr->abs(inputs[0]); } break; case TGSI_OPCODE_RCC: -- cgit v1.2.3 From ea1a607292ef31df70cda8c6476755e0224c9f7d Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 May 2008 14:54:40 -0400 Subject: implement min/max and abstract ops on vectors --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 30 ++++++++- src/gallium/auxiliary/gallivm/instructionssoa.h | 4 ++ src/gallium/auxiliary/gallivm/soabuiltins.c | 75 ++++++++++++++++++++--- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 2 + 4 files changed, 100 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 4520559ba2..55fdda2791 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -176,14 +176,17 @@ void InstructionsSoa::createFunctionMap() m_functionsMap[TGSI_OPCODE_ABS] = "abs"; m_functionsMap[TGSI_OPCODE_DP3] = "dp3"; m_functionsMap[TGSI_OPCODE_DP4] = "dp4"; + m_functionsMap[TGSI_OPCODE_MIN] = "min"; + m_functionsMap[TGSI_OPCODE_MAX] = "max"; m_functionsMap[TGSI_OPCODE_POWER] = "pow"; } void InstructionsSoa::createDependencies() { { - std::vector powDeps(1); + std::vector powDeps(2); powDeps[0] = "powf"; + powDeps[1] = "powvec"; m_builtinDependencies["pow"] = powDeps; } { @@ -191,6 +194,16 @@ void InstructionsSoa::createDependencies() absDeps[0] = "fabsf"; m_builtinDependencies["abs"] = absDeps; } + { + std::vector maxDeps(1); + maxDeps[0] = "maxvec"; + m_builtinDependencies["max"] = maxDeps; + } + { + std::vector minDeps(1); + minDeps[0] = "minvec"; + m_builtinDependencies["min"] = minDeps; + } } llvm::Function * InstructionsSoa::function(int op) @@ -374,6 +387,21 @@ std::vector InstructionsSoa::pow(const std::vector i return callBuiltin(func, in1, in2); } +std::vector InstructionsSoa::min(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MIN); + return callBuiltin(func, in1, in2); +} + + +std::vector InstructionsSoa::max(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MAX); + return callBuiltin(func, in1, in2); +} + void checkFunction(Function *func) { for (Function::const_iterator BI = func->begin(), BE = func->end(); diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 02e5fab51f..5a7f8fdf72 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -59,6 +59,10 @@ public: std::vector madd(const std::vector in1, const std::vector in2, const std::vector in3); + std::vector max(const std::vector in1, + const std::vector in2); + std::vector min(const std::vector in1, + const std::vector in2); std::vector mul(const std::vector in1, const std::vector in2); std::vector pow(const std::vector in1, diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index f04e4c974d..935283f962 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -89,18 +89,73 @@ void dp4(float4 *res, extern float powf(float num, float p); +float4 powvec(float4 vec, float4 q) +{ + float4 p; + p.x = powf(vec.x, q.x); + p.y = powf(vec.y, q.y); + p.z = powf(vec.z, q.z); + p.w = powf(vec.w, q.w); + return p; +} + void pow(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) { - float4 p; - p.x = powf(tmp0x.x, tmp1x.x); - p.y = powf(tmp0x.y, tmp1x.y); - p.z = powf(tmp0x.z, tmp1x.z); - p.w = powf(tmp0x.w, tmp1x.w); - - res[0] = p; - res[1] = p; - res[2] = p; - res[3] = p; + res[0] = powvec(tmp0x, tmp1x); + res[1] = res[0]; + res[2] = res[0]; + res[3] = res[0]; +} + +void lit(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + const float4 zerovec = (float4) {0, 0, 0, 0}; + float4 tmpx = tmp0x; + float4 tmpy = tmp0y; + + res[0] = (float4){1.0, 1.0, 1.0, 1.0}; + res[1] = tmpx; + res[2] = tmpy; + res[3] = (float4){1.0, 1.0, 1.0, 1.0}; +} + +float4 minvec(float4 a, float4 b) +{ + return (float4){(a.x < b.x) ? a.x : b.x, + (a.y < b.y) ? a.y : b.y, + (a.z < b.z) ? a.z : b.z, + (a.w < b.w) ? a.w : b.w}; +} + +void min(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = minvec(tmp0x, tmp1x); + res[1] = minvec(tmp0y, tmp1y); + res[2] = minvec(tmp0z, tmp1z); + res[3] = minvec(tmp0w, tmp1w); +} + + +float4 maxvec(float4 a, float4 b) +{ + return (float4){(a.x > b.x) ? a.x : b.x, + (a.y > b.y) ? a.y : b.y, + (a.z > b.z) ? a.z : b.z, + (a.w > b.w) ? a.w : b.w}; +} + +void max(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = maxvec(tmp0x, tmp1x); + res[1] = maxvec(tmp0y, tmp1y); + res[2] = maxvec(tmp0z, tmp1z); + res[3] = maxvec(tmp0w, tmp1w); } diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 5465d3a95e..007b5c169a 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -724,9 +724,11 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_MIN: { + out = instr->min(inputs[0], inputs[1]); } break; case TGSI_OPCODE_MAX: { + out = instr->max(inputs[0], inputs[1]); } break; case TGSI_OPCODE_SLT: { -- cgit v1.2.3 From 1d1cf8edf6a0409caf9aa7d44e186eb51f51fa1f Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 May 2008 16:06:59 -0400 Subject: do the lit (some artifacts present) --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 15 +++++++++ src/gallium/auxiliary/gallivm/instructionssoa.h | 1 + src/gallium/auxiliary/gallivm/soabuiltins.c | 37 ++++++++++++++--------- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 1 + 4 files changed, 40 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 55fdda2791..074dd0ecd6 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -179,6 +179,7 @@ void InstructionsSoa::createFunctionMap() m_functionsMap[TGSI_OPCODE_MIN] = "min"; m_functionsMap[TGSI_OPCODE_MAX] = "max"; m_functionsMap[TGSI_OPCODE_POWER] = "pow"; + m_functionsMap[TGSI_OPCODE_LIT] = "lit"; } void InstructionsSoa::createDependencies() @@ -204,6 +205,14 @@ void InstructionsSoa::createDependencies() minDeps[0] = "minvec"; m_builtinDependencies["min"] = minDeps; } + { + std::vector litDeps(4); + litDeps[0] = "minvec"; + litDeps[1] = "maxvec"; + litDeps[2] = "powf"; + litDeps[3] = "powvec"; + m_builtinDependencies["lit"] = litDeps; + } } llvm::Function * InstructionsSoa::function(int op) @@ -475,3 +484,9 @@ std::vector InstructionsSoa::sub(const std::vector i return res; } +std::vector InstructionsSoa::lit(const std::vector in) +{ + llvm::Function *func = function(TGSI_OPCODE_LIT); + return callBuiltin(func, in); +} + diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 5a7f8fdf72..477ef4a157 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -56,6 +56,7 @@ public: const std::vector in2); std::vector dp4(const std::vector in1, const std::vector in2); + std::vector lit(const std::vector in); std::vector madd(const std::vector in1, const std::vector in2, const std::vector in3); diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 935283f962..b3bfebfe50 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -109,20 +109,6 @@ void pow(float4 *res, res[3] = res[0]; } -void lit(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) -{ - const float4 zerovec = (float4) {0, 0, 0, 0}; - float4 tmpx = tmp0x; - float4 tmpy = tmp0y; - - res[0] = (float4){1.0, 1.0, 1.0, 1.0}; - res[1] = tmpx; - res[2] = tmpy; - res[3] = (float4){1.0, 1.0, 1.0, 1.0}; -} - float4 minvec(float4 a, float4 b) { return (float4){(a.x < b.x) ? a.x : b.x, @@ -159,3 +145,26 @@ void max(float4 *res, res[2] = maxvec(tmp0z, tmp1z); res[3] = maxvec(tmp0w, tmp1w); } + + +void lit(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + const float4 zerovec = (float4) {0, 0, 0, 0}; + const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; + const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; + + res[0] = (float4){1.0, 1.0, 1.0, 1.0}; + if (tmp0x.x > 0) { + float4 tmpx = maxvec(tmpx, zerovec); + float4 tmpy = maxvec(tmp0y, zerovec); + float4 tmpw = minvec(tmp0w, plus128); + tmpw = maxvec(tmpw, min128); + res[1] = tmpx; + res[2] = powvec(tmpy, tmpw); + } else { + res[1] = zerovec; + res[2] = zerovec; + } + res[3] = (float4){1.0, 1.0, 1.0, 1.0}; +} diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 007b5c169a..abcb240f46 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -692,6 +692,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_LIT: { + out = instr->lit(inputs[0]); } break; case TGSI_OPCODE_RCP: { -- cgit v1.2.3 From 02e45b2dadd42c38247cb992a07eb520ac86519b Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 May 2008 17:10:52 -0400 Subject: fix abs and start on rsq --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 20 +++++++- src/gallium/auxiliary/gallivm/instructionssoa.h | 1 + src/gallium/auxiliary/gallivm/soabuiltins.c | 56 +++++++++++++++-------- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 1 + 4 files changed, 58 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 074dd0ecd6..76049ade7c 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -180,6 +180,7 @@ void InstructionsSoa::createFunctionMap() m_functionsMap[TGSI_OPCODE_MAX] = "max"; m_functionsMap[TGSI_OPCODE_POWER] = "pow"; m_functionsMap[TGSI_OPCODE_LIT] = "lit"; + m_functionsMap[TGSI_OPCODE_RSQ] = "rsq"; } void InstructionsSoa::createDependencies() @@ -191,8 +192,9 @@ void InstructionsSoa::createDependencies() m_builtinDependencies["pow"] = powDeps; } { - std::vector absDeps(1); + std::vector absDeps(2); absDeps[0] = "fabsf"; + absDeps[1] = "absvec"; m_builtinDependencies["abs"] = absDeps; } { @@ -213,6 +215,14 @@ void InstructionsSoa::createDependencies() litDeps[3] = "powvec"; m_builtinDependencies["lit"] = litDeps; } + { + std::vector rsqDeps(4); + rsqDeps[0] = "sqrtf"; + rsqDeps[1] = "sqrtvec"; + rsqDeps[2] = "fabsf"; + rsqDeps[3] = "absvec"; + m_builtinDependencies["rsq"] = rsqDeps; + } } llvm::Function * InstructionsSoa::function(int op) @@ -453,7 +463,9 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) currentModule()->dump(); } else { DenseMap val; + val[m_builtins->getFunction("fabsf")] = currentModule()->getFunction("fabsf"); val[m_builtins->getFunction("powf")] = currentModule()->getFunction("powf"); + val[m_builtins->getFunction("sqrtf")] = currentModule()->getFunction("sqrtf"); func = CloneFunction(originalFunc, val); #if 0 std::cout <<" replacing "<getFunction("powf") @@ -490,3 +502,9 @@ std::vector InstructionsSoa::lit(const std::vector i return callBuiltin(func, in); } +std::vector InstructionsSoa::rsq(const std::vector in) +{ + llvm::Function *func = function(TGSI_OPCODE_RSQ); + return callBuiltin(func, in); +} + diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 477ef4a157..3e20b652dd 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -68,6 +68,7 @@ public: const std::vector in2); std::vector pow(const std::vector in1, const std::vector in2); + std::vector rsq(const std::vector in1); std::vector sub(const std::vector in1, const std::vector in2); void end(); diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index b3bfebfe50..64c02aa967 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -36,28 +36,24 @@ typedef __attribute__(( ext_vector_type(4) )) float float4; extern float fabsf(float val); +float4 absvec(float4 vec) +{ + float4 res; + res.x = fabsf(vec.x); + res.y = fabsf(vec.y); + res.z = fabsf(vec.z); + res.w = fabsf(vec.w); + + return res; +} + void abs(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) { - res[0].x = fabsf(tmp0x.x); - res[0].y = fabsf(tmp0x.y); - res[0].z = fabsf(tmp0x.z); - res[0].w = fabsf(tmp0x.w); - - res[1].x = fabsf(tmp0y.x); - res[1].y = fabsf(tmp0y.y); - res[1].z = fabsf(tmp0y.z); - res[1].w = fabsf(tmp0y.w); - - res[2].x = fabsf(tmp0z.x); - res[2].y = fabsf(tmp0z.y); - res[2].z = fabsf(tmp0z.z); - res[2].w = fabsf(tmp0z.w); - - res[3].x = fabsf(tmp0w.x); - res[3].y = fabsf(tmp0w.y); - res[3].z = fabsf(tmp0w.z); - res[3].w = fabsf(tmp0w.w); + res[0] = absvec(tmp0x); + res[1] = absvec(tmp0y); + res[2] = absvec(tmp0z); + res[3] = absvec(tmp0w); } void dp3(float4 *res, @@ -88,6 +84,7 @@ void dp4(float4 *res, } extern float powf(float num, float p); +extern float sqrtf(float x); float4 powvec(float4 vec, float4 q) { @@ -168,3 +165,24 @@ void lit(float4 *res, } res[3] = (float4){1.0, 1.0, 1.0, 1.0}; } + + +float4 sqrtvec(float4 vec) +{ + float4 p; + p.x = sqrtf(vec.x); + p.y = sqrtf(vec.y); + p.z = sqrtf(vec.z); + p.w = sqrtf(vec.w); + return p; +} + +void rsq(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) +{ + const float4 onevec = (float4) {1., 1., 1., 1.}; + res[0] = onevec/sqrtvec(absvec(tmp0x)); + res[1] = res[0]; + res[2] = res[0]; + res[3] = res[0]; +} diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index abcb240f46..9695358ab8 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -699,6 +699,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_RSQ: { + out = instr->rsq(inputs[0]); } break; case TGSI_OPCODE_EXP: -- cgit v1.2.3 From a7449d4d840148ccd9261b59e68d45e9d0d2be53 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 May 2008 17:35:47 -0400 Subject: fix rsq --- src/gallium/auxiliary/gallivm/soabuiltins.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 64c02aa967..62c75f18f4 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -182,7 +182,7 @@ void rsq(float4 *res, { const float4 onevec = (float4) {1., 1., 1., 1.}; res[0] = onevec/sqrtvec(absvec(tmp0x)); - res[1] = res[0]; - res[2] = res[0]; - res[3] = res[0]; + res[1] = onevec/sqrtvec(absvec(tmp0y)); + res[2] = onevec/sqrtvec(absvec(tmp0z)); + res[3] = onevec/sqrtvec(absvec(tmp0w)); } -- cgit v1.2.3 From 0bf82c0111c9d5e33ffc76be2fd0d22eea316952 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 16 May 2008 17:56:38 -0400 Subject: cosmetic changes --- src/gallium/auxiliary/gallivm/soabuiltins.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 62c75f18f4..78f84510e2 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -147,17 +147,16 @@ void max(float4 *res, void lit(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) { - const float4 zerovec = (float4) {0, 0, 0, 0}; + const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; res[0] = (float4){1.0, 1.0, 1.0, 1.0}; if (tmp0x.x > 0) { - float4 tmpx = maxvec(tmpx, zerovec); float4 tmpy = maxvec(tmp0y, zerovec); float4 tmpw = minvec(tmp0w, plus128); tmpw = maxvec(tmpw, min128); - res[1] = tmpx; + res[1] = tmp0x; res[2] = powvec(tmpy, tmpw); } else { res[1] = zerovec; -- cgit v1.2.3 From 59007a811de2d76ea00164e8f1cacb4a375d1458 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 19 May 2008 09:34:28 -0600 Subject: if x86_get_func() returns NULL, handle it properly instead of aborting --- src/gallium/auxiliary/draw/draw_vs_sse.c | 3 +++ src/gallium/drivers/softpipe/sp_fs_sse.c | 6 +++++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 5929ea76b2..e3f4e67472 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -215,6 +215,9 @@ draw_create_vs_sse(struct draw_context *draw, goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); + if (!vs->func) { + goto fail; + } return &vs->base; diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 25fdfea491..55741cc1df 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -139,7 +139,11 @@ softpipe_create_fs_sse(struct softpipe_context *softpipe, } shader->func = (codegen_function) x86_get_func( &shader->sse2_program ); - assert(shader->func); + if (!shader->func) { + x86_release_func( &shader->sse2_program ); + FREE(shader); + return NULL; + } shader->base.shader = *templ; shader->base.prepare = fs_sse_prepare; -- cgit v1.2.3 From 21e614eabc5e6a502504f307f3710b4dd0417923 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 19 May 2008 12:40:11 -0600 Subject: gallium: fix some texture object leaks --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 8 ++++++++ src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index f501b2aed4..346a96e37f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -713,6 +713,11 @@ static void aaline_destroy(struct draw_stage *stage) { struct aaline_stage *aaline = aaline_stage(stage); + uint i; + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&aaline->state.texture[i], NULL); + } if (aaline->sampler_cso) aaline->pipe->delete_sampler_state(aaline->pipe, aaline->sampler_cso); @@ -836,6 +841,9 @@ aaline_set_sampler_textures(struct pipe_context *pipe, for (i = 0; i < num; i++) { pipe_texture_reference(&aaline->state.texture[i], texture[i]); } + for ( ; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&aaline->state.texture[i], NULL); + } aaline->num_textures = num; /* pass-through */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 73ee419858..2dfd1800d5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -559,6 +559,11 @@ static void pstip_destroy(struct draw_stage *stage) { struct pstip_stage *pstip = pstip_stage(stage); + uint i; + + for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { + pipe_texture_reference(&pstip->state.textures[i], NULL); + } pstip->pipe->delete_sampler_state(pstip->pipe, pstip->sampler_cso); -- cgit v1.2.3 From bd4eec0561fb021849ac4047fdbf40a616fb68b3 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 May 2008 20:33:17 +0900 Subject: pipebuffer: Don't retry allocating in slab suballocator. In pipebuffer, fencing is done at on a level above sub-allocation, so no matter how many times slab allocator retries no buffer will be freed. The pipebuffer fencing implemention already retries allocating. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 23 +++++------------------ 1 file changed, 5 insertions(+), 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index b931455056..45ba158a4d 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -47,9 +47,6 @@ #include "pb_bufmgr.h" -#define DRI_SLABPOOL_ALLOC_RETRIES 100 - - struct pb_slab; struct pb_slab_buffer @@ -313,7 +310,6 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, static struct pb_slab_buffer *buf; struct pb_slab *slab; struct list_head *list; - int count = DRI_SLABPOOL_ALLOC_RETRIES; /* check size */ assert(size == mgr->bufSize); @@ -331,23 +327,14 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, /* XXX: check for compatible buffer usage too? */ _glthread_LOCK_MUTEX(mgr->mutex); - while (mgr->slabs.next == &mgr->slabs && count > 0) { - if (mgr->slabs.next != &mgr->slabs) - break; - - _glthread_UNLOCK_MUTEX(mgr->mutex); - if (count != DRI_SLABPOOL_ALLOC_RETRIES) - util_time_sleep(1); - _glthread_LOCK_MUTEX(mgr->mutex); + if (mgr->slabs.next == &mgr->slabs) { (void) pb_slab_create(mgr); - count--; + if (mgr->slabs.next == &mgr->slabs) { + _glthread_UNLOCK_MUTEX(mgr->mutex); + return NULL; + } } - list = mgr->slabs.next; - if (list == &mgr->slabs) { - _glthread_UNLOCK_MUTEX(mgr->mutex); - return NULL; - } slab = LIST_ENTRY(struct pb_slab, list, head); if (--slab->numFree == 0) LIST_DELINIT(list); -- cgit v1.2.3 From 781676c7cc5ae7586ee8edd07de880892c5a2d86 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 22 May 2008 21:54:41 +0900 Subject: pipebuffer: More robust face null pointers. It is really the caller responsibility not to call pipebuffer with null buffers, etc. But don't let the crash happen here, and still asserting early. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 49705cb862..857ea53c78 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -146,6 +146,8 @@ pb_map(struct pb_buffer *buf, unsigned flags) { assert(buf); + if(!buf) + return NULL; return buf->vtbl->map(buf, flags); } @@ -154,6 +156,8 @@ static INLINE void pb_unmap(struct pb_buffer *buf) { assert(buf); + if(!buf) + return; buf->vtbl->unmap(buf); } @@ -163,6 +167,12 @@ pb_get_base_buffer( struct pb_buffer *buf, struct pb_buffer **base_buf, unsigned *offset ) { + assert(buf); + if(!buf) { + base_buf = NULL; + offset = 0; + return; + } buf->vtbl->get_base_buffer(buf, base_buf, offset); } @@ -171,7 +181,8 @@ static INLINE void pb_destroy(struct pb_buffer *buf) { assert(buf); - assert(buf->vtbl); + if(!buf) + return; buf->vtbl->destroy(buf); } -- cgit v1.2.3 From 8b25b5256fad23e8ea11c6931ecac658ca60c0b0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 13 May 2008 09:46:53 +0100 Subject: draw: remove disabled non-sse swizzle code --- src/gallium/auxiliary/draw/draw_vs_sse.c | 50 +------------------------------- 1 file changed, 1 insertion(+), 49 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index e3f4e67472..edf235cddc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -47,9 +47,7 @@ #include "tgsi/util/tgsi_parse.h" #define SSE_MAX_VERTICES 4 -#define SSE_SWIZZLES 1 -#if SSE_SWIZZLES typedef void (XSTDCALL *codegen_function) ( const struct tgsi_exec_vector *input, /* 1 */ struct tgsi_exec_vector *output, /* 2 */ @@ -62,14 +60,6 @@ typedef void (XSTDCALL *codegen_function) ( float (*aos_output)[4], /* 9 */ uint num_outputs, /* 10 */ uint output_stride ); /* 11 */ -#else -typedef void (XSTDCALL *codegen_function) ( - const struct tgsi_exec_vector *input, - struct tgsi_exec_vector *output, - float (*constant)[4], - struct tgsi_exec_vector *temporary, - float (*immediates)[4] ); -#endif struct draw_sse_vertex_shader { struct draw_vertex_shader base; @@ -111,7 +101,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); -#if SSE_SWIZZLES /* run compiled shader */ shader->func(machine->Inputs, @@ -128,43 +117,6 @@ vs_sse_run_linear( struct draw_vertex_shader *base, input = (const float (*)[4])((const char *)input + input_stride * max_vertices); output = (float (*)[4])((char *)output + output_stride * max_vertices); -#else - unsigned int j, slot; - - /* Swizzle inputs. - */ - for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < base->info.num_inputs; slot++) { - machine->Inputs[slot].xyzw[0].f[j] = input[slot][0]; - machine->Inputs[slot].xyzw[1].f[j] = input[slot][1]; - machine->Inputs[slot].xyzw[2].f[j] = input[slot][2]; - machine->Inputs[slot].xyzw[3].f[j] = input[slot][3]; - } - - input = (const float (*)[4])((const char *)input + input_stride); - } - - /* run compiled shader - */ - shader->func(machine->Inputs, - machine->Outputs, - (float (*)[4])constants, - machine->Temps, - shader->immediates); - - /* Unswizzle all output results. - */ - for (j = 0; j < max_vertices; j++) { - for (slot = 0; slot < base->info.num_outputs; slot++) { - output[slot][0] = machine->Outputs[slot].xyzw[0].f[j]; - output[slot][1] = machine->Outputs[slot].xyzw[1].f[j]; - output[slot][2] = machine->Outputs[slot].xyzw[2].f[j]; - output[slot][3] = machine->Outputs[slot].xyzw[3].f[j]; - } - - output = (float (*)[4])((char *)output + output_stride); - } -#endif } } @@ -211,7 +163,7 @@ draw_create_vs_sse(struct draw_context *draw, x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program, vs->immediates, SSE_SWIZZLES )) + &vs->sse2_program, vs->immediates, TRUE )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); -- cgit v1.2.3 From b23706454bb165a62888d264e95a98a2e4cf139c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 13 May 2008 13:35:14 +0100 Subject: draw: get rid of fetch-shade-emit frontend hack The code is now living in it's intended place as a pt middle end. --- src/gallium/auxiliary/draw/Makefile | 1 - src/gallium/auxiliary/draw/draw_private.h | 1 - src/gallium/auxiliary/draw/draw_pt.c | 14 - src/gallium/auxiliary/draw/draw_pt.h | 1 - .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 677 --------------------- 5 files changed, 694 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 67d78bdbbd..3053682da8 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -26,7 +26,6 @@ C_SOURCES = \ draw_pt_emit.c \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ - draw_pt_fetch_shade_emit.c \ draw_pt_middle_fse.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 86b901a3c8..fd51a57781 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -131,7 +131,6 @@ struct draw_context struct { struct draw_pt_front_end *vcache; struct draw_pt_front_end *varray; - struct draw_pt_front_end *fetch_shade_emit; /* temp hack */ } front; struct pipe_vertex_buffer vertex_buffer[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 91e35db819..75f44d503e 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -85,11 +85,6 @@ draw_pt_arrays(struct draw_context *draw, */ if (draw->pt.user.elts || (opt & PT_PIPELINE)) { frontend = draw->pt.front.vcache; -#if 0 - } else if (opt == PT_SHADE && draw->pt.test_fse) { - /* should be a middle end.. */ - frontend = draw->pt.front.fetch_shade_emit; -#endif } else { frontend = draw->pt.front.varray; } @@ -127,10 +122,6 @@ boolean draw_pt_init( struct draw_context *draw ) draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); if (!draw->pt.middle.fetch_shade_emit) return FALSE; - - draw->pt.front.fetch_shade_emit = draw_pt_fetch_shade_emit( draw ); - if (!draw->pt.front.fetch_shade_emit) - return FALSE; } @@ -159,11 +150,6 @@ void draw_pt_destroy( struct draw_context *draw ) draw->pt.middle.fetch_shade_emit = NULL; } - if (draw->pt.front.fetch_shade_emit) { - draw->pt.front.fetch_shade_emit->destroy( draw->pt.front.fetch_shade_emit ); - draw->pt.front.fetch_shade_emit = NULL; - } - if (draw->pt.front.vcache) { draw->pt.front.vcache->destroy( draw->pt.front.vcache ); draw->pt.front.vcache = NULL; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index cdae46b8d2..e03816ebbc 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -121,7 +121,6 @@ const void *draw_pt_elt_ptr( struct draw_context *draw, struct draw_pt_front_end *draw_pt_vcache( struct draw_context *draw ); struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw); -struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ); /* Middle-ends: * diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c deleted file mode 100644 index f756d3e0bb..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ /dev/null @@ -1,677 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" -#include "draw/draw_vs.h" - -#include "translate/translate.h" - -struct fetch_shade_emit; - -struct fse_shader { - struct translate_key key; - - void (*run_linear)( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ); -}; - -/* Prototype fetch, shade, emit-hw-verts all in one go. - */ -struct fetch_shade_emit { - struct draw_pt_front_end base; - - struct draw_context *draw; - - struct translate_key key; - - /* Temporaries: - */ - const float *constants; - unsigned pitch[PIPE_MAX_ATTRIBS]; - const ubyte *src[PIPE_MAX_ATTRIBS]; - unsigned prim; - - /* Points to one of the three hardwired example shaders, below: - */ - struct fse_shader *active; - - /* Temporary: A list of hard-wired shaders. Of course the plan - * would be to generate these for a given (vertex-shader, - * translate-key) pair... - */ - struct fse_shader shader[10]; - int nr_shaders; -}; - - - -/* Not quite passthrough yet -- we're still running the 'shader' here, - * inlined into the vertex fetch function. - */ -static void fetch_xyz_rgb_st( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - - const float *m = fse->constants; - const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; - const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; - const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; - const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; - - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const ubyte *st = fse->src[2] + start * fse->pitch[2]; - - float *out = (float *)buffer; - - - assert(fse->pitch[1] == 0); - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m0 * ix + m4 * iy + m8 * iz + m12; - out[1] = m1 * ix + m5 * iy + m9 * iz + m13; - out[2] = m2 * ix + m6 * iy + m10 * iz + m14; - out[3] = m3 * ix + m7 * iy + m11 * iz + m15; - xyz += fse->pitch[0]; - } - - { - out[4] = 1.0f; - out[5] = 1.0f; - out[6] = 1.0f; - out[7] = 1.0f; - } - - { - const float *in = (const float *)st; st += fse->pitch[2]; - out[8] = in[0]; - out[9] = in[1]; - out[10] = 0.0f; - out[11] = 1.0f; - } - - out += 12; - } -} - - - -static void fetch_xyz_rgb( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - - const float *m = (const float *)fse->constants; - const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; - const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; - const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; - const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; - - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; - - float *out = (float *)buffer; - -// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]); - - - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m0 * ix + m4 * iy + m8 * iz + m12; - out[1] = m1 * ix + m5 * iy + m9 * iz + m13; - out[2] = m2 * ix + m6 * iy + m10 * iz + m14; - out[3] = m3 * ix + m7 * iy + m11 * iz + m15; - xyz += fse->pitch[0]; - } - - { - const float *in = (const float *)rgb; - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = 1.0f; - rgb += fse->pitch[1]; - } - - out += 8; - } -} - - - - -static void fetch_xyz_rgb_psiz( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - - const float *m = (const float *)fse->constants; - const float m0 = m[0], m4 = m[4], m8 = m[8], m12 = m[12]; - const float m1 = m[1], m5 = m[5], m9 = m[9], m13 = m[13]; - const float m2 = m[2], m6 = m[6], m10 = m[10], m14 = m[14]; - const float m3 = m[3], m7 = m[7], m11 = m[11], m15 = m[15]; - - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const float *rgb = (const float *)(fse->src[1] + start * fse->pitch[1]); - const float psiz = 1.0; - - float *out = (float *)buffer; - - - assert(fse->pitch[1] == 0); - - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m0 * ix + m4 * iy + m8 * iz + m12; - out[1] = m1 * ix + m5 * iy + m9 * iz + m13; - out[2] = m2 * ix + m6 * iy + m10 * iz + m14; - out[3] = m3 * ix + m7 * iy + m11 * iz + m15; - xyz += fse->pitch[0]; - } - - { - out[4] = rgb[0]; - out[5] = rgb[1]; - out[6] = rgb[2]; - out[7] = 1.0f; - } - - { - out[8] = psiz; - } - - out += 9; - } -} - - - - -static boolean set_prim( struct fetch_shade_emit *fse, - unsigned prim, - unsigned count ) -{ - struct draw_context *draw = fse->draw; - - fse->prim = prim; - - switch (prim) { - case PIPE_PRIM_LINE_LOOP: - if (count > 1024) - return FALSE; - draw->render->set_primitive( draw->render, PIPE_PRIM_LINE_STRIP ); - break; - - case PIPE_PRIM_TRIANGLE_FAN: - case PIPE_PRIM_POLYGON: - if (count > 1024) - return FALSE; - draw->render->set_primitive( draw->render, prim ); - break; - - case PIPE_PRIM_QUADS: - case PIPE_PRIM_QUAD_STRIP: - draw->render->set_primitive( draw->render, PIPE_PRIM_TRIANGLES ); - break; - - default: - draw->render->set_primitive( draw->render, prim ); - break; - } - - return TRUE; -} - - - - - - -static void fse_prepare( struct draw_pt_front_end *fe, - unsigned prim, - struct draw_pt_middle_end *unused, - unsigned opt ) -{ - struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe; - struct draw_context *draw = fse->draw; - unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs; - unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs; - const struct vertex_info *vinfo; - unsigned i; - boolean need_psize = 0; - - - if (draw->pt.user.elts) { - assert(0); - return ; - } - - if (!set_prim(fse, prim, /*count*/1022 )) { - assert(0); - return ; - } - - /* Must do this after set_primitive() above: - */ - vinfo = draw->render->get_vertex_info(draw->render); - - - - fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ - num_vs_inputs); /* inputs - fetch from api format */ - - fse->key.output_stride = vinfo->size * 4; - memset(fse->key.element, 0, - fse->key.nr_elements * sizeof(fse->key.element[0])); - - for (i = 0; i < num_vs_inputs; i++) { - const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; - fse->key.element[i].input_format = src->src_format; - - /* Consider ignoring these at this point, ie make generated - * programs independent of this state: - */ - fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index; - fse->key.element[i].input_offset = 0; //src->src_offset; - } - - - { - unsigned dst_offset = 0; - - for (i = 0; i < vinfo->num_attribs; i++) { - unsigned emit_sz = 0; - unsigned output_format = PIPE_FORMAT_NONE; - unsigned vs_output = vinfo->src_index[i]; - - switch (vinfo->emit[i]) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - need_psize = 1; - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - vs_output = num_vs_outputs + 1; - - break; - default: - assert(0); - break; - } - - /* The elements in the key correspond to vertex shader output - * numbers, not to positions in the hw vertex description -- - * that's handled by the output_offset field. - */ - fse->key.element[vs_output].output_format = output_format; - fse->key.element[vs_output].output_offset = dst_offset; - - dst_offset += emit_sz; - assert(fse->key.output_stride >= dst_offset); - } - } - - /* To make psize work, really need to tell the vertex shader to - * copy that value from input->output. For 'translate' this was - * implicit for all elements. - */ -#if 0 - if (need_psize) { - unsigned input = num_vs_inputs + 1; - const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; - fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; - fse->key.element[i].input_buffer = 0; //nr_buffers + 1; - fse->key.element[i].input_offset = 0; - - fse->key.nr_elements += 1; - - } -#endif - - fse->constants = draw->pt.user.constants; - - /* Would normally look up a vertex shader and peruse its list of - * varients somehow. We omitted that step and put all the - * hardcoded "shaders" into an array. We're just making the - * assumption that this happens to be a matching shader... ie - * you're running isosurf, aren't you? - */ - fse->active = NULL; - for (i = 0; i < fse->nr_shaders; i++) { - if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0) - fse->active = &fse->shader[i]; - } - - if (!fse->active) { - assert(0); - return ; - } - - /* Now set buffer pointers: - */ - for (i = 0; i < num_vs_inputs; i++) { - unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; - - fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] + - draw->pt.vertex_buffer[buf].buffer_offset + - draw->pt.vertex_element[i].src_offset); - - fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch; - - } - - - //return TRUE; -} - - - - - - -#define INDEX(i) (start + (i)) -static void fse_render_linear( struct vbuf_render *render, - unsigned prim, - unsigned start, - unsigned length ) -{ - ushort *tmp = NULL; - unsigned i, j; - - switch (prim) { - case PIPE_PRIM_LINE_LOOP: - tmp = MALLOC( sizeof(ushort) * (length + 1) ); - - for (i = 0; i < length; i++) - tmp[i] = INDEX(i); - tmp[length] = 0; - - render->draw( render, - tmp, - length+1 ); - break; - - - case PIPE_PRIM_QUAD_STRIP: - tmp = MALLOC( sizeof(ushort) * (length / 2 * 6) ); - - for (j = i = 0; i + 3 < length; i += 2, j += 6) { - tmp[j+0] = INDEX(i+0); - tmp[j+1] = INDEX(i+1); - tmp[j+2] = INDEX(i+3); - - tmp[j+3] = INDEX(i+2); - tmp[j+4] = INDEX(i+0); - tmp[j+5] = INDEX(i+3); - } - - if (j) - render->draw( render, tmp, j ); - break; - - case PIPE_PRIM_QUADS: - tmp = MALLOC( sizeof(int) * (length / 4 * 6) ); - - for (j = i = 0; i + 3 < length; i += 4, j += 6) { - tmp[j+0] = INDEX(i+0); - tmp[j+1] = INDEX(i+1); - tmp[j+2] = INDEX(i+3); - - tmp[j+3] = INDEX(i+1); - tmp[j+4] = INDEX(i+2); - tmp[j+5] = INDEX(i+3); - } - - if (j) - render->draw( render, tmp, j ); - break; - - default: - render->draw_arrays( render, - start, - length ); - break; - } - - if (tmp) - FREE(tmp); -} - - - -static boolean do_draw( struct fetch_shade_emit *fse, - unsigned start, unsigned count ) -{ - struct draw_context *draw = fse->draw; - - char *hw_verts = - draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)count ); - - if (!hw_verts) - return FALSE; - - /* Single routine to fetch vertices, run shader and emit HW verts. - * Clipping and viewport transformation are done on hardware. - */ - fse->active->run_linear( fse, - start, count, - hw_verts ); - - /* Draw arrays path to avoid re-emitting index list again and - * again. - */ - fse_render_linear( draw->render, - fse->prim, - 0, - count ); - - - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - count ); - - return TRUE; -} - - -static void -fse_run(struct draw_pt_front_end *fe, - pt_elt_func elt_func, - const void *elt_ptr, - unsigned count) -{ - struct fetch_shade_emit *fse = (struct fetch_shade_emit *)fe; - unsigned i = 0; - unsigned first, incr; - unsigned start = elt_func(elt_ptr, 0); - - //debug_printf("%s prim %d start %d count %d\n", __FUNCTION__, prim, start, count); - - draw_pt_split_prim(fse->prim, &first, &incr); - - count -= (count - first) % incr; - - while (i + first <= count) { - int nr = MIN2( count - i, 1024 ); - - /* snap to prim boundary - */ - nr -= (nr - first) % incr; - - if (!do_draw( fse, start + i, nr )) { - assert(0); - return ; - } - - /* increment allowing for repeated vertices - */ - i += nr - (first - incr); - } - - //return TRUE; -} - - -static void fse_finish( struct draw_pt_front_end *frontend ) -{ -} - - -static void -fse_destroy( struct draw_pt_front_end *frontend ) -{ - FREE(frontend); -} - -struct draw_pt_front_end *draw_pt_fetch_shade_emit( struct draw_context *draw ) -{ - struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); - if (!fse) - return NULL; - - fse->base.prepare = fse_prepare; - fse->base.run = fse_run; - fse->base.finish = fse_finish; - fse->base.destroy = fse_destroy; - fse->draw = draw; - - fse->shader[0].run_linear = fetch_xyz_rgb_st; - fse->shader[0].key.nr_elements = 3; - fse->shader[0].key.output_stride = 12 * sizeof(float); - - fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[0].key.element[0].input_buffer = 0; - fse->shader[0].key.element[0].input_offset = 0; - fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[0].output_offset = 0; - - fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[0].key.element[1].input_buffer = 0; - fse->shader[0].key.element[1].input_offset = 0; - fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[1].output_offset = 16; - - fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT; - fse->shader[0].key.element[1].input_buffer = 0; - fse->shader[0].key.element[1].input_offset = 0; - fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[1].output_offset = 32; - - fse->shader[1].run_linear = fetch_xyz_rgb; - fse->shader[1].key.nr_elements = 2; - fse->shader[1].key.output_stride = 8 * sizeof(float); - - fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[1].key.element[0].input_buffer = 0; - fse->shader[1].key.element[0].input_offset = 0; - fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[1].key.element[0].output_offset = 0; - - fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[1].key.element[1].input_buffer = 0; - fse->shader[1].key.element[1].input_offset = 0; - fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[1].key.element[1].output_offset = 16; - - fse->shader[2].run_linear = fetch_xyz_rgb_psiz; - fse->shader[2].key.nr_elements = 3; - fse->shader[2].key.output_stride = 9 * sizeof(float); - - fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[2].key.element[0].input_buffer = 0; - fse->shader[2].key.element[0].input_offset = 0; - fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[2].key.element[0].output_offset = 0; - - fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[2].key.element[1].input_buffer = 0; - fse->shader[2].key.element[1].input_offset = 0; - fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[2].key.element[1].output_offset = 16; - - /* psize is special - * -- effectively add it here as another input!?! - * -- who knows how to add it as a buffer? - */ - fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT; - fse->shader[2].key.element[2].input_buffer = 0; - fse->shader[2].key.element[2].input_offset = 0; - fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT; - fse->shader[2].key.element[2].output_offset = 32; - - fse->nr_shaders = 3; - - return &fse->base; -} -- cgit v1.2.3 From 2f0d1396e4c1626b3b1ac799bd29e86a9530369e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 13 May 2008 13:40:22 +0100 Subject: draw: move some state into a new 'vs' area --- src/gallium/auxiliary/draw/draw_context.c | 21 ++++--------- src/gallium/auxiliary/draw/draw_pipe.h | 2 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 +-- src/gallium/auxiliary/draw/draw_pipe_clip.c | 6 ++-- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 4 +-- src/gallium/auxiliary/draw/draw_private.h | 28 +++++++++++------ .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 6 ++-- src/gallium/auxiliary/draw/draw_pt_middle_fse.c | 4 +-- src/gallium/auxiliary/draw/draw_vs.c | 35 +++++++++++++++++++--- src/gallium/auxiliary/draw/draw_vs_exec.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- 15 files changed, 75 insertions(+), 47 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 98e23fa830..2242074965 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -56,12 +56,6 @@ struct draw_context *draw_create( void ) draw->reduced_prim = ~0; /* != any of PIPE_PRIM_x */ - tgsi_exec_machine_init(&draw->machine); - - /* FIXME: give this machine thing a proper constructor: - */ - draw->machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); - draw->machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); if (!draw_pipeline_init( draw )) goto fail; @@ -69,6 +63,9 @@ struct draw_context *draw_create( void ) if (!draw_pt_init( draw )) goto fail; + if (!draw_vs_init( draw )) + goto fail; + return draw; fail: @@ -83,13 +80,6 @@ void draw_destroy( struct draw_context *draw ) return; - if (draw->machine.Inputs) - align_free(draw->machine.Inputs); - - if (draw->machine.Outputs) - align_free(draw->machine.Outputs); - - tgsi_exec_machine_free_data(&draw->machine); /* Not so fast -- we're just borrowing this at the moment. * @@ -99,6 +89,7 @@ void draw_destroy( struct draw_context *draw ) draw_pipeline_destroy( draw ); draw_pt_destroy( draw ); + draw_vs_destroy( draw ); FREE( draw ); } @@ -295,7 +286,7 @@ int draw_find_vs_output(struct draw_context *draw, uint semantic_name, uint semantic_index) { - const struct draw_vertex_shader *vs = draw->vertex_shader; + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == semantic_name && @@ -320,7 +311,7 @@ draw_find_vs_output(struct draw_context *draw, uint draw_num_vs_outputs(struct draw_context *draw) { - uint count = draw->vertex_shader->info.num_outputs; + uint count = draw->vs.vertex_shader->info.num_outputs; if (draw->extra_vp_outputs.slot > 0) count++; return count; diff --git a/src/gallium/auxiliary/draw/draw_pipe.h b/src/gallium/auxiliary/draw/draw_pipe.h index f1cb0891ca..dbad8f98ac 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.h +++ b/src/gallium/auxiliary/draw/draw_pipe.h @@ -116,7 +116,7 @@ dup_vert( struct draw_stage *stage, { struct vertex_header *tmp = stage->tmp[idx]; const uint vsize = sizeof(struct vertex_header) - + stage->draw->num_vs_outputs * 4 * sizeof(float); + + stage->draw->vs.num_vs_outputs * 4 * sizeof(float); memcpy(tmp, vert, vsize); tmp->vertex_id = UNDEFINED_VERTEX_ID; return tmp; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index f501b2aed4..d93708ad3c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -651,7 +651,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) } /* update vertex attrib info */ - aaline->tex_slot = draw->num_vs_outputs; + aaline->tex_slot = draw->vs.num_vs_outputs; assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ /* advertise the extra post-transformed vertex attribute */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 122a48660a..97d74ad693 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -681,7 +681,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) bind_aapoint_fragment_shader(aapoint); /* update vertex attrib info */ - aapoint->tex_slot = draw->num_vs_outputs; + aapoint->tex_slot = draw->vs.num_vs_outputs; assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; @@ -692,7 +692,7 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) aapoint->psize_slot = -1; if (draw->rasterizer->point_size_per_vertex) { /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index ce80c94163..c11ed934a4 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -112,7 +112,7 @@ static void interp( const struct clipper *clip, const struct vertex_header *out, const struct vertex_header *in ) { - const unsigned nr_attrs = clip->stage.draw->num_vs_outputs; + const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; unsigned j; /* Vertex header. @@ -180,7 +180,7 @@ static void emit_poly( struct draw_stage *stage, header.flags |= edge_last; if (0) { - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint j, k; debug_printf("Clipped tri:\n"); for (j = 0; j < 3; j++) { @@ -425,7 +425,7 @@ clip_init_state( struct draw_stage *stage ) clipper->flat = stage->draw->rasterizer->flatshade ? TRUE : FALSE; if (clipper->flat) { - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint i; clipper->num_color_attribs = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 09b68c4559..21a9c3b77f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -159,7 +159,7 @@ static void flatshade_line_1( struct draw_stage *stage, static void flatshade_init_state( struct draw_stage *stage ) { struct flat_stage *flat = flat_stage(stage); - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint i; /* Find which vertex shader outputs are colors, make a list */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 3cbced362e..4673d5dcba 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -71,7 +71,7 @@ screen_interp( struct draw_context *draw, const struct vertex_header *v1 ) { uint attr; - for (attr = 0; attr < draw->num_vs_outputs; attr++) { + for (attr = 0; attr < draw->vs.num_vs_outputs; attr++) { const float *val0 = v0->data[attr]; const float *val1 = v1->data[attr]; float *newv = dst->data[attr]; diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 50872fdbe9..3ac825f565 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -105,7 +105,7 @@ static void twoside_first_tri( struct draw_stage *stage, struct prim_header *header ) { struct twoside_stage *twoside = twoside_stage(stage); - const struct draw_vertex_shader *vs = stage->draw->vertex_shader; + const struct draw_vertex_shader *vs = stage->draw->vs.vertex_shader; uint i; twoside->attrib_front0 = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index ed08573382..df92e3f2d0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -197,7 +197,7 @@ static void widepoint_first_point( struct draw_stage *stage, if (draw->rasterizer->point_sprite) { /* find vertex shader texcoord outputs */ - const struct draw_vertex_shader *vs = draw->vertex_shader; + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i, j = 0; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_GENERIC) { @@ -212,7 +212,7 @@ static void widepoint_first_point( struct draw_stage *stage, wide->psize_slot = -1; if (draw->rasterizer->point_size_per_vertex) { /* find PSIZ vertex output */ - const struct draw_vertex_shader *vs = draw->vertex_shader; + const struct draw_vertex_shader *vs = draw->vs.vertex_shader; uint i; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_PSIZE) { diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index fd51a57781..3418ee2b88 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -169,13 +169,24 @@ struct draw_context /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; + boolean identity_viewport; - struct draw_vertex_shader *vertex_shader; + struct { + struct draw_vertex_shader *vertex_shader; + uint num_vs_outputs; /**< convenience, from vertex_shader */ - boolean identity_viewport; - uint num_vs_outputs; /**< convenience, from vertex_shader */ + /** TGSI program interpreter runtime state */ + struct tgsi_exec_machine machine; + + /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. + */ + struct gallivm_cpu_engine *engine; + + struct translate_cache *fetch_cache; + struct translate_cache *emit_cache; + } vs; /* Clip derived state: */ @@ -192,16 +203,15 @@ struct draw_context unsigned reduced_prim; - /** TGSI program interpreter runtime state */ - struct tgsi_exec_machine machine; - - /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. - */ - struct gallivm_cpu_engine *engine; void *driver_private; }; +/******************************************************************************* + * Vertex shader code: + */ +boolean draw_vs_init( struct draw_context *draw ); +void draw_vs_destroy( struct draw_context *draw ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index dad54690a5..06718779a5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -55,7 +55,7 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *vs = draw->vertex_shader; + struct draw_vertex_shader *vs = draw->vs.vertex_shader; /* Add one to num_outputs because the pipeline occasionally tags on * an additional texcoord, eg for AA lines. @@ -107,7 +107,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vertex_shader; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; unsigned alloc_count = align_int( fetch_count, 4 ); @@ -183,7 +183,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; - struct draw_vertex_shader *shader = draw->vertex_shader; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; unsigned alloc_count = align_int( count, 4 ); diff --git a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c index cdb7d260da..643ea151c1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c +++ b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c @@ -368,8 +368,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; - unsigned num_vs_inputs = draw->vertex_shader->info.num_inputs; - unsigned num_vs_outputs = draw->vertex_shader->info.num_outputs; + unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; + unsigned num_vs_outputs = draw->vs.vertex_shader->info.num_outputs; const struct vertex_info *vinfo; unsigned i; boolean need_psize = 0; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 03fe00a951..4142dd9589 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -66,13 +66,13 @@ draw_bind_vertex_shader(struct draw_context *draw, if (dvs) { - draw->vertex_shader = dvs; - draw->num_vs_outputs = dvs->info.num_outputs; + draw->vs.vertex_shader = dvs; + draw->vs.num_vs_outputs = dvs->info.num_outputs; dvs->prepare( dvs, draw ); } else { - draw->vertex_shader = NULL; - draw->num_vs_outputs = 0; + draw->vs.vertex_shader = NULL; + draw->vs.num_vs_outputs = 0; } } @@ -83,3 +83,30 @@ draw_delete_vertex_shader(struct draw_context *draw, { dvs->delete( dvs ); } + + + +boolean +draw_vs_init( struct draw_context *draw ) +{ + tgsi_exec_machine_init(&draw->vs.machine); + /* FIXME: give this machine thing a proper constructor: + */ + draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + + return TRUE; +} + +void +draw_vs_destroy( struct draw_context *draw ) +{ + if (draw->vs.machine.Inputs) + align_free(draw->vs.machine.Inputs); + + if (draw->vs.machine.Outputs) + align_free(draw->vs.machine.Outputs); + + tgsi_exec_machine_free_data(&draw->vs.machine); + +} diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 7a02f6334b..cb80d008cd 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -182,7 +182,7 @@ draw_create_vs_exec(struct draw_context *draw, vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; - vs->machine = &draw->machine; + vs->machine = &draw->vs.machine; return &vs->base; } diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index edf235cddc..13ad032bd3 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -158,7 +158,7 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; - vs->machine = &draw->machine; + vs->machine = &draw->vs.machine; x86_init_func( &vs->sse2_program ); -- cgit v1.2.3 From 7c99d7fe60e7bb0b7cf103a851aeef4614278ca6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 15 May 2008 12:39:08 +0100 Subject: draw: create specialized vs varients incorporating fetch & emit --- src/gallium/auxiliary/draw/Makefile | 3 +- src/gallium/auxiliary/draw/draw_private.h | 2 + .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 338 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs.c | 83 ++++- src/gallium/auxiliary/draw/draw_vs.h | 105 +++++++ src/gallium/auxiliary/draw/draw_vs_exec.c | 2 + src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 + src/gallium/auxiliary/draw/draw_vs_sse.c | 2 + src/gallium/auxiliary/draw/draw_vs_varient.c | 229 ++++++++++++++ 9 files changed, 764 insertions(+), 2 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c create mode 100644 src/gallium/auxiliary/draw/draw_vs_varient.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 3053682da8..84877994fb 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -26,7 +26,7 @@ C_SOURCES = \ draw_pt_emit.c \ draw_pt_fetch.c \ draw_pt_fetch_emit.c \ - draw_pt_middle_fse.c \ + draw_pt_fetch_shade_emit.c \ draw_pt_fetch_shade_pipeline.c \ draw_pt_post_vs.c \ draw_pt_util.c \ @@ -34,6 +34,7 @@ C_SOURCES = \ draw_pt_vcache.c \ draw_vertex.c \ draw_vs.c \ + draw_vs_varient.c \ draw_vs_exec.c \ draw_vs_llvm.c \ draw_vs_sse.c diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 3418ee2b88..c095bf3d7b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -184,7 +184,9 @@ struct draw_context struct gallivm_cpu_engine *engine; + struct translate *fetch; struct translate_cache *fetch_cache; + struct translate *emit; struct translate_cache *emit_cache; } vs; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c new file mode 100644 index 0000000000..74945dcfe9 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -0,0 +1,338 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_pt.h" +#include "draw/draw_vs.h" + +#include "translate/translate.h" + +struct fetch_shade_emit; + + +/* Prototype fetch, shade, emit-hw-verts all in one go. + */ +struct fetch_shade_emit { + struct draw_pt_middle_end base; + struct draw_context *draw; + + + /* Temporaries: + */ + const float *constants; + unsigned pitch[PIPE_MAX_ATTRIBS]; + const ubyte *src[PIPE_MAX_ATTRIBS]; + unsigned prim; + + struct draw_vs_varient_key key; + struct draw_vs_varient *active; +}; + + + + +static void fse_prepare( struct draw_pt_middle_end *middle, + unsigned prim, + unsigned opt ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; + unsigned num_vs_outputs = draw->vs.vertex_shader->info.num_outputs; + const struct vertex_info *vinfo; + unsigned i; + boolean need_psize = 0; + + + if (draw->pt.user.elts) { + assert(0); + return ; + } + + if (!draw->render->set_primitive( draw->render, + prim )) { + assert(0); + return; + } + + /* Must do this after set_primitive() above: + */ + vinfo = draw->render->get_vertex_info(draw->render); + + + + fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ + num_vs_inputs); /* inputs - fetch from api format */ + + fse->key.output_stride = vinfo->size * 4; + memset(fse->key.element, 0, + fse->key.nr_elements * sizeof(fse->key.element[0])); + + for (i = 0; i < num_vs_inputs; i++) { + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].in.format = src->src_format; + + /* Consider ignoring these, ie make generated programs + * independent of this state: + */ + fse->key.element[i].in.buffer = src->vertex_buffer_index; + fse->key.element[i].in.offset = src->src_offset; + } + + + { + unsigned dst_offset = 0; + + for (i = 0; i < vinfo->num_attribs; i++) { + unsigned emit_sz = 0; + unsigned output_format = PIPE_FORMAT_NONE; + unsigned vs_output = vinfo->src_index[i]; + + switch (vinfo->emit[i]) { + case EMIT_4F: + output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit_sz = 4 * sizeof(float); + break; + case EMIT_3F: + output_format = PIPE_FORMAT_R32G32B32_FLOAT; + emit_sz = 3 * sizeof(float); + break; + case EMIT_2F: + output_format = PIPE_FORMAT_R32G32_FLOAT; + emit_sz = 2 * sizeof(float); + break; + case EMIT_1F: + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + break; + case EMIT_1F_PSIZE: + need_psize = 1; + output_format = PIPE_FORMAT_R32_FLOAT; + emit_sz = 1 * sizeof(float); + vs_output = num_vs_outputs + 1; + + break; + default: + assert(0); + break; + } + + /* The elements in the key correspond to vertex shader output + * numbers, not to positions in the hw vertex description -- + * that's handled by the output_offset field. + */ + fse->key.element[vs_output].out.format = output_format; + fse->key.element[vs_output].out.offset = dst_offset; + + dst_offset += emit_sz; + assert(fse->key.output_stride >= dst_offset); + } + } + + /* To make psize work, really need to tell the vertex shader to + * copy that value from input->output. For 'translate' this was + * implicit for all elements. + */ +#if 0 + if (need_psize) { + unsigned input = num_vs_inputs + 1; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; + fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + fse->key.element[i].input_buffer = 0; //nr_buffers + 1; + fse->key.element[i].input_offset = 0; + + fse->key.nr_elements += 1; + + } +#endif + + /* Would normally look up a vertex shader and peruse its list of + * varients somehow. We omitted that step and put all the + * hardcoded "shaders" into an array. We're just making the + * assumption that this happens to be a matching shader... ie + * you're running isosurf, aren't you? + */ + fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, + &fse->key ); + + if (!fse->active) { + assert(0); + return ; + } + + /* Now set buffer pointers: + */ + for (i = 0; i < num_vs_inputs; i++) { + unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; + + fse->active->set_input( fse->active, + i, + + ((const ubyte *) draw->pt.user.vbuffer[buf] + + draw->pt.vertex_buffer[buf].buffer_offset), + + draw->pt.vertex_buffer[buf].pitch ); + } + + fse->active->set_constants( fse->active, + (const float (*)[4])draw->pt.user.constants ); + + //return TRUE; +} + + + + + + + +static void fse_run_linear( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned alloc_count = align(count, 4); + char *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)alloc_count ); + + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping and viewport transformation are done elsewhere -- + * either by the API or on hardware, or for some other reason not + * required... + */ + fse->active->run_linear( fse->active, + start, count, + hw_verts ); + + /* Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw_arrays( draw->render, + 0, + count ); + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); +} + + +static void +fse_run(struct draw_pt_middle_end *middle, + const unsigned *fetch_elts, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned alloc_count = align(fetch_count, 4); + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)alloc_count ); + if (!hw_verts) { + assert(0); + return; + } + + + /* Single routine to fetch vertices, run shader and emit HW verts. + */ + fse->active->run_elts( fse->active, + fetch_elts, + fetch_count, + hw_verts ); + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + fetch_count ); + +} + + +static void fse_finish( struct draw_pt_middle_end *middle ) +{ +} + + +static void +fse_destroy( struct draw_pt_middle_end *middle ) +{ + FREE(middle); +} + +struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) +{ + struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); + if (!fse) + return NULL; + + fse->base.prepare = fse_prepare; + fse->base.run = fse_run; + fse->base.run_linear = fse_run_linear; + fse->base.finish = fse_finish; + fse->base.destroy = fse_destroy; + fse->draw = draw; + + return &fse->base; +} diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 4142dd9589..9b899d404e 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -36,6 +36,8 @@ #include "draw_private.h" #include "draw_context.h" #include "draw_vs.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" @@ -90,11 +92,25 @@ boolean draw_vs_init( struct draw_context *draw ) { tgsi_exec_machine_init(&draw->vs.machine); + /* FIXME: give this machine thing a proper constructor: */ draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); - draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + if (!draw->vs.machine.Inputs) + return FALSE; + draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16); + if (!draw->vs.machine.Outputs) + return FALSE; + + draw->vs.emit_cache = translate_cache_create(); + if (!draw->vs.emit_cache) + return FALSE; + + draw->vs.fetch_cache = translate_cache_create(); + if (!draw->vs.fetch_cache) + return FALSE; + return TRUE; } @@ -107,6 +123,71 @@ draw_vs_destroy( struct draw_context *draw ) if (draw->vs.machine.Outputs) align_free(draw->vs.machine.Outputs); + if (draw->vs.fetch_cache) + translate_cache_destroy(draw->vs.fetch_cache); + + if (draw->vs.emit_cache) + translate_cache_destroy(draw->vs.emit_cache); + tgsi_exec_machine_free_data(&draw->vs.machine); } + + +struct draw_vs_varient * +draw_vs_lookup_varient( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + struct draw_vs_varient *varient; + unsigned i; + + /* Lookup existing varient: + */ + for (i = 0; i < vs->nr_varients; i++) + if (draw_vs_varient_key_compare(key, &vs->varient[i]->key) == 0) + return vs->varient[i]; + + /* Else have to create a new one: + */ + varient = vs->create_varient( vs, key ); + if (varient == NULL) + return NULL; + + /* Add it to our list: + */ + assert(vs->nr_varients < Elements(vs->varient)); + vs->varient[vs->nr_varients++] = varient; + + /* Done + */ + return varient; +} + + +struct translate * +draw_vs_get_fetch( struct draw_context *draw, + struct translate_key *key ) +{ + if (!draw->vs.fetch || + translate_key_compare(&draw->vs.fetch->key, key) != 0) + { + translate_key_sanitize(key); + draw->vs.fetch = translate_cache_find(draw->vs.fetch_cache, key); + } + + return draw->vs.fetch; +} + +struct translate * +draw_vs_get_emit( struct draw_context *draw, + struct translate_key *key ) +{ + if (!draw->vs.emit || + translate_key_compare(&draw->vs.emit->key, key) != 0) + { + translate_key_sanitize(key); + draw->vs.emit = translate_cache_find(draw->vs.emit_cache, key); + } + + return draw->vs.emit; +} diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index f9772b83b8..677be0d28d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -38,10 +38,63 @@ struct draw_context; struct pipe_shader_state; +struct draw_vs_input +{ + enum pipe_format format; + unsigned buffer; + unsigned offset; +}; + +struct draw_vs_output +{ + enum pipe_format format; + unsigned offset; +}; + +struct draw_vs_element { + struct draw_vs_input in; + struct draw_vs_output out; +}; + +struct draw_vs_varient_key { + unsigned output_stride; + unsigned nr_elements; + struct draw_vs_element element[PIPE_MAX_ATTRIBS]; +}; + +struct draw_vs_varient { + struct draw_vs_varient_key key; + + struct draw_vertex_shader *vs; + + void (*set_input)( struct draw_vs_varient *, + unsigned i, + const void *ptr, + unsigned stride ); + + void (*set_constants)( struct draw_vs_varient *, + const float (*constants)[4] ); + + + void (*run_linear)( struct draw_vs_varient *shader, + unsigned start, + unsigned count, + void *output_buffer ); + + void (*run_elts)( struct draw_vs_varient *shader, + const unsigned *elts, + unsigned count, + void *output_buffer ); + + void (*destroy)( struct draw_vs_varient * ); +}; + + /** * Private version of the compiled vertex_shader */ struct draw_vertex_shader { + struct draw_context *draw; /* This member will disappear shortly: */ @@ -49,6 +102,14 @@ struct draw_vertex_shader { struct tgsi_shader_info info; + /* + */ + struct draw_vs_varient *varient[16]; + unsigned nr_varients; + struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, + const struct draw_vs_varient_key *key ); + + void (*prepare)( struct draw_vertex_shader *shader, struct draw_context *draw ); @@ -68,6 +129,15 @@ struct draw_vertex_shader { }; +struct draw_vs_varient * +draw_vs_lookup_varient( struct draw_vertex_shader *base, + const struct draw_vs_varient_key *key ); + + +/******************************************************************************** + * Internal functions: + */ + struct draw_vertex_shader * draw_create_vs_exec(struct draw_context *draw, const struct pipe_shader_state *templ); @@ -80,8 +150,43 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); +/******************************************************************************** + * Helpers for vs implementations that don't do their own fetch/emit varients. + * Means these can be shared between shaders. + */ +struct translate; +struct translate_key; + +struct translate *draw_vs_get_fetch( struct draw_context *draw, + struct translate_key *key ); + + +struct translate *draw_vs_get_emit( struct draw_context *draw, + struct translate_key *key ); + +struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); + + + +static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key ) +{ + return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_vs_element); +} + +static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a, + const struct draw_vs_varient_key *b ) +{ + int keysize = draw_vs_varient_keysize(a); + return memcmp(a, b, keysize); +} + + + + #define MAX_TGSI_VERTICES 4 + #endif diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index cb80d008cd..4501877efc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -179,9 +179,11 @@ draw_create_vs_exec(struct draw_context *draw, tgsi_scan_shader(state->tokens, &vs->base.info); + vs->base.draw = draw; vs->base.prepare = vs_exec_prepare; vs->base.run_linear = vs_exec_run_linear; vs->base.delete = vs_exec_delete; + vs->base.create_varient = draw_vs_varient_generic; vs->machine = &draw->vs.machine; return &vs->base; diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 171da51dd5..621472ec7c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -114,7 +114,9 @@ draw_create_vs_llvm(struct draw_context *draw, tgsi_scan_shader(vs->base.state.tokens, &vs->base.info); + vs->base.draw = draw; vs->base.prepare = vs_llvm_prepare; + vs->base.create_varient = draw_vs_varient_generic; vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; vs->machine = &draw->machine; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 13ad032bd3..df94a7e0c7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -155,6 +155,8 @@ draw_create_vs_sse(struct draw_context *draw, tgsi_scan_shader(templ->tokens, &vs->base.info); + vs->base.draw = draw; + vs->base.create_varient = draw_vs_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c new file mode 100644 index 0000000000..d27b0f6187 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -0,0 +1,229 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + */ + +#include "pipe/p_util.h" +#include "draw/draw_context.h" +#include "draw/draw_private.h" +#include "draw/draw_vbuf.h" +#include "draw/draw_vertex.h" +#include "draw/draw_vs.h" +#include "translate/translate.h" +#include "translate/translate_cache.h" + +/* A first pass at incorporating vertex fetch/emit functionality into + */ +struct draw_vs_varient_generic { + struct draw_vs_varient base; + + + + struct draw_vertex_shader *shader; + struct draw_context *draw; + + /* Basic plan is to run these two translate functions before/after + * the vertex shader's existing run_linear() routine to simulate + * the inclusion of this functionality into the shader... + * + * Next will look at actually including it. + */ + struct translate *fetch; + struct translate *emit; + + const float (*constants)[4]; +}; + + + + +static void vsvg_set_constants( struct draw_vs_varient *varient, + const float (*constants)[4] ) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + vsvg->constants = constants; +} + + +static void vsvg_set_input( struct draw_vs_varient *varient, + unsigned buffer, + const void *ptr, + unsigned stride ) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + vsvg->fetch->set_buffer(vsvg->fetch, + buffer, + ptr, + stride); +} + + +static void vsvg_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + /* Want to do this in small batches for cache locality? + */ + + vsvg->fetch->run_elts( vsvg->fetch, + elts, + count, + output_buffer ); + + //if (!vsvg->base.vs->is_passthrough) + { + vsvg->base.vs->run_linear( vsvg->base.vs, + output_buffer, + output_buffer, + vsvg->constants, + count, + vsvg->base.key.output_stride, + vsvg->base.key.output_stride); + + //if (!vsvg->already_in_emit_format) + + vsvg->emit->set_buffer( vsvg->emit, + 0, + output_buffer, + vsvg->base.key.output_stride ); + + + vsvg->emit->run( vsvg->emit, + 0, count, + output_buffer ); + } +} + + +static void vsvg_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + //debug_printf("%s %d %d\n", __FUNCTION__, start, count); + + + vsvg->fetch->run( vsvg->fetch, + start, + count, + output_buffer ); + + //if (!vsvg->base.vs->is_passthrough) + { + vsvg->base.vs->run_linear( vsvg->base.vs, + output_buffer, + output_buffer, + vsvg->constants, + count, + vsvg->base.key.output_stride, + vsvg->base.key.output_stride); + + //if (!vsvg->already_in_emit_format) + vsvg->emit->set_buffer( vsvg->emit, + 0, + output_buffer, + vsvg->base.key.output_stride ); + + + vsvg->emit->run( vsvg->emit, + 0, count, + output_buffer ); + } +} + + + +static void vsvg_destroy( struct draw_vs_varient *varient ) +{ + FREE(varient); +} + + +struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + unsigned i; + struct translate_key fetch, emit; + + struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); + if (vsvg == NULL) + return NULL; + + vsvg->base.key = *key; + vsvg->base.vs = vs; + vsvg->base.set_input = vsvg_set_input; + vsvg->base.set_constants = vsvg_set_constants; + vsvg->base.run_elts = vsvg_run_elts; + vsvg->base.run_linear = vsvg_run_linear; + vsvg->base.destroy = vsvg_destroy; + + + + /* OK, have to build a new one: + */ + fetch.nr_elements = vs->info.num_inputs; + fetch.output_stride = 0; + for (i = 0; i < vs->info.num_inputs; i++) { + fetch.element[i].input_format = key->element[i].in.format; + fetch.element[i].input_buffer = key->element[i].in.buffer; + fetch.element[i].input_offset = key->element[i].in.offset; + fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + fetch.element[i].output_offset = fetch.output_stride; + fetch.output_stride += 4 * sizeof(float); + } + + + emit.nr_elements = vs->info.num_outputs; + emit.output_stride = key->output_stride; + for (i = 0; i < vs->info.num_outputs; i++) { + emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit.element[i].input_buffer = 0; + emit.element[i].input_offset = i * 4 * sizeof(float); + emit.element[i].output_format = key->element[i].out.format; + emit.element[i].output_offset = key->element[i].out.offset; + } + + vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch ); + vsvg->emit = draw_vs_get_emit( vs->draw, &emit ); + + return &vsvg->base; +} + + + + + -- cgit v1.2.3 From 9232f0c023af060b12f77dee5e8b6a533c48e146 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 May 2008 16:28:53 +0100 Subject: rtasm: remove unused struct member --- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index eacaeeaf6f..baa10b7d4a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -44,7 +44,6 @@ struct x86_function { unsigned stack_offset; int need_emms; unsigned char error_overflow[4]; - const char *fn; }; enum x86_reg_file { -- cgit v1.2.3 From 8618e6aa16bdba2c8b08124261bbaedaf7e22447 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 20 May 2008 14:34:06 +0100 Subject: translate: remove spurious comment --- src/gallium/auxiliary/translate/translate_sse.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a54ac5a82f..582d6f6466 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -472,13 +472,7 @@ static boolean build_vertex_emit( struct translate_sse *p, x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride)); /* Incr index - */ /* Emit code for each of the attributes. Currently routes - * everything through SSE registers, even when it might be more - * efficient to stick with regular old x86. No optimization or - * other tricks - enough new ground to cover here just getting - * things working. - */ - + */ if (linear) { x86_inc(p->func, idxEBX); } -- cgit v1.2.3 From d3e64caef6f8654af1a84825803e517ab8221c68 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 08:28:16 +0100 Subject: rtasm: export debug reg print function --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 20 +++++++++----------- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 3 +++ 2 files changed, 12 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4e036d9032..68ac91ed13 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -36,11 +36,8 @@ #define DUMP_SSE 0 -#if DUMP_SSE -static void -_print_reg( - struct x86_reg reg ) +void x86_print_reg( struct x86_reg reg ) { if (reg.mod != mod_REG) debug_printf( "[" ); @@ -77,6 +74,7 @@ _print_reg( debug_printf( "]" ); } +#if DUMP_SSE #define DUMP_START() debug_printf( "\n" ) #define DUMP_END() debug_printf( "\n" ) @@ -87,7 +85,7 @@ _print_reg( foo++; \ if (*foo) \ foo++; \ - debug_printf( "\n% 15s ", foo ); \ + debug_printf( "\n% 4x% 15s ", p->csr - p->store, foo ); \ } while (0) #define DUMP_I( I ) do { \ @@ -97,27 +95,27 @@ _print_reg( #define DUMP_R( R0 ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ } while( 0 ) #define DUMP_RR( R0, R1 ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", " ); \ - _print_reg( R1 ); \ + x86_print_reg( R1 ); \ } while( 0 ) #define DUMP_RI( R0, I ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", %u", I ); \ } while( 0 ) #define DUMP_RRI( R0, R1, I ) do { \ DUMP(); \ - _print_reg( R0 ); \ + x86_print_reg( R0 ); \ debug_printf( ", " ); \ - _print_reg( R1 ); \ + x86_print_reg( R1 ); \ debug_printf( ", %u", I ); \ } while( 0 ) diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index baa10b7d4a..1e02c6e73b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -106,6 +106,9 @@ void x86_init_func_size( struct x86_function *p, unsigned code_size ); void x86_release_func( struct x86_function *p ); void (*x86_get_func( struct x86_function *p ))( void ); +/* Debugging: + */ +void x86_print_reg( struct x86_reg reg ); /* Create and manipulate registers and regmem values: -- cgit v1.2.3 From b5c8b3fba6ac90a0d83e02bfe432142f1adee9e5 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 08:29:19 +0100 Subject: translate: mark functions as PIPE_CDECL --- src/gallium/auxiliary/translate/translate.h | 18 +++++++++--------- src/gallium/auxiliary/translate/translate_generic.c | 16 ++++++++-------- src/gallium/auxiliary/translate/translate_sse.c | 20 +++++++------------- 3 files changed, 24 insertions(+), 30 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index b8210af50c..c3b754a902 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -71,15 +71,15 @@ struct translate { const void *ptr, unsigned stride ); - void (*run_elts)( struct translate *, - const unsigned *elts, - unsigned count, - void *output_buffer); - - void (*run)( struct translate *, - unsigned start, - unsigned count, - void *output_buffer); + void (PIPE_CDECL *run_elts)( struct translate *, + const unsigned *elts, + unsigned count, + void *output_buffer); + + void (PIPE_CDECL *run)( struct translate *, + unsigned start, + unsigned count, + void *output_buffer); }; diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 402780ee53..a25d94f2ca 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -541,10 +541,10 @@ static emit_func get_emit_func( enum pipe_format format ) /** * Fetch vertex attributes for 'count' vertices. */ -static void generic_run_elts( struct translate *translate, - const unsigned *elts, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL generic_run_elts( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); char *vert = output_buffer; @@ -580,10 +580,10 @@ static void generic_run_elts( struct translate *translate, -static void generic_run( struct translate *translate, - unsigned start, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL generic_run( struct translate *translate, + unsigned start, + unsigned count, + void *output_buffer ) { struct translate_generic *tg = translate_generic(translate); char *vert = output_buffer; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 582d6f6466..2fc8b9d3d0 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -45,22 +45,16 @@ #define W 3 -#ifdef WIN32 -#define RTASM __cdecl -#else -#define RTASM -#endif - -typedef void (RTASM *run_func)( struct translate *translate, - unsigned start, - unsigned count, - void *output_buffer ); - -typedef void (RTASM *run_elts_func)( struct translate *translate, - const unsigned *elts, +typedef void (PIPE_CDECL *run_func)( struct translate *translate, + unsigned start, unsigned count, void *output_buffer ); +typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, + const unsigned *elts, + unsigned count, + void *output_buffer ); + struct translate_sse { -- cgit v1.2.3 From ba738a3135415de8b381cd8845cd6c435d5747a8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 09:43:30 +0100 Subject: draw: mark varient functions as PIPE_CDECL --- src/gallium/auxiliary/draw/draw_vs.h | 30 +++++++++++++++++++++--------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 677be0d28d..6bfc2c8d75 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -62,6 +62,19 @@ struct draw_vs_varient_key { struct draw_vs_element element[PIPE_MAX_ATTRIBS]; }; +struct draw_vs_varient; + +typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *, + const unsigned *elts, + unsigned count, + void *output_buffer); + +typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *, + unsigned start, + unsigned count, + void *output_buffer); + + struct draw_vs_varient { struct draw_vs_varient_key key; @@ -75,16 +88,15 @@ struct draw_vs_varient { void (*set_constants)( struct draw_vs_varient *, const float (*constants)[4] ); + void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, + unsigned start, + unsigned count, + void *output_buffer ); - void (*run_linear)( struct draw_vs_varient *shader, - unsigned start, - unsigned count, - void *output_buffer ); - - void (*run_elts)( struct draw_vs_varient *shader, - const unsigned *elts, - unsigned count, - void *output_buffer ); + void (PIPE_CDECL *run_elts)( struct draw_vs_varient *shader, + const unsigned *elts, + unsigned count, + void *output_buffer ); void (*destroy)( struct draw_vs_varient * ); }; -- cgit v1.2.3 From 1ba10e5ccf5cd0c990922e982e1e9bc6be48a5e4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 09:44:16 +0100 Subject: draw: add aos vertex shader varient --- src/gallium/auxiliary/draw/Makefile | 2 + src/gallium/auxiliary/draw/draw_vs.h | 10 + src/gallium/auxiliary/draw/draw_vs_aos.c | 1739 +++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 181 +++ src/gallium/auxiliary/draw/draw_vs_aos_io.c | 314 +++++ src/gallium/auxiliary/draw/draw_vs_sse.c | 1 + 6 files changed, 2247 insertions(+) create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos.c create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos.h create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos_io.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 84877994fb..9a88ecc070 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -35,6 +35,8 @@ C_SOURCES = \ draw_vertex.c \ draw_vs.c \ draw_vs_varient.c \ + draw_vs_aos.c \ + draw_vs_aos_io.c \ draw_vs_exec.c \ draw_vs_llvm.c \ draw_vs_sse.c diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 6bfc2c8d75..5a8d0da06d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -162,6 +162,16 @@ struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); + + +struct draw_vs_varient_key; +struct draw_vertex_shader; + +struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ); + + + /******************************************************************************** * Helpers for vs implementations that don't do their own fetch/emit varients. * Means these can be shared between shaders. diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c new file mode 100644 index 0000000000..620f5e3592 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -0,0 +1,1739 @@ +/* + * Mesa 3-D graphics library + * Version: 6.3 + * + * Copyright (C) 1999-2004 Brian Paul All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/** + * Translate tgsi vertex programs to x86/x87/SSE/SSE2 machine code + * using the rtasm runtime assembler. Based on the old + * t_vb_arb_program_sse.c + */ + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/util/tgsi_dump.h" + +#include "draw_vs.h" +#include "draw_vs_aos.h" + +#include "rtasm/rtasm_x86sse.h" + +#ifdef PIPE_ARCH_X86 + + +#define DISASSEM 0 + + + + + +static INLINE boolean eq( struct x86_reg a, + struct x86_reg b ) +{ + return (a.file == b.file && + a.idx == b.idx && + a.mod == b.mod && + a.disp == b.disp); +} + + +static struct x86_reg get_reg_ptr(struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + struct x86_reg ptr = cp->machine_EDX; + + switch (file) { + case TGSI_FILE_INPUT: + return x86_make_disp(ptr, Offset(struct aos_machine, input[idx])); + + case TGSI_FILE_OUTPUT: + return x86_make_disp(ptr, Offset(struct aos_machine, output[idx])); + + case TGSI_FILE_TEMPORARY: + return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx])); + + case TGSI_FILE_IMMEDIATE: + return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx])); + + case TGSI_FILE_CONSTANT: + return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx])); + + case AOS_FILE_INTERNAL: + return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx])); + + default: + ERROR(cp, "unknown reg file"); + return x86_make_reg(0,0); + } +} + + +struct x86_reg aos_get_internal( struct aos_compilation *cp, + unsigned imm ) +{ + return get_reg_ptr( cp, + AOS_FILE_INTERNAL, + imm + 1 ); +} + +static void spill( struct aos_compilation *cp, unsigned idx ) +{ + if (!cp->xmm[idx].dirty || + (cp->xmm[idx].file != TGSI_FILE_INPUT && /* inputs are fetched into xmm & set dirty */ + cp->xmm[idx].file != TGSI_FILE_OUTPUT && + cp->xmm[idx].file != TGSI_FILE_TEMPORARY)) { + ERROR(cp, "invalid spill"); + return; + } + else { + struct x86_reg oldval = get_reg_ptr(cp, + cp->xmm[idx].file, + cp->xmm[idx].idx); + + assert(cp->xmm[idx].dirty); + sse_movups(cp->func, oldval, x86_make_reg(file_XMM, idx)); + cp->xmm[idx].dirty = 0; + } +} + +struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ) +{ + unsigned i; + unsigned oldest = 0; + + for (i = 0; i < 8; i++) + if (cp->xmm[i].last_used < cp->xmm[oldest].last_used) + oldest = i; + + /* Need to write out the old value? + */ + if (cp->xmm[oldest].dirty) + spill(cp, oldest); + + assert(cp->xmm[oldest].last_used != cp->insn_counter); + + cp->xmm[oldest].file = TGSI_FILE_NULL; + cp->xmm[oldest].idx = 0; + cp->xmm[oldest].last_used = cp->insn_counter; + return x86_make_reg(file_XMM, oldest); +} + +void aos_release_xmm_reg( struct aos_compilation *cp, + unsigned idx ) +{ + cp->xmm[idx].file = TGSI_FILE_NULL; + cp->xmm[idx].idx = 0; + cp->xmm[idx].dirty = 0; + cp->xmm[idx].last_used = 0; +} + +static void invalidate_xmm( struct aos_compilation *cp, + unsigned file, unsigned idx ) +{ + unsigned i; + + /* Invalidate any old copy of this register in XMM0-7. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) { + + if (cp->xmm[i].dirty) + spill(cp, i); + + aos_release_xmm_reg(cp, i); + break; + } + } + + for (; i < 8; i++) { + if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) { + assert(0); + } + } +} + + +void aos_adopt_xmm_reg( struct aos_compilation *cp, + struct x86_reg reg, + unsigned file, + unsigned idx, + unsigned dirty ) +{ + if (reg.file != file_XMM) { + assert(0); + return; + } + + invalidate_xmm(cp, file, idx); + cp->xmm[reg.idx].file = file; + cp->xmm[reg.idx].idx = idx; + cp->xmm[reg.idx].dirty = dirty; +} + + + +static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + invalidate_xmm( cp, file, idx ); + return get_reg_ptr( cp, file, idx ); +} + + +/* As above, but return a pointer. Note - this pointer may alias + * those returned by get_arg_ptr(). + */ +static struct x86_reg get_dst_ptr( struct aos_compilation *cp, + const struct tgsi_full_dst_register *dst ) +{ + return aos_get_shader_reg_ptr( cp, dst->DstRegister.File, dst->DstRegister.Index ); +} + + + + + +/* Return an XMM reg if the argument is resident, otherwise return a + * base+offset pointer to the saved value. + */ +struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + unsigned i; + + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && + cp->xmm[i].idx == idx) + { + cp->xmm[i].last_used = cp->insn_counter; + return x86_make_reg(file_XMM, i); + } + } + + /* If not found in the XMM register file, return an indirect + * reference to the in-memory copy: + */ + return get_reg_ptr( cp, file, idx ); +} + + + + + +/* Emulate pshufd insn in regular SSE, if necessary: + */ +static void emit_pshufd( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg arg0, + ubyte shuf ) +{ + if (cp->have_sse2) { + sse2_pshufd(cp->func, dst, arg0, shuf); + } + else { + if (!eq(dst, arg0)) + sse_movups(cp->func, dst, arg0); + + sse_shufps(cp->func, dst, dst, shuf); + } +} + + + + +/* Helper for writemask: + */ +static boolean emit_shuf_copy1( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg arg0, + struct x86_reg arg1, + ubyte shuf ) +{ + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movups(cp->func, dst, arg1); + emit_pshufd(cp, dst, dst, shuf); + emit_pshufd(cp, tmp, arg0, shuf); + + sse_movss(cp->func, dst, tmp); + + emit_pshufd(cp, dst, dst, shuf); + + aos_release_xmm_reg(cp, tmp.idx); + return TRUE; +} + + +/* Helper for writemask: + */ +static boolean emit_shuf_copy2( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg arg0, + struct x86_reg arg1, + ubyte shuf ) +{ + struct x86_reg tmp = aos_get_xmm_reg(cp); + emit_pshufd(cp, dst, arg1, shuf); + emit_pshufd(cp, tmp, arg0, shuf); + + sse_shufps(cp->func, dst, tmp, SHUF(X, Y, Z, W)); + + emit_pshufd(cp, dst, dst, shuf); + + aos_release_xmm_reg(cp, tmp.idx); + return TRUE; +} + +#define SSE_SWIZZLE_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6)) + + +/* Locate a source register and perform any required (simple) swizzle. + * + * Just fail on complex swizzles at this point. + */ +static struct x86_reg fetch_src( struct aos_compilation *cp, + const struct tgsi_full_src_register *src ) +{ + struct x86_reg arg0 = aos_get_shader_reg(cp, + src->SrcRegister.File, + src->SrcRegister.Index); + unsigned i; + unsigned swz = 0; + unsigned negs = 0; + unsigned abs = 0; + + for (i = 0; i < 4; i++) { + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, i ); + unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, i ); + + switch (swizzle) { + case TGSI_EXTSWIZZLE_ZERO: + case TGSI_EXTSWIZZLE_ONE: + ERROR(cp, "not supporting full swizzles yet in tgsi_aos_sse2"); + break; + + default: + swz |= (swizzle & 0x3) << (i * 2); + break; + } + + switch (neg) { + case TGSI_UTIL_SIGN_TOGGLE: + negs |= (1<func, dst, arg0); + + aos_release_xmm_reg(cp, tmp.idx); + arg0 = dst; + } + + if (abs && abs != 0xf) { + ERROR(cp, "unsupported partial abs"); + } + + if (abs) { + struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movups(cp->func, tmp, arg0); + sse_mulps(cp->func, tmp, neg); + sse_maxps(cp->func, dst, arg0); + + aos_release_xmm_reg(cp, tmp.idx); + arg0 = dst; + } + } + + return arg0; +} + +static void x87_fld_src( struct aos_compilation *cp, + const struct tgsi_full_src_register *src, + unsigned channel ) +{ + struct x86_reg arg0 = aos_get_shader_reg_ptr(cp, + src->SrcRegister.File, + src->SrcRegister.Index); + + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( src, channel ); + unsigned neg = tgsi_util_get_full_src_register_sign_mode( src, channel ); + + switch (swizzle) { + case TGSI_EXTSWIZZLE_ZERO: + x87_fldz( cp->func ); + break; + + case TGSI_EXTSWIZZLE_ONE: + x87_fld1( cp->func ); + break; + + default: + x87_fld( cp->func, x86_make_disp(arg0, (swizzle & 3) * sizeof(float)) ); + break; + } + + + switch (neg) { + case TGSI_UTIL_SIGN_TOGGLE: + /* Flip the sign: + */ + x87_fchs( cp->func ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + + case TGSI_UTIL_SIGN_CLEAR: + x87_fabs( cp->func ); + break; + + case TGSI_UTIL_SIGN_SET: + x87_fabs( cp->func ); + x87_fchs( cp->func ); + break; + + default: + ERROR(cp, "unsupported sign-mode"); + break; + } +} + + + + + + +/* Used to implement write masking. This and most of the other instructions + * here would be easier to implement if there had been a translation + * to a 2 argument format (dst/arg0, arg1) at the shader level before + * attempting to translate to x86/sse code. + */ +static void store_dest( struct aos_compilation *cp, + const struct tgsi_full_dst_register *reg, + struct x86_reg result ) +{ + if (reg->DstRegister.WriteMask == 0) + { + return; + } + else if (reg->DstRegister.WriteMask == TGSI_WRITEMASK_XYZW) + { + if (result.file == file_XMM) { + aos_adopt_xmm_reg(cp, + result, + reg->DstRegister.File, + reg->DstRegister.Index, + TRUE); + } + else { + struct x86_reg dst = aos_get_xmm_reg(cp); + aos_adopt_xmm_reg(cp, + dst, + reg->DstRegister.File, + reg->DstRegister.Index, + TRUE); + sse_movups(cp->func, dst, result); + } + } + else + { + /* Previous value of the dest register: + */ + struct x86_reg old_dst = aos_get_shader_reg(cp, + reg->DstRegister.File, + reg->DstRegister.Index); + + + /* Alloc an xmm reg to hold the new value of the dest register: + */ + struct x86_reg dst = aos_get_xmm_reg(cp); + + aos_adopt_xmm_reg(cp, + dst, + reg->DstRegister.File, + reg->DstRegister.Index, + TRUE ); + + switch (reg->DstRegister.WriteMask) { + case TGSI_WRITEMASK_X: + if (result.file == file_XMM) { + sse_movups(cp->func, dst, old_dst); + sse_movss(cp->func, dst, result); + } + else { + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movups(cp->func, dst, old_dst); + sse_movss(cp->func, tmp, result); + sse_movss(cp->func, dst, tmp); + aos_release_xmm_reg(cp, tmp.idx); + } + break; + + case TGSI_WRITEMASK_XY: + sse_movups(cp->func, dst, old_dst); + sse_shufps(cp->func, dst, result, SHUF(X, Y, Z, W)); + break; + + case TGSI_WRITEMASK_ZW: + sse_movups(cp->func, dst, result); + sse_shufps(cp->func, dst, old_dst, SHUF(X, Y, Z, W)); + break; + + case TGSI_WRITEMASK_YZW: + if (old_dst.file == file_XMM) { + sse_movups(cp->func, dst, result); + sse_movss(cp->func, dst, old_dst); + } + else { + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movups(cp->func, dst, result); + sse_movss(cp->func, tmp, old_dst); + sse_movss(cp->func, dst, tmp); + aos_release_xmm_reg(cp, tmp.idx); + } + break; + + case TGSI_WRITEMASK_Y: + emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Y,X,Z,W)); + break; + + case TGSI_WRITEMASK_Z: + emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Z,Y,X,W)); + break; + + case TGSI_WRITEMASK_W: + emit_shuf_copy1(cp, dst, result, old_dst, SHUF(W,Y,Z,X)); + break; + + case TGSI_WRITEMASK_XZ: + emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,Z,Y,W)); + break; + + case TGSI_WRITEMASK_XW: + emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,W,Z,Y)); + + case TGSI_WRITEMASK_YZ: + emit_shuf_copy2(cp, dst, result, old_dst, SHUF(Z,Y,X,W)); + break; + + case TGSI_WRITEMASK_YW: + emit_shuf_copy2(cp, dst, result, old_dst, SHUF(W,Y,Z,X)); + break; + + case TGSI_WRITEMASK_XZW: + emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Y,X,Z,W)); + break; + + case TGSI_WRITEMASK_XYW: + emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Z,Y,X,W)); + break; + + case TGSI_WRITEMASK_XYZ: + emit_shuf_copy1(cp, dst, old_dst, result, SHUF(W,Y,Z,X)); + break; + + default: + assert(0); /* not possible */ + break; + } + } +} + + +static void x87_fst_or_nop( struct x86_function *func, + unsigned writemask, + unsigned channel, + struct x86_reg ptr ) +{ + if (writemask & (1<DstRegister.WriteMask; + + x87_fst_or_nop(cp->func, writemask, 0, ptr); + x87_fst_or_nop(cp->func, writemask, 1, ptr); + x87_fst_or_nop(cp->func, writemask, 2, ptr); + x87_fstp_or_pop(cp->func, writemask, 3, ptr); +} + +/* Save current x87 state and put it into single precision mode. + */ +static void save_fpu_state( struct aos_compilation *cp ) +{ +#if 0 + x87_fnstcw( cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore))); + x87_fldcw( cp->func, ); +#endif +} + +static void restore_fpu_state( struct aos_compilation *cp ) +{ +#if 0 + x87_fnclex(cp->func); + x87_fldcw(cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore))); +#endif +} + +static void set_fpu_round_neg_inf( struct aos_compilation *cp ) +{ +#if 0 + if (cp->fpucntl != RND_NEG_FPU) { + struct x86_reg regEDX = x86_make_reg(file_REG32, reg_DX); + struct arb_vp_machine *m = NULL; + + cp->fpucntl = RND_NEG_FPU; + x87_fnclex(cp->func); + x87_fldcw(cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_rnd_neg))); + } +#endif +} + +static void set_fpu_round_nearest( struct aos_compilation *cp ) +{ +#if 0 +#endif +} + + +static void emit_x87_ex2( struct aos_compilation *cp ) +{ + struct x86_reg st0 = x86_make_reg(file_x87, 0); + struct x86_reg st1 = x86_make_reg(file_x87, 1); + struct x86_reg st3 = x86_make_reg(file_x87, 3); + + set_fpu_round_neg_inf( cp ); + + x87_fld(cp->func, st0); /* a a */ + x87_fprndint( cp->func ); /* int(a) a */ + x87_fld(cp->func, st0); /* int(a) int(a) a */ + x87_fstp(cp->func, st3); /* int(a) a int(a)*/ + x87_fsubp(cp->func, st1); /* frac(a) int(a) */ + x87_f2xm1(cp->func); /* (2^frac(a))-1 int(a)*/ + x87_fld1(cp->func); /* 1 (2^frac(a))-1 int(a)*/ + x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */ + x87_fscale(cp->func); /* 2^a */ +} + + + +/** + * The traditional instructions. All operate on internal registers + * and ignore write masks and swizzling issues. + */ + +static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + + sse_movups(cp->func, dst, arg0); + sse_mulps(cp->func, dst, neg); + sse_maxps(cp->func, dst, arg0); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_addps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fcos(cp->func); + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + + +/* The dotproduct instructions don't really do that well in sse: + */ +static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_mulps(cp->func, dst, arg1); + + /* Now the hard bit: sum the first 3 values: + */ + sse_movhlps(cp->func, tmp, dst); + sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */ + emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); + sse_addss(cp->func, dst, tmp); + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + + aos_release_xmm_reg(cp, tmp.idx); + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + + +static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_mulps(cp->func, dst, arg1); + + /* Now the hard bit: sum the values: + */ + sse_movhlps(cp->func, tmp, dst); + sse_addps(cp->func, dst, tmp); /* a*x+c*z, b*y+d*w, a*x+c*z, b*y+d*w */ + emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); + sse_addss(cp->func, dst, tmp); + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + + aos_release_xmm_reg(cp, tmp.idx); + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg tmp = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_mulps(cp->func, dst, arg1); + + /* Now the hard bit: sum the values (from DP3): + */ + sse_movhlps(cp->func, tmp, dst); + sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */ + emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); + sse_addss(cp->func, dst, tmp); + emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W)); + sse_addss(cp->func, dst, tmp); + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + + aos_release_xmm_reg(cp, tmp.idx); + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_DST( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + +/* dst[0] = 1.0 * 1.0F; */ +/* dst[1] = arg0[1] * arg1[1]; */ +/* dst[2] = arg0[2] * 1.0; */ +/* dst[3] = 1.0 * arg1[3]; */ + + emit_shuf_copy2(cp, dst, arg0, ones, SHUF(X,W,Z,Y)); + emit_shuf_copy2(cp, tmp, arg1, ones, SHUF(X,Z,Y,W)); + sse_mulps(cp->func, dst, tmp); + + aos_release_xmm_reg(cp, tmp.idx); + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld1(cp->func); /* 1 */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 1 */ + x87_fyl2x(cp->func); /* log2(a0) */ + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + + +static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + + emit_x87_ex2(cp); + + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + +static boolean emit_EXP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg st0 = x86_make_reg(file_x87, 0); + struct x86_reg st1 = x86_make_reg(file_x87, 1); + struct x86_reg st3 = x86_make_reg(file_x87, 3); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + + /* CAUTION: dst may alias arg0! + */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* arg0.x */ + x87_fld(cp->func, st0); /* arg arg */ + + /* by default, fpu is setup to round-to-nearest. We want to + * change this now, and track the state through to the end of the + * generated function so that it isn't repeated unnecessarily. + * Alternately, could subtract .5 to get round to -inf behaviour. + */ + set_fpu_round_neg_inf( cp ); + x87_fprndint( cp->func ); /* flr(a) a */ + x87_fld(cp->func, st0); /* flr(a) flr(a) a */ + x87_fld1(cp->func); /* 1 floor(a) floor(a) a */ + x87_fst_or_nop(cp->func, writemask, 3, dst); /* stack unchanged */ + + x87_fscale(cp->func); /* 2^floor(a) floor(a) a */ + x87_fst(cp->func, st3); /* 2^floor(a) floor(a) a 2^floor(a)*/ + + x87_fstp_or_pop(cp->func, writemask, 0, dst); /* flr(a) a 2^flr(a) */ + + x87_fsubrp(cp->func, st1); /* frac(a) 2^flr(a) */ + + x87_fst_or_nop(cp->func, writemask, 1, dst); /* frac(a) 2^flr(a) */ + + x87_f2xm1(cp->func); /* (2^frac(a))-1 2^flr(a)*/ + x87_fld1(cp->func); /* 1 (2^frac(a))-1 2^flr(a)*/ + x87_faddp(cp->func, st1); /* 2^frac(a) 2^flr(a) */ + x87_fmulp(cp->func, st1); /* 2^a */ + + x87_fstp_or_pop(cp->func, writemask, 2, dst); + +/* dst[0] = 2^floor(tmp); */ +/* dst[1] = frac(tmp); */ +/* dst[2] = 2^floor(tmp) * 2^frac(tmp); */ +/* dst[3] = 1.0F; */ + return TRUE; +} + +static boolean emit_LOG( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg st0 = x86_make_reg(file_x87, 0); + struct x86_reg st1 = x86_make_reg(file_x87, 1); + struct x86_reg st2 = x86_make_reg(file_x87, 2); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + + /* CAUTION: dst may alias arg0! + */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* arg0.x */ + x87_fabs(cp->func); /* |arg0.x| */ + x87_fxtract(cp->func); /* mantissa(arg0.x), exponent(arg0.x) */ + x87_fst(cp->func, st2); /* mantissa, exponent, mantissa */ + x87_fld1(cp->func); /* 1, mantissa, exponent, mantissa */ + x87_fyl2x(cp->func); /* log2(mantissa), exponent, mantissa */ + x87_fadd(cp->func, st0, st1); /* e+l2(m), e, m */ + + x87_fstp_or_pop(cp->func, writemask, 2, dst); /* e, m */ + + x87_fld1(cp->func); /* 1, e, m */ + x87_fsub(cp->func, st1, st0); /* 1, e-1, m */ + + x87_fstp_or_pop(cp->func, writemask, 3, dst); /* e-1,m */ + x87_fstp_or_pop(cp->func, writemask, 0, dst); /* m */ + + x87_fadd(cp->func, st0, st0); /* 2m */ + + x87_fstp_or_pop( cp->func, writemask, 1, dst ); + + return TRUE; +} + +static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + int i; + + set_fpu_round_neg_inf( cp ); + + /* Load all sources first to avoid aliasing + */ + for (i = 0; i < 4; i++) { + if (writemask & (1<FullSrcRegisters[0], i); + } + } + + for (i = 0; i < 4; i++) { + if (writemask & (1<func ); + x87_fstp(cp->func, x86_make_disp(dst, i*4)); + } + } + + return TRUE; +} + + +static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + int i; + + set_fpu_round_nearest( cp ); + + /* Load all sources first to avoid aliasing + */ + for (i = 0; i < 4; i++) { + if (writemask & (1<FullSrcRegisters[0], i); + } + } + + for (i = 0; i < 4; i++) { + if (writemask & (1<func ); + x87_fstp(cp->func, x86_make_disp(dst, i*4)); + } + } + + return TRUE; +} + + +static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg st0 = x86_make_reg(file_x87, 0); + struct x86_reg st1 = x86_make_reg(file_x87, 1); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + int i; + + set_fpu_round_neg_inf( cp ); + + /* suck all the source values onto the stack before writing out any + * dst, which may alias... + */ + for (i = 0; i < 4; i++) { + if (writemask & (1<FullSrcRegisters[0], i); + } + } + + for (i = 0; i < 4; i++) { + if (writemask & (1<func, st0); /* a a */ + x87_fprndint( cp->func ); /* flr(a) a */ + x87_fsubrp(cp->func, st1); /* frc(a) */ + x87_fstp(cp->func, x86_make_disp(dst, i*4)); + } + } + + return TRUE; +} + + + +static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg st1 = x86_make_reg(file_x87, 1); + unsigned fixup1, fixup2; + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + + + /* Load the interesting parts of arg0: + */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 3); + x87_fld_src(cp, &op->FullSrcRegisters[0], 1); + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + + + if (writemask & TGSI_WRITEMASK_XW) { + x87_fld1(cp->func); + x87_fst_or_nop(cp->func, writemask, 0, dst); + x87_fstp_or_pop(cp->func, writemask, 3, dst); + } + + if (writemask & TGSI_WRITEMASK_YZ) { + + /* Pre-zero destinations, may be overwritten later... fixme. + */ + x87_fldz(cp->func); + x87_fst_or_nop(cp->func, writemask, 1, dst); + x87_fstp_or_pop(cp->func, writemask, 2, dst); + + + /* Check arg0[0]: + */ + x87_fldz(cp->func); /* 0 a0 a1 a3 */ + x87_fucomp(cp->func, st1); /* a0 a1 a3 */ + x87_fnstsw(cp->func, cp->tmp_EAX); + x86_sahf(cp->func); + fixup1 = x86_jcc_forward(cp->func, cc_AE); + + x87_fstp_or_pop(cp->func, writemask, 1, dst); /* a1 a3 */ + + /* Check arg0[1]: + */ + x87_fldz(cp->func); /* 0 a1 a3 */ + x87_fucomp(cp->func, st1); /* a1 a3 */ + x87_fnstsw(cp->func, cp->tmp_EAX); + x86_sahf(cp->func); + fixup2 = x86_jcc_forward(cp->func, cc_AE); + + /* Compute pow(a1, a3) + */ + x87_fyl2x(cp->func); /* a3*log2(a1) */ + + emit_x87_ex2( cp ); /* 2^(a3*log2(a1)) */ + + x87_fstp_or_pop(cp->func, writemask, 2, dst); + + /* Land jumps: + */ + x86_fixup_fwd_jump(cp->func, fixup1); + x86_fixup_fwd_jump(cp->func, fixup2); + } + + return TRUE; +} + + + +static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_maxps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_minps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_mulps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_mulps(cp->func, dst, arg1); + sse_addps(cp->func, dst, arg2); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */ + x87_fyl2x(cp->func); /* a1*log2(a0) */ + + emit_x87_ex2( cp ); /* 2^(a1*log2(a0)) */ + + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + + +static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + if (cp->have_sse2) { + sse2_rcpss(cp->func, dst, arg0); + /* extend precision here... + */ + } + else { + struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + sse_movss(cp->func, dst, ones); + sse_divss(cp->func, dst, arg0); + } + + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + sse_rsqrtss(cp->func, dst, arg0); + + /* Extend precision here... + */ + + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + + sse_movups(cp->func, dst, arg0); + sse_cmpps(cp->func, dst, arg1, cc_NotLessThan); + sse_andps(cp->func, dst, ones); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_SIN( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); + x87_fsin(cp->func); + x87_fstp_dest4(cp, &op->FullDstRegisters[0]); + return TRUE; +} + + + +static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + + sse_movups(cp->func, dst, arg0); + sse_cmpps(cp->func, dst, arg1, cc_LessThan); + sse_andps(cp->func, dst, ones); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + +static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + + sse_movups(cp->func, dst, arg0); + sse_subps(cp->func, dst, arg1); + + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + +static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); + struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg tmp0 = aos_get_xmm_reg(cp); + struct x86_reg tmp1 = aos_get_xmm_reg(cp); + + /* Could avoid tmp0, tmp1 if we overwrote arg0, arg1. Need a way + * to invalidate registers. This will come with better analysis + * (liveness analysis) of the incoming program. + */ + emit_pshufd(cp, dst, arg0, SHUF(Y, Z, X, W)); + emit_pshufd(cp, tmp1, arg1, SHUF(Z, X, Y, W)); + sse_mulps(cp->func, dst, tmp1); + emit_pshufd(cp, tmp0, arg0, SHUF(Z, X, Y, W)); + emit_pshufd(cp, tmp1, arg1, SHUF(Y, Z, X, W)); + sse_mulps(cp->func, tmp0, tmp1); + sse_subps(cp->func, dst, tmp0); + +/* dst[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; */ +/* dst[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; */ +/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */ +/* dst[3] is undef */ + + + aos_release_xmm_reg(cp, tmp0.idx); + aos_release_xmm_reg(cp, tmp1.idx); + store_dest(cp, &op->FullDstRegisters[0], dst); + return TRUE; +} + + + +static boolean +emit_instruction( struct aos_compilation *cp, + struct tgsi_full_instruction *inst ) +{ + switch( inst->Instruction.Opcode ) { + case TGSI_OPCODE_MOV: + return emit_MOV( cp, inst ); + + case TGSI_OPCODE_LIT: + return emit_LIT(cp, inst); + + case TGSI_OPCODE_RCP: + return emit_RCP(cp, inst); + + case TGSI_OPCODE_RSQ: + return emit_RSQ(cp, inst); + + case TGSI_OPCODE_EXP: + return emit_EXP(cp, inst); + + case TGSI_OPCODE_LOG: + return emit_LOG(cp, inst); + + case TGSI_OPCODE_MUL: + return emit_MUL(cp, inst); + + case TGSI_OPCODE_ADD: + return emit_ADD(cp, inst); + + case TGSI_OPCODE_DP3: + return emit_DP3(cp, inst); + + case TGSI_OPCODE_DP4: + return emit_DP4(cp, inst); + + case TGSI_OPCODE_DST: + return emit_DST(cp, inst); + + case TGSI_OPCODE_MIN: + return emit_MIN(cp, inst); + + case TGSI_OPCODE_MAX: + return emit_MAX(cp, inst); + + case TGSI_OPCODE_SLT: + return emit_SLT(cp, inst); + + case TGSI_OPCODE_SGE: + return emit_SGE(cp, inst); + + case TGSI_OPCODE_MAD: + return emit_MAD(cp, inst); + + case TGSI_OPCODE_SUB: + return emit_SUB(cp, inst); + + case TGSI_OPCODE_LERP: +// return emit_LERP(cp, inst); + return FALSE; + + case TGSI_OPCODE_FRAC: + return emit_FRC(cp, inst); + + case TGSI_OPCODE_CLAMP: +// return emit_CLAMP(cp, inst); + return FALSE; + + case TGSI_OPCODE_FLOOR: + return emit_FLR(cp, inst); + + case TGSI_OPCODE_ROUND: + return emit_RND(cp, inst); + + case TGSI_OPCODE_EXPBASE2: + return emit_EX2(cp, inst); + + case TGSI_OPCODE_LOGBASE2: + return emit_LG2(cp, inst); + + case TGSI_OPCODE_POWER: + return emit_POW(cp, inst); + + case TGSI_OPCODE_CROSSPRODUCT: + return emit_XPD(cp, inst); + + case TGSI_OPCODE_ABS: + return emit_ABS(cp, inst); + + case TGSI_OPCODE_DPH: + return emit_DPH(cp, inst); + + case TGSI_OPCODE_COS: + return emit_COS(cp, inst); + + case TGSI_OPCODE_SIN: + return emit_SIN(cp, inst); + + case TGSI_OPCODE_END: + return TRUE; + + default: + return FALSE; + } +} + +static boolean note_immediate( struct aos_compilation *cp, + struct tgsi_full_immediate *imm ) +{ + unsigned pos = cp->num_immediates++; + unsigned j; + + for (j = 0; j < imm->Immediate.Size; j++) { + cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float; + } + + return TRUE; +} + + + + +static void find_last_write_outputs( struct aos_compilation *cp ) +{ + struct tgsi_parse_context parse; + unsigned this_instruction = 0; + unsigned i; + + tgsi_parse_init( &parse, cp->vaos->base.vs->state.tokens ); + + while (!tgsi_parse_end_of_tokens( &parse )) { + + tgsi_parse_token( &parse ); + + if (parse.FullToken.Token.Type != TGSI_TOKEN_TYPE_INSTRUCTION) + continue; + + for (i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++) { + if (parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.File == + TGSI_FILE_OUTPUT) + { + unsigned idx = parse.FullToken.FullInstruction.FullDstRegisters[i].DstRegister.Index; + cp->output_last_write[idx] = this_instruction; + } + } + + this_instruction++; + } + + tgsi_parse_free( &parse ); +} + + +#define ARG_VARIENT 1 +#define ARG_START_ELTS 2 +#define ARG_COUNT 3 +#define ARG_OUTBUF 4 + + +static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, + boolean linear ) +{ + struct tgsi_parse_context parse; + struct aos_compilation cp; + unsigned fixup, label; + + tgsi_parse_init( &parse, varient->base.vs->state.tokens ); + + memset(&cp, 0, sizeof(cp)); + + cp.insn_counter = 1; + cp.vaos = varient; + cp.have_sse2 = 1; + cp.func = &varient->func[ linear ? 0 : 1 ]; + + cp.tmp_EAX = x86_make_reg(file_REG32, reg_AX); + cp.idx_EBX = x86_make_reg(file_REG32, reg_BX); + cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX); + cp.machine_EDX = x86_make_reg(file_REG32, reg_DX); + cp.count_ESI = x86_make_reg(file_REG32, reg_SI); + + x86_init_func(cp.func); + + find_last_write_outputs(&cp); + + x86_push(cp.func, cp.idx_EBX); + x86_push(cp.func, cp.count_ESI); + + + /* Load arguments into regs: + */ + x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_VARIENT)); + x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS)); + x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT)); + x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF)); + + + /* Compare count to zero and possibly bail. + */ + x86_xor(cp.func, cp.tmp_EAX, cp.tmp_EAX); + x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX); + fixup = x86_jcc_forward(cp.func, cc_E); + + /* Dig out the machine pointer from inside the varient arg + */ + x86_mov(cp.func, cp.machine_EDX, + x86_make_disp(cp.machine_EDX, + Offset( struct draw_vs_varient_aos_sse, machine ))); + + save_fpu_state( &cp ); + + /* Note address for loop jump + */ + label = x86_get_label(cp.func); + { + /* Fetch inputs... TODO: fetch lazily... + */ + if (!aos_fetch_inputs( &cp, linear )) + goto fail; + + /* Emit the shader: + */ + while( !tgsi_parse_end_of_tokens( &parse ) && !cp.error ) + { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_IMMEDIATE: + if (!note_immediate( &cp, &parse.FullToken.FullImmediate )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (!emit_instruction( &cp, &parse.FullToken.FullInstruction )) + goto fail; + break; + } + + cp.insn_counter++; + debug_printf("\n"); + } + + if (cp.error) + goto fail; + + /* Emit output... TODO: do this eagerly after the last write to a + * given output. + */ + if (!aos_emit_outputs( &cp )) + goto fail; + + + /* Next vertex: + */ + x86_lea(cp.func, + cp.outbuf_ECX, + x86_make_disp(cp.outbuf_ECX, + cp.vaos->base.key.output_stride)); + + /* Incr index + */ + if (linear) { + x86_inc(cp.func, cp.idx_EBX); + } + else { + x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4)); + } + + } + /* decr count, loop if not zero + */ + x86_dec(cp.func, cp.count_ESI); +/* x86_test(cp.func, cp.count_ESI, cp.count_ESI); */ + x86_jcc(cp.func, cc_NZ, label); + + restore_fpu_state(&cp); + + /* Land forward jump here: + */ + x86_fixup_fwd_jump(cp.func, fixup); + + /* Exit mmx state? + */ + if (cp.func->need_emms) + mmx_emms(cp.func); + + x86_pop(cp.func, cp.count_ESI); + x86_pop(cp.func, cp.idx_EBX); + + x86_ret(cp.func); + + tgsi_parse_free( &parse ); + return !cp.error; + + fail: + tgsi_parse_free( &parse ); + return FALSE; +} + + + +static void vaos_set_buffer( struct draw_vs_varient *varient, + unsigned buf, + const void *ptr, + unsigned stride ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + unsigned i; + + for (i = 0; i < vaos->base.vs->info.num_inputs; i++) { + if (vaos->base.key.element[i].in.buffer == buf) { + vaos->machine->attrib[i].input_ptr = ((char *)ptr + + vaos->base.key.element[i].in.offset); + vaos->machine->attrib[i].input_stride = stride; + } + } +} + + +static void vaos_destroy( struct draw_vs_varient *varient ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + if (vaos->machine) + align_free( vaos->machine ); + + x86_release_func( &vaos->func[0] ); + x86_release_func( &vaos->func[1] ); + + FREE(vaos); +} + +static void vaos_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + vaos->gen_run_elts( varient, + elts, + count, + output_buffer ); +} + +static void vaos_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + vaos->gen_run_linear( varient, + start, + count, + output_buffer ); +} + + +static void vaos_set_constants( struct draw_vs_varient *varient, + const float (*constants)[4] ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + memcpy(vaos->machine->constant, + constants, + (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float)); +} + + +static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); + + if (!vaos) + goto fail; + + vaos->base.key = *key; + vaos->base.vs = vs; + vaos->base.set_input = vaos_set_buffer; + vaos->base.set_constants = vaos_set_constants; + vaos->base.destroy = vaos_destroy; + vaos->base.run_linear = vaos_run_linear; + vaos->base.run_elts = vaos_run_elts; + + vaos->machine = align_malloc( sizeof(struct aos_machine), 16 ); + if (!vaos->machine) + goto fail; + + memset(vaos->machine, 0, sizeof(struct aos_machine)); + + tgsi_dump(vs->state.tokens, 0); + + if (!build_vertex_program( vaos, TRUE )) + goto fail; + + if (!build_vertex_program( vaos, FALSE )) + goto fail; + + vaos->gen_run_linear = (vsv_run_linear_func)x86_get_func(&vaos->func[0]); + if (!vaos->gen_run_linear) + goto fail; + + vaos->gen_run_elts = (vsv_run_elts_func)x86_get_func(&vaos->func[1]); + if (!vaos->gen_run_elts) + goto fail; + + return &vaos->base; + + fail: + if (vaos->machine) + align_free( vaos->machine ); + + if (vaos) + x86_release_func( &vaos->func[0] ); + + if (vaos) + x86_release_func( &vaos->func[1] ); + + FREE(vaos); + + return NULL; +} + + +struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, + const struct draw_vs_varient_key *key ) +{ + struct draw_vs_varient *varient = varient_aos_sse( vs, key ); + + if (varient == NULL) { + assert(0); + varient = draw_vs_varient_generic( vs, key ); + } + + return varient; +} + + + +#endif diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h new file mode 100644 index 0000000000..1d8a055a90 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -0,0 +1,181 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/* Authors: Keith Whitwell + */ + +#ifndef DRAW_VS_AOS_H +#define DRAW_VS_AOS_H + + +struct tgsi_token; +struct x86_function; + +#include "pipe/p_state.h" +#include "rtasm/rtasm_x86sse.h" + + + + + +#define X 0 +#define Y 1 +#define Z 2 +#define W 3 + +#define MAX_INPUTS PIPE_MAX_ATTRIBS +#define MAX_OUTPUTS PIPE_MAX_ATTRIBS +#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */ +#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */ +#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */ +#define MAX_INTERNALS 4 + +#define AOS_FILE_INTERNAL TGSI_FILE_COUNT + +/* This is the temporary storage used by all the aos_sse vs varients. + * Create one per context and reuse by passing a pointer in at + * vs_varient creation?? + */ +struct aos_machine { + float input [MAX_INPUTS ][4]; + float output [MAX_OUTPUTS ][4]; + float temp [MAX_TEMPS ][4]; + float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */ + float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */ + float internal [MAX_INTERNALS ][4]; + + unsigned fpu_round_nearest; + unsigned fpu_round_neg_inf; + + struct { + const void *input_ptr; + unsigned input_stride; + + unsigned output_offset; + } attrib[PIPE_MAX_ATTRIBS]; +}; + + + + +struct aos_compilation { + struct x86_function *func; + struct draw_vs_varient_aos_sse *vaos; + + unsigned insn_counter; + unsigned num_immediates; + + struct { + unsigned idx:16; + unsigned file:8; + unsigned dirty:8; + unsigned last_used; + } xmm[8]; + + + boolean input_fetched[PIPE_MAX_ATTRIBS]; + unsigned output_last_write[PIPE_MAX_ATTRIBS]; + + boolean have_sse2; + boolean error; + short fpucntl; + + /* these are actually known values, but putting them in a struct + * like this is helpful to keep them in sync across the file. + */ + struct x86_reg tmp_EAX; + struct x86_reg idx_EBX; /* either start+i or &elt[i] */ + struct x86_reg outbuf_ECX; + struct x86_reg machine_EDX; + struct x86_reg count_ESI; /* decrements to zero */ +}; + +struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); +void aos_release_xmm_reg( struct aos_compilation *cp, unsigned idx ); + +void aos_adopt_xmm_reg( struct aos_compilation *cp, + struct x86_reg reg, + unsigned file, + unsigned idx, + unsigned dirty ); + +struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, + unsigned file, + unsigned idx ); + +boolean aos_fetch_inputs( struct aos_compilation *cp, + boolean linear ); + +boolean aos_emit_outputs( struct aos_compilation *cp ); + + +#define IMM_ONES 0 /* 1, 1,1,1 */ +#define IMM_NEGS 1 /* 1,-1,0,0 */ +#define IMM_IDENTITY 2 /* 0, 0,0,1 */ +#define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ +#define IMM_255 4 /* 255, 255, 255, 255 */ + +struct x86_reg aos_get_internal( struct aos_compilation *cp, + unsigned imm ); + + +#define ERROR(cp, msg) \ +do { \ + debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ + cp->error = 1; \ + assert(0); \ +} while (0) + + + + + + +struct draw_vs_varient_aos_sse { + struct draw_vs_varient base; + struct draw_context *draw; + +#if 0 + struct { + const void *ptr; + unsigned stride; + } attrib[PIPE_MAX_ATTRIBS]; +#endif + + struct aos_machine *machine; /* XXX: temporarily unshared */ + + vsv_run_linear_func gen_run_linear; + vsv_run_elts_func gen_run_elts; + + + struct x86_function func[2]; +}; + + + +#endif + diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c new file mode 100644 index 0000000000..72b2b3d11d --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -0,0 +1,314 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/exec/tgsi_exec.h" +#include "draw_vs.h" +#include "draw_vs_aos.h" + +#include "rtasm/rtasm_x86sse.h" + +#ifdef PIPE_ARCH_X86 + +/* Note - don't yet have to worry about interacting with the code in + * draw_vs_aos.c as there is no intermingling of generated code... + * That may have to change, we'll see. + */ +static void emit_load_R32G32B32A32( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movups(cp->func, data, src_ptr); +} + +static void emit_load_R32G32B32( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); + sse_shufps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); + sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); + sse_movlps(cp->func, data, src_ptr); +} + +static void emit_load_R32G32( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movups(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) ); + sse_movlps(cp->func, data, src_ptr); +} + + +static void emit_load_R32( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movss(cp->func, data, src_ptr); + sse_orps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) ); +} + + +static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, + struct x86_reg data, + struct x86_reg src_ptr ) +{ + sse_movss(cp->func, data, src_ptr); + sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY )); + sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY )); + sse2_cvtdq2ps(cp->func, data, data); + sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255)); +} + + + +static void get_src_ptr( struct x86_function *func, + struct x86_reg src, + struct x86_reg machine, + struct x86_reg elt, + unsigned a ) +{ + struct x86_reg input_ptr = + x86_make_disp(machine, + Offset(struct aos_machine, attrib[a].input_ptr)); + + struct x86_reg input_stride = + x86_make_disp(machine, + Offset(struct aos_machine, attrib[a].input_stride)); + + /* Calculate pointer to current attrib: + */ + x86_mov(func, src, input_stride); + x86_imul(func, src, elt); + x86_add(func, src, input_ptr); +} + + +/* Extended swizzles? Maybe later. + */ +static void emit_swizzle( struct aos_compilation *cp, + struct x86_reg dest, + struct x86_reg src, + unsigned shuffle ) +{ + sse_shufps(cp->func, dest, src, shuffle); +} + + +static boolean load_input( struct aos_compilation *cp, + unsigned idx, + boolean linear ) +{ + unsigned format = cp->vaos->base.key.element[idx].in.format; + struct x86_reg src = cp->tmp_EAX; + struct x86_reg dataXMM = aos_get_xmm_reg(cp); + + /* Figure out source pointer address: + */ + get_src_ptr(cp->func, + src, + cp->machine_EDX, + linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), + idx); + + src = x86_deref(src); + + aos_adopt_xmm_reg( cp, + dataXMM, + TGSI_FILE_INPUT, + idx, + TRUE ); + + switch (format) { + case PIPE_FORMAT_R32_FLOAT: + emit_load_R32(cp, dataXMM, src); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_load_R32G32(cp, dataXMM, src); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_load_R32G32B32(cp, dataXMM, src); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_load_R32G32B32A32(cp, dataXMM, src); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); + emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_load_R8G8B8A8_UNORM(cp, dataXMM, src); + break; + default: + ERROR(cp, "unhandled input format"); + return FALSE; + } + + return TRUE; +} + + +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) +{ + unsigned i; + + for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) { + if (!load_input( cp, i, linear )) + return FALSE; + cp->insn_counter++; + debug_printf("\n"); + } + + return TRUE; +} + + + + + + + +static void emit_store_R32G32B32A32( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_movups(cp->func, dst_ptr, dataXMM); +} + +static void emit_store_R32G32B32( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_movlps(cp->func, dst_ptr, dataXMM); + sse_shufps(cp->func, dataXMM, dataXMM, SHUF(Z,Z,Z,Z) ); /* NOTE! destructive */ + sse_movss(cp->func, x86_make_disp(dst_ptr,8), dataXMM); +} + +static void emit_store_R32G32( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_movlps(cp->func, dst_ptr, dataXMM); +} + +static void emit_store_R32( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_movss(cp->func, dst_ptr, dataXMM); +} + + + +static void emit_store_R8G8B8A8_UNORM( struct aos_compilation *cp, + struct x86_reg dst_ptr, + struct x86_reg dataXMM ) +{ + sse_mulps(cp->func, dataXMM, aos_get_internal(cp, IMM_255)); + sse2_cvtps2dq(cp->func, dataXMM, dataXMM); + sse2_packssdw(cp->func, dataXMM, dataXMM); + sse2_packuswb(cp->func, dataXMM, dataXMM); + sse_movss(cp->func, dst_ptr, dataXMM); +} + + + + + +static boolean emit_output( struct aos_compilation *cp, + struct x86_reg ptr, + struct x86_reg dataXMM, + unsigned format ) +{ + switch (format) { + case PIPE_FORMAT_R32_FLOAT: + emit_store_R32(cp, ptr, dataXMM); + break; + case PIPE_FORMAT_R32G32_FLOAT: + emit_store_R32G32(cp, ptr, dataXMM); + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + emit_store_R32G32B32(cp, ptr, dataXMM); + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + emit_store_R32G32B32A32(cp, ptr, dataXMM); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + break; + default: + ERROR(cp, "unhandled output format"); + return FALSE; + } + + return TRUE; +} + + + +boolean aos_emit_outputs( struct aos_compilation *cp ) +{ + unsigned i; + + for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) { + unsigned format = cp->vaos->base.key.element[i].out.format; + unsigned offset = cp->vaos->base.key.element[i].out.offset; + + struct x86_reg data = aos_get_shader_reg( cp, + TGSI_FILE_OUTPUT, + i ); + + if (data.file != file_XMM) { + struct x86_reg tmp = aos_get_xmm_reg( cp ); + sse_movups(cp->func, tmp, data); + data = tmp; + } + + if (!emit_output( cp, + x86_make_disp( cp->outbuf_ECX, offset ), + data, + format )) + return FALSE; + + aos_release_xmm_reg( cp, data.idx ); + + cp->insn_counter++; + debug_printf("\n"); + } + + return TRUE; +} + +#endif diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index df94a7e0c7..0581c3042f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -157,6 +157,7 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.draw = draw; vs->base.create_varient = draw_vs_varient_generic; +// vs->base.create_varient = draw_vs_varient_aos_sse; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; -- cgit v1.2.3 From 030af06691bc5bc82ca141a576da7a2edffe9d1c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:14:55 +0100 Subject: rtasm: add x87 instructions and debug-check for x87 stack usage --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 120 +++++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 20 ++++- 2 files changed, 138 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 68ac91ed13..a2e8af343b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -986,6 +986,26 @@ void sse2_movd( struct x86_function *p, /*********************************************************************** * x87 instructions */ +static void note_x87_pop( struct x86_function *p ) +{ + p->x87_stack--; + assert(p->x87_stack >= 0); + debug_printf("\nstack: %d\n", p->x87_stack); +} + +static void note_x87_push( struct x86_function *p ) +{ + p->x87_stack++; + assert(p->x87_stack <= 7); + debug_printf("\nstack: %d\n", p->x87_stack); +} + +void x87_assert_stack_empty( struct x86_function *p ) +{ + assert (p->x87_stack == 0); +} + + void x87_fist( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -998,6 +1018,7 @@ void x87_fistp( struct x86_function *p, struct x86_reg dst ) DUMP_R( dst ); emit_1ub(p, 0xdb); emit_modrm_noreg(p, 3, dst); + note_x87_pop(p); } void x87_fild( struct x86_function *p, struct x86_reg arg ) @@ -1005,12 +1026,14 @@ void x87_fild( struct x86_function *p, struct x86_reg arg ) DUMP_R( arg ); emit_1ub(p, 0xdf); emit_modrm_noreg(p, 0, arg); + note_x87_push(p); } void x87_fldz( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xee); + note_x87_push(p); } @@ -1027,18 +1050,21 @@ void x87_fld1( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xe8); + note_x87_push(p); } void x87_fldl2e( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xea); + note_x87_push(p); } void x87_fldln2( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xed); + note_x87_push(p); } void x87_fwait( struct x86_function *p ) @@ -1059,6 +1085,49 @@ void x87_fclex( struct x86_function *p ) x87_fnclex(p); } +void x87_fcmovb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc0+arg.idx); +} + +void x87_fcmove( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xc8+arg.idx); +} + +void x87_fcmovbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xda, 0xd0+arg.idx); +} + +void x87_fcmovnb( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc0+arg.idx); +} + +void x87_fcmovne( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xc8+arg.idx); +} + +void x87_fcmovnbe( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + assert(arg.file == file_x87); + emit_2ub(p, 0xdb, 0xd0+arg.idx); +} + + static void x87_arith_op( struct x86_function *p, struct x86_reg dst, struct x86_reg arg, unsigned char dst0ub0, @@ -1146,6 +1215,7 @@ void x87_fmulp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc8+dst.idx); + note_x87_pop(p); } void x87_fsubp( struct x86_function *p, struct x86_reg dst ) @@ -1154,6 +1224,7 @@ void x87_fsubp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe8+dst.idx); + note_x87_pop(p); } void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) @@ -1162,6 +1233,7 @@ void x87_fsubrp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xe0+dst.idx); + note_x87_pop(p); } void x87_faddp( struct x86_function *p, struct x86_reg dst ) @@ -1170,6 +1242,7 @@ void x87_faddp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xc0+dst.idx); + note_x87_pop(p); } void x87_fdivp( struct x86_function *p, struct x86_reg dst ) @@ -1178,6 +1251,7 @@ void x87_fdivp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf8+dst.idx); + note_x87_pop(p); } void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) @@ -1186,6 +1260,13 @@ void x87_fdivrp( struct x86_function *p, struct x86_reg dst ) assert(dst.file == file_x87); assert(dst.idx >= 1); emit_2ub(p, 0xde, 0xf0+dst.idx); + note_x87_pop(p); +} + +void x87_ftst( struct x86_function *p ) +{ + DUMP(); + emit_2ub(p, 0xd9, 0xe4); } void x87_fucom( struct x86_function *p, struct x86_reg arg ) @@ -1200,12 +1281,15 @@ void x87_fucomp( struct x86_function *p, struct x86_reg arg ) DUMP_R( arg ); assert(arg.file == file_x87); emit_2ub(p, 0xdd, 0xe8+arg.idx); + note_x87_pop(p); } void x87_fucompp( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xda, 0xe9); + note_x87_pop(p); /* pop twice */ + note_x87_pop(p); /* pop twice */ } void x87_fxch( struct x86_function *p, struct x86_reg arg ) @@ -1287,6 +1371,7 @@ void x87_fyl2x( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xf1); + note_x87_pop(p); } /* st1 = st1 * log2(st0 + 1.0); @@ -1298,6 +1383,7 @@ void x87_fyl2xp1( struct x86_function *p ) { DUMP(); emit_2ub(p, 0xd9, 0xf9); + note_x87_pop(p); } @@ -1310,6 +1396,7 @@ void x87_fld( struct x86_function *p, struct x86_reg arg ) emit_1ub(p, 0xd9); emit_modrm_noreg(p, 0, arg); } + note_x87_push(p); } void x87_fst( struct x86_function *p, struct x86_reg dst ) @@ -1332,8 +1419,15 @@ void x87_fstp( struct x86_function *p, struct x86_reg dst ) emit_1ub(p, 0xd9); emit_modrm_noreg(p, 3, dst); } + note_x87_pop(p); +} + +void x87_fpop( struct x86_function *p ) +{ + x87_fstp( p, x86_make_reg( file_x87, 0 )); } + void x87_fcom( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -1345,6 +1439,7 @@ void x87_fcom( struct x86_function *p, struct x86_reg dst ) } } + void x87_fcomp( struct x86_function *p, struct x86_reg dst ) { DUMP_R( dst ); @@ -1354,6 +1449,20 @@ void x87_fcomp( struct x86_function *p, struct x86_reg dst ) emit_1ub(p, 0xd8); emit_modrm_noreg(p, 3, dst); } + note_x87_pop(p); +} + +void x87_fcomi( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); +} + +void x87_fcomip( struct x86_function *p, struct x86_reg arg ) +{ + DUMP_R( arg ); + emit_2ub(p, 0xdb, 0xf0+arg.idx); + note_x87_pop(p); } @@ -1372,6 +1481,17 @@ void x87_fnstsw( struct x86_function *p, struct x86_reg dst ) } +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ) +{ + DUMP_R( dst ); + assert(dst.file == file_REG32); + + emit_1ub(p, 0x9b); /* WAIT -- needed? */ + emit_1ub(p, 0xd9); + emit_modrm_noreg(p, 7, dst); +} + + /*********************************************************************** diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 1e02c6e73b..9f7e31e055 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -41,8 +41,11 @@ struct x86_function { unsigned size; unsigned char *store; unsigned char *csr; - unsigned stack_offset; - int need_emms; + + unsigned stack_offset:16; + unsigned need_emms:8; + int x87_stack:8; + unsigned char error_overflow[4]; }; @@ -229,13 +232,23 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); +void x87_assert_stack_empty( struct x86_function *p ); + void x87_f2xm1( struct x86_function *p ); void x87_fabs( struct x86_function *p ); void x87_fadd( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); void x87_faddp( struct x86_function *p, struct x86_reg dst ); void x87_fchs( struct x86_function *p ); void x87_fclex( struct x86_function *p ); +void x87_fcmovb( struct x86_function *p, struct x86_reg src ); +void x87_fcmovbe( struct x86_function *p, struct x86_reg src ); +void x87_fcmove( struct x86_function *p, struct x86_reg src ); +void x87_fcmovnb( struct x86_function *p, struct x86_reg src ); +void x87_fcmovnbe( struct x86_function *p, struct x86_reg src ); +void x87_fcmovne( struct x86_function *p, struct x86_reg src ); void x87_fcom( struct x86_function *p, struct x86_reg dst ); +void x87_fcomi( struct x86_function *p, struct x86_reg dst ); +void x87_fcomip( struct x86_function *p, struct x86_reg dst ); void x87_fcomp( struct x86_function *p, struct x86_reg dst ); void x87_fcos( struct x86_function *p ); void x87_fdiv( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); @@ -255,6 +268,7 @@ void x87_fmul( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); void x87_fmulp( struct x86_function *p, struct x86_reg dst ); void x87_fnclex( struct x86_function *p ); void x87_fprndint( struct x86_function *p ); +void x87_fpop( struct x86_function *p ); void x87_fscale( struct x86_function *p ); void x87_fsin( struct x86_function *p ); void x87_fsincos( struct x86_function *p ); @@ -265,11 +279,13 @@ void x87_fsub( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); void x87_fsubp( struct x86_function *p, struct x86_reg dst ); void x87_fsubr( struct x86_function *p, struct x86_reg dst, struct x86_reg arg ); void x87_fsubrp( struct x86_function *p, struct x86_reg dst ); +void x87_ftst( struct x86_function *p ); void x87_fxch( struct x86_function *p, struct x86_reg dst ); void x87_fxtract( struct x86_function *p ); void x87_fyl2x( struct x86_function *p ); void x87_fyl2xp1( struct x86_function *p ); void x87_fwait( struct x86_function *p ); +void x87_fnstcw( struct x86_function *p, struct x86_reg dst ); void x87_fnstsw( struct x86_function *p, struct x86_reg dst ); void x87_fucompp( struct x86_function *p ); void x87_fucomp( struct x86_function *p, struct x86_reg arg ); -- cgit v1.2.3 From 889473b3f5a216bd753c357974d6bae29fe3c41d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:28:56 +0100 Subject: draw: add viewport to varient state --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 9 +++- src/gallium/auxiliary/draw/draw_vs.h | 8 +++- src/gallium/auxiliary/draw/draw_vs_aos.c | 50 ++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 9 +++- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 +- src/gallium/auxiliary/draw/draw_vs_varient.c | 10 +++++ 6 files changed, 84 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 74945dcfe9..984fbb6767 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -95,10 +95,14 @@ static void fse_prepare( struct draw_pt_middle_end *middle, + fse->key.output_stride = vinfo->size * 4; fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ num_vs_inputs); /* inputs - fetch from api format */ - fse->key.output_stride = vinfo->size * 4; + fse->key.viewport = 1; + fse->key.clip = 0; + fse->key.pad = 0; + memset(fse->key.element, 0, fse->key.nr_elements * sizeof(fse->key.element[0])); @@ -211,6 +215,9 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->active->set_constants( fse->active, (const float (*)[4])draw->pt.user.constants ); + fse->active->set_viewport( fse->active, + &draw->viewport ); + //return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 5a8d0da06d..ff3e19b2a8 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -58,7 +58,10 @@ struct draw_vs_element { struct draw_vs_varient_key { unsigned output_stride; - unsigned nr_elements; + unsigned nr_elements:16; + unsigned viewport:1; + unsigned clip:1; + unsigned pad:14; struct draw_vs_element element[PIPE_MAX_ATTRIBS]; }; @@ -88,6 +91,9 @@ struct draw_vs_varient { void (*set_constants)( struct draw_vs_varient *, const float (*constants)[4] ); + void (*set_viewport)( struct draw_vs_varient *, + const struct pipe_viewport_state * ); + void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, unsigned start, unsigned count, diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 620f5e3592..b8e66e8b78 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1401,6 +1401,37 @@ emit_instruction( struct aos_compilation *cp, } } + +static boolean emit_viewport( struct aos_compilation *cp ) +{ + struct x86_reg pos = aos_get_shader_reg(cp, + TGSI_FILE_OUTPUT, + 0); + + struct x86_reg scale = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, scale)); + + struct x86_reg translate = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, translate)); + + if (pos.file != file_XMM) { + struct x86_reg dst = aos_get_xmm_reg(cp); + sse_movups(cp->func, dst, pos); + pos = dst; + } + + sse_mulps(cp->func, pos, scale); + sse_addps(cp->func, pos, translate); + + aos_adopt_xmm_reg( cp, + pos, + TGSI_FILE_OUTPUT, + 0, + TRUE ); + return TRUE; +} + + static boolean note_immediate( struct aos_compilation *cp, struct tgsi_full_immediate *imm ) { @@ -1540,6 +1571,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, if (cp.error) goto fail; + if (cp.vaos->base.key.viewport) { + emit_viewport(&cp); + } + /* Emit output... TODO: do this eagerly after the last write to a * given output. */ @@ -1665,11 +1700,25 @@ static void vaos_set_constants( struct draw_vs_varient *varient, } +static void vaos_set_viewport( struct draw_vs_varient *varient, + const struct pipe_viewport_state *viewport ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float)); + memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float)); +} + + + static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, const struct draw_vs_varient_key *key ) { struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); + if (key->clip) + return NULL; + if (!vaos) goto fail; @@ -1677,6 +1726,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.vs = vs; vaos->base.set_input = vaos_set_buffer; vaos->base.set_constants = vaos_set_constants; + vaos->base.set_viewport = vaos_set_viewport; vaos->base.destroy = vaos_destroy; vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 1d8a055a90..16fef6451c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -68,8 +68,13 @@ struct aos_machine { float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */ float internal [MAX_INTERNALS ][4]; - unsigned fpu_round_nearest; - unsigned fpu_round_neg_inf; + float scale[4]; /* viewport */ + float translate[4]; /* viewport */ + + ushort fpu_round_nearest; + ushort fpu_round_neg_inf; + ushort fpu_restore; + ushort fpucntl; /* one of FPU_* above */ struct { const void *input_ptr; diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0581c3042f..7781782ae8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -156,8 +156,8 @@ draw_create_vs_sse(struct draw_context *draw, tgsi_scan_shader(templ->tokens, &vs->base.info); vs->base.draw = draw; - vs->base.create_varient = draw_vs_varient_generic; -// vs->base.create_varient = draw_vs_varient_aos_sse; + vs->base.create_varient = draw_vs_varient_aos_sse; +// vs->base.create_varient = draw_vs_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index d27b0f6187..f6f621a748 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -167,6 +167,12 @@ static void vsvg_run_linear( struct draw_vs_varient *varient, + +static void vsvg_set_viewport( struct draw_vs_varient *varient, + const struct pipe_viewport_state *viewport ) +{ +} + static void vsvg_destroy( struct draw_vs_varient *varient ) { FREE(varient); @@ -179,6 +185,9 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, unsigned i; struct translate_key fetch, emit; + if (key->viewport || key->clip) + return NULL; + struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); if (vsvg == NULL) return NULL; @@ -187,6 +196,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->base.vs = vs; vsvg->base.set_input = vsvg_set_input; vsvg->base.set_constants = vsvg_set_constants; + vsvg->base.set_viewport = vsvg_set_viewport; vsvg->base.run_elts = vsvg_run_elts; vsvg->base.run_linear = vsvg_run_linear; vsvg->base.destroy = vsvg_destroy; -- cgit v1.2.3 From 194a7be28f6eed502f2475d9a637cb3610ca75f6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:31:08 +0100 Subject: draw: fix vs aos internal/machine state --- src/gallium/auxiliary/draw/draw_vs_aos.c | 59 ++++++++++++++++++++++++++++++-- src/gallium/auxiliary/draw/draw_vs_aos.h | 9 +++-- 2 files changed, 63 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index b8e66e8b78..67761f881d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -83,7 +83,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx])); case AOS_FILE_INTERNAL: - return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx])); + return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); default: ERROR(cp, "unknown reg file"); @@ -97,9 +97,63 @@ struct x86_reg aos_get_internal( struct aos_compilation *cp, { return get_reg_ptr( cp, AOS_FILE_INTERNAL, - imm + 1 ); + imm ); +} + +#define X87_CW_EXCEPTION_INV_OP (1<<0) +#define X87_CW_EXCEPTION_DENORM_OP (1<<1) +#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2) +#define X87_CW_EXCEPTION_OVERFLOW (1<<3) +#define X87_CW_EXCEPTION_UNDERFLOW (1<<4) +#define X87_CW_EXCEPTION_PRECISION (1<<5) +#define X87_CW_PRECISION_SINGLE (0<<8) +#define X87_CW_PRECISION_RESERVED (1<<8) +#define X87_CW_PRECISION_DOUBLE (2<<8) +#define X87_CW_PRECISION_DOUBLE_EXT (3<<8) +#define X87_CW_PRECISION_MASK (3<<8) +#define X87_CW_ROUND_NEAREST (0<<10) +#define X87_CW_ROUND_DOWN (1<<10) +#define X87_CW_ROUND_UP (2<<10) +#define X87_CW_ROUND_ZERO (3<<10) +#define X87_CW_ROUND_MASK (3<<10) +#define X87_CW_INFINITY (1<<12) + +static void init_internals( struct aos_machine *machine ) +{ + float inv = 1.0f/255.0f; + float f255 = 255.0f; + + ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); + ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); + ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + + + machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_NEAREST | + X87_CW_PRECISION_DOUBLE_EXT); + + assert(machine->fpu_rnd_nearest == 0x37f); + + machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_DOWN | + X87_CW_PRECISION_DOUBLE_EXT); } + static void spill( struct aos_compilation *cp, unsigned idx ) { if (!cp->xmm[idx].dirty || @@ -1736,6 +1790,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, goto fail; memset(vaos->machine, 0, sizeof(struct aos_machine)); + init_internals(vaos->machine); tgsi_dump(vs->state.tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 16fef6451c..c2afd4e9a0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -52,10 +52,13 @@ struct x86_function; #define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */ #define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */ #define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */ -#define MAX_INTERNALS 4 +#define MAX_INTERNALS 8 #define AOS_FILE_INTERNAL TGSI_FILE_COUNT +#define FPU_RND_NEG 1 +#define FPU_RND_NEAREST 2 + /* This is the temporary storage used by all the aos_sse vs varients. * Create one per context and reuse by passing a pointer in at * vs_varient creation?? @@ -71,8 +74,8 @@ struct aos_machine { float scale[4]; /* viewport */ float translate[4]; /* viewport */ - ushort fpu_round_nearest; - ushort fpu_round_neg_inf; + ushort fpu_rnd_nearest; + ushort fpu_rnd_neg_inf; ushort fpu_restore; ushort fpucntl; /* one of FPU_* above */ -- cgit v1.2.3 From 2302a5d3c1ea2c682dfc034012a054b8327a81de Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:32:43 +0100 Subject: draw: fix fpu control word manipulations --- src/gallium/auxiliary/draw/draw_vs_aos.c | 33 +++++++++++++++++++------------- 1 file changed, 20 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 67761f881d..e736990acc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -690,41 +690,47 @@ static void x87_fstp_dest4( struct aos_compilation *cp, x87_fstp_or_pop(cp->func, writemask, 3, ptr); } +#define FPU_MANIP 1 /* Save current x87 state and put it into single precision mode. */ static void save_fpu_state( struct aos_compilation *cp ) { -#if 0 - x87_fnstcw( cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore))); - x87_fldcw( cp->func, ); +#if FPU_MANIP + x87_fnstcw( cp->func, x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, fpu_restore))); #endif } static void restore_fpu_state( struct aos_compilation *cp ) { -#if 0 +#if FPU_MANIP x87_fnclex(cp->func); - x87_fldcw(cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_restore))); + x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, fpu_restore))); #endif } static void set_fpu_round_neg_inf( struct aos_compilation *cp ) { -#if 0 - if (cp->fpucntl != RND_NEG_FPU) { - struct x86_reg regEDX = x86_make_reg(file_REG32, reg_DX); - struct arb_vp_machine *m = NULL; - - cp->fpucntl = RND_NEG_FPU; +#if FPU_MANIP + if (cp->fpucntl != FPU_RND_NEG) { + cp->fpucntl = FPU_RND_NEG; x87_fnclex(cp->func); - x87_fldcw(cp->func, x86_make_disp(regEDX, get_offset(m, &m->fpucntl_rnd_neg))); + x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, fpu_rnd_neg_inf))); } #endif } static void set_fpu_round_nearest( struct aos_compilation *cp ) { -#if 0 +#if FPU_MANIP + if (cp->fpucntl != FPU_RND_NEAREST) { + cp->fpucntl = FPU_RND_NEAREST; + x87_fnclex(cp->func); + x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, fpu_rnd_nearest))); + } #endif } @@ -1590,6 +1596,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, Offset( struct draw_vs_varient_aos_sse, machine ))); save_fpu_state( &cp ); + set_fpu_round_nearest( &cp ); /* Note address for loop jump */ -- cgit v1.2.3 From 0a7a0d79f64de9794878c42bc5b79a04772d7ed8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:34:52 +0100 Subject: draw: fix x87_ex2 and partially fix lit insn --- src/gallium/auxiliary/draw/draw_vs_aos.c | 112 +++++++++++++++---------------- 1 file changed, 56 insertions(+), 56 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index e736990acc..a365d456d1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -735,23 +735,26 @@ static void set_fpu_round_nearest( struct aos_compilation *cp ) } -static void emit_x87_ex2( struct aos_compilation *cp ) +static void x87_emit_ex2( struct aos_compilation *cp ) { struct x86_reg st0 = x86_make_reg(file_x87, 0); struct x86_reg st1 = x86_make_reg(file_x87, 1); - struct x86_reg st3 = x86_make_reg(file_x87, 3); + int stack = cp->func->x87_stack; set_fpu_round_neg_inf( cp ); - x87_fld(cp->func, st0); /* a a */ - x87_fprndint( cp->func ); /* int(a) a */ - x87_fld(cp->func, st0); /* int(a) int(a) a */ - x87_fstp(cp->func, st3); /* int(a) a int(a)*/ - x87_fsubp(cp->func, st1); /* frac(a) int(a) */ - x87_f2xm1(cp->func); /* (2^frac(a))-1 int(a)*/ - x87_fld1(cp->func); /* 1 (2^frac(a))-1 int(a)*/ - x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */ - x87_fscale(cp->func); /* 2^a */ + x87_fld(cp->func, st0); /* a a */ + x87_fld(cp->func, st0); /* a a a */ + x87_fprndint( cp->func ); /* flr(a) a a*/ + x87_fsubp(cp->func, st1); /* frac(a) a */ + x87_f2xm1(cp->func); /* (2^frac(a))-1 a */ + x87_fld1(cp->func); /* 1 (2^frac(a))-1 a */ + x87_faddp(cp->func, st1); /* 2^frac(a) a */ + x87_fscale(cp->func); /* 2^a a */ + x87_fstp(cp->func, st1); + + assert( stack == cp->func->x87_stack); + } @@ -907,9 +910,7 @@ static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { x87_fld_src(cp, &op->FullSrcRegisters[0], 0); - - emit_x87_ex2(cp); - + x87_emit_ex2(cp); x87_fstp_dest4(cp, &op->FullDstRegisters[0]); return TRUE; } @@ -1084,63 +1085,62 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - struct x86_reg st1 = x86_make_reg(file_x87, 1); - unsigned fixup1, fixup2; unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; - /* Load the interesting parts of arg0: - */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 3); - x87_fld_src(cp, &op->FullSrcRegisters[0], 1); - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); - - if (writemask & TGSI_WRITEMASK_XW) { - x87_fld1(cp->func); - x87_fst_or_nop(cp->func, writemask, 0, dst); - x87_fstp_or_pop(cp->func, writemask, 3, dst); - } if (writemask & TGSI_WRITEMASK_YZ) { + struct x86_reg st1 = x86_make_reg(file_x87, 1); + struct x86_reg st2 = x86_make_reg(file_x87, 2); + + - /* Pre-zero destinations, may be overwritten later... fixme. - */ - x87_fldz(cp->func); - x87_fst_or_nop(cp->func, writemask, 1, dst); - x87_fstp_or_pop(cp->func, writemask, 2, dst); + /* a1' = a1 <= 0 ? 1 : a1; + */ + x87_fldz(cp->func); /* 0 */ + x87_fld1(cp->func); /* 1 0 */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */ + x87_fcomi(cp->func, st2); /* a1 1 0 */ + x87_fcmovb(cp->func, st1); /* a1' 1 0 */ + x87_fstp(cp->func, st1); /* a1' 0 */ + x87_fstp(cp->func, st1); /* a1' */ + + x87_fld_src(cp, &op->FullSrcRegisters[0], 3); /* a3 a1' */ + x87_fxch(cp->func, st1); /* a1' a3 */ + - /* Check arg0[0]: + /* Compute pow(a1, a3) */ - x87_fldz(cp->func); /* 0 a0 a1 a3 */ - x87_fucomp(cp->func, st1); /* a0 a1 a3 */ - x87_fnstsw(cp->func, cp->tmp_EAX); - x86_sahf(cp->func); - fixup1 = x86_jcc_forward(cp->func, cc_AE); - - x87_fstp_or_pop(cp->func, writemask, 1, dst); /* a1 a3 */ + x87_fyl2x(cp->func); /* a3*log2(a1) */ + x87_emit_ex2( cp ); /* 2^(a3*log2(a1)) */ - /* Check arg0[1]: - */ - x87_fldz(cp->func); /* 0 a1 a3 */ - x87_fucomp(cp->func, st1); /* a1 a3 */ - x87_fnstsw(cp->func, cp->tmp_EAX); - x86_sahf(cp->func); - fixup2 = x86_jcc_forward(cp->func, cc_AE); - /* Compute pow(a1, a3) + /* a0' = max2(a0, 0): */ - x87_fyl2x(cp->func); /* a3*log2(a1) */ + x87_fldz(cp->func); /* 0 r2 */ + x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */ + x87_fcomi(cp->func, st1); + x87_fcmovb(cp->func, st1); /* a0' 0 r2 */ + x87_fstp(cp->func, st1); /* a0' r2 */ - emit_x87_ex2( cp ); /* 2^(a3*log2(a1)) */ + x87_fxch(cp->func, st1); /* a0' r2 */ + x87_fst_or_nop(cp->func, writemask, 1, dst); /* result[1] = a0' */ + + x87_fldz(cp->func); /* 0 a0' r2 */ + x87_fcomi(cp->func, st1); /* 0 a0' r2 */ + x87_fcmovnbe(cp->func, st2); /* r2' a0' r2 */ x87_fstp_or_pop(cp->func, writemask, 2, dst); - - /* Land jumps: - */ - x86_fixup_fwd_jump(cp->func, fixup1); - x86_fixup_fwd_jump(cp->func, fixup2); + x87_fpop(cp->func); + x87_fpop(cp->func); + } + + if (writemask & TGSI_WRITEMASK_XW) { + x87_fld1(cp->func); + x87_fst_or_nop(cp->func, writemask, 0, dst); + x87_fstp_or_pop(cp->func, writemask, 3, dst); } return TRUE; @@ -1222,7 +1222,7 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */ x87_fyl2x(cp->func); /* a1*log2(a0) */ - emit_x87_ex2( cp ); /* 2^(a1*log2(a0)) */ + x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */ x87_fstp_dest4(cp, &op->FullDstRegisters[0]); return TRUE; -- cgit v1.2.3 From 083f3f5c32a28d2993a8a5a8b4f5ef81224a5ec3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:38:23 +0100 Subject: draw: avoid a pointless mov in many sse opcodes --- src/gallium/auxiliary/draw/draw_vs_aos.c | 94 +++++++++++++++++++++----------- 1 file changed, 63 insertions(+), 31 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index a365d456d1..97de43c232 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -174,14 +174,44 @@ static void spill( struct aos_compilation *cp, unsigned idx ) } } +static boolean is_xmm_tmp( struct aos_compilation *cp, + struct x86_reg reg ) +{ + return (reg.file == file_XMM && + cp->xmm[reg.idx].file == TGSI_FILE_NULL); +} + +static struct x86_reg get_xmm_tmp( struct aos_compilation *cp, + struct x86_reg reg ) +{ + if (!is_xmm_tmp(cp, reg)) { + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movups(cp->func, tmp, reg); + reg = tmp; + } + + return reg; +} + + struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ) { unsigned i; unsigned oldest = 0; + boolean found = FALSE; for (i = 0; i < 8; i++) - if (cp->xmm[i].last_used < cp->xmm[oldest].last_used) + if (cp->xmm[i].last_used != cp->insn_counter && + cp->xmm[i].file == TGSI_FILE_NULL) { oldest = i; + found = TRUE; + } + + if (!found) { + for (i = 0; i < 8; i++) + if (cp->xmm[i].last_used < cp->xmm[oldest].last_used) + oldest = i; + } /* Need to write out the old value? */ @@ -237,15 +267,24 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp, unsigned idx, unsigned dirty ) { + unsigned i; + if (reg.file != file_XMM) { assert(0); return; } - invalidate_xmm(cp, file, idx); + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && + cp->xmm[i].idx == idx) { + aos_release_xmm_reg(cp, i); + } + } + cp->xmm[reg.idx].file = file; cp->xmm[reg.idx].idx = idx; cp->xmm[reg.idx].dirty = dirty; + cp->xmm[reg.idx].last_used = cp->insn_counter; } @@ -659,6 +698,7 @@ static void x87_fst_or_nop( struct x86_function *func, unsigned channel, struct x86_reg ptr ) { + assert(ptr.file == file_REG32); if (writemask & (1<FullSrcRegisters[0]); - struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_mulps(cp->func, dst, neg); sse_maxps(cp->func, dst, arg0); @@ -782,9 +822,8 @@ static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_addps(cp->func, dst, arg1); store_dest(cp, &op->FullDstRegisters[0], dst); @@ -806,10 +845,9 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_mulps(cp->func, dst, arg1); /* Now the hard bit: sum the first 3 values: @@ -831,10 +869,9 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_mulps(cp->func, dst, arg1); /* Now the hard bit: sum the values: @@ -854,10 +891,9 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_mulps(cp->func, dst, arg1); /* Now the hard bit: sum the values (from DP3): @@ -1152,9 +1188,8 @@ static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_maxps(cp->func, dst, arg1); store_dest(cp, &op->FullDstRegisters[0], dst); @@ -1166,9 +1201,8 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_minps(cp->func, dst, arg1); store_dest(cp, &op->FullDstRegisters[0], dst); @@ -1178,9 +1212,9 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); + /* potentially nothing to do */ store_dest(cp, &op->FullDstRegisters[0], dst); return TRUE; @@ -1190,9 +1224,8 @@ static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_mulps(cp->func, dst, arg1); store_dest(cp, &op->FullDstRegisters[0], dst); @@ -1205,13 +1238,15 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg arg2 = fetch_src(cp, &op->FullSrcRegisters[2]); - struct x86_reg dst = aos_get_xmm_reg(cp); - sse_movups(cp->func, dst, arg0); - sse_mulps(cp->func, dst, arg1); - sse_addps(cp->func, dst, arg2); + /* If we can't clobber old contents of arg0, get a temporary & copy + * it there, then clobber it... + */ + arg0 = get_xmm_tmp(cp, arg0); - store_dest(cp, &op->FullDstRegisters[0], dst); + sse_mulps(cp->func, arg0, arg1); + sse_addps(cp->func, arg0, arg2); + store_dest(cp, &op->FullDstRegisters[0], arg0); return TRUE; } @@ -1272,10 +1307,9 @@ static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_cmpps(cp->func, dst, arg1, cc_NotLessThan); sse_andps(cp->func, dst, ones); @@ -1297,10 +1331,9 @@ static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_cmpps(cp->func, dst, arg1, cc_LessThan); sse_andps(cp->func, dst, ones); @@ -1312,9 +1345,8 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg dst = get_xmm_tmp(cp, arg0); - sse_movups(cp->func, dst, arg0); sse_subps(cp->func, dst, arg1); store_dest(cp, &op->FullDstRegisters[0], dst); -- cgit v1.2.3 From 5b1bd30f22ffa3955150ec008631d0f4754d340f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:41:01 +0100 Subject: draw: when preloading args to x87 stack, need to use reverse order --- src/gallium/auxiliary/draw/draw_vs_aos.c | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 97de43c232..fde92c7226 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -980,7 +980,7 @@ static boolean emit_EXP( struct aos_compilation *cp, const struct tgsi_full_inst x87_fstp_or_pop(cp->func, writemask, 0, dst); /* flr(a) a 2^flr(a) */ - x87_fsubrp(cp->func, st1); /* frac(a) 2^flr(a) */ + x87_fsubp(cp->func, st1); /* frac(a) 2^flr(a) */ x87_fst_or_nop(cp->func, writemask, 1, dst); /* frac(a) 2^flr(a) */ @@ -1041,9 +1041,9 @@ static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_inst /* Load all sources first to avoid aliasing */ - for (i = 0; i < 4; i++) { + for (i = 3; i >= 0; i--) { if (writemask & (1<FullSrcRegisters[0], i); + x87_fld_src(cp, &op->FullSrcRegisters[0], i); } } @@ -1068,9 +1068,9 @@ static boolean emit_RND( struct aos_compilation *cp, const struct tgsi_full_inst /* Load all sources first to avoid aliasing */ - for (i = 0; i < 4; i++) { + for (i = 3; i >= 0; i--) { if (writemask & (1<FullSrcRegisters[0], i); + x87_fld_src(cp, &op->FullSrcRegisters[0], i); } } @@ -1098,7 +1098,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst /* suck all the source values onto the stack before writing out any * dst, which may alias... */ - for (i = 0; i < 4; i++) { + for (i = 3; i >= 0; i--) { if (writemask & (1<FullSrcRegisters[0], i); } @@ -1108,7 +1108,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst if (writemask & (1<func, st0); /* a a */ x87_fprndint( cp->func ); /* flr(a) a */ - x87_fsubrp(cp->func, st1); /* frc(a) */ + x87_fsubp(cp->func, st1); /* frc(a) */ x87_fstp(cp->func, x86_make_disp(dst, i*4)); } } @@ -1392,6 +1392,8 @@ static boolean emit_instruction( struct aos_compilation *cp, struct tgsi_full_instruction *inst ) { + x87_assert_stack_empty(cp->func); + switch( inst->Instruction.Opcode ) { case TGSI_OPCODE_MOV: return emit_MOV( cp, inst ); @@ -1657,6 +1659,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, break; } + x87_assert_stack_empty(cp.func); cp.insn_counter++; debug_printf("\n"); } @@ -1712,6 +1715,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, x86_pop(cp.func, cp.count_ESI); x86_pop(cp.func, cp.idx_EBX); + x87_assert_stack_empty(cp.func); x86_ret(cp.func); tgsi_parse_free( &parse ); -- cgit v1.2.3 From 6f407b072453eb2bb7077a952257a099db4da025 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 21 May 2008 20:50:36 +0100 Subject: rtasm: remove debug --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index a2e8af343b..d78676b8f3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -990,14 +990,12 @@ static void note_x87_pop( struct x86_function *p ) { p->x87_stack--; assert(p->x87_stack >= 0); - debug_printf("\nstack: %d\n", p->x87_stack); } static void note_x87_push( struct x86_function *p ) { p->x87_stack++; assert(p->x87_stack <= 7); - debug_printf("\nstack: %d\n", p->x87_stack); } void x87_assert_stack_empty( struct x86_function *p ) -- cgit v1.2.3 From a5c3b499fa40f46298389900e74f1db04f99166a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 13:37:48 +0100 Subject: draw: fse works with elts, remove assert --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 984fbb6767..7fefd391a6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -78,11 +78,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle, boolean need_psize = 0; - if (draw->pt.user.elts) { - assert(0); - return ; - } - if (!draw->render->set_primitive( draw->render, prim )) { assert(0); @@ -250,9 +245,8 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, } /* Single routine to fetch vertices, run shader and emit HW verts. - * Clipping and viewport transformation are done elsewhere -- - * either by the API or on hardware, or for some other reason not - * required... + * Clipping is done elsewhere -- either by the API or on hardware, + * or for some other reason not required... */ fse->active->run_linear( fse->active, start, count, -- cgit v1.2.3 From c684ffa02d8d43ee04b99ee63ccd1adb66e81c1a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 13:41:49 +0100 Subject: draw: clean up internal immediates in aos sse --- src/gallium/auxiliary/draw/draw_vs_aos.c | 64 +++++++++++++++++++++-------- src/gallium/auxiliary/draw/draw_vs_aos.h | 5 ++- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 10 ++--- 3 files changed, 55 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index fde92c7226..0b8600696a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -44,12 +44,6 @@ #ifdef PIPE_ARCH_X86 -#define DISASSEM 0 - - - - - static INLINE boolean eq( struct x86_reg a, struct x86_reg b ) { @@ -92,13 +86,6 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, } -struct x86_reg aos_get_internal( struct aos_compilation *cp, - unsigned imm ) -{ - return get_reg_ptr( cp, - AOS_FILE_INTERNAL, - imm ); -} #define X87_CW_EXCEPTION_INV_OP (1<<0) #define X87_CW_EXCEPTION_DENORM_OP (1<<1) @@ -123,6 +110,9 @@ static void init_internals( struct aos_machine *machine ) float inv = 1.0f/255.0f; float f255 = 255.0f; + ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f); + *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff; + ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); @@ -337,6 +327,39 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, +static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp, + unsigned file, + unsigned idx ) +{ + struct x86_reg reg = aos_get_shader_reg( cp, file, idx ); + + if (reg.file != file_XMM) { + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movups(cp->func, tmp, reg); + aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE ); + reg = tmp; + } + + return reg; +} + + + +struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, + unsigned imm ) +{ + return aos_get_shader_reg_xmm( cp, AOS_FILE_INTERNAL, imm ); +} + + +struct x86_reg aos_get_internal( struct aos_compilation *cp, + unsigned imm ) +{ + return aos_get_shader_reg( cp, AOS_FILE_INTERNAL, imm ); +} + + + /* Emulate pshufd insn in regular SSE, if necessary: @@ -461,15 +484,15 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, arg0 = dst; } - if (negs) { - struct x86_reg imm_negs = aos_get_internal(cp, IMM_NEGS); + if (negs && negs != 0xf) { + struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ); struct x86_reg tmp = aos_get_xmm_reg(cp); /* Load 1,-1,0,0 * Use neg as arg to pshufd * Multiply */ - emit_pshufd(cp, tmp, imm_negs, + emit_pshufd(cp, tmp, imm_swz, SHUF((negs & 1) ? 1 : 0, (negs & 2) ? 1 : 0, (negs & 4) ? 1 : 0, @@ -479,12 +502,17 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, aos_release_xmm_reg(cp, tmp.idx); arg0 = dst; } + else if (negs) { + struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS); + sse_mulps(cp->func, dst, imm_negs); + arg0 = dst; + } + if (abs && abs != 0xf) { ERROR(cp, "unsupported partial abs"); } - - if (abs) { + else if (abs) { struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); struct x86_reg tmp = aos_get_xmm_reg(cp); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index c2afd4e9a0..efdc9a38f4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -142,13 +142,16 @@ boolean aos_emit_outputs( struct aos_compilation *cp ); #define IMM_ONES 0 /* 1, 1,1,1 */ -#define IMM_NEGS 1 /* 1,-1,0,0 */ +#define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */ #define IMM_IDENTITY 2 /* 0, 0,0,1 */ #define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ #define IMM_255 4 /* 255, 255, 255, 255 */ +#define IMM_NEGS 5 /* -1,-1,-1,-1 */ struct x86_reg aos_get_internal( struct aos_compilation *cp, unsigned imm ); +struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, + unsigned imm ); #define ERROR(cp, msg) \ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 72b2b3d11d..0dda9df97d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -54,7 +54,7 @@ static void emit_load_R32G32B32( struct aos_compilation *cp, struct x86_reg src_ptr ) { sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); - sse_shufps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); + sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); sse_movlps(cp->func, data, src_ptr); } @@ -63,7 +63,7 @@ static void emit_load_R32G32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { - sse_movups(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) ); + sse_movups(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); sse_movlps(cp->func, data, src_ptr); } @@ -73,7 +73,7 @@ static void emit_load_R32( struct aos_compilation *cp, struct x86_reg src_ptr ) { sse_movss(cp->func, data, src_ptr); - sse_orps(cp->func, data, aos_get_internal( cp, IMM_IDENTITY ) ); + sse_orps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ) ); } @@ -82,8 +82,8 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, struct x86_reg src_ptr ) { sse_movss(cp->func, data, src_ptr); - sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY )); - sse2_punpcklbw(cp->func, data, aos_get_internal( cp, IMM_IDENTITY )); + sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); + sse2_punpcklbw(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY )); sse2_cvtdq2ps(cp->func, data, data); sse_mulps(cp->func, data, aos_get_internal(cp, IMM_INV_255)); } -- cgit v1.2.3 From 05029c919d46299ca259ee8af880d0a65f95ce7c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 13:46:06 +0100 Subject: draw: clean up masked writes in aos sse, make some xmm function names clearer --- src/gallium/auxiliary/draw/draw_vs_aos.c | 236 +++++++++++-------------------- 1 file changed, 82 insertions(+), 154 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 0b8600696a..708ecadbac 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -171,7 +171,7 @@ static boolean is_xmm_tmp( struct aos_compilation *cp, cp->xmm[reg.idx].file == TGSI_FILE_NULL); } -static struct x86_reg get_xmm_tmp( struct aos_compilation *cp, +static struct x86_reg get_xmm_clone( struct aos_compilation *cp, struct x86_reg reg ) { if (!is_xmm_tmp(cp, reg)) { @@ -380,31 +380,37 @@ static void emit_pshufd( struct aos_compilation *cp, } } - - - -/* Helper for writemask: +/* load masks (pack into negs??) + * pshufd - shuffle according to writemask + * and - result, mask + * nand - dest, mask + * or - dest, result */ -static boolean emit_shuf_copy1( struct aos_compilation *cp, - struct x86_reg dst, - struct x86_reg arg0, - struct x86_reg arg1, - ubyte shuf ) +static boolean mask_write( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg result, + unsigned mask ) { + struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ); struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_movups(cp->func, dst, arg1); - emit_pshufd(cp, dst, dst, shuf); - emit_pshufd(cp, tmp, arg0, shuf); - - sse_movss(cp->func, dst, tmp); + + emit_pshufd(cp, tmp, imm_swz, + SHUF((mask & 1) ? 2 : 3, + (mask & 2) ? 2 : 3, + (mask & 4) ? 2 : 3, + (mask & 8) ? 2 : 3)); - emit_pshufd(cp, dst, dst, shuf); + sse_andps(cp->func, dst, tmp); + sse_andnps(cp->func, tmp, result); + sse_orps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); return TRUE; } + + /* Helper for writemask: */ static boolean emit_shuf_copy2( struct aos_compilation *cp, @@ -414,17 +420,18 @@ static boolean emit_shuf_copy2( struct aos_compilation *cp, ubyte shuf ) { struct x86_reg tmp = aos_get_xmm_reg(cp); + emit_pshufd(cp, dst, arg1, shuf); emit_pshufd(cp, tmp, arg0, shuf); - sse_shufps(cp->func, dst, tmp, SHUF(X, Y, Z, W)); - emit_pshufd(cp, dst, dst, shuf); aos_release_xmm_reg(cp, tmp.idx); return TRUE; } + + #define SSE_SWIZZLE_NOOP ((0<<0) | (1<<2) | (2<<4) | (3<<6)) @@ -593,131 +600,58 @@ static void store_dest( struct aos_compilation *cp, const struct tgsi_full_dst_register *reg, struct x86_reg result ) { - if (reg->DstRegister.WriteMask == 0) - { - return; - } - else if (reg->DstRegister.WriteMask == TGSI_WRITEMASK_XYZW) - { - if (result.file == file_XMM) { - aos_adopt_xmm_reg(cp, - result, - reg->DstRegister.File, - reg->DstRegister.Index, - TRUE); - } - else { - struct x86_reg dst = aos_get_xmm_reg(cp); - aos_adopt_xmm_reg(cp, - dst, - reg->DstRegister.File, - reg->DstRegister.Index, - TRUE); - sse_movups(cp->func, dst, result); - } - } - else - { - /* Previous value of the dest register: - */ - struct x86_reg old_dst = aos_get_shader_reg(cp, - reg->DstRegister.File, - reg->DstRegister.Index); - - - /* Alloc an xmm reg to hold the new value of the dest register: - */ - struct x86_reg dst = aos_get_xmm_reg(cp); + struct x86_reg dst; + switch (reg->DstRegister.WriteMask) { + case 0: + return; + + case TGSI_WRITEMASK_XYZW: aos_adopt_xmm_reg(cp, - dst, + get_xmm_clone(cp, result), reg->DstRegister.File, reg->DstRegister.Index, - TRUE ); - - switch (reg->DstRegister.WriteMask) { - case TGSI_WRITEMASK_X: - if (result.file == file_XMM) { - sse_movups(cp->func, dst, old_dst); - sse_movss(cp->func, dst, result); - } - else { - struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_movups(cp->func, dst, old_dst); - sse_movss(cp->func, tmp, result); - sse_movss(cp->func, dst, tmp); - aos_release_xmm_reg(cp, tmp.idx); - } - break; - - case TGSI_WRITEMASK_XY: - sse_movups(cp->func, dst, old_dst); - sse_shufps(cp->func, dst, result, SHUF(X, Y, Z, W)); - break; - - case TGSI_WRITEMASK_ZW: - sse_movups(cp->func, dst, result); - sse_shufps(cp->func, dst, old_dst, SHUF(X, Y, Z, W)); - break; - - case TGSI_WRITEMASK_YZW: - if (old_dst.file == file_XMM) { - sse_movups(cp->func, dst, result); - sse_movss(cp->func, dst, old_dst); - } - else { - struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_movups(cp->func, dst, result); - sse_movss(cp->func, tmp, old_dst); - sse_movss(cp->func, dst, tmp); - aos_release_xmm_reg(cp, tmp.idx); - } - break; - - case TGSI_WRITEMASK_Y: - emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Y,X,Z,W)); - break; - - case TGSI_WRITEMASK_Z: - emit_shuf_copy1(cp, dst, result, old_dst, SHUF(Z,Y,X,W)); - break; - - case TGSI_WRITEMASK_W: - emit_shuf_copy1(cp, dst, result, old_dst, SHUF(W,Y,Z,X)); - break; - - case TGSI_WRITEMASK_XZ: - emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,Z,Y,W)); - break; + TRUE); + return; + default: + break; + } - case TGSI_WRITEMASK_XW: - emit_shuf_copy2(cp, dst, result, old_dst, SHUF(X,W,Z,Y)); + dst = aos_get_shader_reg_xmm(cp, + reg->DstRegister.File, + reg->DstRegister.Index); - case TGSI_WRITEMASK_YZ: - emit_shuf_copy2(cp, dst, result, old_dst, SHUF(Z,Y,X,W)); - break; + switch (reg->DstRegister.WriteMask) { + case TGSI_WRITEMASK_X: + sse_movss(cp->func, dst, get_xmm_clone(cp, result)); + break; + + case TGSI_WRITEMASK_XY: + sse_shufps(cp->func, dst, get_xmm_clone(cp, result), SHUF(X, Y, Z, W)); + break; - case TGSI_WRITEMASK_YW: - emit_shuf_copy2(cp, dst, result, old_dst, SHUF(W,Y,Z,X)); - break; + case TGSI_WRITEMASK_ZW: + result = get_xmm_clone(cp, result); + sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W)); + dst = result; + break; - case TGSI_WRITEMASK_XZW: - emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Y,X,Z,W)); - break; + case TGSI_WRITEMASK_YZW: + sse_movss(cp->func, result, dst); + dst = result; + break; - case TGSI_WRITEMASK_XYW: - emit_shuf_copy1(cp, dst, old_dst, result, SHUF(Z,Y,X,W)); - break; + default: + mask_write(cp, dst, result, reg->DstRegister.WriteMask); + break; + } - case TGSI_WRITEMASK_XYZ: - emit_shuf_copy1(cp, dst, old_dst, result, SHUF(W,Y,Z,X)); - break; + aos_adopt_xmm_reg(cp, + dst, + reg->DstRegister.File, + reg->DstRegister.Index, + TRUE); - default: - assert(0); /* not possible */ - break; - } - } } @@ -837,7 +771,7 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_mulps(cp->func, dst, neg); sse_maxps(cp->func, dst, arg0); @@ -850,7 +784,7 @@ static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_addps(cp->func, dst, arg1); @@ -874,7 +808,7 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_mulps(cp->func, dst, arg1); @@ -898,7 +832,7 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_mulps(cp->func, dst, arg1); @@ -920,7 +854,7 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_mulps(cp->func, dst, arg1); @@ -1216,7 +1150,7 @@ static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_maxps(cp->func, dst, arg1); @@ -1229,7 +1163,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_minps(cp->func, dst, arg1); @@ -1240,7 +1174,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); /* potentially nothing to do */ @@ -1252,7 +1186,7 @@ static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_mulps(cp->func, dst, arg1); @@ -1270,7 +1204,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst /* If we can't clobber old contents of arg0, get a temporary & copy * it there, then clobber it... */ - arg0 = get_xmm_tmp(cp, arg0); + arg0 = get_xmm_clone(cp, arg0); sse_mulps(cp->func, arg0, arg1); sse_addps(cp->func, arg0, arg2); @@ -1336,7 +1270,7 @@ static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_NotLessThan); sse_andps(cp->func, dst, ones); @@ -1360,7 +1294,7 @@ static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_LessThan); sse_andps(cp->func, dst, ones); @@ -1373,7 +1307,7 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_tmp(cp, arg0); + struct x86_reg dst = get_xmm_clone(cp, arg0); sse_subps(cp->func, dst, arg1); @@ -1526,9 +1460,9 @@ emit_instruction( struct aos_compilation *cp, static boolean emit_viewport( struct aos_compilation *cp ) { - struct x86_reg pos = aos_get_shader_reg(cp, - TGSI_FILE_OUTPUT, - 0); + struct x86_reg pos = aos_get_shader_reg_xmm(cp, + TGSI_FILE_OUTPUT, + 0); struct x86_reg scale = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, scale)); @@ -1536,12 +1470,6 @@ static boolean emit_viewport( struct aos_compilation *cp ) struct x86_reg translate = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, translate)); - if (pos.file != file_XMM) { - struct x86_reg dst = aos_get_xmm_reg(cp); - sse_movups(cp->func, dst, pos); - pos = dst; - } - sse_mulps(cp->func, pos, scale); sse_addps(cp->func, pos, translate); -- cgit v1.2.3 From 7b25c1a4032960752d8a8e950bdf75740b2de2e8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 13:47:08 +0100 Subject: draw: remove FPU_MANIP ifdef --- src/gallium/auxiliary/draw/draw_vs_aos.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 708ecadbac..d60940bb7a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -693,48 +693,39 @@ static void x87_fstp_dest4( struct aos_compilation *cp, x87_fstp_or_pop(cp->func, writemask, 3, ptr); } -#define FPU_MANIP 1 /* Save current x87 state and put it into single precision mode. */ static void save_fpu_state( struct aos_compilation *cp ) { -#if FPU_MANIP x87_fnstcw( cp->func, x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, fpu_restore))); -#endif } static void restore_fpu_state( struct aos_compilation *cp ) { -#if FPU_MANIP x87_fnclex(cp->func); x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, fpu_restore))); -#endif } static void set_fpu_round_neg_inf( struct aos_compilation *cp ) { -#if FPU_MANIP if (cp->fpucntl != FPU_RND_NEG) { cp->fpucntl = FPU_RND_NEG; x87_fnclex(cp->func); x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, fpu_rnd_neg_inf))); } -#endif } static void set_fpu_round_nearest( struct aos_compilation *cp ) { -#if FPU_MANIP if (cp->fpucntl != FPU_RND_NEAREST) { cp->fpucntl = FPU_RND_NEAREST; x87_fnclex(cp->func); x87_fldcw( cp->func, x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, fpu_rnd_nearest))); } -#endif } @@ -754,7 +745,7 @@ static void x87_emit_ex2( struct aos_compilation *cp ) x87_fld1(cp->func); /* 1 (2^frac(a))-1 a */ x87_faddp(cp->func, st1); /* 2^frac(a) a */ x87_fscale(cp->func); /* 2^a a */ - x87_fstp(cp->func, st1); + x87_fstp(cp->func, st1); /* 2^a */ assert( stack == cp->func->x87_stack); -- cgit v1.2.3 From 6780a6dede31e7f2eb465e1d7b507b3e64fe6ec9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 13:48:07 +0100 Subject: draw: shortcircuit shuffle in aos_sse when possible --- src/gallium/auxiliary/draw/draw_vs_aos.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index d60940bb7a..b8fad231ca 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -809,7 +809,9 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); /* a*x+c*z, b*y, ?, ? */ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); sse_addss(cp->func, dst, tmp); - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + + if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); aos_release_xmm_reg(cp, tmp.idx); store_dest(cp, &op->FullDstRegisters[0], dst); @@ -833,7 +835,9 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst sse_addps(cp->func, dst, tmp); /* a*x+c*z, b*y+d*w, a*x+c*z, b*y+d*w */ emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); sse_addss(cp->func, dst, tmp); - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + + if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); aos_release_xmm_reg(cp, tmp.idx); store_dest(cp, &op->FullDstRegisters[0], dst); @@ -857,7 +861,9 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst sse_addss(cp->func, dst, tmp); emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W)); sse_addss(cp->func, dst, tmp); - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + + if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); aos_release_xmm_reg(cp, tmp.idx); store_dest(cp, &op->FullDstRegisters[0], dst); @@ -1233,7 +1239,8 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst sse_divss(cp->func, dst, arg0); } - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); store_dest(cp, &op->FullDstRegisters[0], dst); return TRUE; @@ -1249,7 +1256,8 @@ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_inst /* Extend precision here... */ - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) + sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); store_dest(cp, &op->FullDstRegisters[0], dst); return TRUE; -- cgit v1.2.3 From 65cb09249e750b45ec3fc9a57670fc77250efc5e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 13:49:38 +0100 Subject: draw: for debug, do rhw divide in aos_sse viewport calcs --- src/gallium/auxiliary/draw/draw_vs_aos.c | 45 +++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index b8fad231ca..40de13a98c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1481,6 +1481,46 @@ static boolean emit_viewport( struct aos_compilation *cp ) } +/* This is useful to be able to see the results on softpipe. Doesn't + * do proper clipping, just assumes the backend can do it during + * rasterization -- for debug only... + */ +static boolean emit_rhw_viewport( struct aos_compilation *cp ) +{ + struct x86_reg tmp = aos_get_xmm_reg(cp); + struct x86_reg pos = aos_get_shader_reg_xmm(cp, + TGSI_FILE_OUTPUT, + 0); + + struct x86_reg scale = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, scale)); + + struct x86_reg translate = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, translate)); + + + + emit_pshufd(cp, tmp, pos, SHUF(W, W, W, W)); + sse2_rcpss(cp->func, tmp, tmp); + sse_shufps(cp->func, tmp, tmp, SHUF(X, X, X, X)); + + sse_mulps(cp->func, pos, scale); + sse_mulps(cp->func, pos, tmp); + sse_addps(cp->func, pos, translate); + + /* Set pos[3] = w + */ + mask_write(cp, pos, tmp, TGSI_WRITEMASK_W); + + aos_adopt_xmm_reg( cp, + pos, + TGSI_FILE_OUTPUT, + 0, + TRUE ); + return TRUE; +} + + static boolean note_immediate( struct aos_compilation *cp, struct tgsi_full_immediate *imm ) { @@ -1623,7 +1663,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, goto fail; if (cp.vaos->base.key.viewport) { - emit_viewport(&cp); + if (0) + emit_viewport(&cp); + else + emit_rhw_viewport(&cp); } /* Emit output... TODO: do this eagerly after the last write to a -- cgit v1.2.3 From 260001430bbd28ea17201f1980ab1ebed93b246f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 15:24:02 +0100 Subject: draw: use aligned movs within draw_vs_aos.c --- src/gallium/auxiliary/draw/draw_vs_aos.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 40de13a98c..039e233fe8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -159,7 +159,7 @@ static void spill( struct aos_compilation *cp, unsigned idx ) cp->xmm[idx].idx); assert(cp->xmm[idx].dirty); - sse_movups(cp->func, oldval, x86_make_reg(file_XMM, idx)); + sse_movaps(cp->func, oldval, x86_make_reg(file_XMM, idx)); cp->xmm[idx].dirty = 0; } } @@ -176,7 +176,7 @@ static struct x86_reg get_xmm_clone( struct aos_compilation *cp, { if (!is_xmm_tmp(cp, reg)) { struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_movups(cp->func, tmp, reg); + sse_movaps(cp->func, tmp, reg); reg = tmp; } @@ -335,7 +335,7 @@ static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp, if (reg.file != file_XMM) { struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_movups(cp->func, tmp, reg); + sse_movaps(cp->func, tmp, reg); aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE ); reg = tmp; } @@ -374,7 +374,7 @@ static void emit_pshufd( struct aos_compilation *cp, } else { if (!eq(dst, arg0)) - sse_movups(cp->func, dst, arg0); + sse_movaps(cp->func, dst, arg0); sse_shufps(cp->func, dst, dst, shuf); } @@ -523,7 +523,7 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_movups(cp->func, tmp, arg0); + sse_movaps(cp->func, tmp, arg0); sse_mulps(cp->func, tmp, neg); sse_maxps(cp->func, dst, arg0); -- cgit v1.2.3 From 43df4642f1d2f3d2673a1d5e4f5126f5175fb899 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 20:21:49 +0100 Subject: draw: tweak x87_emit_ex2 to avoid changing x87 fpu settings --- src/gallium/auxiliary/draw/draw_vs_aos.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 039e233fe8..93bb4f9bc0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -735,16 +735,17 @@ static void x87_emit_ex2( struct aos_compilation *cp ) struct x86_reg st1 = x86_make_reg(file_x87, 1); int stack = cp->func->x87_stack; - set_fpu_round_neg_inf( cp ); +// set_fpu_round_neg_inf( cp ); x87_fld(cp->func, st0); /* a a */ - x87_fld(cp->func, st0); /* a a a */ - x87_fprndint( cp->func ); /* flr(a) a a*/ - x87_fsubp(cp->func, st1); /* frac(a) a */ - x87_f2xm1(cp->func); /* (2^frac(a))-1 a */ - x87_fld1(cp->func); /* 1 (2^frac(a))-1 a */ - x87_faddp(cp->func, st1); /* 2^frac(a) a */ - x87_fscale(cp->func); /* 2^a a */ + x87_fprndint( cp->func ); /* int(a) a*/ + x87_fsubr(cp->func, st1, st0); /* int(a) frc(a) */ + x87_fxch(cp->func, st1); /* frc(a) int(a) */ + x87_f2xm1(cp->func); /* (2^frc(a))-1 int(a) */ + x87_fld1(cp->func); /* 1 (2^frc(a))-1 int(a) */ + x87_faddp(cp->func, st1); /* 2^frac(a) int(a) */ + x87_fscale(cp->func); /* (2^frac(a)*2^int(int(a))) int(a) */ + /* 2^a int(a) */ x87_fstp(cp->func, st1); /* 2^a */ assert( stack == cp->func->x87_stack); -- cgit v1.2.3 From 7106da136069f865747e03c30ca245bc030b241b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 22 May 2008 20:22:15 +0100 Subject: draw: correct but slow LIT() in aos varient --- src/gallium/auxiliary/draw/draw_vs_aos.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 93bb4f9bc0..930914f609 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1093,10 +1093,12 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst - /* a1' = a1 <= 0 ? 1 : a1; + /* a1' = a1 <= 0 ? 1 : a1; + * + * Note: use 1.0 to avoid passing zero to */ - x87_fldz(cp->func); /* 0 */ - x87_fld1(cp->func); /* 1 0 */ + x87_fldz(cp->func); /* 1 0 */ + x87_fldz(cp->func); /* 1 0 */ x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */ x87_fcomi(cp->func, st2); /* a1 1 0 */ x87_fcmovb(cp->func, st1); /* a1' 1 0 */ @@ -1119,17 +1121,14 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0 0 r2 */ x87_fcomi(cp->func, st1); x87_fcmovb(cp->func, st1); /* a0' 0 r2 */ - x87_fstp(cp->func, st1); /* a0' r2 */ - x87_fxch(cp->func, st1); /* a0' r2 */ x87_fst_or_nop(cp->func, writemask, 1, dst); /* result[1] = a0' */ - x87_fldz(cp->func); /* 0 a0' r2 */ - x87_fcomi(cp->func, st1); /* 0 a0' r2 */ - x87_fcmovnbe(cp->func, st2); /* r2' a0' r2 */ + x87_fcomi(cp->func, st1); /* a0' 0 r2 */ + x87_fcmovnbe(cp->func, st2); /* r2' 0' r2 */ - x87_fstp_or_pop(cp->func, writemask, 2, dst); - x87_fpop(cp->func); + x87_fstp_or_pop(cp->func, writemask, 2, dst); /* 0 r2 */ + x87_fpop(cp->func); /* r2 */ x87_fpop(cp->func); } -- cgit v1.2.3 From 3b41d619a1b7cc8c356c32af777486461ddd7926 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 23 May 2008 09:14:17 +0100 Subject: draw: faster LIT(), incorrect though --- src/gallium/auxiliary/draw/draw_vs_aos.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 930914f609..b0c3ac49d2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1098,7 +1098,13 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst * Note: use 1.0 to avoid passing zero to */ x87_fldz(cp->func); /* 1 0 */ +#if 1 + x87_fld1(cp->func); /* 1 0 */ +#else + /* Correct but slow due to fp exceptions generated in fyl2x - fix me. + */ x87_fldz(cp->func); /* 1 0 */ +#endif x87_fld_src(cp, &op->FullSrcRegisters[0], 1); /* a1 1 0 */ x87_fcomi(cp->func, st2); /* a1 1 0 */ x87_fcmovb(cp->func, st1); /* a1' 1 0 */ -- cgit v1.2.3 From 059a652d64da470ccc7f2f3266fd64721848a7be Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 24 May 2008 19:25:02 +0900 Subject: scons: New profile build. --- common.py | 14 +++++++++++++- src/gallium/auxiliary/util/SConscript | 1 + 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/common.py b/common.py index 36b190ce89..d3c0261d71 100644 --- a/common.py +++ b/common.py @@ -52,7 +52,8 @@ def AddOptions(opts): from SCons.Options.EnumOption import EnumOption except ImportError: from SCons.Variables.EnumVariable import EnumVariable as EnumOption - opts.Add(BoolOption('debug', 'build debug version', 'no')) + opts.Add(BoolOption('debug', 'debug build', 'no')) + opts.Add(BoolOption('profile', 'profile build', 'no')) #opts.Add(BoolOption('quiet', 'quiet command lines', 'no')) opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, allowed_values=('generic', 'x86', 'x86_64'))) @@ -125,6 +126,8 @@ def make_build_dir(env): build_subdir += '-' + env['machine'] if env['debug']: build_subdir += "-debug" + if env['profile']: + build_subdir += "-profile" build_dir = os.path.join(build_topdir, build_subdir) # Place the .sconsign file on the builddir too, to avoid issues with different scons # versions building the same source file @@ -154,6 +157,8 @@ def generate(env): cppdefines += ['DEBUG'] else: cppdefines += ['NDEBUG'] + if env['profile']: + cppdefines += ['PROFILE'] if platform == 'windows': cppdefines += [ 'WIN32', @@ -204,6 +209,8 @@ def generate(env): cflags += ['-O0', '-g3'] else: cflags += ['-O3', '-g3'] + if env['profile']: + cflags += ['-pg'] cflags += [ '-Wall', '-Wmissing-prototypes', @@ -228,6 +235,11 @@ def generate(env): '/Oi', # enable intrinsic functions '/Os', # favor code space ] + if env['profile']: + cflags += [ + '/Gh', # enable _penter hook function + '/GH', # enable _pexit hook function + ] if platform == 'windows': cflags += [ # TODO diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index d55d2c7081..0309de1ac2 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -5,6 +5,7 @@ util = env.ConvenienceLibrary( source = [ 'p_debug.c', 'p_debug_mem.c', + 'p_debug_prof.c', 'p_tile.c', 'p_util.c', 'u_blit.c', -- cgit v1.2.3 From 345eb7fb70840829571cbacdb3980181df8e018a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 24 May 2008 19:25:33 +0900 Subject: gallium: Poor-man profiler for win32 kernel. --- src/gallium/auxiliary/util/p_debug_prof.c | 175 ++++++++++++++++++++++++++++++ src/gallium/include/pipe/p_debug.h | 11 ++ 2 files changed, 186 insertions(+) create mode 100644 src/gallium/auxiliary/util/p_debug_prof.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/p_debug_prof.c new file mode 100644 index 0000000000..958f99c327 --- /dev/null +++ b/src/gallium/auxiliary/util/p_debug_prof.c @@ -0,0 +1,175 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Poor-man profiling. + * + * @author José Fonseca + * + * @sa http://blogs.msdn.com/joshpoley/archive/2008/03/12/poor-man-s-profiler.aspx + * @sa http://www.johnpanzer.com/aci_cuj/index.html + */ + +#include "pipe/p_config.h" + +#if defined(PROFILE) && defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +#include +#include + +#include "pipe/p_debug.h" +#include "util/u_string.h" + + +#define PROFILE_FILE_SIZE 4*1024*1024 +#define FILE_NAME_SIZE 256 + +static WCHAR wFileName[FILE_NAME_SIZE]; +static ULONG_PTR iFile = 0; +static void *pMap = NULL; +static void *pMapEnd = NULL; + + +/** + * Called at the start of every method or function. + * + * @sa http://msdn.microsoft.com/en-us/library/c63a9b7h.aspx + */ +void __declspec(naked) __cdecl +_penter(void) { + _asm { + push ebx + mov ebx, [pMap] + test ebx, ebx + jz done + cmp ebx, [pMapEnd] + je done + push eax + push edx + mov eax, [esp+12] + and eax, 0xfffffffe + mov [ebx], eax + add ebx, 4 + rdtsc + mov [ebx], eax + add ebx, 4 + mov [pMap], ebx + pop edx + pop eax +done: + pop ebx + ret + } +} + + +/** + * Called at the end of Calls the end of every method or function. + * + * @sa http://msdn.microsoft.com/en-us/library/xc11y76y.aspx + */ +void __declspec(naked) __cdecl +_pexit(void) { + _asm { + push ebx + mov ebx, [pMap] + test ebx, ebx + jz done + cmp ebx, [pMapEnd] + je done + push eax + push edx + mov eax, [esp+12] + or eax, 0x00000001 + mov [ebx], eax + add ebx, 4 + rdtsc + mov [ebx], eax + add ebx, 4 + mov [pMap], ebx + pop edx + pop eax +done: + pop ebx + ret + } +} + + +void __declspec(naked) +__debug_profile_reference1(void) { + _asm { + call _penter + call _pexit + ret + } +} + + +void __declspec(naked) +__debug_profile_reference2(void) { + _asm { + call _penter + call __debug_profile_reference1 + call _pexit + ret + } +} + + +void +debug_profile_start(void) +{ + static unsigned no = 0; + char filename[FILE_NAME_SIZE]; + unsigned i; + + util_snprintf(filename, sizeof(filename), "\\??\\c:\\%03u.prof", ++no); + for(i = 0; i < FILE_NAME_SIZE; ++i) + wFileName[i] = (WCHAR)filename[i]; + + pMap = EngMapFile(wFileName, PROFILE_FILE_SIZE, &iFile); + if(pMap) { + pMapEnd = (unsigned char*)pMap + PROFILE_FILE_SIZE; + /* reference functions for calibration purposes */ + __debug_profile_reference2(); + } +} + +void +debug_profile_stop(void) +{ + if(iFile) { + EngUnmapFile(iFile); + /* TODO: truncate file */ + } + iFile = 0; + pMapEnd = pMap = NULL; +} + +#endif /* PROFILE */ diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 1263d0da61..0af635be57 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -313,6 +313,17 @@ void debug_memory_end(unsigned long beginning); +#if defined(PROFILE) && defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void +debug_profile_start(void); + +void +debug_profile_stop(void); + +#endif + + #ifdef DEBUG void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, -- cgit v1.2.3 From 6b3723ee8d084a1abbc971b21c58f7c1e66949a7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 May 2008 13:22:15 +0100 Subject: rtasm: add some helpers for calling out from generated code --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 44 ++++++++++++++++++++++++++++-- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 6 ++++ 2 files changed, 47 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index d78676b8f3..2415b0156b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -218,6 +218,8 @@ static void emit_3ub( struct x86_function *p, unsigned char b0, unsigned char b1 /* Build a modRM byte + possible displacement. No treatment of SIB * indexing. BZZT - no way to encode an absolute address. + * + * This is the "/r" field in the x86 manuals... */ static void emit_modrm( struct x86_function *p, struct x86_reg reg, @@ -256,7 +258,8 @@ static void emit_modrm( struct x86_function *p, } } - +/* Emits the "/0".."/7" specialized versions of the modrm ("/r") bytes. + */ static void emit_modrm_noreg( struct x86_function *p, unsigned op, struct x86_reg regmem ) @@ -365,8 +368,7 @@ void x86_jcc( struct x86_function *p, DUMP_I(cc); if (offset < 0) { - int amt = p->csr - p->store; - assert(amt > -offset); + assert(p->csr - p->store > -offset); } if (offset <= 127 && offset >= -128) { @@ -443,6 +445,16 @@ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) emit_1i(p, imm); } +void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) +{ + DUMP_RI( dst, imm ); + assert(dst.mod == mod_REG); + emit_1ub(p, 0x80); + emit_modrm_noreg(p, 0, dst); + emit_1ub(p, imm); +} + + void x86_push( struct x86_function *p, struct x86_reg reg ) { @@ -459,6 +471,17 @@ void x86_push( struct x86_function *p, p->stack_offset += 4; } +void x86_push_imm32( struct x86_function *p, + int imm32 ) +{ + DUMP_I( imm32 ); + emit_1ub(p, 0x68); + emit_1i(p, imm32); + + p->stack_offset += 4; +} + + void x86_pop( struct x86_function *p, struct x86_reg reg ) { @@ -1558,6 +1581,21 @@ void mmx_movq( struct x86_function *p, */ +void x86_cdecl_caller_push_regs( struct x86_function *p ) +{ + x86_push(p, x86_make_reg(file_REG32, reg_AX)); + x86_push(p, x86_make_reg(file_REG32, reg_CX)); + x86_push(p, x86_make_reg(file_REG32, reg_DX)); +} + +void x86_cdecl_caller_pop_regs( struct x86_function *p ) +{ + x86_pop(p, x86_make_reg(file_REG32, reg_DX)); + x86_pop(p, x86_make_reg(file_REG32, reg_CX)); + x86_pop(p, x86_make_reg(file_REG32, reg_AX)); +} + + /* Retreive a reference to one of the function arguments, taking into * account any push/pop activity: */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 9f7e31e055..63e812fac9 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -155,6 +155,7 @@ void x86_call( struct x86_function *p, struct x86_reg reg); * I load the immediate into general purpose register and use it. */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); /* Macro for sse_shufps() and sse2_pshufd(): @@ -225,6 +226,7 @@ void x86_imul( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_or( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_pop( struct x86_function *p, struct x86_reg reg ); void x86_push( struct x86_function *p, struct x86_reg reg ); +void x86_push_imm32( struct x86_function *p, int imm ); void x86_ret( struct x86_function *p ); void x86_retw( struct x86_function *p, unsigned short imm ); void x86_sub( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); @@ -232,6 +234,10 @@ void x86_test( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_xor( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void x86_sahf( struct x86_function *p ); + +void x86_cdecl_caller_push_regs( struct x86_function *p ); +void x86_cdecl_caller_pop_regs( struct x86_function *p ); + void x87_assert_stack_empty( struct x86_function *p ); void x87_f2xm1( struct x86_function *p ); -- cgit v1.2.3 From 6172f1295cf812108d8ceba15a83ba87880360d3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 May 2008 13:22:29 +0100 Subject: draw: add a debug-print which can be called from inside generated shaders --- src/gallium/auxiliary/draw/draw_vs_aos.c | 67 ++++++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 1 + 2 files changed, 68 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index b0c3ac49d2..aa119f242e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -752,7 +752,63 @@ static void x87_emit_ex2( struct aos_compilation *cp ) } +static void PIPE_CDECL print_reg( const char *msg, + const float *reg ) +{ + debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]); +} + +static void emit_print( struct aos_compilation *cp, + const char *message, /* must point to a static string! */ + unsigned file, + unsigned idx ) +{ + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg arg = get_reg_ptr( cp, file, idx ); + unsigned i; + + /* There shouldn't be anything on the x87 stack. Can add this + * capacity later if need be. + */ + assert(cp->func->x87_stack == 0); + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. We're obviously not concerned about performance on this + * debug path, so here goes: + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + + /* Push the arguments: + */ + x86_lea( cp->func, ecx, arg ); + x86_push( cp->func, ecx ); + x86_push_imm32( cp->func, (int)message ); + + /* Call the helper. Could call debug_printf directly, but + * print_reg is a nice place to put a breakpoint if need be. + */ + x86_mov_reg_imm( cp->func, ecx, (int)print_reg ); + x86_call( cp->func, ecx ); + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + + /* Pop caller-save regs + */ + x86_cdecl_caller_pop_regs( cp->func ); + + /* Done... + */ +} /** * The traditional instructions. All operate on internal registers @@ -1798,6 +1854,17 @@ static void vaos_set_constants( struct draw_vs_varient *varient, memcpy(vaos->machine->constant, constants, (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float)); + +#if 0 + unsigned i; + for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++) + debug_printf("state %d: %f %f %f %f\n", + i, + constants[i][0], + constants[i][1], + constants[i][2], + constants[i][3]); +#endif } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index efdc9a38f4..a0680ec63d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -96,6 +96,7 @@ struct aos_compilation { unsigned insn_counter; unsigned num_immediates; + unsigned count; struct { unsigned idx:16; -- cgit v1.2.3 From 86e529ad90411d21bca3d70984b2db202e7a0cd6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 24 May 2008 16:31:11 +0100 Subject: draw: use lookup tables to avoid calling pow() in LIT opcode --- src/gallium/auxiliary/draw/draw_vs_aos.c | 251 ++++++++++++++++++++++++++++++- src/gallium/auxiliary/draw/draw_vs_aos.h | 27 ++++ 2 files changed, 272 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index aa119f242e..1fbb7088ca 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -105,8 +105,31 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, #define X87_CW_ROUND_MASK (3<<10) #define X87_CW_INFINITY (1<<12) +static void do_populate_lut( struct shine_tab *tab, + float unclamped_exponent ) +{ + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon)); + unsigned i; + + tab->exponent = unclamped_exponent; /* for later comparison */ + + tab->values[0] = 0; + if (exponent == 0) { + for (i = 1; i < 258; i++) { + tab->values[i] = 1.0; + } + } + else { + for (i = 1; i < 258; i++) { + tab->values[i] = powf((float)i * epsilon, exponent); + } + } +} + static void init_internals( struct aos_machine *machine ) { + unsigned i; float inv = 1.0f/255.0f; float f255 = 255.0f; @@ -141,6 +164,9 @@ static void init_internals( struct aos_machine *machine ) (1<<6) | X87_CW_ROUND_DOWN | X87_CW_PRECISION_DOUBLE_EXT); + + for (i = 0; i < MAX_SHINE_TAB; i++) + do_populate_lut( &machine->shine_tab[i], 1.0f ); } @@ -1132,26 +1158,231 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +static PIPE_CDECL void do_lit( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + } + else + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + +static PIPE_CDECL void do_lit_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + return; + } + + if (machine->lit_info[count].shine_tab->exponent != in[3]) { + machine->lit_info[count].func = do_lit; + goto no_luck; + } + + if (in[1] <= 1.0) + { + const float *tab = machine->lit_info[count].shine_tab->values; + float f = in[1] * 256; + int k = (int)f; + float frac = f - (float)k; + + result[0] = 1.0F; + result[1] = in[0]; + result[2] = tab[k] + frac*(tab[k+1]-tab[k]); + result[3] = 1.0; + return; + } + + no_luck: + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + + +static void PIPE_CDECL populate_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + unsigned i, tab; + + /* Search for an existing table for this value. Note that without + * static analysis we don't really know if in[3] will be constant, + * but it usually is... + */ + for (tab = 0; tab < 4; tab++) { + if (machine->shine_tab[tab].exponent == in[3]) { + goto found; + } + } + + for (tab = 0, i = 1; i < 4; i++) { + if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used) + tab = i; + } + + if (machine->shine_tab[tab].last_used == machine->now) { + /* No unused tables (this is not a ffvertex program...). Just + * call pow each time: + */ + machine->lit_info[count].func = do_lit; + machine->lit_info[count].func( machine, result, in, count ); + return; + } + else { + do_populate_lut( &machine->shine_tab[tab], in[3] ); + } + + found: + machine->shine_tab[tab].last_used = machine->now; + machine->lit_info[count].shine_tab = &machine->shine_tab[tab]; + machine->lit_info[count].func = do_lit_lut; + machine->lit_info[count].func( machine, result, in, count ); +} + + + static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; + unsigned lit_count = cp->lit_count++; + struct x86_reg result, arg0; + unsigned i; + +#if 1 + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } +#endif + + if (writemask != TGSI_WRITEMASK_XYZW) + result = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, tmp[0])); + else + result = get_dst_ptr(cp, &op->FullDstRegisters[0]); + + + arg0 = fetch_src( cp, &op->FullSrcRegisters[0] ); + if (arg0.file == file_XMM) { + struct x86_reg tmp = x86_make_disp(cp->machine_EDX, + Offset(struct aos_machine, tmp[1])); + sse_movaps( cp->func, tmp, arg0 ); + arg0 = tmp; + } + + + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + /* Push the arguments: + */ + x86_push_imm32( cp->func, lit_count ); + + x86_lea( cp->func, ecx, arg0 ); + x86_push( cp->func, ecx ); + x86_lea( cp->func, ecx, result ); + x86_push( cp->func, ecx ); + x86_push( cp->func, cp->machine_EDX ); + if (lit_count < MAX_LIT_INFO) { + x86_mov( cp->func, ecx, x86_make_disp( cp->machine_EDX, + Offset(struct aos_machine, lit_info) + + lit_count * sizeof(struct lit_info) + + Offset(struct lit_info, func))); + } + else { + x86_mov_reg_imm( cp->func, ecx, (int)do_lit ); + } + + x86_call( cp->func, ecx ); + + x86_pop( cp->func, ecx ); /* fixme... */ + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + x86_pop( cp->func, ecx ); + + x86_cdecl_caller_pop_regs( cp->func ); + + if (writemask != TGSI_WRITEMASK_XYZW) { + store_dest( cp, + &op->FullDstRegisters[0], + get_xmm_clone( cp, result ) ); + } + + return TRUE; +} + + +static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); + unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; if (writemask & TGSI_WRITEMASK_YZ) { struct x86_reg st1 = x86_make_reg(file_x87, 1); struct x86_reg st2 = x86_make_reg(file_x87, 2); - - - /* a1' = a1 <= 0 ? 1 : a1; - * - * Note: use 1.0 to avoid passing zero to */ x87_fldz(cp->func); /* 1 0 */ #if 1 @@ -1865,6 +2096,14 @@ static void vaos_set_constants( struct draw_vs_varient *varient, constants[i][2], constants[i][3]); #endif + + { + unsigned i; + for (i = 0; i < MAX_LIT_INFO; i++) { + vaos->machine->lit_info[i].func = populate_lut; + vaos->machine->now++; + } + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index a0680ec63d..c08c73d4bc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -59,6 +59,25 @@ struct x86_function; #define FPU_RND_NEG 1 #define FPU_RND_NEAREST 2 +struct aos_machine; +typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, + float *result, + const float *in, + unsigned count ); +struct shine_tab { + float exponent; + float values[258]; + unsigned last_used; +}; + +struct lit_info { + lit_func func; + struct shine_tab *shine_tab; +}; + +#define MAX_SHINE_TAB 4 +#define MAX_LIT_INFO 16 + /* This is the temporary storage used by all the aos_sse vs varients. * Create one per context and reuse by passing a pointer in at * vs_varient creation?? @@ -74,6 +93,13 @@ struct aos_machine { float scale[4]; /* viewport */ float translate[4]; /* viewport */ + float tmp[2][4]; /* scratch space for LIT */ + + struct shine_tab shine_tab[MAX_SHINE_TAB]; + struct lit_info lit_info[MAX_LIT_INFO]; + unsigned now; + + ushort fpu_rnd_nearest; ushort fpu_rnd_neg_inf; ushort fpu_restore; @@ -97,6 +123,7 @@ struct aos_compilation { unsigned insn_counter; unsigned num_immediates; unsigned count; + unsigned lit_count; struct { unsigned idx:16; -- cgit v1.2.3 From a6fca8acb5e8ce0e5e6ce91a524e2bb4c180d3ac Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 May 2008 11:20:38 +0100 Subject: draw: fix input vs output typo in emit --- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 0dda9df97d..f39ebb7a17 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -282,7 +282,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) { unsigned i; - for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) { + for (i = 0; i < cp->vaos->base.vs->info.num_outputs; i++) { unsigned format = cp->vaos->base.key.element[i].out.format; unsigned offset = cp->vaos->base.key.element[i].out.offset; -- cgit v1.2.3 From 584a3dcf8e4042cc1a5d48d83ea63d0a3c9706c1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 May 2008 11:25:09 +0100 Subject: draw: add viewport support to generic vs varient code --- src/gallium/auxiliary/draw/draw_vs_varient.c | 43 ++++++++++++++++++++++++++-- 1 file changed, 41 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index f6f621a748..c15c648527 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -44,7 +44,7 @@ struct draw_vs_varient_generic { struct draw_vs_varient base; - + struct pipe_viewport_state viewport; struct draw_vertex_shader *shader; struct draw_context *draw; @@ -87,6 +87,30 @@ static void vsvg_set_input( struct draw_vs_varient *varient, } +/* Mainly for debug at this stage: + */ +static void do_viewport( struct draw_vs_varient_generic *vsvg, + unsigned count, + void *output_buffer ) +{ + char *ptr = (char *)output_buffer; + const float *scale = vsvg->viewport.scale; + const float *trans = vsvg->viewport.translate; + unsigned stride = vsvg->base.key.output_stride; + unsigned j; + + for (j = 0; j < count; j++, ptr += stride) { + float *data = (float *)ptr; + float w = 1.0f / data[3]; + + data[0] = data[0] * w * scale[0] + trans[0]; + data[1] = data[1] * w * scale[1] + trans[1]; + data[2] = data[2] * w * scale[2] + trans[2]; + data[3] = w; + } +} + + static void vsvg_run_elts( struct draw_vs_varient *varient, const unsigned *elts, unsigned count, @@ -112,6 +136,12 @@ static void vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.key.output_stride, vsvg->base.key.output_stride); + if (vsvg->base.key.viewport) + do_viewport( vsvg, + count, + output_buffer ); + + //if (!vsvg->already_in_emit_format) vsvg->emit->set_buffer( vsvg->emit, @@ -152,6 +182,12 @@ static void vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.key.output_stride, vsvg->base.key.output_stride); + if (vsvg->base.key.viewport) + do_viewport( vsvg, + count, + output_buffer ); + + //if (!vsvg->already_in_emit_format) vsvg->emit->set_buffer( vsvg->emit, 0, @@ -171,6 +207,9 @@ static void vsvg_run_linear( struct draw_vs_varient *varient, static void vsvg_set_viewport( struct draw_vs_varient *varient, const struct pipe_viewport_state *viewport ) { + struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + vsvg->viewport = *viewport; } static void vsvg_destroy( struct draw_vs_varient *varient ) @@ -185,7 +224,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, unsigned i; struct translate_key fetch, emit; - if (key->viewport || key->clip) + if (key->clip) return NULL; struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); -- cgit v1.2.3 From dc52622fcf5660a9675ed61c359cf7068aa4861b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 May 2008 15:28:30 +0100 Subject: cso: use memcpy rather than structure asignment for copying Apparently gcc will omit to copy hidden padding bytes under some circumstances, which means later on memcmp() will indicate a difference between structs even though all the visible members are identical. --- src/gallium/auxiliary/cso_cache/cso_context.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index a1a3a9efaf..7236bff592 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -292,7 +292,7 @@ enum pipe_error cso_set_blend(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(&cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; cso->context = ctx->pipe; @@ -350,7 +350,7 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(&cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; cso->context = ctx->pipe; @@ -508,7 +508,7 @@ enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(&cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; cso->context = ctx->pipe; @@ -564,7 +564,7 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(&cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; cso->context = ctx->pipe; @@ -726,7 +726,7 @@ enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; cso->context = ctx->pipe; -- cgit v1.2.3 From caadc8d944c558e1fa9f23c3616d726337a19862 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 May 2008 15:37:47 +0100 Subject: draw: clean up some of the xmm register manipulation function names --- src/gallium/auxiliary/draw/draw_vs_aos.c | 141 +++++++++++++++++-------------- 1 file changed, 78 insertions(+), 63 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 1fbb7088ca..17b9442d6b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -190,17 +190,26 @@ static void spill( struct aos_compilation *cp, unsigned idx ) } } -static boolean is_xmm_tmp( struct aos_compilation *cp, - struct x86_reg reg ) + +static struct x86_reg get_xmm_writable( struct aos_compilation *cp, + struct x86_reg reg ) { - return (reg.file == file_XMM && - cp->xmm[reg.idx].file == TGSI_FILE_NULL); + if (reg.file != file_XMM || + cp->xmm[reg.idx].file != TGSI_FILE_NULL) + { + struct x86_reg tmp = aos_get_xmm_reg(cp); + sse_movaps(cp->func, tmp, reg); + reg = tmp; + } + + return reg; } -static struct x86_reg get_xmm_clone( struct aos_compilation *cp, - struct x86_reg reg ) +static struct x86_reg get_xmm( struct aos_compilation *cp, + struct x86_reg reg ) { - if (!is_xmm_tmp(cp, reg)) { + if (reg.file != file_XMM) + { struct x86_reg tmp = aos_get_xmm_reg(cp); sse_movaps(cp->func, tmp, reg); reg = tmp; @@ -210,6 +219,9 @@ static struct x86_reg get_xmm_clone( struct aos_compilation *cp, } +/* Allocate an empty xmm register, either as a temporary or later to + * "adopt" as a shader reg. + */ struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ) { unsigned i; @@ -251,32 +263,11 @@ void aos_release_xmm_reg( struct aos_compilation *cp, cp->xmm[idx].last_used = 0; } -static void invalidate_xmm( struct aos_compilation *cp, - unsigned file, unsigned idx ) -{ - unsigned i; - - /* Invalidate any old copy of this register in XMM0-7. - */ - for (i = 0; i < 8; i++) { - if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) { - - if (cp->xmm[i].dirty) - spill(cp, i); - - aos_release_xmm_reg(cp, i); - break; - } - } - for (; i < 8; i++) { - if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) { - assert(0); - } - } -} - + +/* Mark an xmm reg as holding the current copy of a shader reg. + */ void aos_adopt_xmm_reg( struct aos_compilation *cp, struct x86_reg reg, unsigned file, @@ -290,6 +281,9 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp, return; } + /* If any xmm reg thinks it holds this shader reg, break the + * illusion. + */ for (i = 0; i < 8; i++) { if (cp->xmm[i].file == file && cp->xmm[i].idx == idx) { @@ -304,12 +298,24 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp, } - +/* Return a pointer to the in-memory copy of the reg, making sure it is uptodate. + */ static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp, unsigned file, unsigned idx ) { - invalidate_xmm( cp, file, idx ); + unsigned i; + + /* Ensure the in-memory copy of this reg is up-to-date + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && + cp->xmm[i].idx == idx && + cp->xmm[i].dirty) { + spill(cp, i); + } + } + return get_reg_ptr( cp, file, idx ); } @@ -320,7 +326,26 @@ static struct x86_reg aos_get_shader_reg_ptr( struct aos_compilation *cp, static struct x86_reg get_dst_ptr( struct aos_compilation *cp, const struct tgsi_full_dst_register *dst ) { - return aos_get_shader_reg_ptr( cp, dst->DstRegister.File, dst->DstRegister.Index ); + unsigned file = dst->DstRegister.File; + unsigned idx = dst->DstRegister.Index; + unsigned i; + + + /* Ensure in-memory copy of this reg is up-to-date and invalidate + * any xmm copies. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].file == file && + cp->xmm[i].idx == idx) + { + if (cp->xmm[i].dirty) + spill(cp, i); + + aos_release_xmm_reg(cp, i); + } + } + + return get_reg_ptr( cp, file, idx ); } @@ -358,15 +383,7 @@ static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp, unsigned idx ) { struct x86_reg reg = aos_get_shader_reg( cp, file, idx ); - - if (reg.file != file_XMM) { - struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_movaps(cp->func, tmp, reg); - aos_adopt_xmm_reg( cp, tmp, file, idx, FALSE ); - reg = tmp; - } - - return reg; + return get_xmm( cp, reg ); } @@ -634,7 +651,7 @@ static void store_dest( struct aos_compilation *cp, case TGSI_WRITEMASK_XYZW: aos_adopt_xmm_reg(cp, - get_xmm_clone(cp, result), + get_xmm_writable(cp, result), reg->DstRegister.File, reg->DstRegister.Index, TRUE); @@ -649,15 +666,15 @@ static void store_dest( struct aos_compilation *cp, switch (reg->DstRegister.WriteMask) { case TGSI_WRITEMASK_X: - sse_movss(cp->func, dst, get_xmm_clone(cp, result)); + sse_movss(cp->func, dst, get_xmm(cp, result)); break; case TGSI_WRITEMASK_XY: - sse_shufps(cp->func, dst, get_xmm_clone(cp, result), SHUF(X, Y, Z, W)); + sse_shufps(cp->func, dst, get_xmm(cp, result), SHUF(X, Y, Z, W)); break; case TGSI_WRITEMASK_ZW: - result = get_xmm_clone(cp, result); + result = get_xmm_writable(cp, result); sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W)); dst = result; break; @@ -845,7 +862,7 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_mulps(cp->func, dst, neg); sse_maxps(cp->func, dst, arg0); @@ -858,7 +875,7 @@ static boolean emit_ADD( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_addps(cp->func, dst, arg1); @@ -882,10 +899,9 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_mulps(cp->func, dst, arg1); - /* Now the hard bit: sum the first 3 values: */ sse_movhlps(cp->func, tmp, dst); @@ -908,7 +924,7 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_mulps(cp->func, dst, arg1); @@ -932,7 +948,7 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg tmp = aos_get_xmm_reg(cp); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_mulps(cp->func, dst, arg1); @@ -1366,7 +1382,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst if (writemask != TGSI_WRITEMASK_XYZW) { store_dest( cp, &op->FullDstRegisters[0], - get_xmm_clone( cp, result ) ); + get_xmm_writable( cp, result ) ); } return TRUE; @@ -1440,7 +1456,7 @@ static boolean emit_MAX( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_maxps(cp->func, dst, arg1); @@ -1453,7 +1469,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_minps(cp->func, dst, arg1); @@ -1464,7 +1480,7 @@ static boolean emit_MIN( struct aos_compilation *cp, const struct tgsi_full_inst static boolean emit_MOV( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); /* potentially nothing to do */ @@ -1476,7 +1492,7 @@ static boolean emit_MUL( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_mulps(cp->func, dst, arg1); @@ -1494,7 +1510,7 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst /* If we can't clobber old contents of arg0, get a temporary & copy * it there, then clobber it... */ - arg0 = get_xmm_clone(cp, arg0); + arg0 = get_xmm_writable(cp, arg0); sse_mulps(cp->func, arg0, arg1); sse_addps(cp->func, arg0, arg2); @@ -1562,7 +1578,7 @@ static boolean emit_SGE( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_NotLessThan); sse_andps(cp->func, dst, ones); @@ -1586,7 +1602,7 @@ static boolean emit_SLT( struct aos_compilation *cp, const struct tgsi_full_inst struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); struct x86_reg ones = aos_get_internal(cp, IMM_ONES); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_cmpps(cp->func, dst, arg1, cc_LessThan); sse_andps(cp->func, dst, ones); @@ -1599,7 +1615,7 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = get_xmm_clone(cp, arg0); + struct x86_reg dst = get_xmm_writable(cp, arg0); sse_subps(cp->func, dst, arg1); @@ -1989,7 +2005,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, /* decr count, loop if not zero */ x86_dec(cp.func, cp.count_ESI); -/* x86_test(cp.func, cp.count_ESI, cp.count_ESI); */ x86_jcc(cp.func, cc_NZ, label); restore_fpu_state(&cp); -- cgit v1.2.3 From ce331e3a5e2a0505e01637861bdd7f5e6cfbd041 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 May 2008 15:44:17 +0100 Subject: draw: special case for writing out scalar results --- src/gallium/auxiliary/draw/draw_vs_aos.c | 127 +++++++++++++++++++++++++------ 1 file changed, 102 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 17b9442d6b..aebc230858 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -697,6 +697,72 @@ static void store_dest( struct aos_compilation *cp, } +static void inject_scalar( struct aos_compilation *cp, + struct x86_reg dst, + struct x86_reg result, + unsigned swizzle ) +{ + sse_shufps(cp->func, dst, dst, swizzle); + sse_movss(cp->func, dst, result); + sse_shufps(cp->func, dst, dst, swizzle); +} + + +static void store_scalar_dest( struct aos_compilation *cp, + const struct tgsi_full_dst_register *reg, + struct x86_reg result ) +{ + unsigned writemask = reg->DstRegister.WriteMask; + struct x86_reg dst; + + if (writemask != TGSI_WRITEMASK_X && + writemask != TGSI_WRITEMASK_Y && + writemask != TGSI_WRITEMASK_Z && + writemask != TGSI_WRITEMASK_W && + writemask != 0) + { + result = get_xmm_writable(cp, result); /* already true, right? */ + sse_shufps(cp->func, result, result, SHUF(X,X,X,X)); + store_dest(cp, reg, result); + return; + } + + result = get_xmm(cp, result); + dst = aos_get_shader_reg_xmm(cp, + reg->DstRegister.File, + reg->DstRegister.Index); + + + + switch (reg->DstRegister.WriteMask) { + case TGSI_WRITEMASK_X: + sse_movss(cp->func, dst, result); + break; + + case TGSI_WRITEMASK_Y: + inject_scalar(cp, dst, result, SHUF(Y, X, Z, W)); + break; + + case TGSI_WRITEMASK_Z: + inject_scalar(cp, dst, result, SHUF(Z, Y, X, W)); + break; + + case TGSI_WRITEMASK_W: + inject_scalar(cp, dst, result, SHUF(W, Y, Z, X)); + break; + + default: + break; + } + + aos_adopt_xmm_reg(cp, + dst, + reg->DstRegister.File, + reg->DstRegister.Index, + TRUE); +} + + static void x87_fst_or_nop( struct x86_function *func, unsigned writemask, @@ -909,11 +975,8 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); sse_addss(cp->func, dst, tmp); - if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); - aos_release_xmm_reg(cp, tmp.idx); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); return TRUE; } @@ -935,11 +998,8 @@ static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_inst emit_pshufd(cp, tmp, dst, SHUF(Y,X,W,Z)); sse_addss(cp->func, dst, tmp); - if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); - aos_release_xmm_reg(cp, tmp.idx); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); return TRUE; } @@ -961,11 +1021,8 @@ static boolean emit_DPH( struct aos_compilation *cp, const struct tgsi_full_inst emit_pshufd(cp, tmp, arg1, SHUF(W,W,W,W)); sse_addss(cp->func, dst, tmp); - if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); - aos_release_xmm_reg(cp, tmp.idx); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); return TRUE; } @@ -1518,7 +1575,9 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } - +/* Really not sufficient -- need to check for conditions that could + * generate inf/nan values, which will slow things down hugely. + */ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */ @@ -1548,27 +1607,45 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst sse_divss(cp->func, dst, arg0); } - if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); - - store_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); return TRUE; } + +/* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg dst = aos_get_xmm_reg(cp); - sse_rsqrtss(cp->func, dst, arg0); - - /* Extend precision here... - */ - - if (op->FullDstRegisters[0].DstRegister.WriteMask != 0x1) - sse_shufps(cp->func, dst, dst, SHUF(X, X, X, X)); + if (1) { + sse_rsqrtss(cp->func, dst, arg0); + } + else { +#if 0 + /* Extend precision here... + */ + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + + sse_rsqrtss( func, tmp1, src ); /* rsqrtss(a) */ + sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) */ + sse_mulss( func, dst, tmp1 ); /* .5 * rsqrtss(a) */ + sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) * rsqrtss(a) */ + sse_subss( func, tmp0, src ); /* 3.0 - (a * rsqrtss(a) * rsqrtss(a)) */ + sse_mulss( func, dst, tmp0 ); /* .5 * r * (3.0 - (a * r * r)) */ +#endif + } - store_dest(cp, &op->FullDstRegisters[0], dst); + store_scalar_dest(cp, &op->FullDstRegisters[0], dst); return TRUE; } -- cgit v1.2.3 From 3afb7198e01516dba38bb3248d4c0161e54650fe Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 May 2008 15:45:27 +0100 Subject: draw: remove EXP & LOG from vs_aos.c These don't get hit & look like bug magnets to me... --- src/gallium/auxiliary/draw/draw_vs_aos.c | 85 ++------------------------------ 1 file changed, 4 insertions(+), 81 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index aebc230858..34dc09ead7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1066,85 +1066,6 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -static boolean emit_EXP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) -{ - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - struct x86_reg st0 = x86_make_reg(file_x87, 0); - struct x86_reg st1 = x86_make_reg(file_x87, 1); - struct x86_reg st3 = x86_make_reg(file_x87, 3); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; - - /* CAUTION: dst may alias arg0! - */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* arg0.x */ - x87_fld(cp->func, st0); /* arg arg */ - - /* by default, fpu is setup to round-to-nearest. We want to - * change this now, and track the state through to the end of the - * generated function so that it isn't repeated unnecessarily. - * Alternately, could subtract .5 to get round to -inf behaviour. - */ - set_fpu_round_neg_inf( cp ); - x87_fprndint( cp->func ); /* flr(a) a */ - x87_fld(cp->func, st0); /* flr(a) flr(a) a */ - x87_fld1(cp->func); /* 1 floor(a) floor(a) a */ - x87_fst_or_nop(cp->func, writemask, 3, dst); /* stack unchanged */ - - x87_fscale(cp->func); /* 2^floor(a) floor(a) a */ - x87_fst(cp->func, st3); /* 2^floor(a) floor(a) a 2^floor(a)*/ - - x87_fstp_or_pop(cp->func, writemask, 0, dst); /* flr(a) a 2^flr(a) */ - - x87_fsubp(cp->func, st1); /* frac(a) 2^flr(a) */ - - x87_fst_or_nop(cp->func, writemask, 1, dst); /* frac(a) 2^flr(a) */ - - x87_f2xm1(cp->func); /* (2^frac(a))-1 2^flr(a)*/ - x87_fld1(cp->func); /* 1 (2^frac(a))-1 2^flr(a)*/ - x87_faddp(cp->func, st1); /* 2^frac(a) 2^flr(a) */ - x87_fmulp(cp->func, st1); /* 2^a */ - - x87_fstp_or_pop(cp->func, writemask, 2, dst); - -/* dst[0] = 2^floor(tmp); */ -/* dst[1] = frac(tmp); */ -/* dst[2] = 2^floor(tmp) * 2^frac(tmp); */ -/* dst[3] = 1.0F; */ - return TRUE; -} - -static boolean emit_LOG( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) -{ - struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); - struct x86_reg st0 = x86_make_reg(file_x87, 0); - struct x86_reg st1 = x86_make_reg(file_x87, 1); - struct x86_reg st2 = x86_make_reg(file_x87, 2); - unsigned writemask = op->FullDstRegisters[0].DstRegister.WriteMask; - - /* CAUTION: dst may alias arg0! - */ - x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* arg0.x */ - x87_fabs(cp->func); /* |arg0.x| */ - x87_fxtract(cp->func); /* mantissa(arg0.x), exponent(arg0.x) */ - x87_fst(cp->func, st2); /* mantissa, exponent, mantissa */ - x87_fld1(cp->func); /* 1, mantissa, exponent, mantissa */ - x87_fyl2x(cp->func); /* log2(mantissa), exponent, mantissa */ - x87_fadd(cp->func, st0, st1); /* e+l2(m), e, m */ - - x87_fstp_or_pop(cp->func, writemask, 2, dst); /* e, m */ - - x87_fld1(cp->func); /* 1, e, m */ - x87_fsub(cp->func, st1, st0); /* 1, e-1, m */ - - x87_fstp_or_pop(cp->func, writemask, 3, dst); /* e-1,m */ - x87_fstp_or_pop(cp->func, writemask, 0, dst); /* m */ - - x87_fadd(cp->func, st0, st0); /* 2m */ - - x87_fstp_or_pop( cp->func, writemask, 1, dst ); - - return TRUE; -} static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { @@ -1755,10 +1676,12 @@ emit_instruction( struct aos_compilation *cp, return emit_RSQ(cp, inst); case TGSI_OPCODE_EXP: - return emit_EXP(cp, inst); + /*return emit_EXP(cp, inst);*/ + return FALSE; case TGSI_OPCODE_LOG: - return emit_LOG(cp, inst); + /*return emit_LOG(cp, inst);*/ + return FALSE; case TGSI_OPCODE_MUL: return emit_MUL(cp, inst); -- cgit v1.2.3 From 9c7568965c00dcc2e9403a2f94f1cd09dcd783ae Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 May 2008 15:47:04 +0100 Subject: draw: slight tweak for XPD opcode --- src/gallium/auxiliary/draw/draw_vs_aos.c | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 34dc09ead7..37d04e45a6 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1626,31 +1626,24 @@ static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg arg1 = fetch_src(cp, &op->FullSrcRegisters[1]); - struct x86_reg dst = aos_get_xmm_reg(cp); struct x86_reg tmp0 = aos_get_xmm_reg(cp); struct x86_reg tmp1 = aos_get_xmm_reg(cp); - /* Could avoid tmp0, tmp1 if we overwrote arg0, arg1. Need a way - * to invalidate registers. This will come with better analysis - * (liveness analysis) of the incoming program. - */ - emit_pshufd(cp, dst, arg0, SHUF(Y, Z, X, W)); - emit_pshufd(cp, tmp1, arg1, SHUF(Z, X, Y, W)); - sse_mulps(cp->func, dst, tmp1); - emit_pshufd(cp, tmp0, arg0, SHUF(Z, X, Y, W)); emit_pshufd(cp, tmp1, arg1, SHUF(Y, Z, X, W)); - sse_mulps(cp->func, tmp0, tmp1); - sse_subps(cp->func, dst, tmp0); + sse_mulps(cp->func, tmp1, arg0); + emit_pshufd(cp, tmp0, arg0, SHUF(Y, Z, X, W)); + sse_mulps(cp->func, tmp0, arg1); + sse_subps(cp->func, tmp1, tmp0); + sse_shufps(cp->func, tmp1, tmp1, SHUF(Y, Z, X, W)); +/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */ /* dst[0] = arg0[1] * arg1[2] - arg0[2] * arg1[1]; */ /* dst[1] = arg0[2] * arg1[0] - arg0[0] * arg1[2]; */ -/* dst[2] = arg0[0] * arg1[1] - arg0[1] * arg1[0]; */ /* dst[3] is undef */ aos_release_xmm_reg(cp, tmp0.idx); - aos_release_xmm_reg(cp, tmp1.idx); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->FullDstRegisters[0], tmp1); return TRUE; } -- cgit v1.2.3 From 359058e7b77ddbac5eec7e8d1c77232bcbb1adbf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 25 May 2008 15:28:30 +0100 Subject: cso: use memcpy rather than structure asignment for copying Apparently gcc will omit to copy hidden padding bytes under some circumstances, which means later on memcmp() will indicate a difference between structs even though all the visible members are identical. --- src/gallium/auxiliary/cso_cache/cso_context.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index dc5987df44..af4af8ac1d 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -292,7 +292,7 @@ enum pipe_error cso_set_blend(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(&cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_blend_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_blend_state; cso->context = ctx->pipe; @@ -350,7 +350,7 @@ enum pipe_error cso_single_sampler(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(&cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_sampler_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_sampler_state; cso->context = ctx->pipe; @@ -508,7 +508,7 @@ enum pipe_error cso_set_depth_stencil_alpha(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(&cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_depth_stencil_alpha_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_depth_stencil_alpha_state; cso->context = ctx->pipe; @@ -564,7 +564,7 @@ enum pipe_error cso_set_rasterizer(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(&cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_rasterizer_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_rasterizer_state; cso->context = ctx->pipe; @@ -726,7 +726,7 @@ enum pipe_error cso_set_vertex_shader(struct cso_context *ctx, if (!cso) return PIPE_ERROR_OUT_OF_MEMORY; - cso->state = *templ; + memcpy(cso->state, templ, sizeof(*templ)); cso->data = ctx->pipe->create_vs_state(ctx->pipe, &cso->state); cso->delete_state = (cso_state_callback)ctx->pipe->delete_vs_state; cso->context = ctx->pipe; -- cgit v1.2.3 From 721fb5597e687fc1446119002ab03cc428104b29 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 May 2008 00:09:02 +0100 Subject: draw: more aos tweaks --- src/gallium/auxiliary/draw/draw_vs_aos.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 37d04e45a6..916203c66b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -202,6 +202,7 @@ static struct x86_reg get_xmm_writable( struct aos_compilation *cp, reg = tmp; } + cp->xmm[reg.idx].last_used = cp->insn_counter; return reg; } @@ -215,6 +216,7 @@ static struct x86_reg get_xmm( struct aos_compilation *cp, reg = tmp; } + cp->xmm[reg.idx].last_used = cp->insn_counter; return reg; } @@ -281,6 +283,18 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp, return; } + /* If this xmm reg is already holding this shader reg, just update + * last_used, and don't clobber the dirty flag... + */ + if (cp->xmm[reg.idx].file == file && + cp->xmm[reg.idx].idx == idx) + { + cp->xmm[reg.idx].dirty |= dirty; + cp->xmm[reg.idx].last_used = cp->insn_counter; + return; + } + + /* If any xmm reg thinks it holds this shader reg, break the * illusion. */ @@ -382,8 +396,16 @@ static struct x86_reg aos_get_shader_reg_xmm( struct aos_compilation *cp, unsigned file, unsigned idx ) { - struct x86_reg reg = aos_get_shader_reg( cp, file, idx ); - return get_xmm( cp, reg ); + struct x86_reg reg = get_xmm( cp, + aos_get_shader_reg( cp, file, idx ) ); + + aos_adopt_xmm_reg( cp, + reg, + file, + idx, + FALSE ); + + return reg; } -- cgit v1.2.3 From 351eca365c0ba488000c3826d5093de6170381e4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 May 2008 11:03:00 +0100 Subject: draw: extend precision in RSQ opcode --- src/gallium/auxiliary/draw/draw_vs_aos.c | 48 ++++++++++++++++++-------------- src/gallium/auxiliary/draw/draw_vs_aos.h | 1 + 2 files changed, 28 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 916203c66b..1622358ae1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -140,7 +140,8 @@ static void init_internals( struct aos_machine *machine ) ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); - ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f); machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | @@ -1561,35 +1562,40 @@ static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_inst * * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * or: + * x1 = rsqrtps(a) * [1.5 - .5 * a * rsqrtps(a) * rsqrtps(a)] + * * * See: http://softwarecommunity.intel.com/articles/eng/1818.htm */ static boolean emit_RSQ( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { - struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); - struct x86_reg dst = aos_get_xmm_reg(cp); - if (1) { - sse_rsqrtss(cp->func, dst, arg0); + if (0) { + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); + sse_rsqrtss(cp->func, r, arg0); + store_scalar_dest(cp, &op->FullDstRegisters[0], r); + return TRUE; } else { -#if 0 - /* Extend precision here... - */ - sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); - sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); - - sse_rsqrtss( func, tmp1, src ); /* rsqrtss(a) */ - sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) */ - sse_mulss( func, dst, tmp1 ); /* .5 * rsqrtss(a) */ - sse_mulss( func, src, tmp1 ); /* a * rsqrtss(a) * rsqrtss(a) */ - sse_subss( func, tmp0, src ); /* 3.0 - (a * rsqrtss(a) * rsqrtss(a)) */ - sse_mulss( func, dst, tmp0 ); /* .5 * r * (3.0 - (a * r * r)) */ -#endif - } + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg r = aos_get_xmm_reg(cp); - store_scalar_dest(cp, &op->FullDstRegisters[0], dst); - return TRUE; + struct x86_reg neg_half = get_reg_ptr( cp, AOS_FILE_INTERNAL, IMM_RSQ ); + struct x86_reg one_point_five = x86_make_disp( neg_half, 4 ); + struct x86_reg src = get_xmm_writable( cp, arg0 ); + + sse_rsqrtss( cp->func, r, src ); /* rsqrtss(a) */ + sse_mulss( cp->func, src, neg_half ); /* -.5 * a */ + sse_mulss( cp->func, src, r ); /* -.5 * a * r */ + sse_mulss( cp->func, src, r ); /* -.5 * a * r * r */ + sse_addss( cp->func, src, one_point_five ); /* 1.5 - .5 * a * r * r */ + sse_mulss( cp->func, r, src ); /* r * (1.5 - .5 * a * r * r) */ + + store_scalar_dest(cp, &op->FullDstRegisters[0], r); + return TRUE; + } } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index c08c73d4bc..fffe2e4658 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -175,6 +175,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ); #define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */ #define IMM_255 4 /* 255, 255, 255, 255 */ #define IMM_NEGS 5 /* -1,-1,-1,-1 */ +#define IMM_RSQ 6 /* -.5,1.5,_,_ */ struct x86_reg aos_get_internal( struct aos_compilation *cp, unsigned imm ); -- cgit v1.2.3 From a0e2955a16a8a04afe7f84b1c8551211542a6fbd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 27 May 2008 00:13:57 +0900 Subject: pipebuffer: Allow slab allocations of buffers of inequal size. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 45ba158a4d..b9dff09804 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -312,8 +312,8 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, struct list_head *list; /* check size */ - assert(size == mgr->bufSize); - if(size != mgr->bufSize) + assert(size <= mgr->bufSize); + if(size > mgr->bufSize) return NULL; /* check if we can provide the requested alignment */ -- cgit v1.2.3 From 35cfd0a4900750f67cba4f64929ff3347f0bd46f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 27 May 2008 00:19:41 +0900 Subject: pipebuffer: Malloc buffer provider. Simple wrapper around pb_malloc_buffer_create for convenience. --- .../auxiliary/pipebuffer/pb_buffer_malloc.c | 33 +++++++++++++++++++++- src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 9 ++++++ 2 files changed, 41 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 9e8244f909..6c3502eea7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -30,13 +30,14 @@ * Implementation of malloc-based buffers to store data that can't be processed * by the hardware. * - * \author José Fonseca + * \author Jose Fonseca */ #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pb_buffer.h" +#include "pb_bufmgr.h" struct malloc_buffer @@ -125,3 +126,33 @@ pb_malloc_buffer_create(size_t size, return &buf->base; } + + +static struct pb_buffer * +pb_malloc_buffer_create_buffer(struct pb_manager *mgr, + size_t size, + const struct pb_desc *desc) +{ + return pb_malloc_buffer_create(size, desc); +} + + +static void +pb_malloc_bufmgr_destroy(struct pb_manager *mgr) +{ + /* No-op */ +} + + +static struct pb_manager +pb_malloc_bufmgr = { + pb_malloc_buffer_create_buffer, + pb_malloc_bufmgr_destroy +}; + + +struct pb_manager * +pb_malloc_bufmgr_create(void) +{ + return &pb_malloc_bufmgr; +} diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index f6cc7a525b..4a922d16c1 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -79,6 +79,15 @@ struct pb_manager }; +/** + * Malloc buffer provider. + * + * Simple wrapper around pb_malloc_buffer_create for convenience. + */ +struct pb_manager * +pb_malloc_bufmgr_create(void); + + /** * Static buffer pool sub-allocator. * -- cgit v1.2.3 From 529762d5df6d9427f9fa0927e38b9886e412a6bc Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 May 2008 18:29:47 +0100 Subject: draw: add missing break --- src/gallium/auxiliary/draw/draw_pt_emit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 22a83ec78f..e21af3d287 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -100,6 +100,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, case EMIT_4UB: output_format = PIPE_FORMAT_B8G8R8A8_UNORM; emit_sz = 4 * sizeof(ubyte); + break; default: assert(0); output_format = PIPE_FORMAT_NONE; -- cgit v1.2.3 From d80c24a81a9a46c132fe877dde6919a57cacf8c0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 May 2008 18:37:34 +0100 Subject: draw: defensively reset render primitive, which can get clobbered by clipping --- src/gallium/auxiliary/draw/draw_pt_emit.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index e21af3d287..01396a749e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -40,6 +40,7 @@ struct pt_emit { struct translate *translate; struct translate_cache *cache; + unsigned prim; }; void draw_pt_emit_prepare( struct pt_emit *emit, @@ -51,8 +52,14 @@ void draw_pt_emit_prepare( struct pt_emit *emit, struct translate_key hw_key; unsigned i; boolean ok; + - ok = draw->render->set_primitive(draw->render, prim); + /* XXX: may need to defensively reset this later on as clipping can + * clobber this state in the render backend. + */ + emit->prim = prim; + + ok = draw->render->set_primitive(draw->render, emit->prim); if (!ok) { assert(0); return; @@ -145,6 +152,14 @@ void draw_pt_emit( struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + /* XXX: and work out some way to coordinate the render primitive + * between vbuf.c and here... + */ + if (!draw->render->set_primitive(draw->render, emit->prim)) { + assert(0); + return; + } + hw_verts = render->allocate_vertices(render, (ushort)translate->key.output_stride, (ushort)vertex_count); -- cgit v1.2.3 From 93bfc94c351a2eafd43ac7a20b362d969f98d86a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 May 2008 18:54:35 +0100 Subject: draw: defensively reset render primitive some more --- src/gallium/auxiliary/draw/draw_pt_emit.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 01396a749e..d0c9577ee6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -213,6 +213,14 @@ void draw_pt_emit_linear(struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + /* XXX: and work out some way to coordinate the render primitive + * between vbuf.c and here... + */ + if (!draw->render->set_primitive(draw->render, emit->prim)) { + assert(0); + return; + } + hw_verts = render->allocate_vertices(render, (ushort)translate->key.output_stride, (ushort)count); -- cgit v1.2.3 From aa16e3a2750993afdba16c24237bb6d8d8e4d91a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 May 2008 19:10:44 +0100 Subject: draw: defensively flush pipeline backend when setting primitive --- src/gallium/auxiliary/draw/draw_pt_emit.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d0c9577ee6..e37256b1ff 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -53,6 +53,10 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned i; boolean ok; + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + /* XXX: may need to defensively reset this later on as clipping can * clobber this state in the render backend. -- cgit v1.2.3 From 4e2567f0ab6afd701bea4c35e388663e90f5cb6c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 10:42:58 +0100 Subject: draw: some possible fixes for spilling --- src/gallium/auxiliary/draw/draw_vs_aos.c | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 1622358ae1..99630e4f75 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -253,6 +253,7 @@ struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ) cp->xmm[oldest].file = TGSI_FILE_NULL; cp->xmm[oldest].idx = 0; + cp->xmm[oldest].dirty = 0; cp->xmm[oldest].last_used = cp->insn_counter; return x86_make_reg(file_XMM, oldest); } @@ -284,24 +285,18 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp, return; } - /* If this xmm reg is already holding this shader reg, just update - * last_used, and don't clobber the dirty flag... - */ - if (cp->xmm[reg.idx].file == file && - cp->xmm[reg.idx].idx == idx) - { - cp->xmm[reg.idx].dirty |= dirty; - cp->xmm[reg.idx].last_used = cp->insn_counter; - return; - } - /* If any xmm reg thinks it holds this shader reg, break the * illusion. */ for (i = 0; i < 8; i++) { if (cp->xmm[i].file == file && - cp->xmm[i].idx == idx) { + cp->xmm[i].idx == idx) + { + /* If an xmm reg is already holding this shader reg, take into account its + * dirty flag... + */ + dirty |= cp->xmm[i].dirty; aos_release_xmm_reg(cp, i); } } @@ -1989,6 +1984,17 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, debug_printf("\n"); } + + { + unsigned i; + for (i = 0; i < 8; i++) { + if (cp.xmm[i].file != TGSI_FILE_OUTPUT) { + cp.xmm[i].file = TGSI_FILE_NULL; + cp.xmm[i].dirty = 0; + } + } + } + if (cp.error) goto fail; -- cgit v1.2.3 From 5dc44184fa9f07465b7ff2be94394c55392ce5e9 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 11:10:50 +0100 Subject: draw: fix writemask/shufps confusion --- src/gallium/auxiliary/draw/draw_vs_aos.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 99630e4f75..434bd2a9ab 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -687,17 +687,18 @@ static void store_dest( struct aos_compilation *cp, sse_movss(cp->func, dst, get_xmm(cp, result)); break; - case TGSI_WRITEMASK_XY: + case TGSI_WRITEMASK_ZW: sse_shufps(cp->func, dst, get_xmm(cp, result), SHUF(X, Y, Z, W)); break; - case TGSI_WRITEMASK_ZW: + case TGSI_WRITEMASK_XY: result = get_xmm_writable(cp, result); sse_shufps(cp->func, result, dst, SHUF(X, Y, Z, W)); dst = result; break; case TGSI_WRITEMASK_YZW: + result = get_xmm_writable(cp, result); sse_movss(cp->func, result, dst); dst = result; break; @@ -891,7 +892,7 @@ static void emit_print( struct aos_compilation *cp, unsigned idx ) { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - struct x86_reg arg = get_reg_ptr( cp, file, idx ); + struct x86_reg arg = aos_get_shader_reg_ptr( cp, file, idx ); unsigned i; /* There shouldn't be anything on the x87 stack. Can add this -- cgit v1.2.3 From adaaa29218f1babad874f50681ca971fdd3b8a40 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 11:12:42 +0100 Subject: tgsi: export utils for dumping individual instructions --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 24 ++++++++++++------------ src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 18 ++++++++++++++++++ 2 files changed, 30 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 4c65ffd780..648afa2a51 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -539,9 +539,9 @@ static const char *TGSI_MODULATES[] = "MODULATE_EIGHTH" }; -static void -dump_declaration_short( - struct tgsi_full_declaration *decl ) +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ) { TXT( "\nDCL " ); ENM( decl->Declaration.File, TGSI_FILES_SHORT ); @@ -672,9 +672,9 @@ dump_declaration_verbose( } } -static void -dump_immediate_short( - struct tgsi_full_immediate *imm ) +void +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ) { unsigned i; @@ -727,9 +727,9 @@ dump_immediate_verbose( } } -static void -dump_instruction_short( - struct tgsi_full_instruction *inst, +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, unsigned instno ) { unsigned i; @@ -1281,17 +1281,17 @@ tgsi_dump( switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_DECLARATION: - dump_declaration_short( + tgsi_dump_declaration( &parse.FullToken.FullDeclaration ); break; case TGSI_TOKEN_TYPE_IMMEDIATE: - dump_immediate_short( + tgsi_dump_immediate( &parse.FullToken.FullImmediate ); break; case TGSI_TOKEN_TYPE_INSTRUCTION: - dump_instruction_short( + tgsi_dump_instruction( &parse.FullToken.FullInstruction, instno ); instno++; diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index beb0155d56..ca83bdef20 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -14,6 +14,24 @@ tgsi_dump( const struct tgsi_token *tokens, unsigned flags ); +struct tgsi_full_immediate; +struct tgsi_full_instruction; +struct tgsi_full_declaration; + +void +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ); + +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, + unsigned instno ); + +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ); + + #if defined __cplusplus } #endif -- cgit v1.2.3 From f7946bc7c0435ab2926cd729dfd8312222a3aa2a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 11:15:31 +0100 Subject: draw: dump individual instructions as they are processed --- src/gallium/auxiliary/draw/draw_vs_aos.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 434bd2a9ab..d3989fe107 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -42,7 +42,20 @@ #include "rtasm/rtasm_x86sse.h" #ifdef PIPE_ARCH_X86 +#define DISASSEM 0 +static const char *files[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM", + "INTERNAL", +}; static INLINE boolean eq( struct x86_reg a, struct x86_reg b ) @@ -184,7 +197,11 @@ static void spill( struct aos_compilation *cp, unsigned idx ) struct x86_reg oldval = get_reg_ptr(cp, cp->xmm[idx].file, cp->xmm[idx].idx); - + + if (0) debug_printf("\nspill %s[%d]", + files[cp->xmm[idx].file], + cp->xmm[idx].idx); + assert(cp->xmm[idx].dirty); sse_movaps(cp->func, oldval, x86_make_reg(file_XMM, idx)); cp->xmm[idx].dirty = 0; @@ -1975,6 +1992,9 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, break; case TGSI_TOKEN_TYPE_INSTRUCTION: + if (DISASSEM) + tgsi_dump_instruction( &parse.FullToken.FullInstruction, cp.insn_counter ); + if (!emit_instruction( &cp, &parse.FullToken.FullInstruction )) goto fail; break; @@ -1982,7 +2002,9 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, x87_assert_stack_empty(cp.func); cp.insn_counter++; - debug_printf("\n"); + + if (DISASSEM) + debug_printf("\n"); } -- cgit v1.2.3 From 50c1d329b95ad78e03ca4d537daee4d11f308c7a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 11:58:55 +0100 Subject: draw: restore extras path in draw_pt_vcache.c, keep pipeline flags out of non-pipeline elts --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 117 +++++++++++++++------------- 1 file changed, 65 insertions(+), 52 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 6c17edba34..96e02fbf3a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -104,10 +104,22 @@ static INLINE void vcache_elt( struct vcache_frontend *vcache, static void vcache_triangle( struct vcache_frontend *vcache, - ushort flags, unsigned i0, unsigned i1, unsigned i2 ) +{ + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_elt(vcache, i2, 0); + vcache_check_flush(vcache); +} + + +static void vcache_triangle_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1, + unsigned i2 ) { vcache_elt(vcache, i0, flags); vcache_elt(vcache, i1, 0); @@ -116,9 +128,19 @@ static void vcache_triangle( struct vcache_frontend *vcache, } static void vcache_line( struct vcache_frontend *vcache, - ushort flags, unsigned i0, unsigned i1 ) +{ + vcache_elt(vcache, i0, 0); + vcache_elt(vcache, i1, 0); + vcache_check_flush(vcache); +} + + +static void vcache_line_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1 ) { vcache_elt(vcache, i0, flags); vcache_elt(vcache, i1, 0); @@ -139,63 +161,46 @@ static void vcache_quad( struct vcache_frontend *vcache, unsigned i2, unsigned i3 ) { - vcache_triangle( vcache, - ( DRAW_PIPE_RESET_STIPPLE | - DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_2 ), - i0, i1, i3 ); - - vcache_triangle( vcache, - ( DRAW_PIPE_EDGE_FLAG_0 | - DRAW_PIPE_EDGE_FLAG_1 ), - i1, i2, i3 ); + vcache_triangle( vcache, i0, i1, i3 ); + vcache_triangle( vcache, i1, i2, i3 ); +} + +static void vcache_ef_quad( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) +{ + vcache_triangle_flags( vcache, + ( DRAW_PIPE_RESET_STIPPLE | + DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_2 ), + i0, i1, i3 ); + + vcache_triangle_flags( vcache, + ( DRAW_PIPE_EDGE_FLAG_0 | + DRAW_PIPE_EDGE_FLAG_1 ), + i1, i2, i3 ); } /* At least for now, we're back to using a template include file for * this. The two paths aren't too different though - it may be * possible to reunify them. */ -#define TRIANGLE(flags,i0,i1,i2) \ - vcache_triangle(vcache, \ - flags, \ - get_elt(elts,i0), \ - get_elt(elts,i1), \ - get_elt(elts,i2)) - -#define QUAD(i0,i1,i2,i3) \ - vcache_quad(vcache, \ - get_elt(elts,i0), \ - get_elt(elts,i1), \ - get_elt(elts,i2), \ - get_elt(elts,i3)) - -#define LINE(flags,i0,i1) \ - vcache_line(vcache, \ - flags, \ - get_elt(elts,i0), \ - get_elt(elts,i1)) - -#define POINT(i0) \ - vcache_point(vcache, \ - get_elt(elts,i0)) - +#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle_flags(vc,flags,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) vcache_ef_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) vcache_line_flags(vc,flags,i0,i1) +#define POINT(vc,i0) vcache_point(vc,i0) +#define FUNC vcache_run_extras +#include "draw_pt_vcache_tmp.h" + +#define TRIANGLE(vc,flags,i0,i1,i2) vcache_triangle(vc,i0,i1,i2) +#define QUAD(vc,i0,i1,i2,i3) vcache_quad(vc,i0,i1,i2,i3) +#define LINE(vc,flags,i0,i1) vcache_line(vc,i0,i1) +#define POINT(vc,i0) vcache_point(vc,i0) #define FUNC vcache_run -#define ARGS \ - struct draw_pt_front_end *frontend, \ - pt_elt_func get_elt, \ - const void *elts - -#define LOCAL_VARS \ - struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; \ - struct draw_context *draw = vcache->draw; \ - boolean flatfirst = (draw->rasterizer->flatshade && \ - draw->rasterizer->flatshade_first); \ - unsigned prim = vcache->input_prim; \ - unsigned i, flags; +#include "draw_pt_vcache_tmp.h" -#define FLUSH vcache_flush( vcache ) - -#include "draw_pt_decompose.h" @@ -208,7 +213,15 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; - vcache->base.run = vcache_run; + if (opt & PT_PIPELINE) + { + vcache->base.run = vcache_run_extras; + } + else + { + vcache->base.run = vcache_run; + } + vcache->input_prim = prim; vcache->output_prim = draw_pt_reduced_prim(prim); -- cgit v1.2.3 From a08c574bfcf72c7f7ffbeb35c10347b491ef87fb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 12:26:23 +0100 Subject: draw: hook up viewport / rhw emit to varient key state --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 4 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 16 +++---- src/gallium/auxiliary/draw/draw_vs_varient.c | 52 ++++++++++++++++++---- 3 files changed, 53 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 7fefd391a6..2f2e7195b3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -94,8 +94,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ num_vs_inputs); /* inputs - fetch from api format */ - fse->key.viewport = 1; - fse->key.clip = 0; + fse->key.viewport = !draw->identity_viewport; + fse->key.clip = !draw->bypass_clipping; fse->key.pad = 0; memset(fse->key.element, 0, diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index d3989fe107..c63553216c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2021,11 +2021,14 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, if (cp.error) goto fail; - if (cp.vaos->base.key.viewport) { - if (0) - emit_viewport(&cp); - else - emit_rhw_viewport(&cp); + if (cp.vaos->base.key.clip) { + /* not really handling clipping, just do the rhw so we can + * see the results... + */ + emit_rhw_viewport(&cp); + } + else if (cp.vaos->base.key.viewport) { + emit_viewport(&cp); } /* Emit output... TODO: do this eagerly after the last write to a @@ -2188,9 +2191,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, { struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); - if (key->clip) - return NULL; - if (!vaos) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index c15c648527..d4deabfff3 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -89,9 +89,9 @@ static void vsvg_set_input( struct draw_vs_varient *varient, /* Mainly for debug at this stage: */ -static void do_viewport( struct draw_vs_varient_generic *vsvg, - unsigned count, - void *output_buffer ) +static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, + unsigned count, + void *output_buffer ) { char *ptr = (char *)output_buffer; const float *scale = vsvg->viewport.scale; @@ -109,6 +109,25 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, data[3] = w; } } + +static void do_viewport( struct draw_vs_varient_generic *vsvg, + unsigned count, + void *output_buffer ) +{ + char *ptr = (char *)output_buffer; + const float *scale = vsvg->viewport.scale; + const float *trans = vsvg->viewport.translate; + unsigned stride = vsvg->base.key.output_stride; + unsigned j; + + for (j = 0; j < count; j++, ptr += stride) { + float *data = (float *)ptr; + + data[0] = data[0] * scale[0] + trans[0]; + data[1] = data[1] * scale[1] + trans[1]; + data[2] = data[2] * scale[2] + trans[2]; + } +} static void vsvg_run_elts( struct draw_vs_varient *varient, @@ -136,10 +155,20 @@ static void vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.key.output_stride, vsvg->base.key.output_stride); - if (vsvg->base.key.viewport) + + if (vsvg->base.key.clip) { + /* not really handling clipping, just do the rhw so we can + * see the results... + */ + do_rhw_viewport( vsvg, + count, + output_buffer ); + } + else if (vsvg->base.key.viewport) { do_viewport( vsvg, count, output_buffer ); + } //if (!vsvg->already_in_emit_format) @@ -182,11 +211,19 @@ static void vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.key.output_stride, vsvg->base.key.output_stride); - if (vsvg->base.key.viewport) + if (vsvg->base.key.clip) { + /* not really handling clipping, just do the rhw so we can + * see the results... + */ + do_rhw_viewport( vsvg, + count, + output_buffer ); + } + else if (vsvg->base.key.viewport) { do_viewport( vsvg, count, output_buffer ); - + } //if (!vsvg->already_in_emit_format) vsvg->emit->set_buffer( vsvg->emit, @@ -224,9 +261,6 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, unsigned i; struct translate_key fetch, emit; - if (key->clip) - return NULL; - struct draw_vs_varient_generic *vsvg = CALLOC_STRUCT( draw_vs_varient_generic ); if (vsvg == NULL) return NULL; -- cgit v1.2.3 From 9752ebd99e16646fed3c14712fc0af2c34c9e48f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 27 May 2008 12:59:46 +0200 Subject: draw: Fix for EMIT_4UB case --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 2f2e7195b3..c6249b4b41 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -143,7 +143,10 @@ static void fse_prepare( struct draw_pt_middle_end *middle, output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); vs_output = num_vs_outputs + 1; - + break; + case EMIT_4UB: + output_format = PIPE_FORMAT_B8G8R8A8_UNORM; + emit_sz = 4 * sizeof(ubyte); break; default: assert(0); -- cgit v1.2.3 From 9f9f6c21be105de41a58128605b911e679efe8f0 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 27 May 2008 13:15:52 +0200 Subject: draw: Fixed typo in draw_pt_varray and added comments --- src/gallium/auxiliary/draw/draw_pt_varray.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 06fd866ccd..260f28f284 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -158,14 +158,14 @@ static INLINE void varray_point( struct varray_frontend *varray, static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { PIPE_PRIM_POINTS, PIPE_PRIM_LINES, + PIPE_PRIM_LINES, /* decomposed LINELOOP */ PIPE_PRIM_LINE_STRIP, - PIPE_PRIM_LINES, /* decomposed */ PIPE_PRIM_TRIANGLES, PIPE_PRIM_TRIANGLE_STRIP, - PIPE_PRIM_TRIANGLES, /* decomposed */ + PIPE_PRIM_TRIANGLES, /* decomposed TRI_FAN */ PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP, - PIPE_PRIM_TRIANGLES /* decomposed */ + PIPE_PRIM_TRIANGLES /* decomposed POLYGON */ }; -- cgit v1.2.3 From f64c44ad3e55467ce964871502445cf5a0fb46d6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 14:17:57 +0100 Subject: draw: remove dead file --- src/gallium/auxiliary/draw/draw_pt_middle_fse.c | 705 ------------------------ 1 file changed, 705 deletions(-) delete mode 100644 src/gallium/auxiliary/draw/draw_pt_middle_fse.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c b/src/gallium/auxiliary/draw/draw_pt_middle_fse.c deleted file mode 100644 index 643ea151c1..0000000000 --- a/src/gallium/auxiliary/draw/draw_pt_middle_fse.c +++ /dev/null @@ -1,705 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - - /* - * Authors: - * Keith Whitwell - */ - - -#include "pipe/p_util.h" -#include "draw/draw_context.h" -#include "draw/draw_private.h" -#include "draw/draw_vbuf.h" -#include "draw/draw_vertex.h" -#include "draw/draw_pt.h" -#include "draw/draw_vs.h" - -#include "translate/translate.h" - -struct fetch_shade_emit; - -struct fse_shader { - struct translate_key key; - - void (*run_linear)( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ); - - void (*run_elts)( const struct fetch_shade_emit *fse, - const unsigned *fetch_elts, - unsigned fetch_count, - char *buffer ); - -}; - -/* Prototype fetch, shade, emit-hw-verts all in one go. - */ -struct fetch_shade_emit { - struct draw_pt_middle_end base; - struct draw_context *draw; - - struct translate_key key; - - /* Temporaries: - */ - const float *constants; - unsigned pitch[PIPE_MAX_ATTRIBS]; - const ubyte *src[PIPE_MAX_ATTRIBS]; - unsigned prim; - - /* Points to one of the three hardwired example shaders, below: - */ - struct fse_shader *active; - - /* Temporary: A list of hard-wired shaders. Of course the plan - * would be to generate these for a given (vertex-shader, - * translate-key) pair... - */ - struct fse_shader shader[10]; - int nr_shaders; -}; - - - -/* Not quite passthrough yet -- we're still running the 'shader' here, - * inlined into the vertex fetch function. - */ -static void shader0_run_linear( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - - const float *m = fse->constants; - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; - const ubyte *st = fse->src[2] + start * fse->pitch[2]; - - float *out = (float *)buffer; - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; - out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; - out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; - out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; - xyz += fse->pitch[0]; - } - - { - const float *in = (const float *)rgb; - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = 1.0f; - rgb += fse->pitch[1]; - } - - { - const float *in = (const float *)st; - out[8] = in[0]; - out[9] = in[1]; - out[10] = 0.0f; - out[11] = 1.0f; - st += fse->pitch[2]; - } - - out += 12; - } -} - - - -static void shader1_run_linear( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - const float *m = (const float *)fse->constants; - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; - float *out = (float *)buffer; - -// debug_printf("rgb %f %f %f\n", rgb[0], rgb[1], rgb[2]); - - - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; - out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; - out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; - out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; - xyz += fse->pitch[0]; - } - - { - const float *in = (const float *)rgb; - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = 1.0f; - rgb += fse->pitch[1]; - } - - out += 8; - } -} - - - - -static void shader2_run_linear( const struct fetch_shade_emit *fse, - unsigned start, - unsigned count, - char *buffer ) -{ - unsigned i; - const float *m = (const float *)fse->constants; - const ubyte *xyz = fse->src[0] + start * fse->pitch[0]; - const ubyte *rgb = fse->src[1] + start * fse->pitch[1]; - const float psiz = 1.0; - float *out = (float *)buffer; - - - assert(fse->pitch[1] == 0); - - for (i = 0; i < count; i++) { - { - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; - out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; - out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; - out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; - xyz += fse->pitch[0]; - } - - { - const float *in = (const float *)rgb; - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = 1.0f; - rgb += fse->pitch[1]; - } - - { - out[8] = psiz; - } - - out += 9; - } -} - - - - -static void shader0_run_elts( const struct fetch_shade_emit *fse, - const unsigned *elts, - unsigned count, - char *buffer ) -{ - unsigned i; - const float *m = fse->constants; - float *out = (float *)buffer; - - - /* loop over vertex attributes (vertex shader inputs) - */ - for (i = 0; i < count; i++) { - unsigned elt = elts[i]; - { - const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; - out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; - out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; - out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; - } - - { - const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; - const float *in = (const float *)rgb; - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = 1.0f; - } - - { - const ubyte *st = fse->src[2] + elt * fse->pitch[2]; - const float *in = (const float *)st; - out[8] = in[0]; - out[9] = in[1]; - out[10] = 0.0f; - out[11] = 1.0f; - } - - out += 12; - } -} - - - -static void shader1_run_elts( const struct fetch_shade_emit *fse, - const unsigned *elts, - unsigned count, - char *buffer ) -{ - unsigned i; - const float *m = (const float *)fse->constants; - float *out = (float *)buffer; - - for (i = 0; i < count; i++) { - unsigned elt = elts[i]; - - { - const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; - out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; - out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; - out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; - xyz += fse->pitch[0]; - } - - { - const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; - const float *in = (const float *)rgb; - out[4] = in[0]; - out[5] = in[1]; - out[6] = in[2]; - out[7] = 1.0f; - rgb += fse->pitch[1]; - } - - out += 8; - } -} - - - - -static void shader2_run_elts( const struct fetch_shade_emit *fse, - const unsigned *elts, - unsigned count, - char *buffer ) -{ - unsigned i; - const float *m = (const float *)fse->constants; - const float psiz = 1.0; - float *out = (float *)buffer; - - for (i = 0; i < count; i++) { - unsigned elt = elts[i]; - { - const ubyte *xyz = fse->src[0] + elt * fse->pitch[0]; - const float *in = (const float *)xyz; - const float ix = in[0], iy = in[1], iz = in[2]; - - out[0] = m[0] * ix + m[4] * iy + m[8] * iz + m[12]; - out[1] = m[1] * ix + m[5] * iy + m[9] * iz + m[13]; - out[2] = m[2] * ix + m[6] * iy + m[10] * iz + m[14]; - out[3] = m[3] * ix + m[7] * iy + m[11] * iz + m[15]; - } - - { - const ubyte *rgb = fse->src[1] + elt * fse->pitch[1]; - out[4] = rgb[0]; - out[5] = rgb[1]; - out[6] = rgb[2]; - out[7] = 1.0f; - } - - { - out[8] = psiz; - } - - out += 9; - } -} - - - -static void fse_prepare( struct draw_pt_middle_end *middle, - unsigned prim, - unsigned opt ) -{ - struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; - struct draw_context *draw = fse->draw; - unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; - unsigned num_vs_outputs = draw->vs.vertex_shader->info.num_outputs; - const struct vertex_info *vinfo; - unsigned i; - boolean need_psize = 0; - - - if (draw->pt.user.elts) { - assert(0); - return ; - } - - if (!draw->render->set_primitive( draw->render, - prim )) { - assert(0); - return; - } - - /* Must do this after set_primitive() above: - */ - vinfo = draw->render->get_vertex_info(draw->render); - - - - fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ - num_vs_inputs); /* inputs - fetch from api format */ - - fse->key.output_stride = vinfo->size * 4; - memset(fse->key.element, 0, - fse->key.nr_elements * sizeof(fse->key.element[0])); - - for (i = 0; i < num_vs_inputs; i++) { - const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; - fse->key.element[i].input_format = src->src_format; - - /* Consider ignoring these at this point, ie make generated - * programs independent of this state: - */ - fse->key.element[i].input_buffer = 0; //src->vertex_buffer_index; - fse->key.element[i].input_offset = 0; //src->src_offset; - } - - - { - unsigned dst_offset = 0; - - for (i = 0; i < vinfo->num_attribs; i++) { - unsigned emit_sz = 0; - unsigned output_format = PIPE_FORMAT_NONE; - unsigned vs_output = vinfo->src_index[i]; - - switch (vinfo->emit[i]) { - case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit_sz = 4 * sizeof(float); - break; - case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; - emit_sz = 3 * sizeof(float); - break; - case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; - emit_sz = 2 * sizeof(float); - break; - case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - break; - case EMIT_1F_PSIZE: - need_psize = 1; - output_format = PIPE_FORMAT_R32_FLOAT; - emit_sz = 1 * sizeof(float); - vs_output = num_vs_outputs + 1; - - break; - default: - assert(0); - break; - } - - /* The elements in the key correspond to vertex shader output - * numbers, not to positions in the hw vertex description -- - * that's handled by the output_offset field. - */ - fse->key.element[vs_output].output_format = output_format; - fse->key.element[vs_output].output_offset = dst_offset; - - dst_offset += emit_sz; - assert(fse->key.output_stride >= dst_offset); - } - } - - /* To make psize work, really need to tell the vertex shader to - * copy that value from input->output. For 'translate' this was - * implicit for all elements. - */ -#if 0 - if (need_psize) { - unsigned input = num_vs_inputs + 1; - const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; - fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; - fse->key.element[i].input_buffer = 0; //nr_buffers + 1; - fse->key.element[i].input_offset = 0; - - fse->key.nr_elements += 1; - - } -#endif - - fse->constants = draw->pt.user.constants; - - /* Would normally look up a vertex shader and peruse its list of - * varients somehow. We omitted that step and put all the - * hardcoded "shaders" into an array. We're just making the - * assumption that this happens to be a matching shader... ie - * you're running isosurf, aren't you? - */ - fse->active = NULL; - for (i = 0; i < fse->nr_shaders; i++) { - if (translate_key_compare( &fse->key, &fse->shader[i].key) == 0) - fse->active = &fse->shader[i]; - } - - if (!fse->active) { - assert(0); - return ; - } - - /* Now set buffer pointers: - */ - for (i = 0; i < num_vs_inputs; i++) { - unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; - - fse->src[i] = ((const ubyte *) draw->pt.user.vbuffer[buf] + - draw->pt.vertex_buffer[buf].buffer_offset + - draw->pt.vertex_element[i].src_offset); - - fse->pitch[i] = draw->pt.vertex_buffer[buf].pitch; - - } - - - //return TRUE; -} - - - - - - - -static void fse_run_linear( struct draw_pt_middle_end *middle, - unsigned start, - unsigned count ) -{ - struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; - struct draw_context *draw = fse->draw; - - char *hw_verts; - - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? - */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)count ); - - if (!hw_verts) { - assert(0); - return; - } - - /* Single routine to fetch vertices, run shader and emit HW verts. - * Clipping and viewport transformation are done elsewhere -- - * either by the API or on hardware, or for some other reason not - * required... - */ - fse->active->run_linear( fse, - start, count, - hw_verts ); - - /* Draw arrays path to avoid re-emitting index list again and - * again. - */ - draw->render->draw_arrays( draw->render, - 0, - count ); - - - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - count ); -} - - -static void -fse_run(struct draw_pt_middle_end *middle, - const unsigned *fetch_elts, - unsigned fetch_count, - const ushort *draw_elts, - unsigned draw_count ) -{ - struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; - struct draw_context *draw = fse->draw; - void *hw_verts; - - /* XXX: need to flush to get prim_vbuf.c to release its allocation?? - */ - draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - - hw_verts = draw->render->allocate_vertices( draw->render, - (ushort)fse->key.output_stride, - (ushort)fetch_count ); - if (!hw_verts) { - assert(0); - return; - } - - - /* Single routine to fetch vertices, run shader and emit HW verts. - */ - fse->active->run_elts( fse, - fetch_elts, - fetch_count, - hw_verts ); - - draw->render->draw( draw->render, - draw_elts, - draw_count ); - - draw->render->release_vertices( draw->render, - hw_verts, - fse->key.output_stride, - fetch_count ); - -} - - -static void fse_finish( struct draw_pt_middle_end *middle ) -{ -} - - -static void -fse_destroy( struct draw_pt_middle_end *middle ) -{ - FREE(middle); -} - -struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) -{ - struct fetch_shade_emit *fse = CALLOC_STRUCT(fetch_shade_emit); - if (!fse) - return NULL; - - fse->base.prepare = fse_prepare; - fse->base.run = fse_run; - fse->base.run_linear = fse_run_linear; - fse->base.finish = fse_finish; - fse->base.destroy = fse_destroy; - fse->draw = draw; - - fse->shader[0].run_linear = shader0_run_linear; - fse->shader[0].run_elts = shader0_run_elts; - fse->shader[0].key.nr_elements = 3; - fse->shader[0].key.output_stride = 12 * sizeof(float); - - fse->shader[0].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[0].key.element[0].input_buffer = 0; - fse->shader[0].key.element[0].input_offset = 0; - fse->shader[0].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[0].output_offset = 0; - - fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[0].key.element[1].input_buffer = 0; - fse->shader[0].key.element[1].input_offset = 0; - fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[1].output_offset = 16; - - fse->shader[0].key.element[1].input_format = PIPE_FORMAT_R32G32_FLOAT; - fse->shader[0].key.element[1].input_buffer = 0; - fse->shader[0].key.element[1].input_offset = 0; - fse->shader[0].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[0].key.element[1].output_offset = 32; - - fse->shader[1].run_linear = shader1_run_linear; - fse->shader[1].run_elts = shader1_run_elts; - fse->shader[1].key.nr_elements = 2; - fse->shader[1].key.output_stride = 8 * sizeof(float); - - fse->shader[1].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[1].key.element[0].input_buffer = 0; - fse->shader[1].key.element[0].input_offset = 0; - fse->shader[1].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[1].key.element[0].output_offset = 0; - - fse->shader[1].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[1].key.element[1].input_buffer = 0; - fse->shader[1].key.element[1].input_offset = 0; - fse->shader[1].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[1].key.element[1].output_offset = 16; - - fse->shader[2].run_linear = shader2_run_linear; - fse->shader[2].run_elts = shader2_run_elts; - fse->shader[2].key.nr_elements = 3; - fse->shader[2].key.output_stride = 9 * sizeof(float); - - fse->shader[2].key.element[0].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[2].key.element[0].input_buffer = 0; - fse->shader[2].key.element[0].input_offset = 0; - fse->shader[2].key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[2].key.element[0].output_offset = 0; - - fse->shader[2].key.element[1].input_format = PIPE_FORMAT_R32G32B32_FLOAT; - fse->shader[2].key.element[1].input_buffer = 0; - fse->shader[2].key.element[1].input_offset = 0; - fse->shader[2].key.element[1].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fse->shader[2].key.element[1].output_offset = 16; - - /* psize is special - * -- effectively add it here as another input!?! - * -- who knows how to add it as a buffer? - */ - fse->shader[2].key.element[2].input_format = PIPE_FORMAT_R32_FLOAT; - fse->shader[2].key.element[2].input_buffer = 0; - fse->shader[2].key.element[2].input_offset = 0; - fse->shader[2].key.element[2].output_format = PIPE_FORMAT_R32_FLOAT; - fse->shader[2].key.element[2].output_offset = 32; - - fse->nr_shaders = 3; - - return &fse->base; -} -- cgit v1.2.3 From f8762ba5234fd1b44e11e76bb5f58d2305c90572 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 14:42:15 +0100 Subject: draw: explicitly list nr_inputs, outputs in varient key --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 14 +++++++++----- src/gallium/auxiliary/draw/draw_vs.h | 6 ++++-- src/gallium/auxiliary/draw/draw_vs_aos.c | 2 +- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_varient.c | 10 +++++----- 5 files changed, 21 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index c6249b4b41..581026dcb0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -72,7 +72,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle, struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; - unsigned num_vs_outputs = draw->vs.vertex_shader->info.num_outputs; const struct vertex_info *vinfo; unsigned i; boolean need_psize = 0; @@ -91,8 +90,11 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->key.output_stride = vinfo->size * 4; - fse->key.nr_elements = MAX2(num_vs_outputs, /* outputs - translate to hw format */ - num_vs_inputs); /* inputs - fetch from api format */ + fse->key.nr_outputs = vinfo->num_attribs; + fse->key.nr_inputs = num_vs_inputs; + + fse->key.nr_elements = MAX2(fse->key.nr_outputs, /* outputs - translate to hw format */ + fse->key.nr_inputs); /* inputs - fetch from api format */ fse->key.viewport = !draw->identity_viewport; fse->key.clip = !draw->bypass_clipping; @@ -142,7 +144,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, need_psize = 1; output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); - vs_output = num_vs_outputs + 1; + vs_output = vinfo->num_attribs + 1; break; case EMIT_4UB: output_format = PIPE_FORMAT_B8G8R8A8_UNORM; @@ -177,7 +179,9 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->key.element[i].input_buffer = 0; //nr_buffers + 1; fse->key.element[i].input_offset = 0; - fse->key.nr_elements += 1; + fse->key.nr_inputs += 1; + fse->key.nr_elements = MAX2(fse->key.nr_inputs, + fse->key.nr_outputs); } #endif diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index ff3e19b2a8..17902ab86a 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -58,10 +58,12 @@ struct draw_vs_element { struct draw_vs_varient_key { unsigned output_stride; - unsigned nr_elements:16; + unsigned nr_elements:8; /* max2(nr_inputs, nr_outputs) */ + unsigned nr_inputs:8; + unsigned nr_outputs:8; unsigned viewport:1; unsigned clip:1; - unsigned pad:14; + unsigned pad:5; struct draw_vs_element element[PIPE_MAX_ATTRIBS]; }; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index c63553216c..e2e96470f7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2095,7 +2095,7 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; unsigned i; - for (i = 0; i < vaos->base.vs->info.num_inputs; i++) { + for (i = 0; i < vaos->base.key.nr_inputs; i++) { if (vaos->base.key.element[i].in.buffer == buf) { vaos->machine->attrib[i].input_ptr = ((char *)ptr + vaos->base.key.element[i].in.offset); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index f39ebb7a17..ef265d61cf 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -180,7 +180,7 @@ boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) { unsigned i; - for (i = 0; i < cp->vaos->base.vs->info.num_inputs; i++) { + for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { if (!load_input( cp, i, linear )) return FALSE; cp->insn_counter++; @@ -282,7 +282,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) { unsigned i; - for (i = 0; i < cp->vaos->base.vs->info.num_outputs; i++) { + for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) { unsigned format = cp->vaos->base.key.element[i].out.format; unsigned offset = cp->vaos->base.key.element[i].out.offset; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index d4deabfff3..dab46e8eed 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -276,11 +276,11 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, - /* OK, have to build a new one: + /* Build free-standing fetch and emit functions: */ - fetch.nr_elements = vs->info.num_inputs; + fetch.nr_elements = key->nr_inputs; fetch.output_stride = 0; - for (i = 0; i < vs->info.num_inputs; i++) { + for (i = 0; i < key->nr_inputs; i++) { fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; @@ -290,9 +290,9 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, } - emit.nr_elements = vs->info.num_outputs; + emit.nr_elements = key->nr_outputs; emit.output_stride = key->output_stride; - for (i = 0; i < vs->info.num_outputs; i++) { + for (i = 0; i < key->nr_outputs; i++) { emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit.element[i].input_buffer = 0; emit.element[i].input_offset = i * 4 * sizeof(float); -- cgit v1.2.3 From 2ec419d40dba43305c28fca9658ea00541f67821 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 17:45:54 +0100 Subject: draw: fix ABS aliasing bug --- src/gallium/auxiliary/draw/draw_vs_aos.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index e2e96470f7..1c63677e6e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -964,12 +964,13 @@ static boolean emit_ABS( struct aos_compilation *cp, const struct tgsi_full_inst { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); - struct x86_reg dst = get_xmm_writable(cp, arg0); + struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_mulps(cp->func, dst, neg); - sse_maxps(cp->func, dst, arg0); + sse_movaps(cp->func, tmp, arg0); + sse_mulps(cp->func, tmp, neg); + sse_maxps(cp->func, tmp, arg0); - store_dest(cp, &op->FullDstRegisters[0], dst); + store_dest(cp, &op->FullDstRegisters[0], tmp); return TRUE; } -- cgit v1.2.3 From 660fee8351542dadc0d5550164e753f7c2d67261 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 17:49:13 +0100 Subject: draw: ensure vs outputs mapped correctly to vinfo attribs --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 1 + src/gallium/auxiliary/draw/draw_vs.h | 19 ++++++++++--------- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 3 ++- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 581026dcb0..43d7095f76 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -160,6 +160,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, * that's handled by the output_offset field. */ fse->key.element[vs_output].out.format = output_format; + fse->key.element[vs_output].out.vs_output = vs_output; fse->key.element[vs_output].out.offset = dst_offset; dst_offset += emit_sz; diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 17902ab86a..01171bc23d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -38,22 +38,23 @@ struct draw_context; struct pipe_shader_state; -struct draw_vs_input +struct draw_varient_input { enum pipe_format format; unsigned buffer; unsigned offset; }; -struct draw_vs_output +struct draw_varient_output { - enum pipe_format format; - unsigned offset; + enum pipe_format format; /* output format */ + unsigned vs_output:8; /* which vertex shader output is this? */ + unsigned offset:24; /* offset into output vertex */ }; -struct draw_vs_element { - struct draw_vs_input in; - struct draw_vs_output out; +struct draw_varient_element { + struct draw_varient_input in; + struct draw_varient_output out; }; struct draw_vs_varient_key { @@ -64,7 +65,7 @@ struct draw_vs_varient_key { unsigned viewport:1; unsigned clip:1; unsigned pad:5; - struct draw_vs_element element[PIPE_MAX_ATTRIBS]; + struct draw_varient_element element[PIPE_MAX_ATTRIBS]; }; struct draw_vs_varient; @@ -201,7 +202,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, static INLINE int draw_vs_varient_keysize( const struct draw_vs_varient_key *key ) { - return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_vs_element); + return 2 * sizeof(int) + key->nr_elements * sizeof(struct draw_varient_element); } static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key *a, diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index ef265d61cf..cebfaf6474 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -285,10 +285,11 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) for (i = 0; i < cp->vaos->base.key.nr_outputs; i++) { unsigned format = cp->vaos->base.key.element[i].out.format; unsigned offset = cp->vaos->base.key.element[i].out.offset; + unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output; struct x86_reg data = aos_get_shader_reg( cp, TGSI_FILE_OUTPUT, - i ); + vs_output ); if (data.file != file_XMM) { struct x86_reg tmp = aos_get_xmm_reg( cp ); -- cgit v1.2.3 From fd20d1c7e8bbe2f40d73679b1514023772cfd8f6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 17:51:48 +0100 Subject: draw: add disabled debug code --- src/gallium/auxiliary/draw/draw_pt_emit.c | 15 +++++++++++- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 28 +++++++++++++++++++++- 2 files changed, 41 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index e37256b1ff..cf87cde996 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -41,6 +41,8 @@ struct pt_emit { struct translate_cache *cache; unsigned prim; + + const struct vertex_info *vinfo; }; void draw_pt_emit_prepare( struct pt_emit *emit, @@ -71,7 +73,7 @@ void draw_pt_emit_prepare( struct pt_emit *emit, /* Must do this after set_primitive() above: */ - vinfo = draw->render->get_vertex_info(draw->render); + emit->vinfo = vinfo = draw->render->get_vertex_info(draw->render); /* Translate from pipeline vertices to hw vertices. @@ -245,6 +247,17 @@ void draw_pt_emit_linear(struct pt_emit *emit, vertex_count, hw_verts); + if (0) { + unsigned i; + for (i = 0; i < vertex_count; i++) { + debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); + draw_dump_emitted_vertex( emit->vinfo, + (const uint8_t *)hw_verts + + translate->key.output_stride * i ); + } + } + + render->draw_arrays(render, start, count); render->release_vertices(render, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 43d7095f76..85d0bdfcab 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -60,6 +60,9 @@ struct fetch_shade_emit { struct draw_vs_varient_key key; struct draw_vs_varient *active; + + + const struct vertex_info *vinfo; }; @@ -85,7 +88,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, /* Must do this after set_primitive() above: */ - vinfo = draw->render->get_vertex_info(draw->render); + fse->vinfo = vinfo = draw->render->get_vertex_info(draw->render); @@ -267,6 +270,18 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, 0, count ); + if (0) { + unsigned i; + for (i = 0; i < count; i++) { + debug_printf("\n\n%s vertex %d: (stride %d, offset %d)\n", __FUNCTION__, i, + fse->key.output_stride, + fse->key.output_stride * i); + + draw_dump_emitted_vertex( fse->vinfo, + (const uint8_t *)hw_verts + fse->key.output_stride * i ); + } + } + draw->render->release_vertices( draw->render, hw_verts, @@ -311,6 +326,17 @@ fse_run(struct draw_pt_middle_end *middle, draw_elts, draw_count ); + if (0) { + unsigned i; + for (i = 0; i < fetch_count; i++) { + debug_printf("\n\n%s vertex %d:\n", __FUNCTION__, i); + draw_dump_emitted_vertex( fse->vinfo, + (const uint8_t *)hw_verts + + fse->key.output_stride * i ); + } + } + + draw->render->release_vertices( draw->render, hw_verts, fse->key.output_stride, -- cgit v1.2.3 From 7b85ea19de09d4e7e077ca147528e90e52683690 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 19:01:57 +0100 Subject: draw: support psize in vs_varient paths Preserve the vinfo "EMIT_*" format descriptors in the varient key, and deal with PSIZE directly in each implementation. --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 35 ++------------------ src/gallium/auxiliary/draw/draw_vertex.h | 21 ++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.c | 3 ++ src/gallium/auxiliary/draw/draw_vs_aos.h | 1 + src/gallium/auxiliary/draw/draw_vs_aos_io.c | 37 ++++++++++++++-------- src/gallium/auxiliary/draw/draw_vs_varient.c | 24 +++++++++++--- 6 files changed, 71 insertions(+), 50 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 85d0bdfcab..729c7db999 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -77,7 +77,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; const struct vertex_info *vinfo; unsigned i; - boolean need_psize = 0; if (!draw->render->set_primitive( draw->render, @@ -123,34 +122,24 @@ static void fse_prepare( struct draw_pt_middle_end *middle, for (i = 0; i < vinfo->num_attribs; i++) { unsigned emit_sz = 0; - unsigned output_format = PIPE_FORMAT_NONE; - unsigned vs_output = vinfo->src_index[i]; switch (vinfo->emit[i]) { case EMIT_4F: - output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); break; case EMIT_3F: - output_format = PIPE_FORMAT_R32G32B32_FLOAT; emit_sz = 3 * sizeof(float); break; case EMIT_2F: - output_format = PIPE_FORMAT_R32G32_FLOAT; emit_sz = 2 * sizeof(float); break; case EMIT_1F: - output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); break; case EMIT_1F_PSIZE: - need_psize = 1; - output_format = PIPE_FORMAT_R32_FLOAT; emit_sz = 1 * sizeof(float); - vs_output = vinfo->num_attribs + 1; break; case EMIT_4UB: - output_format = PIPE_FORMAT_B8G8R8A8_UNORM; emit_sz = 4 * sizeof(ubyte); break; default: @@ -162,33 +151,15 @@ static void fse_prepare( struct draw_pt_middle_end *middle, * numbers, not to positions in the hw vertex description -- * that's handled by the output_offset field. */ - fse->key.element[vs_output].out.format = output_format; - fse->key.element[vs_output].out.vs_output = vs_output; - fse->key.element[vs_output].out.offset = dst_offset; + fse->key.element[i].out.format = vinfo->emit[i]; + fse->key.element[i].out.vs_output = vinfo->src_index[i]; + fse->key.element[i].out.offset = dst_offset; dst_offset += emit_sz; assert(fse->key.output_stride >= dst_offset); } } - /* To make psize work, really need to tell the vertex shader to - * copy that value from input->output. For 'translate' this was - * implicit for all elements. - */ -#if 0 - if (need_psize) { - unsigned input = num_vs_inputs + 1; - const struct pipe_vertex_element *src = &draw->pt.vertex_element[i]; - fse->key.element[i].input_format = PIPE_FORMAT_R32_FLOAT; - fse->key.element[i].input_buffer = 0; //nr_buffers + 1; - fse->key.element[i].input_offset = 0; - - fse->key.nr_inputs += 1; - fse->key.nr_elements = MAX2(fse->key.nr_inputs, - fse->key.nr_outputs); - - } -#endif /* Would normally look up a vertex shader and peruse its list of * varients somehow. We omitted that step and put all the diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 6d8bac5138..16c65c4317 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -109,4 +109,25 @@ extern void draw_compute_vertex_size(struct vertex_info *vinfo); void draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data); + +static INLINE unsigned draw_translate_vinfo_format(unsigned format ) +{ + switch (format) { + case EMIT_1F: + case EMIT_1F_PSIZE: + return PIPE_FORMAT_R32_FLOAT; + case EMIT_2F: + return PIPE_FORMAT_R32G32_FLOAT; + case EMIT_3F: + return PIPE_FORMAT_R32G32B32_FLOAT; + case EMIT_4F: + return PIPE_FORMAT_R32G32B32A32_FLOAT; + case EMIT_4UB: + return PIPE_FORMAT_R8G8B8A8_UNORM; + default: + return PIPE_FORMAT_NONE; + } +} + + #endif /* DRAW_VERTEX_H */ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 1c63677e6e..d3770b2c53 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2126,6 +2126,7 @@ static void vaos_run_elts( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; vaos->gen_run_elts( varient, elts, count, @@ -2139,6 +2140,7 @@ static void vaos_run_linear( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; vaos->gen_run_linear( varient, start, count, @@ -2204,6 +2206,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; + vaos->draw = vs->draw; vaos->machine = align_malloc( sizeof(struct aos_machine), 16 ); if (!vaos->machine) goto fail; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index fffe2e4658..b47413ff43 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -176,6 +176,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ); #define IMM_255 4 /* 255, 255, 255, 255 */ #define IMM_NEGS 5 /* -1,-1,-1,-1 */ #define IMM_RSQ 6 /* -.5,1.5,_,_ */ +#define IMM_PSIZE 7 /* not really an immediate - updated each run */ struct x86_reg aos_get_internal( struct aos_compilation *cp, unsigned imm ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index cebfaf6474..836110f382 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -33,6 +33,7 @@ #include "tgsi/exec/tgsi_exec.h" #include "draw_vs.h" #include "draw_vs_aos.h" +#include "draw_vertex.h" #include "rtasm/rtasm_x86sse.h" @@ -249,24 +250,27 @@ static boolean emit_output( struct aos_compilation *cp, unsigned format ) { switch (format) { - case PIPE_FORMAT_R32_FLOAT: + case EMIT_1F: + case EMIT_1F_PSIZE: emit_store_R32(cp, ptr, dataXMM); break; - case PIPE_FORMAT_R32G32_FLOAT: + case EMIT_2F: emit_store_R32G32(cp, ptr, dataXMM); break; - case PIPE_FORMAT_R32G32B32_FLOAT: + case EMIT_3F: emit_store_R32G32B32(cp, ptr, dataXMM); break; - case PIPE_FORMAT_R32G32B32A32_FLOAT: + case EMIT_4F: emit_store_R32G32B32A32(cp, ptr, dataXMM); break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); - emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + case EMIT_4UB: + if (1) { + emit_swizzle(cp, dataXMM, dataXMM, SHUF(Z,Y,X,W)); + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + } + else { + emit_store_R8G8B8A8_UNORM(cp, ptr, dataXMM); + } break; default: ERROR(cp, "unhandled output format"); @@ -287,9 +291,16 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) unsigned offset = cp->vaos->base.key.element[i].out.offset; unsigned vs_output = cp->vaos->base.key.element[i].out.vs_output; - struct x86_reg data = aos_get_shader_reg( cp, - TGSI_FILE_OUTPUT, - vs_output ); + struct x86_reg data; + + if (format == EMIT_1F_PSIZE) { + data = aos_get_internal_xmm( cp, IMM_PSIZE ); + } + else { + data = aos_get_shader_reg( cp, + TGSI_FILE_OUTPUT, + vs_output ); + } if (data.file != file_XMM) { struct x86_reg tmp = aos_get_xmm_reg( cp ); diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index dab46e8eed..119a3a04b5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -231,6 +231,10 @@ static void vsvg_run_linear( struct draw_vs_varient *varient, output_buffer, vsvg->base.key.output_stride ); + vsvg->emit->set_buffer( vsvg->emit, + 1, + &vsvg->draw->rasterizer->point_size, + 0); vsvg->emit->run( vsvg->emit, 0, count, @@ -293,11 +297,21 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, emit.nr_elements = key->nr_outputs; emit.output_stride = key->output_stride; for (i = 0; i < key->nr_outputs; i++) { - emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - emit.element[i].input_buffer = 0; - emit.element[i].input_offset = i * 4 * sizeof(float); - emit.element[i].output_format = key->element[i].out.format; - emit.element[i].output_offset = key->element[i].out.offset; + if (key->element[i].out.format != EMIT_1F_PSIZE) + { + emit.element[i].input_format = PIPE_FORMAT_R32G32B32A32_FLOAT; + emit.element[i].input_buffer = 0; + emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); + emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); + emit.element[i].output_offset = key->element[i].out.offset; + } + else { + emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; + emit.element[i].input_buffer = 1; + emit.element[i].input_offset = 0; + emit.element[i].output_format = PIPE_FORMAT_R32_FLOAT; + emit.element[i].output_offset = key->element[i].out.offset; + } } vsvg->fetch = draw_vs_get_fetch( vs->draw, &fetch ); -- cgit v1.2.3 From e38bb10824fc3dc194d7cc6987f3f4957784310e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 May 2008 23:21:50 +0100 Subject: draw: reset stipple counter whenever non-line prim encountered --- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 4673d5dcba..9522b79582 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -175,6 +175,22 @@ reset_stipple_counter(struct draw_stage *stage) stage->next->reset_stipple_counter( stage->next ); } +static void +stipple_reset_point(struct draw_stage *stage, struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + stipple->counter = 0; + stage->next->point(stage->next, header); +} + +static void +stipple_reset_tri(struct draw_stage *stage, struct prim_header *header) +{ + struct stipple_stage *stipple = stipple_stage(stage); + stipple->counter = 0; + stage->next->tri(stage->next, header); +} + static void stipple_first_line(struct draw_stage *stage, @@ -220,9 +236,9 @@ struct draw_stage *draw_stipple_stage( struct draw_context *draw ) stipple->stage.draw = draw; stipple->stage.next = NULL; - stipple->stage.point = draw_pipe_passthrough_point; + stipple->stage.point = stipple_reset_point; stipple->stage.line = stipple_first_line; - stipple->stage.tri = draw_pipe_passthrough_tri; + stipple->stage.tri = stipple_reset_tri; stipple->stage.reset_stipple_counter = reset_stipple_counter; stipple->stage.flush = stipple_flush; stipple->stage.destroy = stipple_destroy; -- cgit v1.2.3 From 55d29a8d48663982a1aeea414f69a5896b97d1ea Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 May 2008 16:12:14 +0900 Subject: gallium: Windows CE portability fixes. --- src/gallium/auxiliary/draw/draw_pt_elts.c | 8 +- src/gallium/auxiliary/draw/draw_pt_varray.c | 4 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 +- src/gallium/auxiliary/rtasm/rtasm_cpu.c | 4 +- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 4 +- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 4 +- src/gallium/auxiliary/tgsi/util/tgsi_util.c | 2 +- src/gallium/auxiliary/translate/translate.c | 3 +- .../auxiliary/translate/translate_generic.c | 236 ++++++++++----------- src/gallium/auxiliary/translate/translate_sse.c | 5 +- src/gallium/auxiliary/util/u_time.h | 2 +- src/gallium/include/pipe/p_compiler.h | 54 +++-- src/gallium/include/pipe/p_config.h | 8 +- src/gallium/include/pipe/p_debug.h | 11 +- 14 files changed, 192 insertions(+), 157 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c index 2094c081ed..b7780fb507 100644 --- a/src/gallium/auxiliary/draw/draw_pt_elts.c +++ b/src/gallium/auxiliary/draw/draw_pt_elts.c @@ -60,10 +60,10 @@ static unsigned elt_vert( const void *elts, unsigned idx ) pt_elt_func draw_pt_elt_func( struct draw_context *draw ) { switch (draw->pt.user.eltSize) { - case 0: return elt_vert; - case 1: return elt_ubyte; - case 2: return elt_ushort; - case 4: return elt_uint; + case 0: return &elt_vert; + case 1: return &elt_ubyte; + case 2: return &elt_ushort; + case 4: return &elt_uint; default: return NULL; } } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 355093f945..c7c66b34d4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -147,8 +147,8 @@ static INLINE void varray_ef_quad( struct varray_frontend *varray, unsigned i2, unsigned i3 ) { - const unsigned omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; - const unsigned omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; + const ushort omitEdge1 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_2; + const ushort omitEdge2 = DRAW_PIPE_EDGE_FLAG_0 | DRAW_PIPE_EDGE_FLAG_1; varray_triangle_flags( varray, DRAW_PIPE_RESET_STIPPLE | omitEdge1, diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index e3f4e67472..c88bc137ee 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -31,9 +31,11 @@ * Brian Paul */ +#include "pipe/p_config.h" + #include "draw_vs.h" -#if defined(__i386__) || defined(__386__) +#if defined(PIPE_ARCH_X86) #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_cpu.c b/src/gallium/auxiliary/rtasm/rtasm_cpu.c index f01e12faa0..5499018b21 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_cpu.c +++ b/src/gallium/auxiliary/rtasm/rtasm_cpu.c @@ -47,7 +47,7 @@ static boolean rtasm_sse_enabled(void) int rtasm_cpu_has_sse(void) { /* FIXME: actually detect this at run-time */ -#if defined(__i386__) || defined(__386__) || defined(i386) +#if defined(PIPE_ARCH_X86) return rtasm_sse_enabled(); #else return 0; @@ -57,7 +57,7 @@ int rtasm_cpu_has_sse(void) int rtasm_cpu_has_sse2(void) { /* FIXME: actually detect this at run-time */ -#if defined(__i386__) || defined(__386__) || defined(i386) +#if defined(PIPE_ARCH_X86) return rtasm_sse_enabled(); #else return 0; diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 4e036d9032..6cd88ebca3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -21,7 +21,9 @@ * **************************************************************************/ -#if defined(__i386__) || defined(__386__) || defined(i386) +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) #include "pipe/p_compiler.h" #include "pipe/p_debug.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index eacaeeaf6f..a5afa16395 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -24,7 +24,9 @@ #ifndef _RTASM_X86SSE_H_ #define _RTASM_X86SSE_H_ -#if defined(__i386__) || defined(__386__) || defined(i386) +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) /* It is up to the caller to ensure that instructions issued are * suitable for the host cpu. There are no checks made in this module diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c index 4cdd89182a..56a50d3b21 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c @@ -8,7 +8,7 @@ union pointer_hack { void *pointer; - unsigned long long uint64; + uint64_t uint64; }; void * diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index b04bc6eefd..b93fbf9033 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -30,6 +30,7 @@ * Keith Whitwell */ +#include "pipe/p_config.h" #include "pipe/p_util.h" #include "pipe/p_state.h" #include "translate.h" @@ -38,7 +39,7 @@ struct translate *translate_create( const struct translate_key *key ) { struct translate *translate = NULL; -#if defined(__i386__) || defined(__386__) || defined(i386) +#if defined(PIPE_ARCH_X86) translate = translate_sse2_create( key ); if (translate) return translate; diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 402780ee53..8f3b470333 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -255,140 +255,140 @@ static fetch_func get_fetch_func( enum pipe_format format ) { switch (format) { case PIPE_FORMAT_R64_FLOAT: - return fetch_R64_FLOAT; + return &fetch_R64_FLOAT; case PIPE_FORMAT_R64G64_FLOAT: - return fetch_R64G64_FLOAT; + return &fetch_R64G64_FLOAT; case PIPE_FORMAT_R64G64B64_FLOAT: - return fetch_R64G64B64_FLOAT; + return &fetch_R64G64B64_FLOAT; case PIPE_FORMAT_R64G64B64A64_FLOAT: - return fetch_R64G64B64A64_FLOAT; + return &fetch_R64G64B64A64_FLOAT; case PIPE_FORMAT_R32_FLOAT: - return fetch_R32_FLOAT; + return &fetch_R32_FLOAT; case PIPE_FORMAT_R32G32_FLOAT: - return fetch_R32G32_FLOAT; + return &fetch_R32G32_FLOAT; case PIPE_FORMAT_R32G32B32_FLOAT: - return fetch_R32G32B32_FLOAT; + return &fetch_R32G32B32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: - return fetch_R32G32B32A32_FLOAT; + return &fetch_R32G32B32A32_FLOAT; case PIPE_FORMAT_R32_UNORM: - return fetch_R32_UNORM; + return &fetch_R32_UNORM; case PIPE_FORMAT_R32G32_UNORM: - return fetch_R32G32_UNORM; + return &fetch_R32G32_UNORM; case PIPE_FORMAT_R32G32B32_UNORM: - return fetch_R32G32B32_UNORM; + return &fetch_R32G32B32_UNORM; case PIPE_FORMAT_R32G32B32A32_UNORM: - return fetch_R32G32B32A32_UNORM; + return &fetch_R32G32B32A32_UNORM; case PIPE_FORMAT_R32_USCALED: - return fetch_R32_USCALED; + return &fetch_R32_USCALED; case PIPE_FORMAT_R32G32_USCALED: - return fetch_R32G32_USCALED; + return &fetch_R32G32_USCALED; case PIPE_FORMAT_R32G32B32_USCALED: - return fetch_R32G32B32_USCALED; + return &fetch_R32G32B32_USCALED; case PIPE_FORMAT_R32G32B32A32_USCALED: - return fetch_R32G32B32A32_USCALED; + return &fetch_R32G32B32A32_USCALED; case PIPE_FORMAT_R32_SNORM: - return fetch_R32_SNORM; + return &fetch_R32_SNORM; case PIPE_FORMAT_R32G32_SNORM: - return fetch_R32G32_SNORM; + return &fetch_R32G32_SNORM; case PIPE_FORMAT_R32G32B32_SNORM: - return fetch_R32G32B32_SNORM; + return &fetch_R32G32B32_SNORM; case PIPE_FORMAT_R32G32B32A32_SNORM: - return fetch_R32G32B32A32_SNORM; + return &fetch_R32G32B32A32_SNORM; case PIPE_FORMAT_R32_SSCALED: - return fetch_R32_SSCALED; + return &fetch_R32_SSCALED; case PIPE_FORMAT_R32G32_SSCALED: - return fetch_R32G32_SSCALED; + return &fetch_R32G32_SSCALED; case PIPE_FORMAT_R32G32B32_SSCALED: - return fetch_R32G32B32_SSCALED; + return &fetch_R32G32B32_SSCALED; case PIPE_FORMAT_R32G32B32A32_SSCALED: - return fetch_R32G32B32A32_SSCALED; + return &fetch_R32G32B32A32_SSCALED; case PIPE_FORMAT_R16_UNORM: - return fetch_R16_UNORM; + return &fetch_R16_UNORM; case PIPE_FORMAT_R16G16_UNORM: - return fetch_R16G16_UNORM; + return &fetch_R16G16_UNORM; case PIPE_FORMAT_R16G16B16_UNORM: - return fetch_R16G16B16_UNORM; + return &fetch_R16G16B16_UNORM; case PIPE_FORMAT_R16G16B16A16_UNORM: - return fetch_R16G16B16A16_UNORM; + return &fetch_R16G16B16A16_UNORM; case PIPE_FORMAT_R16_USCALED: - return fetch_R16_USCALED; + return &fetch_R16_USCALED; case PIPE_FORMAT_R16G16_USCALED: - return fetch_R16G16_USCALED; + return &fetch_R16G16_USCALED; case PIPE_FORMAT_R16G16B16_USCALED: - return fetch_R16G16B16_USCALED; + return &fetch_R16G16B16_USCALED; case PIPE_FORMAT_R16G16B16A16_USCALED: - return fetch_R16G16B16A16_USCALED; + return &fetch_R16G16B16A16_USCALED; case PIPE_FORMAT_R16_SNORM: - return fetch_R16_SNORM; + return &fetch_R16_SNORM; case PIPE_FORMAT_R16G16_SNORM: - return fetch_R16G16_SNORM; + return &fetch_R16G16_SNORM; case PIPE_FORMAT_R16G16B16_SNORM: - return fetch_R16G16B16_SNORM; + return &fetch_R16G16B16_SNORM; case PIPE_FORMAT_R16G16B16A16_SNORM: - return fetch_R16G16B16A16_SNORM; + return &fetch_R16G16B16A16_SNORM; case PIPE_FORMAT_R16_SSCALED: - return fetch_R16_SSCALED; + return &fetch_R16_SSCALED; case PIPE_FORMAT_R16G16_SSCALED: - return fetch_R16G16_SSCALED; + return &fetch_R16G16_SSCALED; case PIPE_FORMAT_R16G16B16_SSCALED: - return fetch_R16G16B16_SSCALED; + return &fetch_R16G16B16_SSCALED; case PIPE_FORMAT_R16G16B16A16_SSCALED: - return fetch_R16G16B16A16_SSCALED; + return &fetch_R16G16B16A16_SSCALED; case PIPE_FORMAT_R8_UNORM: - return fetch_R8_UNORM; + return &fetch_R8_UNORM; case PIPE_FORMAT_R8G8_UNORM: - return fetch_R8G8_UNORM; + return &fetch_R8G8_UNORM; case PIPE_FORMAT_R8G8B8_UNORM: - return fetch_R8G8B8_UNORM; + return &fetch_R8G8B8_UNORM; case PIPE_FORMAT_R8G8B8A8_UNORM: - return fetch_R8G8B8A8_UNORM; + return &fetch_R8G8B8A8_UNORM; case PIPE_FORMAT_R8_USCALED: - return fetch_R8_USCALED; + return &fetch_R8_USCALED; case PIPE_FORMAT_R8G8_USCALED: - return fetch_R8G8_USCALED; + return &fetch_R8G8_USCALED; case PIPE_FORMAT_R8G8B8_USCALED: - return fetch_R8G8B8_USCALED; + return &fetch_R8G8B8_USCALED; case PIPE_FORMAT_R8G8B8A8_USCALED: - return fetch_R8G8B8A8_USCALED; + return &fetch_R8G8B8A8_USCALED; case PIPE_FORMAT_R8_SNORM: - return fetch_R8_SNORM; + return &fetch_R8_SNORM; case PIPE_FORMAT_R8G8_SNORM: - return fetch_R8G8_SNORM; + return &fetch_R8G8_SNORM; case PIPE_FORMAT_R8G8B8_SNORM: - return fetch_R8G8B8_SNORM; + return &fetch_R8G8B8_SNORM; case PIPE_FORMAT_R8G8B8A8_SNORM: - return fetch_R8G8B8A8_SNORM; + return &fetch_R8G8B8A8_SNORM; case PIPE_FORMAT_R8_SSCALED: - return fetch_R8_SSCALED; + return &fetch_R8_SSCALED; case PIPE_FORMAT_R8G8_SSCALED: - return fetch_R8G8_SSCALED; + return &fetch_R8G8_SSCALED; case PIPE_FORMAT_R8G8B8_SSCALED: - return fetch_R8G8B8_SSCALED; + return &fetch_R8G8B8_SSCALED; case PIPE_FORMAT_R8G8B8A8_SSCALED: - return fetch_R8G8B8A8_SSCALED; + return &fetch_R8G8B8A8_SSCALED; case PIPE_FORMAT_A8R8G8B8_UNORM: - return fetch_A8R8G8B8_UNORM; + return &fetch_A8R8G8B8_UNORM; case PIPE_FORMAT_B8G8R8A8_UNORM: - return fetch_B8G8R8A8_UNORM; + return &fetch_B8G8R8A8_UNORM; default: assert(0); - return fetch_NULL; + return &fetch_NULL; } } @@ -399,140 +399,140 @@ static emit_func get_emit_func( enum pipe_format format ) { switch (format) { case PIPE_FORMAT_R64_FLOAT: - return emit_R64_FLOAT; + return &emit_R64_FLOAT; case PIPE_FORMAT_R64G64_FLOAT: - return emit_R64G64_FLOAT; + return &emit_R64G64_FLOAT; case PIPE_FORMAT_R64G64B64_FLOAT: - return emit_R64G64B64_FLOAT; + return &emit_R64G64B64_FLOAT; case PIPE_FORMAT_R64G64B64A64_FLOAT: - return emit_R64G64B64A64_FLOAT; + return &emit_R64G64B64A64_FLOAT; case PIPE_FORMAT_R32_FLOAT: - return emit_R32_FLOAT; + return &emit_R32_FLOAT; case PIPE_FORMAT_R32G32_FLOAT: - return emit_R32G32_FLOAT; + return &emit_R32G32_FLOAT; case PIPE_FORMAT_R32G32B32_FLOAT: - return emit_R32G32B32_FLOAT; + return &emit_R32G32B32_FLOAT; case PIPE_FORMAT_R32G32B32A32_FLOAT: - return emit_R32G32B32A32_FLOAT; + return &emit_R32G32B32A32_FLOAT; case PIPE_FORMAT_R32_UNORM: - return emit_R32_UNORM; + return &emit_R32_UNORM; case PIPE_FORMAT_R32G32_UNORM: - return emit_R32G32_UNORM; + return &emit_R32G32_UNORM; case PIPE_FORMAT_R32G32B32_UNORM: - return emit_R32G32B32_UNORM; + return &emit_R32G32B32_UNORM; case PIPE_FORMAT_R32G32B32A32_UNORM: - return emit_R32G32B32A32_UNORM; + return &emit_R32G32B32A32_UNORM; case PIPE_FORMAT_R32_USCALED: - return emit_R32_USCALED; + return &emit_R32_USCALED; case PIPE_FORMAT_R32G32_USCALED: - return emit_R32G32_USCALED; + return &emit_R32G32_USCALED; case PIPE_FORMAT_R32G32B32_USCALED: - return emit_R32G32B32_USCALED; + return &emit_R32G32B32_USCALED; case PIPE_FORMAT_R32G32B32A32_USCALED: - return emit_R32G32B32A32_USCALED; + return &emit_R32G32B32A32_USCALED; case PIPE_FORMAT_R32_SNORM: - return emit_R32_SNORM; + return &emit_R32_SNORM; case PIPE_FORMAT_R32G32_SNORM: - return emit_R32G32_SNORM; + return &emit_R32G32_SNORM; case PIPE_FORMAT_R32G32B32_SNORM: - return emit_R32G32B32_SNORM; + return &emit_R32G32B32_SNORM; case PIPE_FORMAT_R32G32B32A32_SNORM: - return emit_R32G32B32A32_SNORM; + return &emit_R32G32B32A32_SNORM; case PIPE_FORMAT_R32_SSCALED: - return emit_R32_SSCALED; + return &emit_R32_SSCALED; case PIPE_FORMAT_R32G32_SSCALED: - return emit_R32G32_SSCALED; + return &emit_R32G32_SSCALED; case PIPE_FORMAT_R32G32B32_SSCALED: - return emit_R32G32B32_SSCALED; + return &emit_R32G32B32_SSCALED; case PIPE_FORMAT_R32G32B32A32_SSCALED: - return emit_R32G32B32A32_SSCALED; + return &emit_R32G32B32A32_SSCALED; case PIPE_FORMAT_R16_UNORM: - return emit_R16_UNORM; + return &emit_R16_UNORM; case PIPE_FORMAT_R16G16_UNORM: - return emit_R16G16_UNORM; + return &emit_R16G16_UNORM; case PIPE_FORMAT_R16G16B16_UNORM: - return emit_R16G16B16_UNORM; + return &emit_R16G16B16_UNORM; case PIPE_FORMAT_R16G16B16A16_UNORM: - return emit_R16G16B16A16_UNORM; + return &emit_R16G16B16A16_UNORM; case PIPE_FORMAT_R16_USCALED: - return emit_R16_USCALED; + return &emit_R16_USCALED; case PIPE_FORMAT_R16G16_USCALED: - return emit_R16G16_USCALED; + return &emit_R16G16_USCALED; case PIPE_FORMAT_R16G16B16_USCALED: - return emit_R16G16B16_USCALED; + return &emit_R16G16B16_USCALED; case PIPE_FORMAT_R16G16B16A16_USCALED: - return emit_R16G16B16A16_USCALED; + return &emit_R16G16B16A16_USCALED; case PIPE_FORMAT_R16_SNORM: - return emit_R16_SNORM; + return &emit_R16_SNORM; case PIPE_FORMAT_R16G16_SNORM: - return emit_R16G16_SNORM; + return &emit_R16G16_SNORM; case PIPE_FORMAT_R16G16B16_SNORM: - return emit_R16G16B16_SNORM; + return &emit_R16G16B16_SNORM; case PIPE_FORMAT_R16G16B16A16_SNORM: - return emit_R16G16B16A16_SNORM; + return &emit_R16G16B16A16_SNORM; case PIPE_FORMAT_R16_SSCALED: - return emit_R16_SSCALED; + return &emit_R16_SSCALED; case PIPE_FORMAT_R16G16_SSCALED: - return emit_R16G16_SSCALED; + return &emit_R16G16_SSCALED; case PIPE_FORMAT_R16G16B16_SSCALED: - return emit_R16G16B16_SSCALED; + return &emit_R16G16B16_SSCALED; case PIPE_FORMAT_R16G16B16A16_SSCALED: - return emit_R16G16B16A16_SSCALED; + return &emit_R16G16B16A16_SSCALED; case PIPE_FORMAT_R8_UNORM: - return emit_R8_UNORM; + return &emit_R8_UNORM; case PIPE_FORMAT_R8G8_UNORM: - return emit_R8G8_UNORM; + return &emit_R8G8_UNORM; case PIPE_FORMAT_R8G8B8_UNORM: - return emit_R8G8B8_UNORM; + return &emit_R8G8B8_UNORM; case PIPE_FORMAT_R8G8B8A8_UNORM: - return emit_R8G8B8A8_UNORM; + return &emit_R8G8B8A8_UNORM; case PIPE_FORMAT_R8_USCALED: - return emit_R8_USCALED; + return &emit_R8_USCALED; case PIPE_FORMAT_R8G8_USCALED: - return emit_R8G8_USCALED; + return &emit_R8G8_USCALED; case PIPE_FORMAT_R8G8B8_USCALED: - return emit_R8G8B8_USCALED; + return &emit_R8G8B8_USCALED; case PIPE_FORMAT_R8G8B8A8_USCALED: - return emit_R8G8B8A8_USCALED; + return &emit_R8G8B8A8_USCALED; case PIPE_FORMAT_R8_SNORM: - return emit_R8_SNORM; + return &emit_R8_SNORM; case PIPE_FORMAT_R8G8_SNORM: - return emit_R8G8_SNORM; + return &emit_R8G8_SNORM; case PIPE_FORMAT_R8G8B8_SNORM: - return emit_R8G8B8_SNORM; + return &emit_R8G8B8_SNORM; case PIPE_FORMAT_R8G8B8A8_SNORM: - return emit_R8G8B8A8_SNORM; + return &emit_R8G8B8A8_SNORM; case PIPE_FORMAT_R8_SSCALED: - return emit_R8_SSCALED; + return &emit_R8_SSCALED; case PIPE_FORMAT_R8G8_SSCALED: - return emit_R8G8_SSCALED; + return &emit_R8G8_SSCALED; case PIPE_FORMAT_R8G8B8_SSCALED: - return emit_R8G8B8_SSCALED; + return &emit_R8G8B8_SSCALED; case PIPE_FORMAT_R8G8B8A8_SSCALED: - return emit_R8G8B8A8_SSCALED; + return &emit_R8G8B8A8_SSCALED; case PIPE_FORMAT_A8R8G8B8_UNORM: - return emit_A8R8G8B8_UNORM; + return &emit_A8R8G8B8_UNORM; case PIPE_FORMAT_B8G8R8A8_UNORM: - return emit_B8G8R8A8_UNORM; + return &emit_B8G8R8A8_UNORM; default: assert(0); - return emit_NULL; + return &emit_NULL; } } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index a54ac5a82f..634b05b8a9 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -26,6 +26,7 @@ */ +#include "pipe/p_config.h" #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "util/u_simple_list.h" @@ -33,7 +34,7 @@ #include "translate.h" -#if defined(__i386__) || defined(__386__) || defined(i386) +#if defined(PIPE_ARCH_X86) #include "rtasm/rtasm_cpu.h" #include "rtasm/rtasm_x86sse.h" @@ -617,7 +618,7 @@ struct translate *translate_sse2_create( const struct translate_key *key ) #else -void translate_create_sse( const struct translate_key *key ) +struct translate *translate_sse2_create( const struct translate_key *key ) { return NULL; } diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index 48ec7a4a96..f9963ce0e2 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -61,7 +61,7 @@ struct util_time #if defined(PIPE_OS_LINUX) struct timeval tv; #else - long long counter; + int64_t counter; #endif }; diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index a4b772bc4f..96b21d998d 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -52,39 +52,55 @@ #endif /* __MSC__ */ -typedef unsigned int uint; -typedef unsigned char ubyte; -typedef unsigned char boolean; -typedef unsigned short ushort; -typedef unsigned long long uint64; - - #if defined(__MSC__) -typedef char int8_t; -typedef unsigned char uint8_t; -typedef short int16_t; -typedef unsigned short uint16_t; -typedef long int32_t; -typedef unsigned long uint32_t; -typedef long long int64_t; -typedef unsigned long long uint64_t; +typedef __int8 int8_t; +typedef unsigned __int8 uint8_t; +typedef __int16 int16_t; +typedef unsigned __int16 uint16_t; +typedef __int32 int32_t; +typedef unsigned __int32 uint32_t; +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; #if defined(_WIN64) typedef __int64 intptr_t; typedef unsigned __int64 uintptr_t; #else -typedef int intptr_t; -typedef unsigned int uintptr_t; +typedef __int32 intptr_t; +typedef unsigned __int32 uintptr_t; #endif +#ifndef __cplusplus +#define false 0 +#define true 1 +#define bool _Bool +typedef int _Bool; +#define __bool_true_false_are_defined 1 +#endif /* !__cplusplus */ + #else #include +#include #endif -#define TRUE 1 -#define FALSE 0 +typedef unsigned int uint; +typedef unsigned char ubyte; +typedef unsigned short ushort; +typedef uint64_t uint64; + +#if 0 +#define boolean bool +#else +typedef unsigned char boolean; +#endif +#ifndef TRUE +#define TRUE true +#endif +#ifndef FALSE +#define FALSE false +#endif /* Function inlining */ diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index 6ba211a1fc..d2d2ae1617 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -35,6 +35,10 @@ * this file is auto-generated by an autoconf-like tool at some point, as some * things cannot be determined by existing defines alone. * + * See also: + * - http://gcc.gnu.org/onlinedocs/cpp/Common-Predefined-Macros.html + * - echo | gcc -dM -E - | sort + * - http://msdn.microsoft.com/en-us/library/b0084kay.aspx * @author José Fonseca */ @@ -63,11 +67,11 @@ * Processor architecture */ -#if defined(_X86_) || defined(__i386__) || defined(__386__) || defined(i386) +#if defined(__i386__) /* gcc */ || defined(_M_IX86) /* msvc */ || defined(_X86_) || defined(__386__) || defined(i386) #define PIPE_ARCH_X86 #endif -#if 0 /* FIXME */ +#if defined(__x86_64__) /* gcc */ || defined(_M_X64) /* msvc */ || defined(_M_AMD64) /* msvc */ #define PIPE_ARCH_X86_64 #endif diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 0af635be57..05eca75201 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -59,6 +59,13 @@ extern "C" { #endif #endif + +/* MSVC bebore VC7 does not have the __FUNCTION__ macro */ +#if defined(_MSC_VER) && _MSC_VER < 1300 +#define __FUNCTION__ "???" +#endif + + void _debug_vprintf(const char *format, va_list ap); @@ -127,8 +134,8 @@ void _debug_break(void); #ifdef DEBUG #if (defined(__i386__) || defined(__386__)) && defined(__GNUC__) #define debug_break() __asm("int3") -#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) -#define debug_break() _asm {int 3} +#elif defined(_M_IX86) && defined(_MSC_VER) +#define debug_break() do { _asm {int 3} } while(0) #else #define debug_break() _debug_break() #endif -- cgit v1.2.3 From 4767c10cbb87a77e6cb24c53815c38bd5887c771 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 May 2008 21:47:25 +0900 Subject: translate: Mark functions as PIPE_CDECL. --- src/gallium/auxiliary/translate/translate_sse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 2fc8b9d3d0..69dd4d3dff 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -534,7 +534,7 @@ static void translate_sse_release( struct translate *translate ) FREE(p); } -static void translate_sse_run_elts( struct translate *translate, +static void PIPE_CDECL translate_sse_run_elts( struct translate *translate, const unsigned *elts, unsigned count, void *output_buffer ) @@ -547,7 +547,7 @@ static void translate_sse_run_elts( struct translate *translate, output_buffer ); } -static void translate_sse_run( struct translate *translate, +static void PIPE_CDECL translate_sse_run( struct translate *translate, unsigned start, unsigned count, void *output_buffer ) -- cgit v1.2.3 From 7a986792dabe6556c63b2f2a997c7c6217604e2d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 May 2008 21:48:30 +0900 Subject: tgsi: Observe constness. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 648afa2a51..b018ea9fa1 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -754,7 +754,7 @@ tgsi_dump_instruction( } for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; if( !first_reg ) { CHR( ',' ); @@ -812,7 +812,7 @@ tgsi_dump_instruction( } for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; if( !first_reg ) { CHR( ',' ); -- cgit v1.2.3 From ff44dd5cde47c81f90c5293b904e1c7edd006be5 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 28 May 2008 15:10:19 +0200 Subject: draw: Decorate callbacks with PIPE_CDECL. --- src/gallium/auxiliary/draw/draw_vs_varient.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 119a3a04b5..784ae41205 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -130,10 +130,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, } -static void vsvg_run_elts( struct draw_vs_varient *varient, - const unsigned *elts, - unsigned count, - void *output_buffer) +static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -186,10 +186,10 @@ static void vsvg_run_elts( struct draw_vs_varient *varient, } -static void vsvg_run_linear( struct draw_vs_varient *varient, - unsigned start, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; -- cgit v1.2.3 From e407e83966f2d7d6d9751fc0069ebacd4808d89d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 28 May 2008 15:10:39 +0200 Subject: scons: List missing files for draw module. --- src/gallium/auxiliary/draw/SConscript | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 3b5d5ed492..96f1ae248f 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -15,7 +15,7 @@ draw = env.ConvenienceLibrary( 'draw_pipe_stipple.c', 'draw_pipe_twoside.c', 'draw_pipe_unfilled.c', - 'draw_pipe_util.c', + 'draw_pipe_util.c', 'draw_pipe_validate.c', 'draw_pipe_vbuf.c', 'draw_pipe_wide_line.c', @@ -25,8 +25,10 @@ draw = env.ConvenienceLibrary( 'draw_pt_emit.c', 'draw_pt_fetch.c', 'draw_pt_fetch_emit.c', + 'draw_pt_fetch_shade_emit.c', 'draw_pt_fetch_shade_pipeline.c', 'draw_pt_post_vs.c', + 'draw_pt_util.c', 'draw_pt_varray.c', 'draw_pt_vcache.c', 'draw_vertex.c', @@ -34,6 +36,7 @@ draw = env.ConvenienceLibrary( 'draw_vs_exec.c', 'draw_vs_llvm.c', 'draw_vs_sse.c', + 'draw_vs_varient.c' ]) auxiliaries.insert(0, draw) -- cgit v1.2.3 From 25cec212d05b613a83eb2bc2167e3dab88da0967 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 May 2008 22:19:26 +0900 Subject: scons: Add new files. --- src/gallium/auxiliary/draw/SConscript | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 96f1ae248f..26919a2298 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -31,8 +31,11 @@ draw = env.ConvenienceLibrary( 'draw_pt_util.c', 'draw_pt_varray.c', 'draw_pt_vcache.c', + 'draw_pt_util.c', 'draw_vertex.c', 'draw_vs.c', + 'draw_vs_aos.c', + 'draw_vs_aos_io.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', 'draw_vs_sse.c', -- cgit v1.2.3 From 364f75d9dbc2b8e19c884b9cc74676ab6cbee60d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 May 2008 22:19:58 +0900 Subject: draw: Use PIPE_CDECL. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 20 ++++++++++---------- src/gallium/auxiliary/draw/draw_vs_aos.h | 2 +- 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index d3770b2c53..0cd82ff599 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1189,7 +1189,7 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -static PIPE_CDECL void do_lit( struct aos_machine *machine, +static void PIPE_CDECL do_lit( struct aos_machine *machine, float *result, const float *in, unsigned count ) @@ -1223,7 +1223,7 @@ static PIPE_CDECL void do_lit( struct aos_machine *machine, } -static PIPE_CDECL void do_lit_lut( struct aos_machine *machine, +static void PIPE_CDECL do_lit_lut( struct aos_machine *machine, float *result, const float *in, unsigned count ) @@ -2119,10 +2119,10 @@ static void vaos_destroy( struct draw_vs_varient *varient ) FREE(vaos); } -static void vaos_run_elts( struct draw_vs_varient *varient, - const unsigned *elts, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; @@ -2133,10 +2133,10 @@ static void vaos_run_elts( struct draw_vs_varient *varient, output_buffer ); } -static void vaos_run_linear( struct draw_vs_varient *varient, - unsigned start, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index b47413ff43..837b32794f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -60,7 +60,7 @@ struct x86_function; #define FPU_RND_NEAREST 2 struct aos_machine; -typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, +typedef void (PIPE_CDECL *lit_func)( struct aos_machine *, float *result, const float *in, unsigned count ); -- cgit v1.2.3 From 648da5158e5f418bf859aee6aa4532b6899b0d94 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 28 May 2008 16:36:45 +0100 Subject: rtasm: special case for [ebp] --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 2415b0156b..672d2ff554 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -328,7 +328,7 @@ struct x86_reg x86_make_disp( struct x86_reg reg, else reg.disp += disp; - if (reg.disp == 0) + if (reg.disp == 0 && reg.idx != reg_BP) reg.mod = mod_INDIRECT; else if (reg.disp <= 127 && reg.disp >= -128) reg.mod = mod_DISP8; -- cgit v1.2.3 From 728d1f7f43b6db9f4f42c2d16ba223c492d1147d Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 28 May 2008 23:54:18 +0100 Subject: draw: enable FSE by default --- src/gallium/auxiliary/draw/draw_pt.c | 11 ++- src/gallium/auxiliary/draw/draw_vs.h | 4 ++ src/gallium/auxiliary/draw/draw_vs_aos.c | 105 +++++++++++++++++++++------- src/gallium/auxiliary/draw/draw_vs_aos.h | 34 +++++---- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 25 ++++--- src/gallium/auxiliary/draw/draw_vs_sse.c | 14 ++-- 6 files changed, 129 insertions(+), 64 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 75f44d503e..d48c6c220d 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw, if (opt == 0) middle = draw->pt.middle.fetch_emit; - else if (opt == PT_SHADE && draw->pt.test_fse) + else if (opt == PT_SHADE) middle = draw->pt.middle.fetch_shade_emit; else middle = draw->pt.middle.general; @@ -118,12 +118,9 @@ boolean draw_pt_init( struct draw_context *draw ) if (!draw->pt.middle.fetch_emit) return FALSE; - if (draw->pt.test_fse) { - draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); - if (!draw->pt.middle.fetch_shade_emit) - return FALSE; - } - + draw->pt.middle.fetch_shade_emit = draw_pt_middle_fse( draw ); + if (!draw->pt.middle.fetch_shade_emit) + return FALSE; draw->pt.middle.general = draw_pt_fetch_pipeline_or_emit( draw ); if (!draw->pt.middle.general) diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 01171bc23d..7aa0415baf 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -123,6 +123,10 @@ struct draw_vertex_shader { struct tgsi_shader_info info; + /* Extracted from shader: + */ + const float (*immediates)[4]; + /* */ struct draw_vs_varient *varient[16]; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 0cd82ff599..9056785e7a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -66,6 +66,37 @@ static INLINE boolean eq( struct x86_reg a, a.disp == b.disp); } +struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned value ) +{ + if (cp->ebp != value) { + unsigned offset; + + switch (value) { + case X86_IMMEDIATES: + offset = Offset(struct aos_machine, immediates); + break; + case X86_CONSTANTS: + offset = Offset(struct aos_machine, constants); + break; + case X86_ATTRIBS: + offset = Offset(struct aos_machine, attrib); + break; + default: + assert(0); + offset = 0; + } + + x86_mov(cp->func, cp->temp_EBP, + x86_make_disp(cp->machine_EDX, offset)); + /* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */ + + cp->ebp = value; + } + + return cp->temp_EBP; +} + static struct x86_reg get_reg_ptr(struct aos_compilation *cp, unsigned file, @@ -83,15 +114,15 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, case TGSI_FILE_TEMPORARY: return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx])); - case TGSI_FILE_IMMEDIATE: - return x86_make_disp(ptr, Offset(struct aos_machine, immediate[idx])); - - case TGSI_FILE_CONSTANT: - return x86_make_disp(ptr, Offset(struct aos_machine, constant[idx])); - case AOS_FILE_INTERNAL: return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); + case TGSI_FILE_IMMEDIATE: + return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float)); + + case TGSI_FILE_CONSTANT: + return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float)); + default: ERROR(cp, "unknown reg file"); return x86_make_reg(0,0); @@ -1865,6 +1896,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp ) } +#if 0 static boolean note_immediate( struct aos_compilation *cp, struct tgsi_full_immediate *imm ) { @@ -1877,6 +1909,7 @@ static boolean note_immediate( struct aos_compilation *cp, return TRUE; } +#endif @@ -1939,6 +1972,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, cp.outbuf_ECX = x86_make_reg(file_REG32, reg_CX); cp.machine_EDX = x86_make_reg(file_REG32, reg_DX); cp.count_ESI = x86_make_reg(file_REG32, reg_SI); + cp.temp_EBP = x86_make_reg(file_REG32, reg_BP); x86_init_func(cp.func); @@ -1946,6 +1980,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, x86_push(cp.func, cp.idx_EBX); x86_push(cp.func, cp.count_ESI); + x86_push(cp.func, cp.temp_EBP); /* Load arguments into regs: @@ -1988,8 +2023,10 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: +#if 0 if (!note_immediate( &cp, &parse.FullToken.FullImmediate )) goto fail; +#endif break; case TGSI_TOKEN_TYPE_INSTRUCTION: @@ -2072,6 +2109,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, if (cp.func->need_emms) mmx_emms(cp.func); + x86_pop(cp.func, cp.temp_EBP); x86_pop(cp.func, cp.count_ESI); x86_pop(cp.func, cp.idx_EBX); @@ -2098,26 +2136,14 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, for (i = 0; i < vaos->base.key.nr_inputs; i++) { if (vaos->base.key.element[i].in.buffer == buf) { - vaos->machine->attrib[i].input_ptr = ((char *)ptr + - vaos->base.key.element[i].in.offset); - vaos->machine->attrib[i].input_stride = stride; + vaos->attrib[i].input_ptr = ((char *)ptr + + vaos->base.key.element[i].in.offset); + vaos->attrib[i].input_stride = stride; } } } -static void vaos_destroy( struct draw_vs_varient *varient ) -{ - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - - if (vaos->machine) - align_free( vaos->machine ); - - x86_release_func( &vaos->func[0] ); - x86_release_func( &vaos->func[1] ); - - FREE(vaos); -} static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, const unsigned *elts, @@ -2127,6 +2153,10 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + vaos->machine->constants = vaos->draw->pt.user.constants; + vaos->machine->immediates = vaos->base.vs->immediates; + vaos->machine->attrib = vaos->attrib; + vaos->gen_run_elts( varient, elts, count, @@ -2141,6 +2171,10 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + vaos->machine->constants = vaos->draw->pt.user.constants; + vaos->machine->immediates = vaos->base.vs->immediates; + vaos->machine->attrib = vaos->attrib; + vaos->gen_run_linear( varient, start, count, @@ -2153,10 +2187,6 @@ static void vaos_set_constants( struct draw_vs_varient *varient, { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - memcpy(vaos->machine->constant, - constants, - (vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1) * 4 * sizeof(float)); - #if 0 unsigned i; for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++) @@ -2187,6 +2217,21 @@ static void vaos_set_viewport( struct draw_vs_varient *varient, memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float)); } +static void vaos_destroy( struct draw_vs_varient *varient ) +{ + struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + + if (vaos->machine) + align_free( vaos->machine ); + + FREE( vaos->attrib ); + + x86_release_func( &vaos->func[0] ); + x86_release_func( &vaos->func[1] ); + + FREE(vaos); +} + static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, @@ -2207,6 +2252,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.run_elts = vaos_run_elts; vaos->draw = vs->draw; + + vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) ); + if (!vaos->attrib) + goto fail; + vaos->machine = align_malloc( sizeof(struct aos_machine), 16 ); if (!vaos->machine) goto fail; @@ -2233,7 +2283,10 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, return &vaos->base; fail: - if (vaos->machine) + if (vaos && vaos->attrib) + FREE(vaos->attrib); + + if (vaos && vaos->machine) align_free( vaos->machine ); if (vaos) diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 837b32794f..295d2cb3fe 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -78,6 +78,14 @@ struct lit_info { #define MAX_SHINE_TAB 4 #define MAX_LIT_INFO 16 +struct aos_attrib { + const void *input_ptr; + unsigned input_stride; +}; + + + + /* This is the temporary storage used by all the aos_sse vs varients. * Create one per context and reuse by passing a pointer in at * vs_varient creation?? @@ -86,8 +94,6 @@ struct aos_machine { float input [MAX_INPUTS ][4]; float output [MAX_OUTPUTS ][4]; float temp [MAX_TEMPS ][4]; - float constant [MAX_CONSTANTS ][4]; /* fixme -- should just be a pointer */ - float immediate[MAX_IMMEDIATES][4]; /* fixme -- should just be a pointer */ float internal [MAX_INTERNALS ][4]; float scale[4]; /* viewport */ @@ -105,12 +111,10 @@ struct aos_machine { ushort fpu_restore; ushort fpucntl; /* one of FPU_* above */ - struct { - const void *input_ptr; - unsigned input_stride; + const float (*immediates)[4]; /* points to shader data */ + const float (*constants)[4]; /* points to draw data */ - unsigned output_offset; - } attrib[PIPE_MAX_ATTRIBS]; + const struct aos_attrib *attrib; /* points to ? */ }; @@ -132,6 +136,7 @@ struct aos_compilation { unsigned last_used; } xmm[8]; + unsigned ebp; /* one of X86_* */ boolean input_fetched[PIPE_MAX_ATTRIBS]; unsigned output_last_write[PIPE_MAX_ATTRIBS]; @@ -148,6 +153,7 @@ struct aos_compilation { struct x86_reg outbuf_ECX; struct x86_reg machine_EDX; struct x86_reg count_ESI; /* decrements to zero */ + struct x86_reg temp_EBP; }; struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); @@ -192,20 +198,20 @@ do { \ } while (0) +#define X86_NULL 0 +#define X86_IMMEDIATES 1 +#define X86_CONSTANTS 2 +#define X86_ATTRIBS 3 - +struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned value ); struct draw_vs_varient_aos_sse { struct draw_vs_varient base; struct draw_context *draw; -#if 0 - struct { - const void *ptr; - unsigned stride; - } attrib[PIPE_MAX_ATTRIBS]; -#endif + struct aos_attrib *attrib; struct aos_machine *machine; /* XXX: temporarily unshared */ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 836110f382..45e2092209 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -91,25 +91,25 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, -static void get_src_ptr( struct x86_function *func, +static void get_src_ptr( struct aos_compilation *cp, struct x86_reg src, - struct x86_reg machine, struct x86_reg elt, unsigned a ) { - struct x86_reg input_ptr = - x86_make_disp(machine, - Offset(struct aos_machine, attrib[a].input_ptr)); + struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ), + a * sizeof(struct aos_attrib)); - struct x86_reg input_stride = - x86_make_disp(machine, - Offset(struct aos_machine, attrib[a].input_stride)); + struct x86_reg input_ptr = x86_make_disp(attrib, + Offset(struct aos_attrib, input_ptr)); + + struct x86_reg input_stride = x86_make_disp(attrib, + Offset(struct aos_attrib, input_stride)); /* Calculate pointer to current attrib: */ - x86_mov(func, src, input_stride); - x86_imul(func, src, elt); - x86_add(func, src, input_ptr); + x86_mov(cp->func, src, input_stride); + x86_imul(cp->func, src, elt); + x86_add(cp->func, src, input_ptr); } @@ -134,9 +134,8 @@ static boolean load_input( struct aos_compilation *cp, /* Figure out source pointer address: */ - get_src_ptr(cp->func, + get_src_ptr(cp, src, - cp->machine_EDX, linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), idx); diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 7781782ae8..24f619a278 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -68,8 +68,6 @@ struct draw_sse_vertex_shader { codegen_function func; struct tgsi_exec_machine *machine; - - float immediates[TGSI_EXEC_NUM_IMMEDIATES][4]; }; @@ -107,7 +105,7 @@ vs_sse_run_linear( struct draw_vertex_shader *base, machine->Outputs, (float (*)[4])constants, machine->Temps, - shader->immediates, + (float (*)[4])shader->base.immediates, input, base->info.num_inputs, input_stride, @@ -130,6 +128,8 @@ vs_sse_delete( struct draw_vertex_shader *base ) x86_release_func( &shader->sse2_program ); + align_free(shader->base.immediates); + FREE( (void*) shader->base.state.tokens ); FREE( shader ); } @@ -161,12 +161,18 @@ draw_create_vs_sse(struct draw_context *draw, vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * + sizeof(float), 16); + vs->machine = &draw->vs.machine; x86_init_func( &vs->sse2_program ); if (!tgsi_emit_sse2( (struct tgsi_token *) vs->base.state.tokens, - &vs->sse2_program, vs->immediates, TRUE )) + &vs->sse2_program, + (float (*)[4])vs->base.immediates, + TRUE )) goto fail; vs->func = (codegen_function) x86_get_func( &vs->sse2_program ); -- cgit v1.2.3 From 62628c4d3d497cbca73fde869c9069fa90e6453e Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 00:17:53 +0100 Subject: draw: share machine --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/SConscript | 1 + src/gallium/auxiliary/draw/draw_context.c | 3 + src/gallium/auxiliary/draw/draw_private.h | 9 + .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 6 - src/gallium/auxiliary/draw/draw_vs.c | 43 ++- src/gallium/auxiliary/draw/draw_vs.h | 24 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 282 ++----------------- src/gallium/auxiliary/draw/draw_vs_aos.h | 25 +- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 297 +++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_varient.c | 49 +--- 11 files changed, 412 insertions(+), 328 deletions(-) create mode 100644 src/gallium/auxiliary/draw/draw_vs_aos_machine.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index 9a88ecc070..f2e36a89e9 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -37,6 +37,7 @@ C_SOURCES = \ draw_vs_varient.c \ draw_vs_aos.c \ draw_vs_aos_io.c \ + draw_vs_aos_machine.c \ draw_vs_exec.c \ draw_vs_llvm.c \ draw_vs_sse.c diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 26919a2298..925e668f22 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -36,6 +36,7 @@ draw = env.ConvenienceLibrary( 'draw_vs.c', 'draw_vs_aos.c', 'draw_vs_aos_io.c', + 'draw_vs_aos_machine.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', 'draw_vs_sse.c', diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2242074965..8509baf865 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -174,6 +174,8 @@ void draw_set_viewport_state( struct draw_context *draw, viewport->translate[1] == 0.0f && viewport->translate[2] == 0.0f && viewport->translate[3] == 0.0f); + + draw_vs_set_viewport( draw, viewport ); } @@ -218,6 +220,7 @@ draw_set_mapped_constant_buffer(struct draw_context *draw, const void *buffer) { draw->pt.user.constants = buffer; + draw_vs_set_constants( draw, (const float (*)[4])buffer ); } diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index c095bf3d7b..4cbccc8b5b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -183,6 +183,10 @@ struct draw_context */ struct gallivm_cpu_engine *engine; + /* Here's another one: + */ + struct aos_machine *aos_machine; + struct translate *fetch; struct translate_cache *fetch_cache; @@ -215,6 +219,11 @@ struct draw_context boolean draw_vs_init( struct draw_context *draw ); void draw_vs_destroy( struct draw_context *draw ); +void draw_vs_set_viewport( struct draw_context *, + const struct pipe_viewport_state * ); + +void draw_vs_set_constants( struct draw_context *, + const float (*constants)[4] ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 729c7db999..5265a13160 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -189,12 +189,6 @@ static void fse_prepare( struct draw_pt_middle_end *middle, draw->pt.vertex_buffer[buf].pitch ); } - fse->active->set_constants( fse->active, - (const float (*)[4])draw->pt.user.constants ); - - fse->active->set_viewport( fse->active, - &draw->viewport ); - //return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 9b899d404e..a8b6d0c90d 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -41,6 +41,22 @@ + +void draw_vs_set_constants( struct draw_context *draw, + const float (*constants)[4] ) +{ + draw_vs_aos_machine_constants( draw->vs.aos_machine, constants ); +} + + +void draw_vs_set_viewport( struct draw_context *draw, + const struct pipe_viewport_state *viewport ) +{ + draw_vs_aos_machine_viewport( draw->vs.aos_machine, viewport ); +} + + + struct draw_vertex_shader * draw_create_vertex_shader(struct draw_context *draw, const struct pipe_shader_state *shader) @@ -83,6 +99,13 @@ void draw_delete_vertex_shader(struct draw_context *draw, struct draw_vertex_shader *dvs) { + unsigned i; + + for (i = 0; i < dvs->nr_varients; i++) + dvs->varient[i]->destroy( dvs->varient[i] ); + + dvs->nr_varients = 0; + dvs->delete( dvs ); } @@ -110,6 +133,10 @@ draw_vs_init( struct draw_context *draw ) draw->vs.fetch_cache = translate_cache_create(); if (!draw->vs.fetch_cache) return FALSE; + + draw->vs.aos_machine = draw_vs_aos_machine(); + if (!draw->vs.aos_machine) + return FALSE; return TRUE; } @@ -129,6 +156,9 @@ draw_vs_destroy( struct draw_context *draw ) if (draw->vs.emit_cache) translate_cache_destroy(draw->vs.emit_cache); + if (draw->vs.aos_machine) + draw_vs_aos_machine_destroy(draw->vs.aos_machine); + tgsi_exec_machine_free_data(&draw->vs.machine); } @@ -153,10 +183,17 @@ draw_vs_lookup_varient( struct draw_vertex_shader *vs, if (varient == NULL) return NULL; - /* Add it to our list: + /* Add it to our list, could be smarter: */ - assert(vs->nr_varients < Elements(vs->varient)); - vs->varient[vs->nr_varients++] = varient; + if (vs->nr_varients < Elements(vs->varient)) { + vs->varient[vs->nr_varients++] = varient; + } + else { + vs->last_varient++; + vs->last_varient %= Elements(vs->varient); + vs->varient[vs->last_varient]->destroy(vs->varient[vs->last_varient]); + vs->varient[vs->last_varient] = varient; + } /* Done */ diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 7aa0415baf..08c6de8ba8 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -70,16 +70,6 @@ struct draw_vs_varient_key { struct draw_vs_varient; -typedef void (PIPE_CDECL *vsv_run_elts_func)( struct draw_vs_varient *, - const unsigned *elts, - unsigned count, - void *output_buffer); - -typedef void (PIPE_CDECL *vsv_run_linear_func)( struct draw_vs_varient *, - unsigned start, - unsigned count, - void *output_buffer); - struct draw_vs_varient { struct draw_vs_varient_key key; @@ -91,12 +81,6 @@ struct draw_vs_varient { const void *ptr, unsigned stride ); - void (*set_constants)( struct draw_vs_varient *, - const float (*constants)[4] ); - - void (*set_viewport)( struct draw_vs_varient *, - const struct pipe_viewport_state * ); - void (PIPE_CDECL *run_linear)( struct draw_vs_varient *shader, unsigned start, unsigned count, @@ -131,6 +115,7 @@ struct draw_vertex_shader { */ struct draw_vs_varient *varient[16]; unsigned nr_varients; + unsigned last_varient; struct draw_vs_varient *(*create_varient)( struct draw_vertex_shader *shader, const struct draw_vs_varient_key *key ); @@ -217,7 +202,14 @@ static INLINE int draw_vs_varient_key_compare( const struct draw_vs_varient_key } +struct aos_machine *draw_vs_aos_machine( void ); +void draw_vs_aos_machine_destroy( struct aos_machine *machine ); + +void draw_vs_aos_machine_constants( struct aos_machine *machine, + const float (*constants)[4] ); +void draw_vs_aos_machine_viewport( struct aos_machine *machine, + const struct pipe_viewport_state *viewport ); #define MAX_TGSI_VERTICES 4 diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 9056785e7a..b5e4e1e7b1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -149,70 +149,7 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, #define X87_CW_ROUND_MASK (3<<10) #define X87_CW_INFINITY (1<<12) -static void do_populate_lut( struct shine_tab *tab, - float unclamped_exponent ) -{ - const float epsilon = 1.0F / 256.0F; - float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon)); - unsigned i; - tab->exponent = unclamped_exponent; /* for later comparison */ - - tab->values[0] = 0; - if (exponent == 0) { - for (i = 1; i < 258; i++) { - tab->values[i] = 1.0; - } - } - else { - for (i = 1; i < 258; i++) { - tab->values[i] = powf((float)i * epsilon, exponent); - } - } -} - -static void init_internals( struct aos_machine *machine ) -{ - unsigned i; - float inv = 1.0f/255.0f; - float f255 = 255.0f; - - ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f); - *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff; - - ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); - ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); - ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); - ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); - ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); - ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f); - - - machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | - X87_CW_EXCEPTION_DENORM_OP | - X87_CW_EXCEPTION_ZERO_DIVIDE | - X87_CW_EXCEPTION_OVERFLOW | - X87_CW_EXCEPTION_UNDERFLOW | - X87_CW_EXCEPTION_PRECISION | - (1<<6) | - X87_CW_ROUND_NEAREST | - X87_CW_PRECISION_DOUBLE_EXT); - - assert(machine->fpu_rnd_nearest == 0x37f); - - machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP | - X87_CW_EXCEPTION_DENORM_OP | - X87_CW_EXCEPTION_ZERO_DIVIDE | - X87_CW_EXCEPTION_OVERFLOW | - X87_CW_EXCEPTION_UNDERFLOW | - X87_CW_EXCEPTION_PRECISION | - (1<<6) | - X87_CW_ROUND_DOWN | - X87_CW_PRECISION_DOUBLE_EXT); - - for (i = 0; i < MAX_SHINE_TAB; i++) - do_populate_lut( &machine->shine_tab[i], 1.0f ); -} static void spill( struct aos_compilation *cp, unsigned idx ) @@ -1220,136 +1157,6 @@ static boolean emit_FRC( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -static void PIPE_CDECL do_lit( struct aos_machine *machine, - float *result, - const float *in, - unsigned count ) -{ - if (in[0] > 0) - { - if (in[1] <= 0.0) - { - result[0] = 1.0F; - result[1] = in[0]; - result[2] = 1.0; - result[3] = 1.0F; - } - else - { - const float epsilon = 1.0F / 256.0F; - float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); - result[0] = 1.0F; - result[1] = in[0]; - result[2] = powf(in[1], exponent); - result[3] = 1.0; - } - } - else - { - result[0] = 1.0F; - result[1] = 0.0; - result[2] = 0.0; - result[3] = 1.0F; - } -} - - -static void PIPE_CDECL do_lit_lut( struct aos_machine *machine, - float *result, - const float *in, - unsigned count ) -{ - if (in[0] > 0) - { - if (in[1] <= 0.0) - { - result[0] = 1.0F; - result[1] = in[0]; - result[2] = 1.0; - result[3] = 1.0F; - return; - } - - if (machine->lit_info[count].shine_tab->exponent != in[3]) { - machine->lit_info[count].func = do_lit; - goto no_luck; - } - - if (in[1] <= 1.0) - { - const float *tab = machine->lit_info[count].shine_tab->values; - float f = in[1] * 256; - int k = (int)f; - float frac = f - (float)k; - - result[0] = 1.0F; - result[1] = in[0]; - result[2] = tab[k] + frac*(tab[k+1]-tab[k]); - result[3] = 1.0; - return; - } - - no_luck: - { - const float epsilon = 1.0F / 256.0F; - float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); - result[0] = 1.0F; - result[1] = in[0]; - result[2] = powf(in[1], exponent); - result[3] = 1.0; - } - } - else - { - result[0] = 1.0F; - result[1] = 0.0; - result[2] = 0.0; - result[3] = 1.0F; - } -} - - - -static void PIPE_CDECL populate_lut( struct aos_machine *machine, - float *result, - const float *in, - unsigned count ) -{ - unsigned i, tab; - - /* Search for an existing table for this value. Note that without - * static analysis we don't really know if in[3] will be constant, - * but it usually is... - */ - for (tab = 0; tab < 4; tab++) { - if (machine->shine_tab[tab].exponent == in[3]) { - goto found; - } - } - - for (tab = 0, i = 1; i < 4; i++) { - if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used) - tab = i; - } - - if (machine->shine_tab[tab].last_used == machine->now) { - /* No unused tables (this is not a ffvertex program...). Just - * call pow each time: - */ - machine->lit_info[count].func = do_lit; - machine->lit_info[count].func( machine, result, in, count ); - return; - } - else { - do_populate_lut( &machine->shine_tab[tab], in[3] ); - } - - found: - machine->shine_tab[tab].last_used = machine->now; - machine->lit_info[count].shine_tab = &machine->shine_tab[tab]; - machine->lit_info[count].func = do_lit_lut; - machine->lit_info[count].func( machine, result, in, count ); -} @@ -1413,7 +1220,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst Offset(struct lit_info, func))); } else { - x86_mov_reg_imm( cp->func, ecx, (int)do_lit ); + x86_mov_reg_imm( cp->func, ecx, (int)aos_do_lit ); } x86_call( cp->func, ecx ); @@ -1434,7 +1241,7 @@ static boolean emit_LIT( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } - +#if 0 static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg dst = get_dst_ptr(cp, &op->FullDstRegisters[0]); @@ -1495,6 +1302,7 @@ static boolean emit_inline_LIT( struct aos_compilation *cp, const struct tgsi_fu return TRUE; } +#endif @@ -1945,7 +1753,7 @@ static void find_last_write_outputs( struct aos_compilation *cp ) } -#define ARG_VARIENT 1 +#define ARG_MACHINE 1 #define ARG_START_ELTS 2 #define ARG_COUNT 3 #define ARG_OUTBUF 4 @@ -1985,7 +1793,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, /* Load arguments into regs: */ - x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_VARIENT)); + x86_mov(cp.func, cp.machine_EDX, x86_fn_arg(cp.func, ARG_MACHINE)); x86_mov(cp.func, cp.idx_EBX, x86_fn_arg(cp.func, ARG_START_ELTS)); x86_mov(cp.func, cp.count_ESI, x86_fn_arg(cp.func, ARG_COUNT)); x86_mov(cp.func, cp.outbuf_ECX, x86_fn_arg(cp.func, ARG_OUTBUF)); @@ -1997,11 +1805,6 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, x86_cmp(cp.func, cp.count_ESI, cp.tmp_EAX); fixup = x86_jcc_forward(cp.func, cc_E); - /* Dig out the machine pointer from inside the varient arg - */ - x86_mov(cp.func, cp.machine_EDX, - x86_make_disp(cp.machine_EDX, - Offset( struct draw_vs_varient_aos_sse, machine ))); save_fpu_state( &cp ); set_fpu_round_nearest( &cp ); @@ -2151,13 +1954,14 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, void *output_buffer ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct aos_machine *machine = vaos->draw->vs.aos_machine; - vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - vaos->machine->constants = vaos->draw->pt.user.constants; - vaos->machine->immediates = vaos->base.vs->immediates; - vaos->machine->attrib = vaos->attrib; + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + machine->constants = (const float (*)[4])vaos->draw->pt.user.constants; + machine->immediates = vaos->base.vs->immediates; + machine->attrib = vaos->attrib; - vaos->gen_run_elts( varient, + vaos->gen_run_elts( machine, elts, count, output_buffer ); @@ -2169,61 +1973,25 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, void *output_buffer ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; + struct aos_machine *machine = vaos->draw->vs.aos_machine; - vaos->machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - vaos->machine->constants = vaos->draw->pt.user.constants; - vaos->machine->immediates = vaos->base.vs->immediates; - vaos->machine->attrib = vaos->attrib; + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; + machine->constants = (const float (*)[4])vaos->draw->pt.user.constants; + machine->immediates = vaos->base.vs->immediates; + machine->attrib = vaos->attrib; - vaos->gen_run_linear( varient, + vaos->gen_run_linear( machine, start, count, output_buffer ); } -static void vaos_set_constants( struct draw_vs_varient *varient, - const float (*constants)[4] ) -{ - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - -#if 0 - unsigned i; - for (i =0; i < vaos->base.vs->info.file_max[TGSI_FILE_CONSTANT] + 1; i++) - debug_printf("state %d: %f %f %f %f\n", - i, - constants[i][0], - constants[i][1], - constants[i][2], - constants[i][3]); -#endif - - { - unsigned i; - for (i = 0; i < MAX_LIT_INFO; i++) { - vaos->machine->lit_info[i].func = populate_lut; - vaos->machine->now++; - } - } -} - - -static void vaos_set_viewport( struct draw_vs_varient *varient, - const struct pipe_viewport_state *viewport ) -{ - struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - - memcpy(vaos->machine->scale, viewport->scale, 4 * sizeof(float)); - memcpy(vaos->machine->translate, viewport->translate, 4 * sizeof(float)); -} static void vaos_destroy( struct draw_vs_varient *varient ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - if (vaos->machine) - align_free( vaos->machine ); - FREE( vaos->attrib ); x86_release_func( &vaos->func[0] ); @@ -2245,8 +2013,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.key = *key; vaos->base.vs = vs; vaos->base.set_input = vaos_set_buffer; - vaos->base.set_constants = vaos_set_constants; - vaos->base.set_viewport = vaos_set_viewport; vaos->base.destroy = vaos_destroy; vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; @@ -2257,13 +2023,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->attrib) goto fail; - vaos->machine = align_malloc( sizeof(struct aos_machine), 16 ); - if (!vaos->machine) - goto fail; - - memset(vaos->machine, 0, sizeof(struct aos_machine)); - init_internals(vaos->machine); - tgsi_dump(vs->state.tokens, 0); if (!build_vertex_program( vaos, TRUE )) @@ -2272,11 +2031,11 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!build_vertex_program( vaos, FALSE )) goto fail; - vaos->gen_run_linear = (vsv_run_linear_func)x86_get_func(&vaos->func[0]); + vaos->gen_run_linear = (vaos_run_linear_func)x86_get_func(&vaos->func[0]); if (!vaos->gen_run_linear) goto fail; - vaos->gen_run_elts = (vsv_run_elts_func)x86_get_func(&vaos->func[1]); + vaos->gen_run_elts = (vaos_run_elts_func)x86_get_func(&vaos->func[1]); if (!vaos->gen_run_elts) goto fail; @@ -2286,9 +2045,6 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (vaos && vaos->attrib) FREE(vaos->attrib); - if (vaos && vaos->machine) - align_free( vaos->machine ); - if (vaos) x86_release_func( &vaos->func[0] ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 295d2cb3fe..89a9174151 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -60,10 +60,16 @@ struct x86_function; #define FPU_RND_NEAREST 2 struct aos_machine; -typedef void (PIPE_CDECL *lit_func)( struct aos_machine *, +typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, float *result, const float *in, unsigned count ); + +PIPE_CDECL void aos_do_lit( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ); + struct shine_tab { float exponent; float values[258]; @@ -207,16 +213,25 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp, unsigned value ); +typedef void (PIPE_CDECL *vaos_run_elts_func)( struct aos_machine *, + const unsigned *elts, + unsigned count, + void *output_buffer); + +typedef void (PIPE_CDECL *vaos_run_linear_func)( struct aos_machine *, + unsigned start, + unsigned count, + void *output_buffer); + + struct draw_vs_varient_aos_sse { struct draw_vs_varient base; struct draw_context *draw; struct aos_attrib *attrib; - struct aos_machine *machine; /* XXX: temporarily unshared */ - - vsv_run_linear_func gen_run_linear; - vsv_run_elts_func gen_run_elts; + vaos_run_linear_func gen_run_linear; + vaos_run_elts_func gen_run_elts; struct x86_function func[2]; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c new file mode 100644 index 0000000000..53e999b191 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -0,0 +1,297 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" +#include "tgsi/util/tgsi_util.h" +#include "tgsi/exec/tgsi_exec.h" +#include "draw_vs.h" +#include "draw_vs_aos.h" +#include "draw_vertex.h" + +#include "rtasm/rtasm_x86sse.h" + + +#define X87_CW_EXCEPTION_INV_OP (1<<0) +#define X87_CW_EXCEPTION_DENORM_OP (1<<1) +#define X87_CW_EXCEPTION_ZERO_DIVIDE (1<<2) +#define X87_CW_EXCEPTION_OVERFLOW (1<<3) +#define X87_CW_EXCEPTION_UNDERFLOW (1<<4) +#define X87_CW_EXCEPTION_PRECISION (1<<5) +#define X87_CW_PRECISION_SINGLE (0<<8) +#define X87_CW_PRECISION_RESERVED (1<<8) +#define X87_CW_PRECISION_DOUBLE (2<<8) +#define X87_CW_PRECISION_DOUBLE_EXT (3<<8) +#define X87_CW_PRECISION_MASK (3<<8) +#define X87_CW_ROUND_NEAREST (0<<10) +#define X87_CW_ROUND_DOWN (1<<10) +#define X87_CW_ROUND_UP (2<<10) +#define X87_CW_ROUND_ZERO (3<<10) +#define X87_CW_ROUND_MASK (3<<10) +#define X87_CW_INFINITY (1<<12) + + +PIPE_CDECL void aos_do_lit( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + } + else + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + +static PIPE_CDECL void do_lit_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + if (in[0] > 0) + { + if (in[1] <= 0.0) + { + result[0] = 1.0F; + result[1] = in[0]; + result[2] = 1.0; + result[3] = 1.0F; + return; + } + + if (machine->lit_info[count].shine_tab->exponent != in[3]) { + machine->lit_info[count].func = aos_do_lit; + goto no_luck; + } + + if (in[1] <= 1.0) + { + const float *tab = machine->lit_info[count].shine_tab->values; + float f = in[1] * 256; + int k = (int)f; + float frac = f - (float)k; + + result[0] = 1.0F; + result[1] = in[0]; + result[2] = tab[k] + frac*(tab[k+1]-tab[k]); + result[3] = 1.0; + return; + } + + no_luck: + { + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(in[3], -(128.0F - epsilon), (128.0F - epsilon)); + result[0] = 1.0F; + result[1] = in[0]; + result[2] = powf(in[1], exponent); + result[3] = 1.0; + } + } + else + { + result[0] = 1.0F; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 1.0F; + } +} + + +static void do_populate_lut( struct shine_tab *tab, + float unclamped_exponent ) +{ + const float epsilon = 1.0F / 256.0F; + float exponent = CLAMP(unclamped_exponent, -(128.0F - epsilon), (128.0F - epsilon)); + unsigned i; + + tab->exponent = unclamped_exponent; /* for later comparison */ + + tab->values[0] = 0; + if (exponent == 0) { + for (i = 1; i < 258; i++) { + tab->values[i] = 1.0; + } + } + else { + for (i = 1; i < 258; i++) { + tab->values[i] = powf((float)i * epsilon, exponent); + } + } +} + + + + +static void PIPE_CDECL populate_lut( struct aos_machine *machine, + float *result, + const float *in, + unsigned count ) +{ + unsigned i, tab; + + /* Search for an existing table for this value. Note that without + * static analysis we don't really know if in[3] will be constant, + * but it usually is... + */ + for (tab = 0; tab < 4; tab++) { + if (machine->shine_tab[tab].exponent == in[3]) { + goto found; + } + } + + for (tab = 0, i = 1; i < 4; i++) { + if (machine->shine_tab[i].last_used < machine->shine_tab[tab].last_used) + tab = i; + } + + if (machine->shine_tab[tab].last_used == machine->now) { + /* No unused tables (this is not a ffvertex program...). Just + * call pow each time: + */ + machine->lit_info[count].func = aos_do_lit; + machine->lit_info[count].func( machine, result, in, count ); + return; + } + else { + do_populate_lut( &machine->shine_tab[tab], in[3] ); + } + + found: + machine->shine_tab[tab].last_used = machine->now; + machine->lit_info[count].shine_tab = &machine->shine_tab[tab]; + machine->lit_info[count].func = do_lit_lut; + machine->lit_info[count].func( machine, result, in, count ); +} + + +void draw_vs_aos_machine_constants( struct aos_machine *machine, + const float (*constants)[4] ) +{ + machine->constants = constants; + + { + unsigned i; + for (i = 0; i < MAX_LIT_INFO; i++) { + machine->lit_info[i].func = populate_lut; + machine->now++; + } + } +} + + +void draw_vs_aos_machine_viewport( struct aos_machine *machine, + const struct pipe_viewport_state *viewport ) +{ + memcpy(machine->scale, viewport->scale, 4 * sizeof(float)); + memcpy(machine->translate, viewport->translate, 4 * sizeof(float)); +} + + + +void draw_vs_aos_machine_destroy( struct aos_machine *machine ) +{ + align_free(machine); +} + +struct aos_machine *draw_vs_aos_machine( void ) +{ + struct aos_machine *machine; + unsigned i; + float inv = 1.0f/255.0f; + float f255 = 255.0f; + + machine = align_malloc(sizeof(struct aos_machine), 16); + if (!machine) + return NULL; + + memset(machine, 0, sizeof(*machine)); + + ASSIGN_4V(machine->internal[IMM_SWZ], 1.0f, -1.0f, 0.0f, 1.0f); + *(unsigned *)&machine->internal[IMM_SWZ][3] = 0xffffffff; + + ASSIGN_4V(machine->internal[IMM_ONES], 1.0f, 1.0f, 1.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_NEGS], -1.0f, -1.0f, -1.0f, -1.0f); + ASSIGN_4V(machine->internal[IMM_IDENTITY], 0.0f, 0.0f, 0.0f, 1.0f); + ASSIGN_4V(machine->internal[IMM_INV_255], inv, inv, inv, inv); + ASSIGN_4V(machine->internal[IMM_255], f255, f255, f255, f255); + ASSIGN_4V(machine->internal[IMM_RSQ], -.5f, 1.5f, 0.0f, 0.0f); + + + machine->fpu_rnd_nearest = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_NEAREST | + X87_CW_PRECISION_DOUBLE_EXT); + + assert(machine->fpu_rnd_nearest == 0x37f); + + machine->fpu_rnd_neg_inf = (X87_CW_EXCEPTION_INV_OP | + X87_CW_EXCEPTION_DENORM_OP | + X87_CW_EXCEPTION_ZERO_DIVIDE | + X87_CW_EXCEPTION_OVERFLOW | + X87_CW_EXCEPTION_UNDERFLOW | + X87_CW_EXCEPTION_PRECISION | + (1<<6) | + X87_CW_ROUND_DOWN | + X87_CW_PRECISION_DOUBLE_EXT); + + for (i = 0; i < MAX_SHINE_TAB; i++) + do_populate_lut( &machine->shine_tab[i], 1.0f ); + + return machine; +} + + diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 784ae41205..18cb06e374 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -44,8 +44,6 @@ struct draw_vs_varient_generic { struct draw_vs_varient base; - struct pipe_viewport_state viewport; - struct draw_vertex_shader *shader; struct draw_context *draw; @@ -57,21 +55,11 @@ struct draw_vs_varient_generic { */ struct translate *fetch; struct translate *emit; - - const float (*constants)[4]; }; -static void vsvg_set_constants( struct draw_vs_varient *varient, - const float (*constants)[4] ) -{ - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; - - vsvg->constants = constants; -} - static void vsvg_set_input( struct draw_vs_varient *varient, unsigned buffer, @@ -94,8 +82,8 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, void *output_buffer ) { char *ptr = (char *)output_buffer; - const float *scale = vsvg->viewport.scale; - const float *trans = vsvg->viewport.translate; + const float *scale = vsvg->base.vs->draw->viewport.scale; + const float *trans = vsvg->base.vs->draw->viewport.translate; unsigned stride = vsvg->base.key.output_stride; unsigned j; @@ -115,8 +103,8 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, void *output_buffer ) { char *ptr = (char *)output_buffer; - const float *scale = vsvg->viewport.scale; - const float *trans = vsvg->viewport.translate; + const float *scale = vsvg->base.vs->draw->viewport.scale; + const float *trans = vsvg->base.vs->draw->viewport.translate; unsigned stride = vsvg->base.key.output_stride; unsigned j; @@ -130,10 +118,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, } -static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, - const unsigned *elts, - unsigned count, - void *output_buffer ) +static void vsvg_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -150,7 +138,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, output_buffer, output_buffer, - vsvg->constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, count, vsvg->base.key.output_stride, vsvg->base.key.output_stride); @@ -186,10 +174,10 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, } -static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, - unsigned start, - unsigned count, - void *output_buffer ) +static void vsvg_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -206,7 +194,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, vsvg->base.vs->run_linear( vsvg->base.vs, output_buffer, output_buffer, - vsvg->constants, + (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, count, vsvg->base.key.output_stride, vsvg->base.key.output_stride); @@ -245,13 +233,6 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, -static void vsvg_set_viewport( struct draw_vs_varient *varient, - const struct pipe_viewport_state *viewport ) -{ - struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; - - vsvg->viewport = *viewport; -} static void vsvg_destroy( struct draw_vs_varient *varient ) { @@ -272,8 +253,6 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->base.key = *key; vsvg->base.vs = vs; vsvg->base.set_input = vsvg_set_input; - vsvg->base.set_constants = vsvg_set_constants; - vsvg->base.set_viewport = vsvg_set_viewport; vsvg->base.run_elts = vsvg_run_elts; vsvg->base.run_linear = vsvg_run_linear; vsvg->base.destroy = vsvg_destroy; -- cgit v1.2.3 From 6945bcb89370501e0a218bc656e68e30e4dadcda Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 29 May 2008 18:28:02 +0900 Subject: draw: Put PIPE_CDECL in the right places. MSVC seems picky about this. --- src/gallium/auxiliary/draw/draw_vs_aos.h | 4 ++-- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_varient.c | 16 ++++++++-------- 3 files changed, 12 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 89a9174151..665b425e68 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -60,12 +60,12 @@ struct x86_function; #define FPU_RND_NEAREST 2 struct aos_machine; -typedef void PIPE_CDECL (*lit_func)( struct aos_machine *, +typedef void (PIPE_CDECL *lit_func)( struct aos_machine *, float *result, const float *in, unsigned count ); -PIPE_CDECL void aos_do_lit( struct aos_machine *machine, +void PIPE_CDECL aos_do_lit( struct aos_machine *machine, float *result, const float *in, unsigned count ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index 53e999b191..b7864e9f2f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -57,7 +57,7 @@ #define X87_CW_INFINITY (1<<12) -PIPE_CDECL void aos_do_lit( struct aos_machine *machine, +void PIPE_CDECL aos_do_lit( struct aos_machine *machine, float *result, const float *in, unsigned count ) @@ -91,7 +91,7 @@ PIPE_CDECL void aos_do_lit( struct aos_machine *machine, } -static PIPE_CDECL void do_lit_lut( struct aos_machine *machine, +static void PIPE_CDECL do_lit_lut( struct aos_machine *machine, float *result, const float *in, unsigned count ) diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 18cb06e374..4a155251b5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -118,10 +118,10 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, } -static void vsvg_run_elts( struct draw_vs_varient *varient, - const unsigned *elts, - unsigned count, - void *output_buffer) +static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, + const unsigned *elts, + unsigned count, + void *output_buffer) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -174,10 +174,10 @@ static void vsvg_run_elts( struct draw_vs_varient *varient, } -static void vsvg_run_linear( struct draw_vs_varient *varient, - unsigned start, - unsigned count, - void *output_buffer ) +static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, + unsigned start, + unsigned count, + void *output_buffer ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; -- cgit v1.2.3 From cb87d7e44a6d6b1b4239b4e38c76c6bb848d2ef6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 29 May 2008 12:11:37 +0200 Subject: scons: Remove duplicate entry. --- src/gallium/auxiliary/draw/SConscript | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 925e668f22..544a04918b 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -31,7 +31,6 @@ draw = env.ConvenienceLibrary( 'draw_pt_util.c', 'draw_pt_varray.c', 'draw_pt_vcache.c', - 'draw_pt_util.c', 'draw_vertex.c', 'draw_vs.c', 'draw_vs_aos.c', -- cgit v1.2.3 From 82605d7bcd533d7c96cc619c45970efd7229dc3b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 11:46:43 +0100 Subject: draw: draw_range_elements trial --- src/gallium/auxiliary/draw/draw_context.c | 20 +++- src/gallium/auxiliary/draw/draw_context.h | 10 +- src/gallium/auxiliary/draw/draw_private.h | 2 + src/gallium/auxiliary/draw/draw_pt.h | 9 ++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 48 ++++++++ .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 49 ++++++++ .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 80 +++++++++++++- src/gallium/auxiliary/draw/draw_pt_vcache.c | 123 ++++++++++++++++++++- src/gallium/drivers/softpipe/sp_context.c | 1 + src/gallium/drivers/softpipe/sp_draw_arrays.c | 31 +++++- src/gallium/drivers/softpipe/sp_state.h | 7 ++ src/gallium/include/pipe/p_context.h | 14 +++ src/mesa/state_tracker/st_draw.c | 27 ++++- 13 files changed, 404 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 8509baf865..bcec85c2ef 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -348,14 +348,30 @@ void draw_set_edgeflags( struct draw_context *draw, * \param elements the element buffer ptr */ void -draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, void *elements ) +draw_set_mapped_element_buffer_range( struct draw_context *draw, + unsigned eltSize, + unsigned min_index, + unsigned max_index, + void *elements ) { draw->pt.user.elts = elements; draw->pt.user.eltSize = eltSize; + draw->pt.user.min_index = min_index; + draw->pt.user.max_index = max_index; } +void +draw_set_mapped_element_buffer( struct draw_context *draw, + unsigned eltSize, + void *elements ) +{ + draw->pt.user.elts = elements; + draw->pt.user.eltSize = eltSize; + draw->pt.user.min_index = 0; + draw->pt.user.max_index = 0xffffffff; +} + /* Revamp me please: */ diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index c5c3d3b09e..8dd03cb79e 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -118,8 +118,16 @@ void draw_set_vertex_elements(struct draw_context *draw, unsigned count, const struct pipe_vertex_element *elements); +void +draw_set_mapped_element_buffer_range( struct draw_context *draw, + unsigned eltSize, + unsigned min_index, + unsigned max_index, + void *elements ); + void draw_set_mapped_element_buffer( struct draw_context *draw, - unsigned eltSize, void *elements ); + unsigned eltSize, + void *elements ); void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4cbccc8b5b..40f1d978f2 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -147,6 +147,8 @@ struct draw_context const void *elts; /** bytes per index (0, 1, 2 or 4) */ unsigned eltSize; + unsigned min_index; + unsigned max_index; /** vertex arrays */ const void *vbuffer[PIPE_MAX_ATTRIBS]; diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index e03816ebbc..6b8ba1d171 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -96,6 +96,15 @@ struct draw_pt_middle_end { unsigned start, unsigned count); + /* Transform all vertices in a linear range and then draw them with + * the supplied element list. + */ + void (*run_linear_elts)( struct draw_pt_middle_end *, + unsigned fetch_start, + unsigned fetch_count, + const ushort *draw_elts, + unsigned draw_count ); + void (*finish)( struct draw_pt_middle_end * ); void (*destroy)( struct draw_pt_middle_end * ); }; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index a1d041a74f..09bdc5fb5e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -311,6 +311,53 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, } +static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; + struct draw_context *draw = feme->draw; + void *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)feme->translate->key.output_stride, + (ushort)count ); + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices and emit HW verts. + */ + feme->translate->run( feme->translate, + start, + count, + hw_verts ); + + /* XXX: Draw arrays path to avoid re-emitting index list again and + * again. + */ + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + /* Done -- that was easy, wasn't it: + */ + draw->render->release_vertices( draw->render, + hw_verts, + feme->translate->key.output_stride, + count ); + +} + + + static void fetch_emit_finish( struct draw_pt_middle_end *middle ) { @@ -343,6 +390,7 @@ struct draw_pt_middle_end *draw_pt_fetch_emit( struct draw_context *draw ) fetch_emit->base.prepare = fetch_emit_prepare; fetch_emit->base.run = fetch_emit_run; fetch_emit->base.run_linear = fetch_emit_run_linear; + fetch_emit->base.run_linear_elts = fetch_emit_run_linear_elts; fetch_emit->base.finish = fetch_emit_finish; fetch_emit->base.destroy = fetch_emit_destroy; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 5265a13160..efa6dddbda 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -310,6 +310,54 @@ fse_run(struct draw_pt_middle_end *middle, } + +static void fse_run_linear_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; + struct draw_context *draw = fse->draw; + unsigned alloc_count = align(count, 4); + char *hw_verts; + + /* XXX: need to flush to get prim_vbuf.c to release its allocation?? + */ + draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + + hw_verts = draw->render->allocate_vertices( draw->render, + (ushort)fse->key.output_stride, + (ushort)alloc_count ); + + if (!hw_verts) { + assert(0); + return; + } + + /* Single routine to fetch vertices, run shader and emit HW verts. + * Clipping is done elsewhere -- either by the API or on hardware, + * or for some other reason not required... + */ + fse->active->run_linear( fse->active, + start, count, + hw_verts ); + + + draw->render->draw( draw->render, + draw_elts, + draw_count ); + + + + draw->render->release_vertices( draw->render, + hw_verts, + fse->key.output_stride, + count ); +} + + + static void fse_finish( struct draw_pt_middle_end *middle ) { } @@ -330,6 +378,7 @@ struct draw_pt_middle_end *draw_pt_middle_fse( struct draw_context *draw ) fse->base.prepare = fse_prepare; fse->base.run = fse_run; fse->base.run_linear = fse_run_linear; + fse->base.run_linear_elts = fse_run_linear_elts; fse->base.finish = fse_finish; fse->base.destroy = fse_destroy; fse->draw = draw; diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 06718779a5..c58a900867 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -98,7 +98,6 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, - static void fetch_pipeline_run( struct draw_pt_middle_end *middle, const unsigned *fetch_elts, unsigned fetch_count, @@ -251,6 +250,84 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, +static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, + unsigned start, + unsigned count, + const ushort *draw_elts, + unsigned draw_count ) +{ + struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; + struct draw_context *draw = fpme->draw; + struct draw_vertex_shader *shader = draw->vs.vertex_shader; + unsigned opt = fpme->opt; + unsigned alloc_count = align_int( count, 4 ); + + struct vertex_header *pipeline_verts = + (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); + + if (!pipeline_verts) { + /* Not much we can do here - just skip the rendering. + */ + assert(0); + return; + } + + /* Fetch into our vertex buffer + */ + draw_pt_fetch_run_linear( fpme->fetch, + start, + count, + (char *)pipeline_verts ); + + /* Run the shader, note that this overwrites the data[] parts of + * the pipeline verts. If there is no shader, ie a bypass shader, + * then the inputs == outputs, and are already in the correct + * place. + */ + if (opt & PT_SHADE) + { + shader->run_linear(shader, + (const float (*)[4])pipeline_verts->data, + ( float (*)[4])pipeline_verts->data, + (const float (*)[4])draw->pt.user.constants, + count, + fpme->vertex_size, + fpme->vertex_size); + } + + if (draw_pt_post_vs_run( fpme->post_vs, + pipeline_verts, + count, + fpme->vertex_size )) + { + opt |= PT_PIPELINE; + } + + /* Do we need to run the pipeline? + */ + if (opt & PT_PIPELINE) { + draw_pipeline_run( fpme->draw, + fpme->prim, + pipeline_verts, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + else { + draw_pt_emit( fpme->emit, + (const float (*)[4])pipeline_verts->data, + count, + fpme->vertex_size, + draw_elts, + draw_count ); + } + + FREE(pipeline_verts); +} + + + static void fetch_pipeline_finish( struct draw_pt_middle_end *middle ) { /* nothing to do */ @@ -282,6 +359,7 @@ struct draw_pt_middle_end *draw_pt_fetch_pipeline_or_emit( struct draw_context * fpme->base.prepare = fetch_pipeline_prepare; fpme->base.run = fetch_pipeline_run; fpme->base.run_linear = fetch_pipeline_linear_run; + fpme->base.run_linear_elts = fetch_pipeline_linear_run_elts; fpme->base.finish = fetch_pipeline_finish; fpme->base.destroy = fetch_pipeline_destroy; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 96e02fbf3a..720b91b8e6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -36,8 +36,8 @@ #include "draw/draw_pt.h" -#define CACHE_MAX 32 -#define FETCH_MAX 128 +#define CACHE_MAX 1024 +#define FETCH_MAX 4096 #define DRAW_MAX (16*1024) struct vcache_frontend { @@ -201,7 +201,124 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, #define FUNC vcache_run #include "draw_pt_vcache_tmp.h" +static void translate_uint_elts( const unsigned *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + +static void translate_ushort_elts( const ushort *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} +static void translate_ubyte_elts( const ubyte *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + +#if 0 +static enum pipe_format format_from_get_elt( pt_elt_func get_elt ) +{ + switch (draw->pt.user.eltSize) { + case 1: return PIPE_FORMAT_R8_UNORM; + case 2: return PIPE_FORMAT_R16_UNORM; + case 4: return PIPE_FORMAT_R32_UNORM; + default: return PIPE_FORMAT_NONE; + } +} +#endif + +static void vcache_check_run( struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned draw_count ) +{ + struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; + struct draw_context *draw = vcache->draw; + unsigned min_index = draw->pt.user.min_index; + unsigned max_index = draw->pt.user.max_index; + unsigned index_size = draw->pt.user.eltSize; + unsigned fetch_count = MAX2(max_index, max_index + 1 - min_index); + const ushort *transformed_elts; + ushort *storage = NULL; + + printf("fetch_count %x\n", fetch_count); + + if (fetch_count >= FETCH_MAX || + fetch_count > draw_count) + goto fail; + + + if (min_index == 0 && + index_size == 2) + { + transformed_elts = (const ushort *)elts; + } + else + { + storage = MALLOC( draw_count * sizeof(ushort) ); + if (!storage) + goto fail; + + switch(index_size) { + case 1: + translate_ubyte_elts( (const ubyte *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + case 2: + translate_ushort_elts( (const ushort *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + case 4: + translate_uint_elts( (const uint *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + default: + assert(0); + return; + } + transformed_elts = storage; + } + + vcache->middle->run_linear_elts( vcache->middle, + min_index, /* start */ + fetch_count, + transformed_elts, + draw_count ); + + FREE(storage); + return; + + fail: + vcache_run( frontend, get_elt, elts, draw_count ); +} @@ -219,7 +336,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, } else { - vcache->base.run = vcache_run; + vcache->base.run = vcache_check_run; } vcache->input_prim = prim; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 045a1f74a9..1e0106b86c 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -179,6 +179,7 @@ softpipe_create( struct pipe_screen *screen, softpipe->pipe.draw_arrays = softpipe_draw_arrays; softpipe->pipe.draw_elements = softpipe_draw_elements; + softpipe->pipe.draw_range_elements = softpipe_draw_range_elements; softpipe->pipe.set_edgeflags = softpipe_set_edgeflags; diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 6c58f9909d..dbecf6865f 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -108,11 +108,14 @@ softpipe_draw_arrays(struct pipe_context *pipe, unsigned mode, * * XXX should the element buffer be specified/bound with a separate function? */ + boolean -softpipe_draw_elements(struct pipe_context *pipe, - struct pipe_buffer *indexBuffer, - unsigned indexSize, - unsigned mode, unsigned start, unsigned count) +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count) { struct softpipe_context *sp = softpipe_context(pipe); struct draw_context *draw = sp->draw; @@ -141,11 +144,14 @@ softpipe_draw_elements(struct pipe_context *pipe, void *mapped_indexes = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); + draw_set_mapped_element_buffer_range(draw, indexSize, + min_index, + max_index, + mapped_indexes); } else { /* no index/element buffer */ - draw_set_mapped_element_buffer(draw, 0, NULL); + draw_set_mapped_element_buffer_range(draw, 0, start, start + count - 1, NULL); } @@ -171,6 +177,19 @@ softpipe_draw_elements(struct pipe_context *pipe, return TRUE; } +boolean +softpipe_draw_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned mode, unsigned start, unsigned count) +{ + return softpipe_draw_range_elements( pipe, indexBuffer, + indexSize, + 0, 0xffffffff, + mode, start, count ); +} + + void softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags) diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 452e51fa79..701e02b295 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -171,6 +171,13 @@ boolean softpipe_draw_elements(struct pipe_context *pipe, struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); +boolean +softpipe_draw_range_elements(struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned min_index, + unsigned max_index, + unsigned mode, unsigned start, unsigned count); void softpipe_set_edgeflags(struct pipe_context *pipe, const unsigned *edgeflags); diff --git a/src/gallium/include/pipe/p_context.h b/src/gallium/include/pipe/p_context.h index 0f68f592f7..faf112c6d6 100644 --- a/src/gallium/include/pipe/p_context.h +++ b/src/gallium/include/pipe/p_context.h @@ -76,6 +76,20 @@ struct pipe_context { struct pipe_buffer *indexBuffer, unsigned indexSize, unsigned mode, unsigned start, unsigned count); + + /* XXX: this is (probably) a temporary entrypoint, as the range + * information should be available from the vertex_buffer state. + * Using this to quickly evaluate a specialized path in the draw + * module. + */ + boolean (*draw_range_elements)( struct pipe_context *pipe, + struct pipe_buffer *indexBuffer, + unsigned indexSize, + unsigned minIndex, + unsigned maxIndex, + unsigned mode, + unsigned start, + unsigned count); /*@}*/ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index a3bffbfc95..551860452a 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -365,14 +365,33 @@ st_draw_vbo(GLcontext *ctx, } /* draw */ - for (i = 0; i < nr_prims; i++) { + if (nr_prims == 1 && pipe->draw_range_elements != NULL) { + i = 0; + + /* XXX: exercise temporary path to pass min/max directly + * through to driver & draw module. These interfaces still + * need a bit of work... + */ setup_edgeflags(ctx, prims[i].mode, prims[i].start + indexOffset, prims[i].count, arrays[VERT_ATTRIB_EDGEFLAG]); - pipe->draw_elements(pipe, indexBuf, indexSize, - prims[i].mode, - prims[i].start + indexOffset, prims[i].count); + pipe->draw_range_elements(pipe, indexBuf, indexSize, + min_index, + max_index, + prims[i].mode, + prims[i].start + indexOffset, prims[i].count); + } + else { + for (i = 0; i < nr_prims; i++) { + setup_edgeflags(ctx, prims[i].mode, + prims[i].start + indexOffset, prims[i].count, + arrays[VERT_ATTRIB_EDGEFLAG]); + + pipe->draw_elements(pipe, indexBuf, indexSize, + prims[i].mode, + prims[i].start + indexOffset, prims[i].count); + } } pipe_reference_buffer(pipe, &indexBuf, NULL); -- cgit v1.2.3 From bb2e13b9e82b68ec3b9fc56a4c35e7ead8fd138f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 12:38:49 +0100 Subject: draw: make sure constant buffer data is aligned before passing to aos.c --- src/gallium/auxiliary/draw/draw_context.c | 5 +++-- src/gallium/auxiliary/draw/draw_context.h | 3 ++- src/gallium/auxiliary/draw/draw_private.h | 9 ++++++++- src/gallium/auxiliary/draw/draw_vs.c | 19 ++++++++++++++++++- src/gallium/auxiliary/draw/draw_vs_aos.c | 4 ++-- src/gallium/drivers/i915simple/i915_context.c | 4 +++- src/gallium/drivers/softpipe/sp_draw_arrays.c | 5 +++-- src/mesa/state_tracker/st_draw.c | 7 ++++--- 8 files changed, 43 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index bcec85c2ef..2f263cf06a 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -217,10 +217,11 @@ draw_set_mapped_vertex_buffer(struct draw_context *draw, void draw_set_mapped_constant_buffer(struct draw_context *draw, - const void *buffer) + const void *buffer, + unsigned size ) { draw->pt.user.constants = buffer; - draw_vs_set_constants( draw, (const float (*)[4])buffer ); + draw_vs_set_constants( draw, (const float (*)[4])buffer, size ); } diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 8dd03cb79e..b8f2bfa332 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -133,7 +133,8 @@ void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); void draw_set_mapped_constant_buffer(struct draw_context *draw, - const void *buffer); + const void *buffer, + unsigned size ); void draw_set_edgeflags( struct draw_context *draw, const unsigned *edgeflag ); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 40f1d978f2..88a7224b62 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -190,6 +190,12 @@ struct draw_context struct aos_machine *aos_machine; + const float (*aligned_constants)[4]; + + const float (*aligned_constant_storage)[4]; + unsigned const_storage_size; + + struct translate *fetch; struct translate_cache *fetch_cache; struct translate *emit; @@ -225,7 +231,8 @@ void draw_vs_set_viewport( struct draw_context *, const struct pipe_viewport_state * ); void draw_vs_set_constants( struct draw_context *, - const float (*constants)[4] ); + const float (*constants)[4], + unsigned size ); diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index a8b6d0c90d..ce35112fc1 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -43,8 +43,22 @@ void draw_vs_set_constants( struct draw_context *draw, - const float (*constants)[4] ) + const float (*constants)[4], + unsigned size ) { + if (((unsigned)constants) & 0xf) { + if (size > draw->vs.const_storage_size) { + if (draw->vs.aligned_constant_storage) + align_free(draw->vs.aligned_constant_storage); + draw->vs.aligned_constant_storage = align_malloc( size, 16 ); + } + memcpy( draw->vs.aligned_constant_storage, + constants, + size ); + constants = draw->vs.aligned_constant_storage; + } + + draw->vs.aligned_constants = constants; draw_vs_aos_machine_constants( draw->vs.aos_machine, constants ); } @@ -159,6 +173,9 @@ draw_vs_destroy( struct draw_context *draw ) if (draw->vs.aos_machine) draw_vs_aos_machine_destroy(draw->vs.aos_machine); + if (draw->vs.aligned_constant_storage) + align_free(draw->vs.aligned_constant_storage); + tgsi_exec_machine_free_data(&draw->vs.machine); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index b5e4e1e7b1..55cabb6df9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1957,7 +1957,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, struct aos_machine *machine = vaos->draw->vs.aos_machine; machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - machine->constants = (const float (*)[4])vaos->draw->pt.user.constants; + machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; machine->attrib = vaos->attrib; @@ -1976,7 +1976,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, struct aos_machine *machine = vaos->draw->vs.aos_machine; machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; - machine->constants = (const float (*)[4])vaos->draw->pt.user.constants; + machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; machine->attrib = vaos->attrib; diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 4bef21619c..c609d16a5a 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -86,7 +86,9 @@ i915_draw_elements( struct pipe_context *pipe, draw_set_mapped_constant_buffer(draw, - i915->current.constants[PIPE_SHADER_VERTEX]); + i915->current.constants[PIPE_SHADER_VERTEX], + ( i915->current.num_user_constants[PIPE_SHADER_VERTEX] * + 4 * sizeof(float) )); /* draw! */ draw_arrays(i915->draw, prim, start, count); diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index dbecf6865f..d4d5fa744f 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -54,7 +54,8 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) } draw_set_mapped_constant_buffer(sp->draw, - sp->mapped_constants[PIPE_SHADER_VERTEX]); + sp->mapped_constants[PIPE_SHADER_VERTEX], + sp->constants[i].size); } static void @@ -68,7 +69,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) */ draw_flush(sp->draw); - draw_set_mapped_constant_buffer(sp->draw, NULL); + draw_set_mapped_constant_buffer(sp->draw, NULL, 0); for (i = 0; i < 2; i++) { if (sp->constants[i].size) diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 551860452a..5300848ef6 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -596,9 +596,10 @@ st_feedback_draw_vbo(GLcontext *ctx, /* map constant buffers */ mapped_constants = pipe_buffer_map(pipe, - st->state.constants[PIPE_SHADER_VERTEX].buffer, - PIPE_BUFFER_USAGE_CPU_READ); - draw_set_mapped_constant_buffer(st->draw, mapped_constants); + st->state.constants[PIPE_SHADER_VERTEX].buffer, + PIPE_BUFFER_USAGE_CPU_READ); + draw_set_mapped_constant_buffer(st->draw, mapped_constants, + st->state.constants[PIPE_SHADER_VERTEX].buffer->size); /* draw here */ -- cgit v1.2.3 From 45eecb93920c5a33e71b3c152749273908cb62fd Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 12:42:39 +0100 Subject: draw: remove printf --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 720b91b8e6..64fab61271 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -260,8 +260,6 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, const ushort *transformed_elts; ushort *storage = NULL; - printf("fetch_count %x\n", fetch_count); - if (fetch_count >= FETCH_MAX || fetch_count > draw_count) goto fail; -- cgit v1.2.3 From a233f65f9b79734498c120e8052aa8d6255586e4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 13:08:15 +0100 Subject: draw: better calculation of fetch_count --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 64fab61271..957b8edec2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -256,13 +256,19 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, unsigned min_index = draw->pt.user.min_index; unsigned max_index = draw->pt.user.max_index; unsigned index_size = draw->pt.user.eltSize; - unsigned fetch_count = MAX2(max_index, max_index + 1 - min_index); + unsigned fetch_count = max_index + 1 - min_index; const ushort *transformed_elts; ushort *storage = NULL; - if (fetch_count >= FETCH_MAX || - fetch_count > draw_count) + + if (0) debug_printf("fetch_count %d draw_count %d\n", fetch_count, draw_count); + + if (max_index == 0xffffffff || + fetch_count >= FETCH_MAX || + fetch_count > draw_count) { + debug_printf("fail\n"); goto fail; + } if (min_index == 0 && -- cgit v1.2.3 From 8f887b4252208e60e7e86217ec3b72fb639a4e82 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 13:26:01 +0100 Subject: draw: michal's patch for calling powf... teapot still not quite right --- src/gallium/auxiliary/draw/draw_vs_aos.c | 46 ++++++++++++++++++++++++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 1 + 2 files changed, 47 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 55cabb6df9..725f36b502 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1372,11 +1372,20 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +/* A wrapper for powf(). + * Makes sure it is cdecl and operates on floats. + */ +static float PIPE_CDECL _powerf( float x, float y ) +{ + return powf( x, y ); +} + /* Really not sufficient -- need to check for conditions that could * generate inf/nan values, which will slow things down hugely. */ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { +#if 0 x87_fld_src(cp, &op->FullSrcRegisters[1], 0); /* a1.x */ x87_fld_src(cp, &op->FullSrcRegisters[0], 0); /* a0.x a1.x */ x87_fyl2x(cp->func); /* a1*log2(a0) */ @@ -1384,6 +1393,42 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst x87_emit_ex2( cp ); /* 2^(a1*log2(a0)) */ x87_fstp_dest4(cp, &op->FullDstRegisters[0]); +#else + uint i; + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -8) ); + + x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 4 ) ); + x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); + + x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf ); + x86_call( cp->func, cp->tmp_EAX ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 8) ); + + x86_cdecl_caller_pop_regs( cp->func ); + + /* Note retval on x87 stack: + */ + cp->func->x87_stack++; + + x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); +#endif return TRUE; } @@ -1781,6 +1826,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, cp.machine_EDX = x86_make_reg(file_REG32, reg_DX); cp.count_ESI = x86_make_reg(file_REG32, reg_SI); cp.temp_EBP = x86_make_reg(file_REG32, reg_BP); + cp.stack_ESP = x86_make_reg( file_REG32, reg_SP ); x86_init_func(cp.func); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 665b425e68..fb6d43d32e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -160,6 +160,7 @@ struct aos_compilation { struct x86_reg machine_EDX; struct x86_reg count_ESI; /* decrements to zero */ struct x86_reg temp_EBP; + struct x86_reg stack_ESP; }; struct x86_reg aos_get_xmm_reg( struct aos_compilation *cp ); -- cgit v1.2.3 From 4a7198fdcfe3256bdefff5d1d766327ae7f18b35 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 29 May 2008 22:20:51 +0900 Subject: psb: Make msvc happy. Conflicts: src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index c58a900867..eb6988ff03 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -81,9 +81,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, * but gl vs dx9 clip spaces. */ draw_pt_post_vs_prepare( fpme->post_vs, - draw->bypass_clipping, - draw->identity_viewport, - draw->rasterizer->gl_rasterization_rules ); + (boolean)draw->bypass_clipping, + (boolean)draw->identity_viewport, + (boolean)draw->rasterizer->gl_rasterization_rules ); if (!(opt & PT_PIPELINE)) -- cgit v1.2.3 From 8808d62f608d1397ee75d0087301d0b0a0278244 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 29 May 2008 22:26:56 +0900 Subject: gallium: MSVC warning fixes. Conflicts: src/gallium/auxiliary/draw/draw_pt_varray.c src/gallium/auxiliary/draw/draw_pt_varray_tmp.h src/gallium/auxiliary/draw/draw_pt_vcache.c --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 2 +- src/gallium/auxiliary/draw/draw_pt_varray_tmp.h | 9 +++++---- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 9 +++++---- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 2 +- 5 files changed, 13 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 67b9a9503d..d514e28b77 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -145,7 +145,7 @@ emit_vertex( struct vbuf_stage *vbuf, vertex->vertex_id = vbuf->nr_vertices++; } - return vertex->vertex_id; + return (ushort)vertex->vertex_id; } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h index 6979f6b544..7c722457c3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp.h @@ -10,7 +10,8 @@ static void FUNC(struct draw_pt_front_end *frontend, boolean flatfirst = (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first); - unsigned i, j, flags; + unsigned i, j; + ushort flags; unsigned first, incr; varray->fetch_start = start; @@ -200,9 +201,9 @@ static void FUNC(struct draw_pt_front_end *frontend, /* These bitflags look a little odd because we submit the * vertices as (1,2,0) to satisfy flatshade requirements. */ - const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; - const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; for (j = 0; j + first <= count; j += i) { diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index cf9f394aa3..5c99a47e49 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -10,7 +10,8 @@ static void FUNC( struct draw_pt_front_end *frontend, boolean flatfirst = (draw->rasterizer->flatshade && draw->rasterizer->flatshade_first); - unsigned i, flags; + unsigned i; + ushort flags; switch (vcache->input_prim) { @@ -138,9 +139,9 @@ static void FUNC( struct draw_pt_front_end *frontend, /* These bitflags look a little odd because we submit the * vertices as (1,2,0) to satisfy flatshade requirements. */ - const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; - const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 8018bd7fa4..189dc6024d 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -198,7 +198,7 @@ get_coef( static void emit_retw( struct x86_function *func, - unsigned size ) + unsigned short size ) { x86_retw( func, size ); } diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 69dd4d3dff..be48a14737 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -309,7 +309,7 @@ static void get_src_ptr( struct translate_sse *p, static void emit_swizzle( struct translate_sse *p, struct x86_reg dest, struct x86_reg src, - unsigned shuffle ) + unsigned char shuffle ) { sse_shufps(p->func, dest, src, shuffle); } -- cgit v1.2.3 From 807e7c4ccfdaebf8e568357fb1fd8090ccae638c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 14:35:30 +0100 Subject: draw: add more switches to turn FSE on/off --- src/gallium/auxiliary/draw/draw_private.h | 3 ++- src/gallium/auxiliary/draw/draw_pt.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 88a7224b62..4f8cceee1e 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -157,7 +157,8 @@ struct draw_context const void *constants; } user; - boolean test_fse; + boolean test_fse; /* enable FSE even though its not correct (eg for softpipe) */ + boolean no_fse; /* disable FSE even when it is correct */ } pt; struct { diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index d48c6c220d..723077159b 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -75,7 +75,7 @@ draw_pt_arrays(struct draw_context *draw, if (opt == 0) middle = draw->pt.middle.fetch_emit; - else if (opt == PT_SHADE) + else if (opt == PT_SHADE && !draw->pt.no_fse) middle = draw->pt.middle.fetch_shade_emit; else middle = draw->pt.middle.general; @@ -105,6 +105,7 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; + draw->pt.no_fse = GETENV("DRAW_NO_FSE") != NULL; draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) -- cgit v1.2.3 From 22be9ea4a63ff051604123de5e82cf4050792ef8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 29 May 2008 18:18:56 +0100 Subject: draw: quieten some debug --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 957b8edec2..9ffc99abf9 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -266,7 +266,7 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, if (max_index == 0xffffffff || fetch_count >= FETCH_MAX || fetch_count > draw_count) { - debug_printf("fail\n"); + if (0) debug_printf("fail\n"); goto fail; } -- cgit v1.2.3 From 2691c228701e0d3a048eaaa698ba2236c2650d08 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 29 May 2008 19:24:53 +0900 Subject: gallium: Replace getenv by placeholder code on WinCE. WinCE processes supposedly have environment, but it is not clear which API to use. --- src/gallium/auxiliary/util/p_debug.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index ce7fb58956..d1dfc377f8 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -153,7 +153,9 @@ const char * debug_get_option(const char *name, const char *dfault) { const char *result; -#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + /* EngMapFile creates the file if it does not exists, so it must either be + * disabled on release versions (or put in a less conspicuous place). */ #ifdef DEBUG ULONG_PTR iFile = 0; const void *pMap = NULL; @@ -161,9 +163,6 @@ debug_get_option(const char *name, const char *dfault) static char output[1024]; result = dfault; - /* XXX: this creates the file if it does not exists, so it must either be - * disabled on release versions, or put in a less conspicuous place. - */ pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile); if(pMap) { sol = (const char *)pMap; @@ -187,13 +186,15 @@ debug_get_option(const char *name, const char *dfault) #else result = dfault; #endif +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + /* TODO: implement */ + result = dfault; #else - result = getenv(name); if(!result) result = dfault; #endif - + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); return result; -- cgit v1.2.3 From 1529a2c983f70ed7ff661ae6e8995cdfbca6d9cf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 30 May 2008 14:51:09 +0100 Subject: draw: trim incoming primitives --- src/gallium/auxiliary/draw/draw_pt.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 723077159b..f0d7b51ad7 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -35,6 +35,10 @@ #include "draw/draw_private.h" #include "draw/draw_pt.h" +static unsigned trim( unsigned count, unsigned first, unsigned incr ) +{ + return count - (count - first) % incr; +} @@ -54,6 +58,17 @@ draw_pt_arrays(struct draw_context *draw, struct draw_pt_middle_end *middle = NULL; unsigned opt = 0; + /* Sanitize primitive length: + */ + { + unsigned first, incr; + draw_pt_split_prim(prim, &first, &incr); + count = trim(count, first, incr); + if (count < first) + return TRUE; + } + + if (!draw->render) { opt |= PT_PIPELINE; } -- cgit v1.2.3 From a9d6b1afa5b548a98c2b95db50236c4139d9f569 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 31 May 2008 15:18:55 +0900 Subject: gallium: Port timing functions to WinCE. --- src/gallium/auxiliary/util/u_time.c | 74 ++++++++++++++++++++++++++++--------- 1 file changed, 56 insertions(+), 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index 9b97050d51..c6ce0afab0 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -33,26 +33,44 @@ */ -#include "util/u_time.h" +#include "pipe/p_config.h" #if defined(PIPE_OS_LINUX) #include #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) #include #include -#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#include +extern VOID KeQuerySystemTime(PLARGE_INTEGER); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) #include #else #error Unsupported OS #endif +#include "util/u_time.h" + -#if defined(PIPE_OS_WINDOWS) -static LONGLONG frequency = 0; -#if !defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) -#define EngQueryPerformanceFrequency(p) QueryPerformanceFrequency((LARGE_INTEGER*)(p)) -#define EngQueryPerformanceCounter(p) QueryPerformanceCounter((LARGE_INTEGER*)(p)) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +static int64_t frequency = 0; + +static INLINE void +util_time_get_frequency(void) +{ + if(!frequency) { +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + LONGLONG temp; + EngQueryPerformanceFrequency(&temp); + frequency = temp; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + LARGE_INTEGER temp; + QueryPerformanceFrequency(&temp); + frequency = temp.QuadPart; #endif + } +} #endif @@ -61,8 +79,20 @@ util_time_get(struct util_time *t) { #if defined(PIPE_OS_LINUX) gettimeofday(&t->tv, NULL); -#elif defined(PIPE_OS_WINDOWS) - EngQueryPerformanceCounter(&t->counter); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + LONGLONG temp; + EngQueryPerformanceCounter(&temp); + t->counter = temp; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* Updated every 10 miliseconds, measured in units of 100 nanoseconds. + * http://msdn.microsoft.com/en-us/library/ms801642.aspx */ + LARGE_INTEGER temp; + KeQuerySystemTime(&temp); + t->counter = temp.QuadPart; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + LARGE_INTEGER temp; + QueryPerformanceCounter(&temp); + t->counter = temp.QuadPart; #endif } @@ -75,10 +105,17 @@ util_time_add(const struct util_time *t1, #if defined(PIPE_OS_LINUX) t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; -#elif defined(PIPE_OS_WINDOWS) - if(!frequency) - EngQueryPerformanceFrequency(&frequency); - t2->counter = t1->counter + (usecs * frequency + 999999LL)/1000000LL; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + util_time_get_frequency(); + t2->counter = t1->counter + (usecs * frequency + INT64_C(999999))/INT64_C(1000000); +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* 1 tick = 100 nano seconds. */ + t2->counter = t1->counter + usecs * 10; +#elif + LARGE_INTEGER temp; + LONGLONG freq; + freq = temp.QuadPart; + t2->counter = t1->counter + (usecs * freq)/1000000L; #endif } @@ -90,10 +127,11 @@ util_time_diff(const struct util_time *t1, #if defined(PIPE_OS_LINUX) return (t2->tv.tv_usec - t1->tv.tv_usec) + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; -#elif defined(PIPE_OS_WINDOWS) - if(!frequency) - EngQueryPerformanceFrequency(&frequency); - return (t2->counter - t1->counter)*1000000LL/frequency; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + util_time_get_frequency(); + return (t2->counter - t1->counter)*INT64_C(1000000)/frequency; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + return (t2->counter - t1->counter)/10; #endif } @@ -142,7 +180,7 @@ util_time_timeout(const struct util_time *start, } -#if defined(PIPE_OS_WINDOWS) +#if defined(PIPE_SUBSYSYEM_WINDOWS_DISPLAY) void util_time_sleep(unsigned usecs) { LONGLONG start, curr, end; -- cgit v1.2.3 From 3b77f391db7827d1fb19a5dc3d8e8d3d705185ae Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 31 May 2008 18:51:44 +0900 Subject: draw: Eliminate stdio usage. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 37cbe9bf9e..243d3eee23 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -185,7 +185,7 @@ draw_create_vs_sse(struct draw_context *draw, return &vs->base; fail: - fprintf(stderr, "tgsi_emit_sse2() failed, falling back to interpreter\n"); + debug_error("tgsi_emit_sse2() failed, falling back to interpreter\n"); x86_release_func( &vs->sse2_program ); -- cgit v1.2.3 From 13581958bd99396ab8ec314f10cf61f717b18a9b Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 31 May 2008 18:19:21 +0200 Subject: draw: Remove const qualifier. --- src/gallium/auxiliary/draw/draw_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 4f8cceee1e..423f64262b 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -193,7 +193,7 @@ struct draw_context const float (*aligned_constants)[4]; - const float (*aligned_constant_storage)[4]; + float (*aligned_constant_storage)[4]; unsigned const_storage_size; -- cgit v1.2.3 From a4abedc4f56e0442083aee9b39980900abf6ef40 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 31 May 2008 18:40:39 +0200 Subject: draw: Remove const qualifier. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 243d3eee23..f638208bf5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -130,7 +130,7 @@ vs_sse_delete( struct draw_vertex_shader *base ) x86_release_func( &shader->sse2_program ); - align_free(shader->base.immediates); + align_free( (void *) shader->base.immediates ); FREE( (void*) shader->base.state.tokens ); FREE( shader ); -- cgit v1.2.3 From c2ff3a66a1d9fe0b5303ded0503323a73a6a7391 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 31 May 2008 19:40:36 +0200 Subject: draw: Fix build after TGSI declaration interface changes. --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 35 ++++++++++++------------- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 27 +++++++++---------- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 27 +++++++++---------- 3 files changed, 43 insertions(+), 46 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index fd48b224b4..634bf067f1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -141,18 +141,18 @@ aa_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_OUTPUT && decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && decl->Semantic.SemanticIndex == 0) { - aactx->colorOutput = decl->u.DeclarationRange.First; + aactx->colorOutput = decl->DeclarationRange.First; } else if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { aactx->samplersUsed |= 1 << i; } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->u.DeclarationRange.Last; + if ((int) decl->DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->DeclarationRange.Last; if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { aactx->maxGeneric = decl->Semantic.SemanticIndex; @@ -160,8 +160,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx, } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { aactx->tempsUsed |= (1 << i); } } @@ -225,34 +225,33 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /* declare new generic input/texcoord */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; - decl.Declaration.Interpolate = 1; - /* XXX this could be linear... */ - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->maxInput + 1; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->maxInput + 1; ctx->emit_declaration(ctx, &decl); /* declare new sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->freeSampler; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->texTemp; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->texTemp; ctx->emit_declaration(ctx, &decl); decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->colorTemp; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->colorTemp; ctx->emit_declaration(ctx, &decl); aactx->firstInstruction = FALSE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 97d74ad693..96dcdb43d5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -131,11 +131,11 @@ aa_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_OUTPUT && decl->Semantic.SemanticName == TGSI_SEMANTIC_COLOR && decl->Semantic.SemanticIndex == 0) { - aactx->colorOutput = decl->u.DeclarationRange.First; + aactx->colorOutput = decl->DeclarationRange.First; } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - if ((int) decl->u.DeclarationRange.Last > aactx->maxInput) - aactx->maxInput = decl->u.DeclarationRange.Last; + if ((int) decl->DeclarationRange.Last > aactx->maxInput) + aactx->maxInput = decl->DeclarationRange.Last; if (decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC && (int) decl->Semantic.SemanticIndex > aactx->maxGeneric) { aactx->maxGeneric = decl->Semantic.SemanticIndex; @@ -143,8 +143,8 @@ aa_transform_decl(struct tgsi_transform_context *ctx, } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { aactx->tempsUsed |= (1 << i); } } @@ -193,27 +193,26 @@ aa_transform_inst(struct tgsi_transform_context *ctx, /* declare new generic input/texcoord */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = aactx->maxGeneric + 1; - decl.Declaration.Interpolate = 1; - /* XXX this could be linear... */ - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = texInput; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = texInput; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = tmp0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = tmp0; ctx->emit_declaration(ctx, &decl); decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = aactx->colorTemp; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = aactx->colorTemp; ctx->emit_declaration(ctx, &decl); aactx->firstInstruction = FALSE; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 4c92416eb1..4087cf7a49 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -132,20 +132,20 @@ pstip_transform_decl(struct tgsi_transform_context *ctx, if (decl->Declaration.File == TGSI_FILE_SAMPLER) { uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { pctx->samplersUsed |= 1 << i; } } else if (decl->Declaration.File == TGSI_FILE_INPUT) { - pctx->maxInput = MAX2(pctx->maxInput, (int) decl->u.DeclarationRange.Last); + pctx->maxInput = MAX2(pctx->maxInput, (int) decl->DeclarationRange.Last); if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) - pctx->wincoordInput = (int) decl->u.DeclarationRange.First; + pctx->wincoordInput = (int) decl->DeclarationRange.First; } else if (decl->Declaration.File == TGSI_FILE_TEMPORARY) { uint i; - for (i = decl->u.DeclarationRange.First; - i <= decl->u.DeclarationRange.Last; i++) { + for (i = decl->DeclarationRange.First; + i <= decl->DeclarationRange.Last; i++) { pctx->tempsUsed |= (1 << i); } } @@ -223,28 +223,27 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, /* declare new position input reg */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; + decl.Declaration.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_POSITION; decl.Semantic.SemanticIndex = 0; - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_LINEAR; /* XXX? */ - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = wincoordInput; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = wincoordInput; ctx->emit_declaration(ctx, &decl); } /* declare new sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = pctx->freeSampler; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = pctx->freeSampler; ctx->emit_declaration(ctx, &decl); /* declare new temp regs */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_TEMPORARY; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = pctx->texTemp; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = pctx->texTemp; ctx->emit_declaration(ctx, &decl); /* emit immediate = {1/32, 1/32, 1, 1} -- cgit v1.2.3 From 3de18c2ac3cf679875d22d7ae9e62a11f5ea03c9 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 31 May 2008 19:41:29 +0200 Subject: gallivm: Fix build after TGSI declaration interface changes. --- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 9695358ab8..98014bdaa1 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -96,10 +96,8 @@ translate_declaration(struct gallivm_ir *prog, unsigned first, last, mask; uint interp_method; - assert(decl->Declaration.Declare == TGSI_DECLARE_RANGE); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; /* Do not touch WPOS.xy */ @@ -113,7 +111,7 @@ translate_declaration(struct gallivm_ir *prog, } } - interp_method = decl->Interpolation.Interpolate; + interp_method = decl->Declaration.Interpolate; if (mask == TGSI_WRITEMASK_XYZW) { unsigned i, j; @@ -153,7 +151,7 @@ translate_declarationir(struct gallivm_ir *, struct tgsi_full_declaration *) { if (decl->Declaration.File == TGSI_FILE_ADDRESS) { - int idx = decl->u.DeclarationRange.First; + int idx = decl->DeclarationRange.First; storage->addAddress(idx); } } -- cgit v1.2.3 From a49381587f73c67469ec7546419cfc41387f938c Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 31 May 2008 19:48:13 +0200 Subject: tgsi: Fix build after TGSI declaration interface changes. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 8 +- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 8 +- src/gallium/auxiliary/tgsi/util/tgsi_build.c | 126 ++++++--------------------- src/gallium/auxiliary/tgsi/util/tgsi_build.h | 19 +--- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 58 +++--------- src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 17 +--- src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 9 +- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 4 +- 8 files changed, 52 insertions(+), 197 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 826b432f09..6ba0949183 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1411,13 +1411,11 @@ exec_declaration( unsigned first, last, mask; eval_coef_func eval; - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; - switch( decl->Interpolation.Interpolate ) { + switch( decl->Declaration.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: eval = eval_constant_coef; break; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index 189dc6024d..a59e22b6ea 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1923,16 +1923,14 @@ emit_declaration( unsigned first, last, mask; unsigned i, j; - assert( decl->Declaration.Declare == TGSI_DECLARE_RANGE ); - - first = decl->u.DeclarationRange.First; - last = decl->u.DeclarationRange.Last; + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; mask = decl->Declaration.UsageMask; for( i = first; i <= last; i++ ) { for( j = 0; j < NUM_CHANNELS; j++ ) { if( mask & (1 << j) ) { - switch( decl->Interpolation.Interpolate ) { + switch( decl->Declaration.Interpolate ) { case TGSI_INTERPOLATE_CONSTANT: emit_coef_a0( func, 0, i, j ); emit_inputs( func, 0, i, j ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c index 9c883ab704..63cc27becc 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c @@ -90,9 +90,8 @@ tgsi_default_declaration( void ) declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; declaration.Size = 1; declaration.File = TGSI_FILE_NULL; - declaration.Declare = TGSI_DECLARE_RANGE; declaration.UsageMask = TGSI_WRITEMASK_XYZW; - declaration.Interpolate = 0; + declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; declaration.Semantic = 0; declaration.Padding = 0; declaration.Extended = 0; @@ -103,7 +102,6 @@ tgsi_default_declaration( void ) struct tgsi_declaration tgsi_build_declaration( unsigned file, - unsigned declare, unsigned usage_mask, unsigned interpolate, unsigned semantic, @@ -112,11 +110,10 @@ tgsi_build_declaration( struct tgsi_declaration declaration; assert( file <= TGSI_FILE_IMMEDIATE ); - assert( declare <= TGSI_DECLARE_MASK ); + assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE ); declaration = tgsi_default_declaration(); declaration.File = file; - declaration.Declare = declare; declaration.UsageMask = usage_mask; declaration.Interpolate = interpolate; declaration.Semantic = semantic; @@ -144,7 +141,7 @@ tgsi_default_full_declaration( void ) struct tgsi_full_declaration full_declaration; full_declaration.Declaration = tgsi_default_declaration(); - full_declaration.Interpolation = tgsi_default_declaration_interpolation(); + full_declaration.DeclarationRange = tgsi_default_declaration_range(); full_declaration.Semantic = tgsi_default_declaration_semantic(); return full_declaration; @@ -159,6 +156,7 @@ tgsi_build_full_declaration( { unsigned size = 0; struct tgsi_declaration *declaration; + struct tgsi_declaration_range *dr; if( maxsize <= size ) return 0; @@ -167,63 +165,21 @@ tgsi_build_full_declaration( *declaration = tgsi_build_declaration( full_decl->Declaration.File, - full_decl->Declaration.Declare, full_decl->Declaration.UsageMask, full_decl->Declaration.Interpolate, full_decl->Declaration.Semantic, header ); - switch( full_decl->Declaration.Declare ) { - case TGSI_DECLARE_RANGE: - { - struct tgsi_declaration_range *dr; - - if( maxsize <= size ) - return 0; - dr = (struct tgsi_declaration_range *) &tokens[size]; - size++; - - *dr = tgsi_build_declaration_range( - full_decl->u.DeclarationRange.First, - full_decl->u.DeclarationRange.Last, - declaration, - header ); - break; - } - - case TGSI_DECLARE_MASK: - { - struct tgsi_declaration_mask *dm; - - if( maxsize <= size ) - return 0; - dm = (struct tgsi_declaration_mask *) &tokens[size]; - size++; - - *dm = tgsi_build_declaration_mask( - full_decl->u.DeclarationMask.Mask, - declaration, - header ); - break; - } - - default: - assert( 0 ); - } - - if( full_decl->Declaration.Interpolate ) { - struct tgsi_declaration_interpolation *di; - - if( maxsize <= size ) - return 0; - di = (struct tgsi_declaration_interpolation *) &tokens[size]; - size++; + if (maxsize <= size) + return 0; + dr = (struct tgsi_declaration_range *) &tokens[size]; + size++; - *di = tgsi_build_declaration_interpolation( - full_decl->Interpolation.Interpolate, - declaration, - header ); - } + *dr = tgsi_build_declaration_range( + full_decl->DeclarationRange.First, + full_decl->DeclarationRange.Last, + declaration, + header ); if( full_decl->Declaration.Semantic ) { struct tgsi_declaration_semantic *ds; @@ -243,6 +199,17 @@ tgsi_build_full_declaration( return size; } +struct tgsi_declaration_range +tgsi_default_declaration_range( void ) +{ + struct tgsi_declaration_range dr; + + dr.First = 0; + dr.Last = 0; + + return dr; +} + struct tgsi_declaration_range tgsi_build_declaration_range( unsigned first, @@ -255,6 +222,7 @@ tgsi_build_declaration_range( assert( last >= first ); assert( last <= 0xFFFF ); + declaration_range = tgsi_default_declaration_range(); declaration_range.First = first; declaration_range.Last = last; @@ -263,50 +231,6 @@ tgsi_build_declaration_range( return declaration_range; } -struct tgsi_declaration_mask -tgsi_build_declaration_mask( - unsigned mask, - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - struct tgsi_declaration_mask declaration_mask; - - declaration_mask.Mask = mask; - - declaration_grow( declaration, header ); - - return declaration_mask; -} - -struct tgsi_declaration_interpolation -tgsi_default_declaration_interpolation( void ) -{ - struct tgsi_declaration_interpolation di; - - di.Interpolate = TGSI_INTERPOLATE_CONSTANT; - di.Padding = 0; - - return di; -} - -struct tgsi_declaration_interpolation -tgsi_build_declaration_interpolation( - unsigned interpolate, - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - struct tgsi_declaration_interpolation di; - - assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE ); - - di = tgsi_default_declaration_interpolation(); - di.Interpolate = interpolate; - - declaration_grow( declaration, header ); - - return di; -} - struct tgsi_declaration_semantic tgsi_default_declaration_semantic( void ) { diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h index 80bffc4ae7..423cf141f5 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -37,7 +37,6 @@ tgsi_default_declaration( void ); struct tgsi_declaration tgsi_build_declaration( unsigned file, - unsigned declare, unsigned usage_mask, unsigned interpolate, unsigned semantic, @@ -53,6 +52,9 @@ tgsi_build_full_declaration( struct tgsi_header *header, unsigned maxsize ); +struct tgsi_declaration_range +tgsi_default_declaration_range( void ); + struct tgsi_declaration_range tgsi_build_declaration_range( unsigned first, @@ -60,21 +62,6 @@ tgsi_build_declaration_range( struct tgsi_declaration *declaration, struct tgsi_header *header ); -struct tgsi_declaration_mask -tgsi_build_declaration_mask( - unsigned mask, - struct tgsi_declaration *declaration, - struct tgsi_header *header ); - -struct tgsi_declaration_interpolation -tgsi_default_declaration_interpolation( void ); - -struct tgsi_declaration_interpolation -tgsi_build_declaration_interpolation( - unsigned interpolate, - struct tgsi_declaration *declaration, - struct tgsi_header *header ); - struct tgsi_declaration_semantic tgsi_default_declaration_semantic( void ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index b018ea9fa1..d1a3dfd9c7 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -546,19 +546,13 @@ tgsi_dump_declaration( TXT( "\nDCL " ); ENM( decl->Declaration.File, TGSI_FILES_SHORT ); - switch( decl->Declaration.Declare ) { - case TGSI_DECLARE_RANGE: - CHR( '[' ); - UID( decl->u.DeclarationRange.First ); - if( decl->u.DeclarationRange.First != decl->u.DeclarationRange.Last ) { - TXT( ".." ); - UID( decl->u.DeclarationRange.Last ); - } - CHR( ']' ); - break; - default: - assert( 0 ); + CHR( '[' ); + UID( decl->DeclarationRange.First ); + if (decl->DeclarationRange.First != decl->DeclarationRange.Last) { + TXT( ".." ); + UID( decl->DeclarationRange.Last ); } + CHR( ']' ); if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) { CHR( '.' ); @@ -586,10 +580,8 @@ tgsi_dump_declaration( } } - if (decl->Declaration.Interpolate) { - TXT( ", " ); - ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES_SHORT ); - } + TXT( ", " ); + ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES_SHORT ); } static void @@ -601,8 +593,6 @@ dump_declaration_verbose( { TXT( "\nFile : " ); ENM( decl->Declaration.File, TGSI_FILES ); - TXT( "\nDeclare : " ); - ENM( decl->Declaration.Declare, TGSI_DECLARES ); if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { TXT( "\nUsageMask : " ); if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { @@ -620,7 +610,7 @@ dump_declaration_verbose( } if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { TXT( "\nInterpolate: " ); - UID( decl->Declaration.Interpolate ); + ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); } if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { TXT( "\nSemantic : " ); @@ -632,32 +622,10 @@ dump_declaration_verbose( } EOL(); - switch( decl->Declaration.Declare ) { - case TGSI_DECLARE_RANGE: - TXT( "\nFirst: " ); - UID( decl->u.DeclarationRange.First ); - TXT( "\nLast : " ); - UID( decl->u.DeclarationRange.Last ); - break; - - case TGSI_DECLARE_MASK: - TXT( "\nMask: " ); - UIX( decl->u.DeclarationMask.Mask ); - break; - - default: - assert( 0 ); - } - - if( decl->Declaration.Interpolate ) { - EOL(); - TXT( "\nInterpolate: " ); - ENM( decl->Interpolation.Interpolate, TGSI_INTERPOLATES ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Interpolation.Padding ); - } - } + TXT( "\nFirst: " ); + UID( decl->DeclarationRange.First ); + TXT( "\nLast : " ); + UID( decl->DeclarationRange.Last ); if( decl->Declaration.Semantic ) { EOL(); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c index 5c0b0bfd61..d16f0cdcad 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c @@ -118,22 +118,7 @@ tgsi_parse_token( *decl = tgsi_default_full_declaration(); decl->Declaration = *(struct tgsi_declaration *) &token; - switch( decl->Declaration.Type ) { - case TGSI_DECLARE_RANGE: - next_token( ctx, &decl->u.DeclarationRange ); - break; - - case TGSI_DECLARE_MASK: - next_token( ctx, &decl->u.DeclarationMask ); - break; - - default: - assert (0); - } - - if( decl->Declaration.Interpolate ) { - next_token( ctx, &decl->Interpolation ); - } + next_token( ctx, &decl->DeclarationRange ); if( decl->Declaration.Semantic ) { next_token( ctx, &decl->Semantic ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h index 4102101093..054350712d 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h @@ -65,13 +65,8 @@ struct tgsi_full_src_register struct tgsi_full_declaration { struct tgsi_declaration Declaration; - union - { - struct tgsi_declaration_range DeclarationRange; - struct tgsi_declaration_mask DeclarationMask; - } u; - struct tgsi_declaration_interpolation Interpolation; - struct tgsi_declaration_semantic Semantic; + struct tgsi_declaration_range DeclarationRange; + struct tgsi_declaration_semantic Semantic; }; struct tgsi_full_immediate diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index 65650ed22a..bda7bc2e2e 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -93,8 +93,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, = &parse.FullToken.FullDeclaration; uint file = fulldecl->Declaration.File; uint i; - for (i = fulldecl->u.DeclarationRange.First; - i <= fulldecl->u.DeclarationRange.Last; + for (i = fulldecl->DeclarationRange.First; + i <= fulldecl->DeclarationRange.Last; i++) { /* only first 32 regs will appear in this bitfield */ -- cgit v1.2.3 From 56fc7690d791819d81ff1c6e6e22d22017c68919 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 31 May 2008 19:50:58 +0200 Subject: util: Fix build after TGSI declaration interface changes. --- src/gallium/auxiliary/util/u_simple_shaders.c | 33 +++++++++++++-------------- 1 file changed, 16 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 5f8d12191d..505d93d727 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -92,8 +92,8 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, decl.Semantic.SemanticName = semantic_names[i]; decl.Semantic.SemanticIndex = semantic_indexes[i]; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = i; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = i; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, @@ -107,8 +107,8 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe, decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = semantic_names[i]; decl.Semantic.SemanticIndex = semantic_indexes[i]; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = i; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = i; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, @@ -190,14 +190,13 @@ util_make_fragment_tex_shader(struct pipe_context *pipe, /* declare TEX[0] input */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_INPUT; + /* XXX this could be linear... */ + decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC; decl.Semantic.SemanticIndex = 0; - /* XXX this could be linear... */ - decl.Declaration.Interpolate = 1; - decl.Interpolation.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, @@ -209,8 +208,8 @@ util_make_fragment_tex_shader(struct pipe_context *pipe, decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, @@ -219,8 +218,8 @@ util_make_fragment_tex_shader(struct pipe_context *pipe, /* declare sampler */ decl = tgsi_default_full_declaration(); decl.Declaration.File = TGSI_FILE_SAMPLER; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, @@ -303,8 +302,8 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe, decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, @@ -316,8 +315,8 @@ util_make_fragment_passthrough_shader(struct pipe_context *pipe, decl.Declaration.Semantic = 1; decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR; decl.Semantic.SemanticIndex = 0; - decl.u.DeclarationRange.First = - decl.u.DeclarationRange.Last = 0; + decl.DeclarationRange.First = + decl.DeclarationRange.Last = 0; ti += tgsi_build_full_declaration(&decl, &tokens[ti], header, -- cgit v1.2.3 From c6ae627fdca417318d27a8c26e6d9bc23577aabe Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 2 Jun 2008 11:39:59 +0200 Subject: tgsi: SWZ no longer aliases to MOV. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2 +- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 6ba0949183..6689c3f1fb 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -1477,7 +1477,7 @@ exec_instruction( break; case TGSI_OPCODE_MOV: - /* TGSI_OPCODE_SWZ */ + case TGSI_OPCODE_SWZ: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); STORE( &r[0], 0, chan_index ); diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index a59e22b6ea..cdce5ea9c5 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -1190,7 +1190,7 @@ emit_instruction( break; case TGSI_OPCODE_MOV: - /* TGSI_OPCODE_SWZ */ + case TGSI_OPCODE_SWZ: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); STORE( func, *inst, 0, 0, chan_index ); -- cgit v1.2.3 From 2c7ae3371b6058988f7f10bf031d630b649f3831 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 2 Jun 2008 11:59:04 +0200 Subject: tgsi: Add assertions to the new rule that when an extended swizzle is used, the simple swizzle must be set to identity. --- src/gallium/auxiliary/tgsi/util/tgsi_build.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c index 63cc27becc..18e44b38c2 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c @@ -628,6 +628,14 @@ tgsi_build_full_instruction( tgsi_default_src_register_ext_swz() ) ) { struct tgsi_src_register_ext_swz *src_register_ext_swz; + /* Use of the extended swizzle requires the simple swizzle to be identity. + */ + assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X ); + assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y ); + assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z ); + assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W ); + assert( reg->SrcRegister.Negate == FALSE ); + if( maxsize <= size ) return 0; src_register_ext_swz = -- cgit v1.2.3 From 5b86ae60fe339ae0b813d16ec328a68ccb2b9514 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 30 May 2008 01:46:59 +0900 Subject: draw: Fix MSVC warnings. --- src/gallium/auxiliary/draw/draw_pipe.c | 3 ++- src/gallium/auxiliary/draw/draw_pt_decompose.h | 6 +++--- src/gallium/auxiliary/draw/draw_pt_varray.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs.c | 6 +++--- src/gallium/auxiliary/draw/draw_vs_aos.c | 4 ++-- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- 6 files changed, 13 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 1d26706dee..3355c871ee 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -255,7 +255,8 @@ void draw_pipeline_run( struct draw_context *draw, char *verts = (char *)vertices; \ boolean flatfirst = (draw->rasterizer->flatshade && \ draw->rasterizer->flatshade_first); \ - unsigned i, flags + unsigned i; \ + ushort flags #define FLUSH diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h index dccfde99dd..3fb0695687 100644 --- a/src/gallium/auxiliary/draw/draw_pt_decompose.h +++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h @@ -118,9 +118,9 @@ static void FUNC( ARGS, /* These bitflags look a little odd because we submit the * vertices as (1,2,0) to satisfy flatshade requirements. */ - const unsigned edge_first = DRAW_PIPE_EDGE_FLAG_2; - const unsigned edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const unsigned edge_last = DRAW_PIPE_EDGE_FLAG_1; + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 260f28f284..f19e8850b3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -109,9 +109,9 @@ static INLINE void fetch_init(struct varray_frontend *varray, static INLINE void add_draw_el(struct varray_frontend *varray, - int idx) + unsigned idx) { - varray->draw_elts[varray->draw_count++] = idx; + varray->draw_elts[varray->draw_count++] = (ushort)idx; } diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index ce35112fc1..979f9864fd 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -49,10 +49,10 @@ void draw_vs_set_constants( struct draw_context *draw, if (((unsigned)constants) & 0xf) { if (size > draw->vs.const_storage_size) { if (draw->vs.aligned_constant_storage) - align_free(draw->vs.aligned_constant_storage); + align_free((void *)draw->vs.aligned_constant_storage); draw->vs.aligned_constant_storage = align_malloc( size, 16 ); } - memcpy( draw->vs.aligned_constant_storage, + memcpy( (void*)draw->vs.aligned_constant_storage, constants, size ); constants = draw->vs.aligned_constant_storage; @@ -174,7 +174,7 @@ draw_vs_destroy( struct draw_context *draw ) draw_vs_aos_machine_destroy(draw->vs.aos_machine); if (draw->vs.aligned_constant_storage) - align_free(draw->vs.aligned_constant_storage); + align_free((void*)draw->vs.aligned_constant_storage); tgsi_exec_machine_free_data(&draw->vs.machine); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 725f36b502..9e9f8bac1e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -492,7 +492,7 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, src->SrcRegister.File, src->SrcRegister.Index); unsigned i; - unsigned swz = 0; + ubyte swz = 0; unsigned negs = 0; unsigned abs = 0; @@ -704,7 +704,7 @@ static void store_dest( struct aos_compilation *cp, static void inject_scalar( struct aos_compilation *cp, struct x86_reg dst, struct x86_reg result, - unsigned swizzle ) + ubyte swizzle ) { sse_shufps(cp->func, dst, dst, swizzle); sse_movss(cp->func, dst, result); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 45e2092209..b720185709 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -118,7 +118,7 @@ static void get_src_ptr( struct aos_compilation *cp, static void emit_swizzle( struct aos_compilation *cp, struct x86_reg dest, struct x86_reg src, - unsigned shuffle ) + ubyte shuffle ) { sse_shufps(cp->func, dest, src, shuffle); } -- cgit v1.2.3 From aa1a39d1a742c1bb346ba14814d6bf7b44e646cb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 2 Jun 2008 20:46:05 +0900 Subject: rtasm: Use enum sse_cc in sse_cmpps. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 664a69a537..f4ca282dd9 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -888,7 +888,7 @@ void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg sr void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, - unsigned char cc) + enum sse_cc cc) { DUMP_RRI( dst, src, cc ); emit_2ub(p, X86_TWOB, 0xC2); diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index bd76e1729c..af94577aab 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -191,7 +191,7 @@ void sse_divss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ) void sse_andnps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_andps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cmpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src, - unsigned char cc ); + enum sse_cc cc ); void sse_maxps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_maxss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_minps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From 8d9a96386a5be7f15968bed63ca8b3e5555bbeeb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 2 Jun 2008 21:45:25 +0900 Subject: gallium: Port util_time functions to windows userspace. --- src/gallium/auxiliary/util/u_time.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index c6ce0afab0..49dce75289 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -43,7 +43,7 @@ #elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) #include extern VOID KeQuerySystemTime(PLARGE_INTEGER); -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) #include #else #error Unsupported OS @@ -52,7 +52,7 @@ extern VOID KeQuerySystemTime(PLARGE_INTEGER); #include "util/u_time.h" -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) static int64_t frequency = 0; @@ -64,7 +64,7 @@ util_time_get_frequency(void) LONGLONG temp; EngQueryPerformanceFrequency(&temp); frequency = temp; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) LARGE_INTEGER temp; QueryPerformanceFrequency(&temp); frequency = temp.QuadPart; @@ -89,7 +89,7 @@ util_time_get(struct util_time *t) LARGE_INTEGER temp; KeQuerySystemTime(&temp); t->counter = temp.QuadPart; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) LARGE_INTEGER temp; QueryPerformanceCounter(&temp); t->counter = temp.QuadPart; @@ -105,7 +105,7 @@ util_time_add(const struct util_time *t1, #if defined(PIPE_OS_LINUX) t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000; t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) util_time_get_frequency(); t2->counter = t1->counter + (usecs * frequency + INT64_C(999999))/INT64_C(1000000); #elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) @@ -127,7 +127,7 @@ util_time_diff(const struct util_time *t1, #if defined(PIPE_OS_LINUX) return (t2->tv.tv_usec - t1->tv.tv_usec) + (t2->tv.tv_sec - t1->tv.tv_sec)*1000000; -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) util_time_get_frequency(); return (t2->counter - t1->counter)*INT64_C(1000000)/frequency; #elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -- cgit v1.2.3 From e0860518dfb5a5c6ba6584e3c1b5d7b203277dac Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 2 Jun 2008 22:31:02 +0900 Subject: gallium: Replace XSTDCALL by PIPE_CDECL. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 38 +++++++---------------------- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/include/pipe/p_compiler.h | 16 +----------- 4 files changed, 12 insertions(+), 46 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index f638208bf5..0aa0c9a76b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -50,7 +50,7 @@ #define SSE_MAX_VERTICES 4 -typedef void (XSTDCALL *codegen_function) ( +typedef void (PIPE_CDECL *codegen_function) ( const struct tgsi_exec_vector *input, /* 1 */ struct tgsi_exec_vector *output, /* 2 */ float (*constant)[4], /* 3 */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c index cdce5ea9c5..cdbdf5c882 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c @@ -194,22 +194,12 @@ get_coef( } -#ifdef WIN32 -static void -emit_retw( - struct x86_function *func, - unsigned short size ) -{ - x86_retw( func, size ); -} -#else static void emit_ret( struct x86_function *func ) { x86_ret( func ); } -#endif /** @@ -475,7 +465,7 @@ static void emit_func_call_dst( struct x86_function *func, unsigned xmm_dst, - void (*code)() ) + void (PIPE_CDECL *code)() ) { sse_movaps( func, @@ -496,9 +486,7 @@ emit_func_call_dst( x86_push( func, ecx ); x86_mov_reg_imm( func, ecx, (unsigned long) code ); x86_call( func, ecx ); -#ifndef WIN32 x86_pop(func, ecx ); -#endif } @@ -516,7 +504,7 @@ emit_func_call_dst_src( struct x86_function *func, unsigned xmm_dst, unsigned xmm_src, - void (*code)() ) + void (PIPE_CDECL *code)() ) { sse_movaps( func, @@ -558,7 +546,7 @@ emit_add( make_xmm( xmm_src ) ); } -static void XSTDCALL +static void PIPE_CDECL cos4f( float *store ) { @@ -581,7 +569,7 @@ emit_cos( cos4f ); } -static void XSTDCALL +static void PIPE_CDECL ex24f( float *store ) { @@ -615,7 +603,7 @@ emit_f2it( make_xmm( xmm ) ); } -static void XSTDCALL +static void PIPE_CDECL flr4f( float *store ) { @@ -638,7 +626,7 @@ emit_flr( flr4f ); } -static void XSTDCALL +static void PIPE_CDECL frc4f( float *store ) { @@ -661,7 +649,7 @@ emit_frc( frc4f ); } -static void XSTDCALL +static void PIPE_CDECL lg24f( float *store ) { @@ -720,7 +708,7 @@ emit_neg( TGSI_EXEC_TEMP_80000000_C ) ); } -static void XSTDCALL +static void PIPE_CDECL pow4f( float *store ) { @@ -820,7 +808,7 @@ emit_setsign( TGSI_EXEC_TEMP_80000000_C ) ); } -static void XSTDCALL +static void PIPE_CDECL sin4f( float *store ) { @@ -1736,11 +1724,7 @@ emit_instruction( break; case TGSI_OPCODE_RET: -#ifdef WIN32 - emit_retw( func, 16 ); -#else emit_ret( func ); -#endif break; case TGSI_OPCODE_END: @@ -2281,11 +2265,7 @@ tgsi_emit_sse2( func, get_immediate_base() ); -#ifdef WIN32 - emit_retw( func, 16 ); -#else emit_ret( func ); -#endif tgsi_parse_free( &parse ); diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 55741cc1df..69f7f960aa 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -46,7 +46,7 @@ /* Surely this should be defined somewhere in a tgsi header: */ -typedef void (XSTDCALL *codegen_function)( +typedef void (PIPE_CDECL *codegen_function)( const struct tgsi_exec_vector *input, struct tgsi_exec_vector *output, const float (*constant)[4], diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index f6707385cd..b14260dd90 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -128,7 +128,7 @@ typedef unsigned char boolean; /* This should match linux gcc cdecl semantics everywhere, so that we * just codegen one calling convention on all platforms. */ -#ifdef WIN32 +#ifdef _MSC_VER #define PIPE_CDECL __cdecl #else #define PIPE_CDECL @@ -148,18 +148,4 @@ typedef unsigned char boolean; -/** - * For calling code-gen'd functions, phase out in favor of - * PIPE_CDECL, above, which really means cdecl on all platforms, not - * like the below... - */ -#if !defined(XSTDCALL) -#if defined(WIN32) -#define XSTDCALL __stdcall /* phase this out */ -#else -#define XSTDCALL /* XXX: NOTE! not STDCALL! */ -#endif -#endif - - #endif /* P_COMPILER_H */ -- cgit v1.2.3 From 6a39bcf3752df7c22cdd38b4645a885eb318add8 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 2 Jun 2008 14:36:27 +0100 Subject: draw: fast element translate path without delta --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 124 +++++++++++++++++++++------- 1 file changed, 93 insertions(+), 31 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 9ffc99abf9..ad86ab4292 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -201,39 +201,74 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, #define FUNC vcache_run #include "draw_pt_vcache_tmp.h" +static void rebase_uint_elts( const unsigned *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + +static void rebase_ushort_elts( const ushort *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + +static void rebase_ubyte_elts( const ubyte *src, + unsigned count, + int delta, + ushort *dest ) +{ + unsigned i; + + for (i = 0; i < count; i++) + dest[i] = (ushort)(src[i] + delta); +} + + + static void translate_uint_elts( const unsigned *src, unsigned count, - int delta, ushort *dest ) { unsigned i; for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i] + delta); + dest[i] = (ushort)(src[i]); } static void translate_ushort_elts( const ushort *src, unsigned count, - int delta, ushort *dest ) { unsigned i; for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i] + delta); + dest[i] = (ushort)(src[i]); } static void translate_ubyte_elts( const ubyte *src, unsigned count, - int delta, ushort *dest ) { unsigned i; for (i = 0; i < count; i++) - dest[i] = (ushort)(src[i] + delta); + dest[i] = (ushort)(src[i]); } + + + #if 0 static enum pipe_format format_from_get_elt( pt_elt_func get_elt ) { @@ -282,31 +317,58 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, if (!storage) goto fail; - switch(index_size) { - case 1: - translate_ubyte_elts( (const ubyte *)elts, - draw_count, - 0 - (int)min_index, - storage ); - break; - - case 2: - translate_ushort_elts( (const ushort *)elts, - draw_count, - 0 - (int)min_index, - storage ); - break; - - case 4: - translate_uint_elts( (const uint *)elts, - draw_count, - 0 - (int)min_index, - storage ); - break; - - default: - assert(0); - return; + if (min_index == 0) { + switch(index_size) { + case 1: + translate_ubyte_elts( (const ubyte *)elts, + draw_count, + storage ); + break; + + case 2: + translate_ushort_elts( (const ushort *)elts, + draw_count, + storage ); + break; + + case 4: + translate_uint_elts( (const uint *)elts, + draw_count, + storage ); + break; + + default: + assert(0); + return; + } + } + else { + switch(index_size) { + case 1: + rebase_ubyte_elts( (const ubyte *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + case 2: + rebase_ushort_elts( (const ushort *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + case 4: + rebase_uint_elts( (const uint *)elts, + draw_count, + 0 - (int)min_index, + storage ); + break; + + default: + assert(0); + return; + } } transformed_elts = storage; } -- cgit v1.2.3 From 58cccc8d6b49c75eeabe9b61055e69de824ff757 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 2 Jun 2008 21:51:31 +0200 Subject: draw: Fix DP3 implementation by replacing SSE with x87 version. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 725f36b502..5bfcd96ac3 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -962,8 +962,35 @@ static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +#if 1 + +/* The x87 version. + */ +static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg st1 = x86_make_reg( file_x87, 1 ); + + x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); + x87_fmulp( cp->func, st1 ); + x87_fld_src( cp, &op->FullSrcRegisters[0], 1 ); + x87_fld_src( cp, &op->FullSrcRegisters[1], 1 ); + x87_fmulp( cp->func, st1 ); + x87_faddp( cp->func, st1 ); + x87_fld_src( cp, &op->FullSrcRegisters[0], 2 ); + x87_fld_src( cp, &op->FullSrcRegisters[1], 2 ); + x87_fmulp( cp->func, st1 ); + x87_faddp( cp->func, st1 ); + + x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + + return TRUE; +} + +#else /* The dotproduct instructions don't really do that well in sse: + * XXX: produces wrong results -- disabled. */ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { @@ -985,7 +1012,7 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } - +#endif static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { -- cgit v1.2.3 From 183d490ab139483c88d0b0f541714919de86235c Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 3 Jun 2008 10:59:46 +0200 Subject: draw: Fix fetch_src(). Resurrect SSE version of DP3. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 43 +++++--------------------------- 1 file changed, 6 insertions(+), 37 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 5bfcd96ac3..891f8c211a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -532,10 +532,11 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, if (swz != SSE_SWIZZLE_NOOP || negs != 0 || abs != 0) { struct x86_reg dst = aos_get_xmm_reg(cp); - if (swz != SSE_SWIZZLE_NOOP) { + if (swz != SSE_SWIZZLE_NOOP) emit_pshufd(cp, dst, arg0, swz); - arg0 = dst; - } + else + sse_movaps(cp->func, dst, arg0); + arg0 = dst; if (negs && negs != 0xf) { struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ); @@ -550,15 +551,13 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, (negs & 2) ? 1 : 0, (negs & 4) ? 1 : 0, (negs & 8) ? 1 : 0)); - sse_mulps(cp->func, dst, arg0); + sse_mulps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - arg0 = dst; } else if (negs) { struct x86_reg imm_negs = aos_get_internal_xmm(cp, IMM_NEGS); sse_mulps(cp->func, dst, imm_negs); - arg0 = dst; } @@ -571,10 +570,9 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, sse_movaps(cp->func, tmp, arg0); sse_mulps(cp->func, tmp, neg); - sse_maxps(cp->func, dst, arg0); + sse_maxps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); - arg0 = dst; } } @@ -962,33 +960,6 @@ static boolean emit_COS( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -#if 1 - -/* The x87 version. - */ -static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) -{ - struct x86_reg st1 = x86_make_reg( file_x87, 1 ); - - x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 0 ); - x87_fmulp( cp->func, st1 ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 1 ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 1 ); - x87_fmulp( cp->func, st1 ); - x87_faddp( cp->func, st1 ); - x87_fld_src( cp, &op->FullSrcRegisters[0], 2 ); - x87_fld_src( cp, &op->FullSrcRegisters[1], 2 ); - x87_fmulp( cp->func, st1 ); - x87_faddp( cp->func, st1 ); - - x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); - - return TRUE; -} - -#else - /* The dotproduct instructions don't really do that well in sse: * XXX: produces wrong results -- disabled. */ @@ -1012,8 +983,6 @@ static boolean emit_DP3( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } -#endif - static boolean emit_DP4( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); -- cgit v1.2.3 From 0a5df5bc7d711a766c9d0963fb2029d60cf70a8b Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 3 Jun 2008 11:22:00 +0200 Subject: draw: Use register names more consistently. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 891f8c211a..3cf4a4d125 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -536,7 +536,6 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, emit_pshufd(cp, dst, arg0, swz); else sse_movaps(cp->func, dst, arg0); - arg0 = dst; if (negs && negs != 0xf) { struct x86_reg imm_swz = aos_get_internal_xmm(cp, IMM_SWZ); @@ -568,12 +567,14 @@ static struct x86_reg fetch_src( struct aos_compilation *cp, struct x86_reg neg = aos_get_internal(cp, IMM_NEGS); struct x86_reg tmp = aos_get_xmm_reg(cp); - sse_movaps(cp->func, tmp, arg0); + sse_movaps(cp->func, tmp, dst); sse_mulps(cp->func, tmp, neg); sse_maxps(cp->func, dst, tmp); aos_release_xmm_reg(cp, tmp.idx); } + + return dst; } return arg0; -- cgit v1.2.3 From 0a4aea0e86a897d9afb9f2a0ec27f03faf8f1b21 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 2 Jun 2008 12:59:16 +0100 Subject: draw: respect driver's max vertex buffer size --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 6 ++++++ src/gallium/auxiliary/draw/draw_private.h | 1 + src/gallium/auxiliary/draw/draw_pt.h | 8 ++++++-- src/gallium/auxiliary/draw/draw_pt_emit.c | 6 +++++- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 6 +++++- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 17 ++++++++++------- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 16 ++++++++++++---- src/gallium/auxiliary/draw/draw_pt_varray.c | 5 ++++- src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h | 11 +++++++---- src/gallium/auxiliary/draw/draw_pt_vcache.c | 13 ++++++++----- 10 files changed, 64 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index d514e28b77..9e5597c32c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -387,6 +387,12 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) /* Allocate a new vertex buffer */ vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; + + /* Must always succeed -- driver gives us a + * 'max_vertex_buffer_bytes' which it guarantees it can allocate, + * and it will flush itself if necessary to do so. If this does + * fail, we are basically without usable hardware. + */ vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, (ushort) vbuf->vertex_size, (ushort) vbuf->max_vertices); diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 423f64262b..1865601cc0 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -265,6 +265,7 @@ void draw_pipeline_destroy( struct draw_context *draw ); * These flags expected at first vertex of lines & triangles when * unfilled and/or line stipple modes are operational. */ +#define DRAW_PIPE_MAX_VERTICES (0x1<<12) #define DRAW_PIPE_EDGE_FLAG_0 (0x1<<12) #define DRAW_PIPE_EDGE_FLAG_1 (0x2<<12) #define DRAW_PIPE_EDGE_FLAG_2 (0x4<<12) diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 6b8ba1d171..3d2a9c78b7 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -84,7 +84,8 @@ struct draw_pt_front_end { struct draw_pt_middle_end { void (*prepare)( struct draw_pt_middle_end *, unsigned prim, - unsigned opt ); + unsigned opt, + unsigned *max_vertices ); void (*run)( struct draw_pt_middle_end *, const unsigned *fetch_elts, @@ -105,6 +106,8 @@ struct draw_pt_middle_end { const ushort *draw_elts, unsigned draw_count ); + int (*get_max_vertex_count)( struct draw_pt_middle_end * ); + void (*finish)( struct draw_pt_middle_end * ); void (*destroy)( struct draw_pt_middle_end * ); }; @@ -158,7 +161,8 @@ boolean draw_pt_get_edgeflag( struct draw_context *draw, struct pt_emit; void draw_pt_emit_prepare( struct pt_emit *emit, - unsigned prim ); + unsigned prim, + unsigned *max_vertices ); void draw_pt_emit( struct pt_emit *emit, const float (*vertex_data)[4], diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index cf87cde996..a02f1f46fe 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -46,7 +46,8 @@ struct pt_emit { }; void draw_pt_emit_prepare( struct pt_emit *emit, - unsigned prim ) + unsigned prim, + unsigned *max_vertices ) { struct draw_context *draw = emit->draw; const struct vertex_info *vinfo; @@ -139,6 +140,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit, translate_key_sanitize(&hw_key); emit->translate = translate_cache_find(emit->cache, &hw_key); } + + *max_vertices = (draw->render->max_vertex_buffer_bytes / + (vinfo->size * 4)); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 09bdc5fb5e..083ce105bf 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -90,7 +90,8 @@ struct fetch_emit_middle_end { static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned prim, - unsigned opt ) + unsigned opt, + unsigned *max_vertices ) { struct fetch_emit_middle_end *feme = (struct fetch_emit_middle_end *)middle; struct draw_context *draw = feme->draw; @@ -196,6 +197,9 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, draw->pt.vertex_buffer[i].buffer_offset), draw->pt.vertex_buffer[i].pitch ); } + + *max_vertices = (draw->render->max_vertex_buffer_bytes / + (vinfo->size * 4)); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index efa6dddbda..5a1d79d996 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -70,7 +70,8 @@ struct fetch_shade_emit { static void fse_prepare( struct draw_pt_middle_end *middle, unsigned prim, - unsigned opt ) + unsigned opt, + unsigned *max_vertices ) { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; @@ -189,6 +190,11 @@ static void fse_prepare( struct draw_pt_middle_end *middle, draw->pt.vertex_buffer[buf].pitch ); } + *max_vertices = (draw->render->max_vertex_buffer_bytes / + (vinfo->size * 4)); + + + //return TRUE; } @@ -204,7 +210,6 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; - unsigned alloc_count = align(count, 4); char *hw_verts; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? @@ -213,7 +218,7 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, hw_verts = draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, - (ushort)alloc_count ); + (ushort)count ); if (!hw_verts) { assert(0); @@ -264,7 +269,6 @@ fse_run(struct draw_pt_middle_end *middle, { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; - unsigned alloc_count = align(fetch_count, 4); void *hw_verts; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? @@ -273,7 +277,7 @@ fse_run(struct draw_pt_middle_end *middle, hw_verts = draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, - (ushort)alloc_count ); + (ushort)fetch_count ); if (!hw_verts) { assert(0); return; @@ -319,7 +323,6 @@ static void fse_run_linear_elts( struct draw_pt_middle_end *middle, { struct fetch_shade_emit *fse = (struct fetch_shade_emit *)middle; struct draw_context *draw = fse->draw; - unsigned alloc_count = align(count, 4); char *hw_verts; /* XXX: need to flush to get prim_vbuf.c to release its allocation?? @@ -328,7 +331,7 @@ static void fse_run_linear_elts( struct draw_pt_middle_end *middle, hw_verts = draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, - (ushort)alloc_count ); + (ushort)count ); if (!hw_verts) { assert(0); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index eb6988ff03..25118712a6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -51,7 +51,8 @@ struct fetch_pipeline_middle_end { static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, unsigned prim, - unsigned opt ) + unsigned opt, + unsigned *max_vertices ) { struct fetch_pipeline_middle_end *fpme = (struct fetch_pipeline_middle_end *)middle; struct draw_context *draw = fpme->draw; @@ -86,14 +87,21 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, (boolean)draw->rasterizer->gl_rasterization_rules ); - if (!(opt & PT_PIPELINE)) + if (!(opt & PT_PIPELINE)) { draw_pt_emit_prepare( fpme->emit, - prim ); + prim, + max_vertices ); + + *max_vertices = MAX2( *max_vertices, + DRAW_PIPE_MAX_VERTICES ); + } + else { + *max_vertices = DRAW_PIPE_MAX_VERTICES; + } /* No need to prepare the shader. */ vs->prepare(vs, draw); - } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index f19e8850b3..f5495a80c7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -45,6 +45,9 @@ struct varray_frontend { unsigned fetch_start; + unsigned driver_fetch_max; + unsigned fetch_max; + struct draw_pt_middle_end *middle; unsigned input_prim; @@ -183,7 +186,7 @@ static void varray_prepare(struct draw_pt_front_end *frontend, varray->output_prim = decompose_prim[prim]; varray->middle = middle; - middle->prepare(middle, varray->output_prim, opt); + middle->prepare(middle, varray->output_prim, opt, &varray->fetch_max ); } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index 114ed371a0..6e5e30f38e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -40,7 +40,7 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_QUAD_STRIP: for (j = 0; j < count;) { unsigned remaining = count - j; - unsigned nr = trim( MIN2(FETCH_MAX, remaining), first, incr ); + unsigned nr = trim( MIN2(varray->fetch_max, remaining), first, incr ); varray_flush_linear(varray, start + j, nr); j += nr; if (nr != remaining) @@ -50,8 +50,9 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_LINE_LOOP: if (count >= 2) { + unsigned fetch_max = MIN2(FETCH_MAX, varray->fetch_max); for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); + unsigned end = MIN2(fetch_max, count - j); end -= (end % incr); for (i = 1; i < end; i++) { LINE(varray, i - 1, i); @@ -66,9 +67,10 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_POLYGON: - case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_TRIANGLE_FAN: { + unsigned fetch_max = MIN2(FETCH_MAX, varray->fetch_max); for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(FETCH_MAX, count - j); + unsigned end = MIN2(fetch_max, count - j); end -= (end % incr); for (i = 2; i < end; i++) { TRIANGLE(varray, 0, i - 1, i); @@ -78,6 +80,7 @@ static void FUNC(struct draw_pt_front_end *frontend, varray_flush(varray); } break; + } default: assert(0); diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index ad86ab4292..2eafe270bc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -36,8 +36,8 @@ #include "draw/draw_pt.h" -#define CACHE_MAX 1024 -#define FETCH_MAX 4096 +#define CACHE_MAX 256 +#define FETCH_MAX 256 #define DRAW_MAX (16*1024) struct vcache_frontend { @@ -52,6 +52,7 @@ struct vcache_frontend { unsigned draw_count; unsigned fetch_count; + unsigned fetch_max; struct draw_pt_middle_end *middle; @@ -296,10 +297,12 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, ushort *storage = NULL; - if (0) debug_printf("fetch_count %d draw_count %d\n", fetch_count, draw_count); + if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, + vcache->fetch_max, + draw_count); if (max_index == 0xffffffff || - fetch_count >= FETCH_MAX || + fetch_count >= vcache->fetch_max || fetch_count > draw_count) { if (0) debug_printf("fail\n"); goto fail; @@ -409,7 +412,7 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, vcache->output_prim = draw_pt_reduced_prim(prim); vcache->middle = middle; - middle->prepare( middle, vcache->output_prim, opt ); + middle->prepare( middle, vcache->output_prim, opt, &vcache->fetch_max ); } -- cgit v1.2.3 From b98ac1d47257bf7b2661ae7c1a8904b7bc5d623c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 4 Jun 2008 15:49:18 +0100 Subject: draw: init vsvg draw pointer --- src/gallium/auxiliary/draw/draw_vs_sse.c | 6 ++++-- src/gallium/auxiliary/draw/draw_vs_varient.c | 5 ++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0aa0c9a76b..c3189c707d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -158,8 +158,10 @@ draw_create_vs_sse(struct draw_context *draw, tgsi_scan_shader(templ->tokens, &vs->base.info); vs->base.draw = draw; - vs->base.create_varient = draw_vs_varient_aos_sse; -// vs->base.create_varient = draw_vs_varient_generic; + if (1) + vs->base.create_varient = draw_vs_varient_aos_sse; + else + vs->base.create_varient = draw_vs_varient_generic; vs->base.prepare = vs_sse_prepare; vs->base.run_linear = vs_sse_run_linear; vs->base.delete = vs_sse_delete; diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 4a155251b5..3ceca5e849 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -124,6 +124,8 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, void *output_buffer) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + + if (0) debug_printf("%s %d \n", __FUNCTION__, count); /* Want to do this in small batches for cache locality? */ @@ -181,7 +183,7 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; - //debug_printf("%s %d %d\n", __FUNCTION__, start, count); + if (0) debug_printf("%s %d %d\n", __FUNCTION__, start, count); vsvg->fetch->run( vsvg->fetch, @@ -257,6 +259,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->base.run_linear = vsvg_run_linear; vsvg->base.destroy = vsvg_destroy; + vsvg->draw = vs->draw; /* Build free-standing fetch and emit functions: -- cgit v1.2.3 From 8223add3304451d5e75737a6d1be1739e4517943 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 4 Jun 2008 08:56:06 -0600 Subject: gallium: added tgsi_is_passthrough_shader() function Checks if all instructions are of the form MOV OUT[n], IN[n] Untested at this time. --- src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 83 +++++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 4 ++ 2 files changed, 87 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c index bda7bc2e2e..240aaaf362 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c @@ -141,3 +141,86 @@ tgsi_scan_shader(const struct tgsi_token *tokens, tgsi_parse_free (&parse); } + + + +/** + * Check if the given shader is a "passthrough" shader consisting of only + * MOV instructions of the form: MOV OUT[n], IN[n] + * + */ +boolean +tgsi_is_passthrough_shader(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n"); + return FALSE; + } + + /** + ** Loop over incoming program tokens/instructions + */ + while (!tgsi_parse_end_of_tokens(&parse)) { + + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst = + &parse.FullToken.FullInstruction; + const struct tgsi_full_src_register *src = + &fullinst->FullSrcRegisters[0]; + const struct tgsi_full_dst_register *dst = + &fullinst->FullDstRegisters[0]; + + /* Do a whole bunch of checks for a simple move */ + if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || + src->SrcRegister.File != TGSI_FILE_INPUT || + dst->DstRegister.File != TGSI_FILE_OUTPUT || + src->SrcRegister.Index != dst->DstRegister.Index || + + src->SrcRegister.Negate || + src->SrcRegisterExtMod.Negate || + src->SrcRegisterExtMod.Absolute || + src->SrcRegisterExtMod.Scale2X || + src->SrcRegisterExtMod.Bias || + src->SrcRegisterExtMod.Complement || + + src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || + + src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W || + + dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) + { + tgsi_parse_free(&parse); + return FALSE; + } + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + /* fall-through */ + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* fall-through */ + default: + ; /* no-op */ + } + } + + tgsi_parse_free(&parse); + + /* if we get here, it's a pass-through shader */ + return TRUE; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h index 0530bc6b51..5cb6efb343 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h @@ -67,4 +67,8 @@ tgsi_scan_shader(const struct tgsi_token *tokens, struct tgsi_shader_info *info); +extern boolean +tgsi_is_passthrough_shader(const struct tgsi_token *tokens); + + #endif /* TGSI_SCAN_H */ -- cgit v1.2.3 From f27c7729a98937a761eacceabdfd03f9d694d257 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 6 Jun 2008 13:29:59 +0900 Subject: draw: Compile draw_vs_aos only on x86. --- src/gallium/auxiliary/draw/draw_vs_aos.h | 4 ++++ src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index fb6d43d32e..f6f6af2dd9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -31,6 +31,9 @@ #ifndef DRAW_VS_AOS_H #define DRAW_VS_AOS_H +#include "pipe/p_config.h" + +#ifdef PIPE_ARCH_X86 struct tgsi_token; struct x86_function; @@ -239,6 +242,7 @@ struct draw_vs_varient_aos_sse { }; +#endif #endif diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index b7864e9f2f..dcb6c2c8d4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -26,6 +26,10 @@ **************************************************************************/ +#include "pipe/p_config.h" + +#ifdef PIPE_ARCH_X86 + #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/util/tgsi_parse.h" @@ -295,3 +299,5 @@ struct aos_machine *draw_vs_aos_machine( void ) } +#endif + -- cgit v1.2.3 From 0931b421d67b8ce471f17d43c183017f1eb92e31 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Jun 2008 15:04:45 +0100 Subject: draw: make sure middle-end primitive is uptodate in vcache --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 2eafe270bc..cda2987c9e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -58,10 +58,21 @@ struct vcache_frontend { unsigned input_prim; unsigned output_prim; + + unsigned middle_prim; + unsigned opt; }; static void vcache_flush( struct vcache_frontend *vcache ) { + if (vcache->middle_prim != vcache->output_prim) { + vcache->middle_prim = vcache->output_prim; + vcache->middle->prepare( vcache->middle, + vcache->middle_prim, + vcache->opt, + &vcache->fetch_max ); + } + if (vcache->draw_count) { vcache->middle->run( vcache->middle, vcache->fetch_elts, @@ -308,6 +319,14 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, goto fail; } + if (vcache->middle_prim != vcache->input_prim) { + vcache->middle_prim = vcache->input_prim; + vcache->middle->prepare( vcache->middle, + vcache->middle_prim, + vcache->opt, + &vcache->fetch_max ); + } + if (min_index == 0 && index_size == 2) @@ -412,7 +431,13 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, vcache->output_prim = draw_pt_reduced_prim(prim); vcache->middle = middle; - middle->prepare( middle, vcache->output_prim, opt, &vcache->fetch_max ); + vcache->opt = opt; + + /* Have to run prepare here, but try and guess a good prim for + * doing so: + */ + vcache->middle_prim = (opt & PT_PIPELINE) ? vcache->output_prim : vcache->input_prim; + middle->prepare( middle, vcache->middle_prim, opt, &vcache->fetch_max ); } -- cgit v1.2.3 From 463a47bf59398e850d5a6537da1186d855bd2919 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Jun 2008 18:09:32 +0100 Subject: draw: fix intermediate buffer confusion in draw_vs_varient.c The final output buffer can't be used to hold intermediate results as the intermediate vertex size may be greater than the final vertex size, and in any case the output buffer may be uncached in hw drivers. --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 7 + src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 2 + src/gallium/auxiliary/draw/draw_vs_varient.c | 175 +++++++++++---------- 3 files changed, 101 insertions(+), 83 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 5a1d79d996..5ce3aba2a2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -193,6 +193,13 @@ static void fse_prepare( struct draw_pt_middle_end *middle, *max_vertices = (draw->render->max_vertex_buffer_bytes / (vinfo->size * 4)); + /* Probably need to do this somewhere (or fix exec shader not to + * need it): + */ + if (1) { + struct draw_vertex_shader *vs = draw->vs.vertex_shader; + vs->prepare(vs, draw); + } //return TRUE; diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index 5c99a47e49..ec05bbeab4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -13,6 +13,8 @@ static void FUNC( struct draw_pt_front_end *frontend, unsigned i; ushort flags; + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + switch (vcache->input_prim) { case PIPE_PRIM_POINTS: diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 3ceca5e849..d8317d3bbe 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -55,6 +55,8 @@ struct draw_vs_varient_generic { */ struct translate *fetch; struct translate *emit; + + unsigned temp_vertex_stride; }; @@ -84,7 +86,7 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, char *ptr = (char *)output_buffer; const float *scale = vsvg->base.vs->draw->viewport.scale; const float *trans = vsvg->base.vs->draw->viewport.translate; - unsigned stride = vsvg->base.key.output_stride; + unsigned stride = vsvg->temp_vertex_stride; unsigned j; for (j = 0; j < count; j++, ptr += stride) { @@ -99,13 +101,13 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, } static void do_viewport( struct draw_vs_varient_generic *vsvg, - unsigned count, - void *output_buffer ) + unsigned count, + void *output_buffer ) { char *ptr = (char *)output_buffer; const float *scale = vsvg->base.vs->draw->viewport.scale; const float *trans = vsvg->base.vs->draw->viewport.translate; - unsigned stride = vsvg->base.key.output_stride; + unsigned stride = vsvg->temp_vertex_stride; unsigned j; for (j = 0; j < count; j++, ptr += stride) { @@ -124,7 +126,9 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, void *output_buffer) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; - + unsigned temp_vertex_stride = vsvg->temp_vertex_stride; + void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride ); + if (0) debug_printf("%s %d \n", __FUNCTION__, count); /* Want to do this in small batches for cache locality? @@ -135,44 +139,45 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, count, output_buffer ); - //if (!vsvg->base.vs->is_passthrough) - { - vsvg->base.vs->run_linear( vsvg->base.vs, - output_buffer, - output_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, - count, - vsvg->base.key.output_stride, - vsvg->base.key.output_stride); - - - if (vsvg->base.key.clip) { - /* not really handling clipping, just do the rhw so we can - * see the results... - */ - do_rhw_viewport( vsvg, - count, - output_buffer ); - } - else if (vsvg->base.key.viewport) { - do_viewport( vsvg, - count, - output_buffer ); - } + vsvg->base.vs->run_linear( vsvg->base.vs, + temp_buffer, + temp_buffer, + (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + count, + temp_vertex_stride, + temp_vertex_stride); + + + if (vsvg->base.key.clip) { + /* not really handling clipping, just do the rhw so we can + * see the results... + */ + do_rhw_viewport( vsvg, + count, + temp_buffer ); + } + else if (vsvg->base.key.viewport) { + do_viewport( vsvg, + count, + temp_buffer ); + } - //if (!vsvg->already_in_emit_format) + vsvg->emit->set_buffer( vsvg->emit, + 0, + temp_buffer, + temp_vertex_stride ); - vsvg->emit->set_buffer( vsvg->emit, - 0, - output_buffer, - vsvg->base.key.output_stride ); + vsvg->emit->set_buffer( vsvg->emit, + 1, + &vsvg->draw->rasterizer->point_size, + 0); + vsvg->emit->run( vsvg->emit, + 0, count, + output_buffer ); - vsvg->emit->run( vsvg->emit, - 0, count, - output_buffer ); - } + FREE(temp_buffer); } @@ -182,54 +187,55 @@ static void PIPE_CDECL vsvg_run_linear( struct draw_vs_varient *varient, void *output_buffer ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; + unsigned temp_vertex_stride = vsvg->temp_vertex_stride; + void *temp_buffer = MALLOC( align(count,4) * temp_vertex_stride ); - if (0) debug_printf("%s %d %d\n", __FUNCTION__, start, count); - - + if (0) debug_printf("%s %d %d (sz %d, %d)\n", __FUNCTION__, start, count, + vsvg->base.key.output_stride, + temp_vertex_stride); + vsvg->fetch->run( vsvg->fetch, start, count, - output_buffer ); - - //if (!vsvg->base.vs->is_passthrough) - { - vsvg->base.vs->run_linear( vsvg->base.vs, - output_buffer, - output_buffer, - (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, - count, - vsvg->base.key.output_stride, - vsvg->base.key.output_stride); - - if (vsvg->base.key.clip) { - /* not really handling clipping, just do the rhw so we can - * see the results... - */ - do_rhw_viewport( vsvg, - count, - output_buffer ); - } - else if (vsvg->base.key.viewport) { - do_viewport( vsvg, - count, - output_buffer ); - } - - //if (!vsvg->already_in_emit_format) - vsvg->emit->set_buffer( vsvg->emit, - 0, - output_buffer, - vsvg->base.key.output_stride ); + temp_buffer ); + + vsvg->base.vs->run_linear( vsvg->base.vs, + temp_buffer, + temp_buffer, + (const float (*)[4])vsvg->base.vs->draw->pt.user.constants, + count, + temp_vertex_stride, + temp_vertex_stride); + + if (vsvg->base.key.clip) { + /* not really handling clipping, just do the rhw so we can + * see the results... + */ + do_rhw_viewport( vsvg, + count, + temp_buffer ); + } + else if (vsvg->base.key.viewport) { + do_viewport( vsvg, + count, + temp_buffer ); + } - vsvg->emit->set_buffer( vsvg->emit, - 1, - &vsvg->draw->rasterizer->point_size, - 0); + vsvg->emit->set_buffer( vsvg->emit, + 0, + temp_buffer, + temp_vertex_stride ); + + vsvg->emit->set_buffer( vsvg->emit, + 1, + &vsvg->draw->rasterizer->point_size, + 0); + + vsvg->emit->run( vsvg->emit, + 0, count, + output_buffer ); - vsvg->emit->run( vsvg->emit, - 0, count, - output_buffer ); - } + FREE(temp_buffer); } @@ -261,18 +267,20 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->draw = vs->draw; + vsvg->temp_vertex_stride = MAX2(key->nr_inputs, + vsvg->base.vs->info.num_outputs) * 4 * sizeof(float); /* Build free-standing fetch and emit functions: */ fetch.nr_elements = key->nr_inputs; - fetch.output_stride = 0; + fetch.output_stride = vsvg->temp_vertex_stride; for (i = 0; i < key->nr_inputs; i++) { fetch.element[i].input_format = key->element[i].in.format; fetch.element[i].input_buffer = key->element[i].in.buffer; fetch.element[i].input_offset = key->element[i].in.offset; fetch.element[i].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; - fetch.element[i].output_offset = fetch.output_stride; - fetch.output_stride += 4 * sizeof(float); + fetch.element[i].output_offset = i * 4 * sizeof(float); + assert(fetch.element[i].output_offset < fetch.output_stride); } @@ -286,6 +294,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); emit.element[i].output_offset = key->element[i].out.offset; + assert(emit.element[i].input_offset < fetch.output_stride); } else { emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; -- cgit v1.2.3 From a47c222803483c208f720e3fb5015050d4d0eaf4 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 7 Jun 2008 13:27:53 +0100 Subject: draw: fix temp vs output buffer typo in vsvg_run_elts --- src/gallium/auxiliary/draw/draw_vs_varient.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index d8317d3bbe..abe8c5ec2d 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -137,7 +137,7 @@ static void PIPE_CDECL vsvg_run_elts( struct draw_vs_varient *varient, vsvg->fetch->run_elts( vsvg->fetch, elts, count, - output_buffer ); + temp_buffer ); vsvg->base.vs->run_linear( vsvg->base.vs, temp_buffer, -- cgit v1.2.3 From 1c316f1e824b094977059145a1abcdb50a391f1c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 9 Jun 2008 09:27:52 -0600 Subject: gallium: disable a tgsi_dump() call --- src/gallium/auxiliary/draw/draw_vs_aos.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 45f36cff2a..458bbbe376 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2066,7 +2066,9 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->attrib) goto fail; +#if 0 tgsi_dump(vs->state.tokens, 0); +#endif if (!build_vertex_program( vaos, TRUE )) goto fail; -- cgit v1.2.3 From a70684bf256c3d5bc3a729bf9e9cf1a64cb2064a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Jun 2008 08:32:52 +0900 Subject: gallium: Deprecate GETENV. Replace by debug_get_bool_option. debug_get_bool_option will interpret "n", "no", "0", "f", or "false" as FALSE; and everything else as TRUE. The default value (used when the variable is not set) is received as a parameter. --- src/gallium/auxiliary/draw/draw_pt.c | 4 ++-- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 4 ++-- src/gallium/include/pipe/p_util.h | 2 -- 4 files changed, 5 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index f0d7b51ad7..9140faeea9 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -119,8 +119,8 @@ draw_pt_arrays(struct draw_context *draw, boolean draw_pt_init( struct draw_context *draw ) { - draw->pt.test_fse = GETENV("DRAW_FSE") != NULL; - draw->pt.no_fse = GETENV("DRAW_NO_FSE") != NULL; + draw->pt.test_fse = debug_get_bool_option("DRAW_FSE", FALSE); + draw->pt.no_fse = debug_get_bool_option("DRAW_NO_FSE", FALSE); draw->pt.front.vcache = draw_pt_vcache( draw ); if (!draw->pt.front.vcache) diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 243b4a6852..4c01b8d5b1 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -161,7 +161,7 @@ struct pipe_context *i915_create_context( struct pipe_screen *screen, */ i915->draw = draw_create(); assert(i915->draw); - if (!GETENV("I915_NO_VBUF")) { + if (!debug_get_bool_option("I915_NO_VBUF", FALSE)) { draw_set_rasterize_stage(i915->draw, i915_draw_vbuf_stage(i915)); } else { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 1e0106b86c..6e14f684f1 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -129,12 +129,12 @@ softpipe_create( struct pipe_screen *screen, uint i; #ifdef PIPE_ARCH_X86 - softpipe->use_sse = GETENV( "GALLIUM_NOSSE" ) == NULL; + softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); #else softpipe->use_sse = FALSE; #endif - softpipe->dump_fs = GETENV( "GALLIUM_DUMP_FS" ) != NULL; + softpipe->dump_fs = debug_get_bool_option( "GALLIUM_DUMP_FS", FALSE ); softpipe->pipe.winsys = pipe_winsys; softpipe->pipe.screen = screen; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 5547e57833..8b3003bcef 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -140,8 +140,6 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) -#define GETENV( X ) debug_get_option( X, NULL ) - /** * Return memory on given byte alignment -- cgit v1.2.3 From 4b52f4df1b37918a363d05e0b3db22125e801367 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Jun 2008 18:49:13 +0900 Subject: pipebuffer: Be more lenient when matching cached buffer sizes. Reuse cached buffers up to twice as big a requested. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 4bd3f94a6c..f1a457dde4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -207,8 +207,11 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, size_t size, const struct pb_desc *desc) { - /* TODO: be more lenient with size */ - if(buf->base.base.size != size) + if(buf->base.base.size < size) + return FALSE; + + /* be lenient with size */ + if(buf->base.base.size >= 2*size) return FALSE; if(!pb_check_alignment(desc->alignment, buf->base.base.alignment)) -- cgit v1.2.3 From 0c5b1a8ffb21f72fcde64a7daa13d5dab5b90425 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Jun 2008 08:38:24 +0900 Subject: pipebuffer: Alternative buffer manager. For situations where one has a reserve memory pool, or a faster/slower pool. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 5 ++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c | 101 +++++++++++++++++++++++ 4 files changed, 108 insertions(+) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index ff09011b66..1d9b036c07 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -6,6 +6,7 @@ LIBNAME = pipebuffer C_SOURCES = \ pb_buffer_fenced.c \ pb_buffer_malloc.c \ + pb_bufmgr_alt.c \ pb_bufmgr_cache.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 9db0c0eae3..e52177bc79 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -5,6 +5,7 @@ pipebuffer = env.ConvenienceLibrary( source = [ 'pb_buffer_fenced.c', 'pb_buffer_malloc.c', + 'pb_bufmgr_alt.c', 'pb_bufmgr_cache.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 4a922d16c1..00279f7010 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -171,6 +171,11 @@ fenced_bufmgr_create(struct pb_manager *provider, struct pipe_winsys *winsys); +struct pb_manager * +pb_alt_manager_create(struct pb_manager *provider1, + struct pb_manager *provider2); + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c new file mode 100644 index 0000000000..702bef1c04 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Allocate buffers from two alternative buffer providers. + * + * \author Jose Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_util.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_alt_manager +{ + struct pb_manager base; + + struct pb_manager *provider1; + struct pb_manager *provider2; +}; + + +static INLINE struct pb_alt_manager * +pb_alt_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_alt_manager *)mgr; +} + + +static struct pb_buffer * +pb_alt_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_alt_manager *mgr = pb_alt_manager(_mgr); + struct pb_buffer *buf; + + buf = mgr->provider1->create_buffer(mgr->provider1, size, desc); + if(buf) + return buf; + + buf = mgr->provider2->create_buffer(mgr->provider2, size, desc); + return buf; +} + + +static void +pb_alt_manager_destroy(struct pb_manager *mgr) +{ + FREE(mgr); +} + + +struct pb_manager * +pb_alt_manager_create(struct pb_manager *provider1, + struct pb_manager *provider2) +{ + struct pb_alt_manager *mgr; + + mgr = CALLOC_STRUCT(pb_alt_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_alt_manager_destroy; + mgr->base.create_buffer = pb_alt_manager_create_buffer; + mgr->provider1 = provider1; + mgr->provider2 = provider2; + + return &mgr->base; +} -- cgit v1.2.3 From 3531c5284bb8dc772cd97c6be5bf589c160f9ae8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 9 Jun 2008 19:56:52 +0900 Subject: gallium: Detect buffer overflows in the homegrown memory debugger. --- src/gallium/auxiliary/util/p_debug_mem.c | 49 ++++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 78497c5f6a..51dbd0a710 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -75,6 +75,12 @@ struct debug_memory_header unsigned magic; }; +struct debug_memory_footer +{ + unsigned magic; +}; + + static struct list_head list = { &list, &list }; static unsigned long last_no = 0; @@ -98,14 +104,24 @@ data_from_header(struct debug_memory_header *hdr) return NULL; } +static INLINE struct debug_memory_footer * +footer_from_header(struct debug_memory_header *hdr) +{ + if(hdr) + return (struct debug_memory_footer *)((char *)hdr + sizeof(struct debug_memory_header) + hdr->size); + else + return NULL; +} + void * debug_malloc(const char *file, unsigned line, const char *function, size_t size) { struct debug_memory_header *hdr; + struct debug_memory_footer *ftr; - hdr = real_malloc(sizeof(*hdr) + size); + hdr = real_malloc(sizeof(*hdr) + size + sizeof(*ftr)); if(!hdr) return NULL; @@ -116,6 +132,9 @@ debug_malloc(const char *file, unsigned line, const char *function, hdr->size = size; hdr->magic = DEBUG_MEMORY_MAGIC; + ftr = footer_from_header(hdr); + ftr->magic = DEBUG_MEMORY_MAGIC; + LIST_ADDTAIL(&hdr->head, &list); return data_from_header(hdr); @@ -126,6 +145,7 @@ debug_free(const char *file, unsigned line, const char *function, void *ptr) { struct debug_memory_header *hdr; + struct debug_memory_footer *ftr; if(!ptr) return; @@ -139,8 +159,17 @@ debug_free(const char *file, unsigned line, const char *function, return; } + ftr = footer_from_header(hdr); + if(ftr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: buffer overflow %p\n", + hdr->file, hdr->line, hdr->function, + ptr); + debug_assert(0); + } + LIST_DEL(&hdr->head); hdr->magic = 0; + ftr->magic = 0; real_free(hdr); } @@ -160,6 +189,7 @@ debug_realloc(const char *file, unsigned line, const char *function, void *old_ptr, size_t old_size, size_t new_size ) { struct debug_memory_header *old_hdr, *new_hdr; + struct debug_memory_footer *old_ftr, *new_ftr; void *new_ptr; if(!old_ptr) @@ -179,8 +209,16 @@ debug_realloc(const char *file, unsigned line, const char *function, return NULL; } + old_ftr = footer_from_header(old_hdr); + if(old_ftr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: buffer overflow %p\n", + old_hdr->file, old_hdr->line, old_hdr->function, + old_ptr); + debug_assert(0); + } + /* alloc new */ - new_hdr = real_malloc(sizeof(*new_hdr) + new_size); + new_hdr = real_malloc(sizeof(*new_hdr) + new_size + sizeof(*new_ftr)); if(!new_hdr) return NULL; new_hdr->no = old_hdr->no; @@ -189,14 +227,19 @@ debug_realloc(const char *file, unsigned line, const char *function, new_hdr->function = old_hdr->function; new_hdr->size = new_size; new_hdr->magic = DEBUG_MEMORY_MAGIC; - LIST_REPLACE(&old_hdr->head, &new_hdr->head); + new_ftr = footer_from_header(new_hdr); + new_ftr->magic = DEBUG_MEMORY_MAGIC; + + LIST_REPLACE(&old_hdr->head, &new_hdr->head); + /* copy data */ new_ptr = data_from_header(new_hdr); memcpy( new_ptr, old_ptr, old_size < new_size ? old_size : new_size ); /* free old */ old_hdr->magic = 0; + old_ftr->magic = 0; real_free(old_hdr); return new_ptr; -- cgit v1.2.3 From 061e1c6c57703a92ac17b553f592c0c6114cb227 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 10 Jun 2008 09:16:34 +0100 Subject: draw: rework splitting of fan/loop prims in varray.c, fix flatshade issues --- src/gallium/auxiliary/draw/draw_pt_varray.c | 138 ++++++++------------- .../auxiliary/draw/draw_pt_varray_tmp_linear.h | 54 ++++---- 2 files changed, 76 insertions(+), 116 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index f5495a80c7..4479963db1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -31,7 +31,7 @@ #include "draw/draw_pt.h" #define FETCH_MAX 256 -#define DRAW_MAX (16*FETCH_MAX) +#define DRAW_MAX (FETCH_MAX+8) struct varray_frontend { struct draw_pt_front_end base; @@ -40,11 +40,6 @@ struct varray_frontend { ushort draw_elts[DRAW_MAX]; unsigned fetch_elts[FETCH_MAX]; - unsigned draw_count; - unsigned fetch_count; - - unsigned fetch_start; - unsigned driver_fetch_max; unsigned fetch_max; @@ -54,121 +49,86 @@ struct varray_frontend { unsigned output_prim; }; -static void varray_flush(struct varray_frontend *varray) -{ - if (varray->draw_count) { -#if 0 - debug_printf("FLUSH fc = %d, dc = %d\n", - varray->fetch_count, - varray->draw_count); - debug_printf("\telt0 = %d, eltx = %d, draw0 = %d, drawx = %d\n", - varray->fetch_elts[0], - varray->fetch_elts[varray->fetch_count-1], - varray->draw_elts[0], - varray->draw_elts[varray->draw_count-1]); -#endif - varray->middle->run(varray->middle, - varray->fetch_elts, - varray->fetch_count, - varray->draw_elts, - varray->draw_count); - } - - varray->fetch_count = 0; - varray->draw_count = 0; -} static void varray_flush_linear(struct varray_frontend *varray, unsigned start, unsigned count) { if (count) { -#if 0 - debug_printf("FLUSH LINEAR start = %d, count = %d\n", - start, - count); -#endif assert(varray->middle->run_linear); varray->middle->run_linear(varray->middle, start, count); } } -static INLINE void fetch_init(struct varray_frontend *varray, - unsigned count) +static void varray_line_loop_segment(struct varray_frontend *varray, + unsigned start, + unsigned segment_start, + unsigned segment_count, + boolean end ) { - unsigned idx; -#if 0 - debug_printf("FETCH INIT c = %d, fs = %d\n", - count, - varray->fetch_start); -#endif - for (idx = 0; idx < count; ++idx) { - varray->fetch_elts[idx] = varray->fetch_start + idx; - } - varray->fetch_start += idx; - varray->fetch_count = idx; -} + assert(segment_count+1 < varray->fetch_max); + if (segment_count >= 1) { + unsigned nr = 0, i; + for (i = 0; i < segment_count; i++) + varray->fetch_elts[nr++] = start + segment_start + i; + if (end) + varray->fetch_elts[nr++] = start; + assert(nr < FETCH_MAX); -static INLINE void add_draw_el(struct varray_frontend *varray, - unsigned idx) -{ - varray->draw_elts[varray->draw_count++] = (ushort)idx; + varray->middle->run(varray->middle, + varray->fetch_elts, + nr, + varray->draw_elts, /* ie. linear */ + nr); + } } -static INLINE void varray_triangle( struct varray_frontend *varray, - unsigned i0, - unsigned i1, - unsigned i2 ) -{ - add_draw_el(varray, i0); - add_draw_el(varray, i1); - add_draw_el(varray, i2); -} -static INLINE void varray_line( struct varray_frontend *varray, - unsigned i0, - unsigned i1 ) +static void varray_fan_segment(struct varray_frontend *varray, + unsigned start, + unsigned segment_start, + unsigned segment_count ) { - add_draw_el(varray, i0); - add_draw_el(varray, i1); -} + assert(segment_count+1 < varray->fetch_max); + if (segment_count >= 2) { + unsigned nr = 0, i; + if (segment_start != 0) + varray->fetch_elts[nr++] = start; -static INLINE void varray_point( struct varray_frontend *varray, - unsigned i0 ) -{ - add_draw_el(varray, i0); + for (i = 0 ; i < segment_count; i++) + varray->fetch_elts[nr++] = start + segment_start + i; + + assert(nr < FETCH_MAX); + + varray->middle->run(varray->middle, + varray->fetch_elts, + nr, + varray->draw_elts, /* ie. linear */ + nr); + } } -#if 0 -#define TRIANGLE(flags,i0,i1,i2) varray_triangle(varray,i0,i1,i2) -#define LINE(flags,i0,i1) varray_line(varray,i0,i1) -#define POINT(i0) varray_point(varray,i0) -#define FUNC varray_decompose -#include "draw_pt_decompose.h" -#else -#define TRIANGLE(vc,i0,i1,i2) varray_triangle(vc,i0,i1,i2) -#define LINE(vc,i0,i1) varray_line(vc,i0,i1) -#define POINT(vc,i0) varray_point(vc,i0) + + #define FUNC varray_run #include "draw_pt_varray_tmp_linear.h" -#endif static unsigned decompose_prim[PIPE_PRIM_POLYGON + 1] = { PIPE_PRIM_POINTS, PIPE_PRIM_LINES, - PIPE_PRIM_LINES, /* decomposed LINELOOP */ + PIPE_PRIM_LINE_STRIP, /* decomposed LINELOOP */ PIPE_PRIM_LINE_STRIP, PIPE_PRIM_TRIANGLES, PIPE_PRIM_TRIANGLE_STRIP, - PIPE_PRIM_TRIANGLES, /* decomposed TRI_FAN */ + PIPE_PRIM_TRIANGLE_FAN, PIPE_PRIM_QUADS, PIPE_PRIM_QUAD_STRIP, - PIPE_PRIM_TRIANGLES /* decomposed POLYGON */ + PIPE_PRIM_POLYGON }; @@ -186,7 +146,8 @@ static void varray_prepare(struct draw_pt_front_end *frontend, varray->output_prim = decompose_prim[prim]; varray->middle = middle; - middle->prepare(middle, varray->output_prim, opt, &varray->fetch_max ); + middle->prepare(middle, varray->output_prim, opt, &varray->driver_fetch_max ); + varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max); } @@ -207,6 +168,7 @@ static void varray_destroy(struct draw_pt_front_end *frontend) struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw) { + unsigned i; struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend); if (varray == NULL) return NULL; @@ -217,5 +179,9 @@ struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw) varray->base.destroy = varray_destroy; varray->draw = draw; + for (i = 0; i < DRAW_MAX; i++) { + varray->draw_elts[i] = i; + } + return &varray->base; } diff --git a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h index 6e5e30f38e..55a8e6521d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h +++ b/src/gallium/auxiliary/draw/draw_pt_varray_tmp_linear.h @@ -11,11 +11,9 @@ static void FUNC(struct draw_pt_front_end *frontend, struct varray_frontend *varray = (struct varray_frontend *)frontend; unsigned start = (unsigned)elts; - unsigned i, j; + unsigned j; unsigned first, incr; - varray->fetch_start = start; - draw_pt_split_prim(varray->input_prim, &first, &incr); /* Sanitize primitive length: @@ -40,7 +38,7 @@ static void FUNC(struct draw_pt_front_end *frontend, case PIPE_PRIM_QUAD_STRIP: for (j = 0; j < count;) { unsigned remaining = count - j; - unsigned nr = trim( MIN2(varray->fetch_max, remaining), first, incr ); + unsigned nr = trim( MIN2(varray->driver_fetch_max, remaining), first, incr ); varray_flush_linear(varray, start + j, nr); j += nr; if (nr != remaining) @@ -49,45 +47,41 @@ static void FUNC(struct draw_pt_front_end *frontend, break; case PIPE_PRIM_LINE_LOOP: - if (count >= 2) { - unsigned fetch_max = MIN2(FETCH_MAX, varray->fetch_max); - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(fetch_max, count - j); - end -= (end % incr); - for (i = 1; i < end; i++) { - LINE(varray, i - 1, i); - } - LINE(varray, i - 1, 0); - i = end; - fetch_init(varray, end); - varray_flush(varray); - } + /* Always have to decompose as we've stated that this will be + * emitted as a line-strip. + */ + for (j = 0; j < count;) { + unsigned remaining = count - j; + unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr ); + varray_line_loop_segment(varray, start, j, nr, nr == remaining); + j += nr; + if (nr != remaining) + j -= (first - incr); } break; case PIPE_PRIM_POLYGON: - case PIPE_PRIM_TRIANGLE_FAN: { - unsigned fetch_max = MIN2(FETCH_MAX, varray->fetch_max); - for (j = 0; j + first <= count; j += i) { - unsigned end = MIN2(fetch_max, count - j); - end -= (end % incr); - for (i = 2; i < end; i++) { - TRIANGLE(varray, 0, i - 1, i); + case PIPE_PRIM_TRIANGLE_FAN: + if (count < varray->driver_fetch_max) { + varray_flush_linear(varray, start, count); + } + else { + for ( j = 0; j < count;) { + unsigned remaining = count - j; + unsigned nr = trim( MIN2(varray->fetch_max-1, remaining), first, incr ); + varray_fan_segment(varray, start, j, nr); + j += nr; + if (nr != remaining) + j -= (first - incr); } - i = end; - fetch_init(varray, end); - varray_flush(varray); } break; - } default: assert(0); break; } - - varray_flush(varray); } #undef TRIANGLE -- cgit v1.2.3 From 14a13e3767f080a48a4ae01f803dd0bc8754f441 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 10 Jun 2008 14:45:34 +0100 Subject: draw: fix edgeflags on clipped poly emit --- src/gallium/auxiliary/draw/draw_pipe_clip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index c11ed934a4..77ccddac4a 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -171,7 +171,7 @@ static void emit_poly( struct draw_stage *stage, header.flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; header.pad = 0; - for (i = 2; i < n; i++, header.flags = 0) { + for (i = 2; i < n; i++, header.flags = edge_middle) { header.v[0] = inlist[i-1]; header.v[1] = inlist[i]; header.v[2] = inlist[0]; /* keep in v[2] for flatshading */ -- cgit v1.2.3 From ad44e68706877ab06929747e7a82c718c1c27e02 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 10 Jun 2008 14:46:25 +0100 Subject: draw: no need to rearrange most primitives in vcache for flatshade-first The driver/pipeline will still be applying flatshade-first state to the triangles emitted from vcache, so there's no need to rotate the vertices of most primitives. The only exception is POLYGON, as explained in the code. --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 5 +- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 123 +++++++++++++----------- 2 files changed, 72 insertions(+), 56 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 07f4c99164..b8fd3aa1d8 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -114,7 +114,10 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->translate = translate_cache_find(fetch->cache, &key); { - static struct vertex_header vh = { 0, 1, 0, 0xffff }; + static struct vertex_header vh = { 0, + 1, /* edgeflag */ + 0, + 0xffff }; fetch->translate->set_buffer(fetch->translate, draw->pt.nr_vertex_buffers, &vh, diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index ec05bbeab4..f2334a84f6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -7,15 +7,18 @@ static void FUNC( struct draw_pt_front_end *frontend, { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; struct draw_context *draw = vcache->draw; - - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); unsigned i; ushort flags; if (0) debug_printf("%s %d\n", __FUNCTION__, count); - + /* Note that no adjustment is made here for flatshade provoking + * vertex. This was previously the case, but was incorrect as the + * same logic would be applied twice both here & in the pipeline + * code or driver. The rule is now that we just preserve the + * original order in this code, and leave identification of the PV + * to the pipeline and driver. + */ switch (vcache->input_prim) { case PIPE_PRIM_POINTS: for (i = 0; i < count; i ++) { @@ -72,45 +75,23 @@ static void FUNC( struct draw_pt_front_end *frontend, break; case PIPE_PRIM_TRIANGLE_STRIP: - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0), - get_elt(elts, i + 1 + (i&1)), - get_elt(elts, i + 2 - (i&1))); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0 + (i&1)), - get_elt(elts, i + 1 - (i&1)), - get_elt(elts, i + 2 )); - } + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0 + (i&1)), + get_elt(elts, i + 1 - (i&1)), + get_elt(elts, i + 2 )); } break; case PIPE_PRIM_TRIANGLE_FAN: if (count >= 3) { - if (flatfirst) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0 )); - } - } - else { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2 )); - } + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0 )); } } break; @@ -138,26 +119,58 @@ static void FUNC( struct draw_pt_front_end *frontend, case PIPE_PRIM_POLYGON: { + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); + /* These bitflags look a little odd because we submit the * vertices as (1,2,0) to satisfy flatshade requirements. + * + * Polygon is defined has having vertex 0 be the provoking + * flatshade vertex and all known API's match this usage. + * However, the PV's for the triangles we emit from this + * decomposition vary according to the API, and hence we have + * to choose between two ways of emitting the triangles. */ - const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; - const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - - for (i = 0; i+2 < count; i++, flags = edge_middle) { - - if (i + 3 == count) - flags |= edge_last; - - TRIANGLE( vcache, - flags, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0)); - } + if (flatfirst) { + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_1; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_2; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + /* PV is first vertex */ + TRIANGLE( vcache, + flags, + get_elt(elts, 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2)); + } + } + else { + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + /* PV is third vertex */ + TRIANGLE( vcache, + flags, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0)); + } + } } break; -- cgit v1.2.3 From aa7c21a45b90e6e502c4967f892f1691411d761f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 10 Jun 2008 16:56:32 +0100 Subject: Revert "draw: no need to rearrange most primitives in vcache for flatshade-first" This reverts commit ad44e68706877ab06929747e7a82c718c1c27e02. --- src/gallium/auxiliary/draw/draw_pt_fetch.c | 5 +- src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h | 123 +++++++++++------------- 2 files changed, 56 insertions(+), 72 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index b8fd3aa1d8..07f4c99164 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -114,10 +114,7 @@ void draw_pt_fetch_prepare( struct pt_fetch *fetch, fetch->translate = translate_cache_find(fetch->cache, &key); { - static struct vertex_header vh = { 0, - 1, /* edgeflag */ - 0, - 0xffff }; + static struct vertex_header vh = { 0, 1, 0, 0xffff }; fetch->translate->set_buffer(fetch->translate, draw->pt.nr_vertex_buffers, &vh, diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h index f2334a84f6..ec05bbeab4 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h +++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h @@ -7,18 +7,15 @@ static void FUNC( struct draw_pt_front_end *frontend, { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; struct draw_context *draw = vcache->draw; + + boolean flatfirst = (draw->rasterizer->flatshade && + draw->rasterizer->flatshade_first); unsigned i; ushort flags; if (0) debug_printf("%s %d\n", __FUNCTION__, count); - /* Note that no adjustment is made here for flatshade provoking - * vertex. This was previously the case, but was incorrect as the - * same logic would be applied twice both here & in the pipeline - * code or driver. The rule is now that we just preserve the - * original order in this code, and leave identification of the PV - * to the pipeline and driver. - */ + switch (vcache->input_prim) { case PIPE_PRIM_POINTS: for (i = 0; i < count; i ++) { @@ -75,23 +72,45 @@ static void FUNC( struct draw_pt_front_end *frontend, break; case PIPE_PRIM_TRIANGLE_STRIP: - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 0 + (i&1)), - get_elt(elts, i + 1 - (i&1)), - get_elt(elts, i + 2 )); + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0), + get_elt(elts, i + 1 + (i&1)), + get_elt(elts, i + 2 - (i&1))); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 0 + (i&1)), + get_elt(elts, i + 1 - (i&1)), + get_elt(elts, i + 2 )); + } } break; case PIPE_PRIM_TRIANGLE_FAN: if (count >= 3) { - for (i = 0; i+2 < count; i++) { - TRIANGLE( vcache, - DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0 )); + if (flatfirst) { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0 )); + } + } + else { + for (i = 0; i+2 < count; i++) { + TRIANGLE( vcache, + DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL, + get_elt(elts, 0), + get_elt(elts, i + 1), + get_elt(elts, i + 2 )); + } } } break; @@ -119,58 +138,26 @@ static void FUNC( struct draw_pt_front_end *frontend, case PIPE_PRIM_POLYGON: { - boolean flatfirst = (draw->rasterizer->flatshade && - draw->rasterizer->flatshade_first); - /* These bitflags look a little odd because we submit the * vertices as (1,2,0) to satisfy flatshade requirements. - * - * Polygon is defined has having vertex 0 be the provoking - * flatshade vertex and all known API's match this usage. - * However, the PV's for the triangles we emit from this - * decomposition vary according to the API, and hence we have - * to choose between two ways of emitting the triangles. */ - if (flatfirst) { - const ushort edge_first = DRAW_PIPE_EDGE_FLAG_0; - const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_1; - const ushort edge_last = DRAW_PIPE_EDGE_FLAG_2; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - - for (i = 0; i+2 < count; i++, flags = edge_middle) { - - if (i + 3 == count) - flags |= edge_last; - - /* PV is first vertex */ - TRIANGLE( vcache, - flags, - get_elt(elts, 0), - get_elt(elts, i + 1), - get_elt(elts, i + 2)); - } - } - else { - const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; - const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; - const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; - - flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; - - for (i = 0; i+2 < count; i++, flags = edge_middle) { - - if (i + 3 == count) - flags |= edge_last; - - /* PV is third vertex */ - TRIANGLE( vcache, - flags, - get_elt(elts, i + 1), - get_elt(elts, i + 2), - get_elt(elts, 0)); - } - } + const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2; + const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0; + const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1; + + flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle; + + for (i = 0; i+2 < count; i++, flags = edge_middle) { + + if (i + 3 == count) + flags |= edge_last; + + TRIANGLE( vcache, + flags, + get_elt(elts, i + 1), + get_elt(elts, i + 2), + get_elt(elts, 0)); + } } break; -- cgit v1.2.3 From ab399b555c99c46958c421d900109f78901ddc99 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 11 Jun 2008 09:15:54 +0100 Subject: draw: remove debug assert on failover to generic vs varient --- src/gallium/auxiliary/draw/draw_vs_aos.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 458bbbe376..c47647ea72 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2108,7 +2108,6 @@ struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, struct draw_vs_varient *varient = varient_aos_sse( vs, key ); if (varient == NULL) { - assert(0); varient = draw_vs_varient_generic( vs, key ); } -- cgit v1.2.3 From bd9264210097d08073a4ea3619ca25db56245280 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 11 Jun 2008 09:36:00 +0100 Subject: draw: remove another debug assert on failover to generic vs varient --- src/gallium/auxiliary/draw/draw_vs_aos.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index f6f6af2dd9..66944a4e33 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -202,9 +202,8 @@ struct x86_reg aos_get_internal_xmm( struct aos_compilation *cp, #define ERROR(cp, msg) \ do { \ - debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ + if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \ cp->error = 1; \ - assert(0); \ } while (0) -- cgit v1.2.3 From 4dd1917e4be3ae48b436ed333bd2fcd37603d1ed Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Wed, 11 Jun 2008 14:51:41 +0100 Subject: small optimization --- src/gallium/auxiliary/util/u_pack_color.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 655e2c8259..06abb34d5a 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -255,20 +255,28 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) static INLINE uint util_pack_z(enum pipe_format format, double z) { + if (z == 0.0) + return 0; + switch (format) { case PIPE_FORMAT_Z16_UNORM: + if (z == 1.0) + return 0xffff; return (uint) (z * 0xffff); case PIPE_FORMAT_Z32_UNORM: /* special-case to avoid overflow */ if (z == 1.0) return 0xffffffff; - else - return (uint) (z * 0xffffffff); + return (uint) (z * 0xffffffff); case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: + if (z == 1.0) + return 0xffffff; return (uint) (z * 0xffffff); case PIPE_FORMAT_Z24S8_UNORM: case PIPE_FORMAT_Z24X8_UNORM: + if (z == 1.0) + return 0xffffff00; return ((uint) (z * 0xffffff)) << 8; default: debug_print_format("gallium: unhandled format in util_pack_z()", format); -- cgit v1.2.3 From 807f8f177b3a2833806d84a70e71019f8849239f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 11 Jun 2008 18:46:26 +0100 Subject: draw: preserve specular alpha when flatshading -- may be FOGC --- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 30 ++++++++++++++++++++++-- 1 file changed, 28 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 21a9c3b77f..4741b22d02 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -40,9 +40,19 @@ struct flat_stage struct draw_stage stage; uint num_color_attribs; - uint color_attribs[4]; /* front/back primary/secondary colors */ + uint color_attribs[2]; /* front/back primary colors */ + + uint num_spec_attribs; + uint spec_attribs[2]; /* front/back secondary colors */ }; +#define COPY_3FV( DST, SRC ) \ +do { \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ +} while (0) + static INLINE struct flat_stage * flat_stage(struct draw_stage *stage) @@ -58,10 +68,16 @@ static INLINE void copy_colors( struct draw_stage *stage, { const struct flat_stage *flat = flat_stage(stage); uint i; + for (i = 0; i < flat->num_color_attribs; i++) { const uint attr = flat->color_attribs[i]; COPY_4FV(dst->data[attr], src->data[attr]); } + + for (i = 0; i < flat->num_spec_attribs; i++) { + const uint attr = flat->spec_attribs[i]; + COPY_3FV(dst->data[attr], src->data[attr]); + } } @@ -78,6 +94,12 @@ static INLINE void copy_colors2( struct draw_stage *stage, COPY_4FV(dst0->data[attr], src->data[attr]); COPY_4FV(dst1->data[attr], src->data[attr]); } + + for (i = 0; i < flat->num_spec_attribs; i++) { + const uint attr = flat->spec_attribs[i]; + COPY_3FV(dst0->data[attr], src->data[attr]); + COPY_3FV(dst1->data[attr], src->data[attr]); + } } @@ -164,10 +186,14 @@ static void flatshade_init_state( struct draw_stage *stage ) /* Find which vertex shader outputs are colors, make a list */ flat->num_color_attribs = 0; + flat->num_spec_attribs = 0; for (i = 0; i < vs->info.num_outputs; i++) { if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_COLOR || vs->info.output_semantic_name[i] == TGSI_SEMANTIC_BCOLOR) { - flat->color_attribs[flat->num_color_attribs++] = i; + if (vs->info.output_semantic_index[i] == 0) + flat->color_attribs[flat->num_color_attribs++] = i; + else + flat->spec_attribs[flat->num_spec_attribs++] = i; } } -- cgit v1.2.3 From 2161b0fafcdc16703162dd489d2ec1e7114cce4c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 11 Jun 2008 23:48:13 +0100 Subject: draw: don't assume vertex position is in data[0] --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 25 ++++++++------- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 12 ++++--- src/gallium/auxiliary/draw/draw_pipe_clip.c | 16 +++++----- src/gallium/auxiliary/draw/draw_pipe_cull.c | 8 +++-- src/gallium/auxiliary/draw/draw_pipe_offset.c | 7 +++-- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 5 +-- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 2 ++ src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 9 +++--- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 9 +++--- src/gallium/auxiliary/draw/draw_private.h | 2 +- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 38 +++++++++++++---------- src/gallium/auxiliary/draw/draw_vs.c | 11 +++++++ src/gallium/auxiliary/draw/draw_vs.h | 1 + src/gallium/auxiliary/draw/draw_vs_aos.c | 8 ++--- src/gallium/auxiliary/draw/draw_vs_varient.c | 4 +++ 15 files changed, 97 insertions(+), 60 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 634bf067f1..ecdebca5f1 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -78,6 +78,8 @@ struct aaline_stage /** For AA lines, this is the vertex attrib slot for the new texcoords */ uint tex_slot; + /** position, not necessarily output zero */ + uint pos_slot; void *sampler_cso; struct pipe_texture *texture; @@ -520,9 +522,10 @@ aaline_line(struct draw_stage *stage, struct prim_header *header) struct prim_header tri; struct vertex_header *v[8]; uint texPos = aaline->tex_slot; + uint posPos = aaline->pos_slot; float *pos, *tex; - float dx = header->v[1]->data[0][0] - header->v[0]->data[0][0]; - float dy = header->v[1]->data[0][1] - header->v[0]->data[0][1]; + float dx = header->v[1]->data[posPos][0] - header->v[0]->data[posPos][0]; + float dy = header->v[1]->data[posPos][1] - header->v[0]->data[posPos][1]; double a = atan2(dy, dx); float c_a = (float) cos(a), s_a = (float) sin(a); uint i; @@ -549,35 +552,35 @@ aaline_line(struct draw_stage *stage, struct prim_header *header) */ /* new verts */ - pos = v[0]->data[0]; + pos = v[0]->data[posPos]; pos[0] += (-dx * c_a - dy * s_a); pos[1] += (-dx * s_a + dy * c_a); - pos = v[1]->data[0]; + pos = v[1]->data[posPos]; pos[0] += (-dx * c_a - -dy * s_a); pos[1] += (-dx * s_a + -dy * c_a); - pos = v[2]->data[0]; + pos = v[2]->data[posPos]; pos[0] += ( dx * c_a - dy * s_a); pos[1] += ( dx * s_a + dy * c_a); - pos = v[3]->data[0]; + pos = v[3]->data[posPos]; pos[0] += ( dx * c_a - -dy * s_a); pos[1] += ( dx * s_a + -dy * c_a); - pos = v[4]->data[0]; + pos = v[4]->data[posPos]; pos[0] += (-dx * c_a - dy * s_a); pos[1] += (-dx * s_a + dy * c_a); - pos = v[5]->data[0]; + pos = v[5]->data[posPos]; pos[0] += (-dx * c_a - -dy * s_a); pos[1] += (-dx * s_a + -dy * c_a); - pos = v[6]->data[0]; + pos = v[6]->data[posPos]; pos[0] += ( dx * c_a - dy * s_a); pos[1] += ( dx * s_a + dy * c_a); - pos = v[7]->data[0]; + pos = v[7]->data[posPos]; pos[0] += ( dx * c_a - -dy * s_a); pos[1] += ( dx * s_a + -dy * c_a); @@ -653,7 +656,7 @@ aaline_first_line(struct draw_stage *stage, struct prim_header *header) /* update vertex attrib info */ aaline->tex_slot = draw->vs.num_vs_outputs; - assert(aaline->tex_slot > 0); /* output[0] is vertex pos */ + aaline->pos_slot = draw->vs.position_output; /* advertise the extra post-transformed vertex attribute */ draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 96dcdb43d5..87fd303649 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -85,6 +85,7 @@ struct aapoint_stage /** this is the vertex attrib slot for the new texcoords */ uint tex_slot; + uint pos_slot; /* * Currently bound state @@ -570,6 +571,7 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) struct prim_header tri; struct vertex_header *v[4]; uint texPos = aapoint->tex_slot; + uint pos_slot = aapoint->pos_slot; float radius, *pos, *tex; uint i; float k; @@ -619,19 +621,19 @@ aapoint_point(struct draw_stage *stage, struct prim_header *header) } /* new verts */ - pos = v[0]->data[0]; + pos = v[0]->data[pos_slot]; pos[0] -= radius; pos[1] -= radius; - pos = v[1]->data[0]; + pos = v[1]->data[pos_slot]; pos[0] += radius; pos[1] -= radius; - pos = v[2]->data[0]; + pos = v[2]->data[pos_slot]; pos[0] += radius; pos[1] += radius; - pos = v[3]->data[0]; + pos = v[3]->data[pos_slot]; pos[0] -= radius; pos[1] += radius; @@ -683,6 +685,8 @@ aapoint_first_point(struct draw_stage *stage, struct prim_header *header) aapoint->tex_slot = draw->vs.num_vs_outputs; assert(aapoint->tex_slot > 0); /* output[0] is vertex pos */ + aapoint->pos_slot = draw->vs.position_output; + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; draw->extra_vp_outputs.semantic_index = aapoint->fs->generic_attrib; draw->extra_vp_outputs.slot = aapoint->tex_slot; diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index 77ccddac4a..fa10f8efca 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -113,6 +113,7 @@ static void interp( const struct clipper *clip, const struct vertex_header *in ) { const unsigned nr_attrs = clip->stage.draw->vs.num_vs_outputs; + const unsigned pos_attr = clip->stage.draw->vs.position_output; unsigned j; /* Vertex header. @@ -138,18 +139,17 @@ static void interp( const struct clipper *clip, const float *trans = clip->stage.draw->viewport.translate; const float oow = 1.0f / pos[3]; - dst->data[0][0] = pos[0] * oow * scale[0] + trans[0]; - dst->data[0][1] = pos[1] * oow * scale[1] + trans[1]; - dst->data[0][2] = pos[2] * oow * scale[2] + trans[2]; - dst->data[0][3] = oow; + dst->data[pos_attr][0] = pos[0] * oow * scale[0] + trans[0]; + dst->data[pos_attr][1] = pos[1] * oow * scale[1] + trans[1]; + dst->data[pos_attr][2] = pos[2] * oow * scale[2] + trans[2]; + dst->data[pos_attr][3] = oow; } /* Other attributes - * Note: start at 1 to skip winpos (data[0]) since we just computed - * it above. */ - for (j = 1; j < nr_attrs; j++) { - interp_attr(dst->data[j], t, in->data[j], out->data[j]); + for (j = 0; j < nr_attrs; j++) { + if (j != pos_attr) + interp_attr(dst->data[j], t, in->data[j], out->data[j]); } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index 87aaf1f85b..d0d22a38e0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -55,10 +55,12 @@ static INLINE struct cull_stage *cull_stage( struct draw_stage *stage ) static void cull_tri( struct draw_stage *stage, struct prim_header *header ) { + const unsigned pos = stage->draw->vs.position_output; + /* Window coords: */ - const float *v0 = header->v[0]->data[0]; - const float *v1 = header->v[1]->data[0]; - const float *v2 = header->v[2]->data[0]; + const float *v0 = header->v[0]->data[pos]; + const float *v1 = header->v[1]->data[pos]; + const float *v2 = header->v[2]->data[pos]; /* edge vectors e = v0 - v2, f = v1 - v2 */ const float ex = v0[0] - v2[0]; diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index ea6de8c571..8f1650e55c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -62,14 +62,15 @@ static INLINE struct offset_stage *offset_stage( struct draw_stage *stage ) static void do_offset_tri( struct draw_stage *stage, struct prim_header *header ) { + const unsigned pos = stage->draw->vs.position_output; struct offset_stage *offset = offset_stage(stage); float inv_det = 1.0f / header->det; /* Window coords: */ - float *v0 = header->v[0]->data[0]; - float *v1 = header->v[1]->data[0]; - float *v2 = header->v[2]->data[0]; + float *v0 = header->v[0]->data[pos]; + float *v1 = header->v[1]->data[pos]; + float *v2 = header->v[2]->data[pos]; /* edge vectors e = v0 - v2, f = v1 - v2 */ float ex = v0[0] - v2[0]; diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index 9522b79582..bf0db18a68 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -119,8 +119,9 @@ stipple_line(struct draw_stage *stage, struct prim_header *header) struct stipple_stage *stipple = stipple_stage(stage); struct vertex_header *v0 = header->v[0]; struct vertex_header *v1 = header->v[1]; - const float *pos0 = v0->data[0]; - const float *pos1 = v1->data[0]; + const unsigned pos = stage->draw->vs.position_output; + const float *pos0 = v0->data[pos]; + const float *pos1 = v1->data[pos]; float start = 0; int state = 0; diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 9e5597c32c..10a1f7df79 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -136,6 +136,8 @@ emit_vertex( struct vbuf_stage *vbuf, * set_buffer is efficient. Consider a special one-shot mode for * translate. */ + /* Note: we really do want data[0] here, not data[pos]: + */ vbuf->translate->set_buffer(vbuf->translate, 0, vertex->data[0], 0); vbuf->translate->run(vbuf->translate, 0, 1, vbuf->vertex_ptr); diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 878c9c7169..29649f5787 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -58,6 +58,7 @@ static void wideline_line( struct draw_stage *stage, struct prim_header *header ) { /*const struct wideline_stage *wide = wideline_stage(stage);*/ + const unsigned pos = stage->draw->vs.position_output; const float half_width = 0.5f * stage->draw->rasterizer->line_width; struct prim_header tri; @@ -67,10 +68,10 @@ static void wideline_line( struct draw_stage *stage, struct vertex_header *v2 = dup_vert(stage, header->v[1], 2); struct vertex_header *v3 = dup_vert(stage, header->v[1], 3); - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; + float *pos0 = v0->data[pos]; + float *pos1 = v1->data[pos]; + float *pos2 = v2->data[pos]; + float *pos3 = v3->data[pos]; const float dx = FABSF(pos0[0] - pos2[0]); const float dy = FABSF(pos0[1] - pos2[1]); diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index df92e3f2d0..d40a07f4ae 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -96,6 +96,7 @@ static void widepoint_point( struct draw_stage *stage, struct prim_header *header ) { const struct widepoint_stage *wide = widepoint_stage(stage); + const unsigned pos = stage->draw->vs.position_output; const boolean sprite = (boolean) stage->draw->rasterizer->point_sprite; float half_size; float left_adj, right_adj, bot_adj, top_adj; @@ -108,10 +109,10 @@ static void widepoint_point( struct draw_stage *stage, struct vertex_header *v2 = dup_vert(stage, header->v[0], 2); struct vertex_header *v3 = dup_vert(stage, header->v[0], 3); - float *pos0 = v0->data[0]; - float *pos1 = v1->data[0]; - float *pos2 = v2->data[0]; - float *pos3 = v3->data[0]; + float *pos0 = v0->data[pos]; + float *pos1 = v1->data[pos]; + float *pos2 = v2->data[pos]; + float *pos3 = v3->data[pos]; /* point size is either per-vertex or fixed size */ if (wide->psize_slot >= 0) { diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 1865601cc0..7bd1e670b4 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -177,7 +177,7 @@ struct draw_context struct { struct draw_vertex_shader *vertex_shader; uint num_vs_outputs; /**< convenience, from vertex_shader */ - + uint position_output; /** TGSI program interpreter runtime state */ struct tgsi_exec_machine machine; diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index c4a67c8289..af6306b1c6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -100,16 +100,19 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; + const unsigned pos = pvs->draw->vs.position_output; unsigned clipped = 0; unsigned j; if (0) debug_printf("%s\n"); for (j = 0; j < count; j++) { - out->clip[0] = out->data[0][0]; - out->clip[1] = out->data[0][1]; - out->clip[2] = out->data[0][2]; - out->clip[3] = out->data[0][3]; + float *position = out->data[pos]; + + out->clip[0] = position[0]; + out->clip[1] = position[1]; + out->clip[2] = position[2]; + out->clip[3] = position[3]; out->vertex_id = 0xffff; out->clipmask = compute_clipmask_gl(out->clip, @@ -120,19 +123,19 @@ static boolean post_vs_cliptest_viewport_gl( struct pt_post_vs *pvs, if (out->clipmask == 0) { /* divide by w */ - float w = 1.0f / out->data[0][3]; + float w = 1.0f / position[3]; /* Viewport mapping */ - out->data[0][0] = out->data[0][0] * w * scale[0] + trans[0]; - out->data[0][1] = out->data[0][1] * w * scale[1] + trans[1]; - out->data[0][2] = out->data[0][2] * w * scale[2] + trans[2]; - out->data[0][3] = w; + position[0] = position[0] * w * scale[0] + trans[0]; + position[1] = position[1] * w * scale[1] + trans[1]; + position[2] = position[2] * w * scale[2] + trans[2]; + position[3] = w; #if 0 debug_printf("post viewport: %f %f %f %f\n", - out->data[0][0], - out->data[0][1], - out->data[0][2], - out->data[0][3]); + position[0], + position[1], + position[2], + position[3]); #endif } @@ -154,15 +157,18 @@ static boolean post_vs_viewport( struct pt_post_vs *pvs, struct vertex_header *out = vertices; const float *scale = pvs->draw->viewport.scale; const float *trans = pvs->draw->viewport.translate; + const unsigned pos = pvs->draw->vs.position_output; unsigned j; if (0) debug_printf("%s\n", __FUNCTION__); for (j = 0; j < count; j++) { + float *position = out->data[pos]; + /* Viewport mapping only, no cliptest/rhw divide */ - out->data[0][0] = out->data[0][0] * scale[0] + trans[0]; - out->data[0][1] = out->data[0][1] * scale[1] + trans[1]; - out->data[0][2] = out->data[0][2] * scale[2] + trans[2]; + position[0] = position[0] * scale[0] + trans[0]; + position[1] = position[1] * scale[1] + trans[1]; + position[2] = position[2] * scale[2] + trans[2]; out = (struct vertex_header *)((char *)out + stride); } diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 979f9864fd..978954e91c 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -85,6 +85,16 @@ draw_create_vertex_shader(struct draw_context *draw, } } + if (vs) + { + uint i; + for (i = 0; i < vs->info.num_outputs; i++) { + if (vs->info.output_semantic_name[i] == TGSI_SEMANTIC_POSITION && + vs->info.output_semantic_index[i] == 0) + vs->position_output = i; + } + } + assert(vs); return vs; } @@ -100,6 +110,7 @@ draw_bind_vertex_shader(struct draw_context *draw, { draw->vs.vertex_shader = dvs; draw->vs.num_vs_outputs = dvs->info.num_outputs; + draw->vs.position_output = dvs->position_output; dvs->prepare( dvs, draw ); } else { diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 08c6de8ba8..45992d1986 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -106,6 +106,7 @@ struct draw_vertex_shader { struct pipe_shader_state state; struct tgsi_shader_info info; + unsigned position_output; /* Extracted from shader: */ diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index c47647ea72..5d4a8b38c8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1686,7 +1686,7 @@ static boolean emit_viewport( struct aos_compilation *cp ) { struct x86_reg pos = aos_get_shader_reg_xmm(cp, TGSI_FILE_OUTPUT, - 0); + cp->vaos->draw->vs.position_output ); struct x86_reg scale = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, scale)); @@ -1700,7 +1700,7 @@ static boolean emit_viewport( struct aos_compilation *cp ) aos_adopt_xmm_reg( cp, pos, TGSI_FILE_OUTPUT, - 0, + cp->vaos->draw->vs.position_output, TRUE ); return TRUE; } @@ -1715,7 +1715,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp ) struct x86_reg tmp = aos_get_xmm_reg(cp); struct x86_reg pos = aos_get_shader_reg_xmm(cp, TGSI_FILE_OUTPUT, - 0); + cp->vaos->draw->vs.position_output); struct x86_reg scale = x86_make_disp(cp->machine_EDX, Offset(struct aos_machine, scale)); @@ -1740,7 +1740,7 @@ static boolean emit_rhw_viewport( struct aos_compilation *cp ) aos_adopt_xmm_reg( cp, pos, TGSI_FILE_OUTPUT, - 0, + cp->vaos->draw->vs.position_output, TRUE ); return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index abe8c5ec2d..ad0b829afa 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -89,6 +89,8 @@ static void do_rhw_viewport( struct draw_vs_varient_generic *vsvg, unsigned stride = vsvg->temp_vertex_stride; unsigned j; + ptr += vsvg->base.vs->position_output * 4 * sizeof(float); + for (j = 0; j < count; j++, ptr += stride) { float *data = (float *)ptr; float w = 1.0f / data[3]; @@ -110,6 +112,8 @@ static void do_viewport( struct draw_vs_varient_generic *vsvg, unsigned stride = vsvg->temp_vertex_stride; unsigned j; + ptr += vsvg->base.vs->position_output * 4 * sizeof(float); + for (j = 0; j < count; j++, ptr += stride) { float *data = (float *)ptr; -- cgit v1.2.3 From 23422d603ae002a1f368e20cd0f158e057876cb8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 10 Jun 2008 23:22:12 +0900 Subject: gallium: Implement debug_get_num_option. For numeric options. --- src/gallium/auxiliary/util/p_debug.c | 30 ++++++++++++++++++++++++++++-- src/gallium/include/pipe/p_debug.h | 2 +- 2 files changed, 29 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index d1dfc377f8..a0ffb024e4 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -230,8 +230,34 @@ debug_get_bool_option(const char *name, boolean dfault) long debug_get_num_option(const char *name, long dfault) { - /* FIXME */ - return dfault; + long result; + const char *str; + + str = debug_get_option(name, NULL); + if(!str) + result = dfault; + else { + long sign; + char c; + c = *str++; + if(c == '-') { + sign = -1; + c = *str++; + } + else { + sign = 1; + } + result = 0; + while('0' <= c && c <= '9') { + result = result*10 + (c - '0'); + c = *str++; + } + result *= sign; + } + + debug_printf("%s: %s = %li\n", __FUNCTION__, name, result); + + return result; } diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 05eca75201..9011557006 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -289,7 +289,7 @@ boolean debug_get_bool_option(const char *name, boolean dfault); long -debug_get_unsigned_option(const char *name, long dfault); +debug_get_num_option(const char *name, long dfault); unsigned long debug_get_flags_option(const char *name, -- cgit v1.2.3 From 0f552f500c05d041eda751867c779a8ecc11849c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Jun 2008 10:33:41 +0900 Subject: pipebuffer: Fix improper memory free. --- src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 6c3502eea7..e90d2e5623 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -120,7 +120,7 @@ pb_malloc_buffer_create(size_t size, buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); if(!buf->data) { - align_free(buf); + FREE(buf); return NULL; } -- cgit v1.2.3 From 2fed8d8496af4aa9105de298c6f320a85eb01623 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 11 Jun 2008 12:25:47 +0900 Subject: gallium: Support L16 pixel format. --- src/gallium/auxiliary/util/p_debug.c | 1 + src/gallium/include/pipe/p_format.h | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index a0ffb024e4..e4de12167a 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -370,6 +370,7 @@ static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_I8_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_L16_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_YCBCR), DEBUG_NAMED_VALUE(PIPE_FORMAT_YCBCR_REV), DEBUG_NAMED_VALUE(PIPE_FORMAT_Z16_UNORM), diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 9ba00f8d7b..3aad463049 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -242,6 +242,7 @@ enum pipe_format { PIPE_FORMAT_A8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_000R, 0, 0, 0, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte alpha */ PIPE_FORMAT_I8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRR, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte intensity */ PIPE_FORMAT_A8L8_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_UNORM ), /**< ubyte alpha, luminance */ + PIPE_FORMAT_L16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 2, 2, 2, 0, PIPE_FORMAT_TYPE_UNORM ), /**< ushort luminance */ PIPE_FORMAT_YCBCR = _PIPE_FORMAT_YCBCR( 0 ), PIPE_FORMAT_YCBCR_REV = _PIPE_FORMAT_YCBCR( 1 ), PIPE_FORMAT_Z16_UNORM = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_Z000, 2, 0, 0, 0, PIPE_FORMAT_TYPE_UNORM ), -- cgit v1.2.3 From 23d340c9edbbe64a99478b922c008ae3e2ec7103 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 12 Jun 2008 14:39:57 +0100 Subject: pb: don't assert(0) on failure to allocate - this is a normal condition in many drivers --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 66256f3fa7..0a1e8c83b1 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -184,7 +184,6 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); if(!mm_buf->block) { - assert(0); FREE(mm_buf); _glthread_UNLOCK_MUTEX(mm->mutex); return NULL; -- cgit v1.2.3 From 68ef8e89a5f25cd9f80e2b9088604631a28edc3c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Tue, 10 Jun 2008 16:59:44 -0400 Subject: glsl: implement variable array indexes --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 9 ++++++++- src/mesa/shader/arbprogparse.c | 2 +- src/mesa/shader/prog_print.c | 17 +++++++++++------ src/mesa/shader/slang/slang_emit.c | 16 +++++++++++----- src/mesa/shader/slang/slang_ir.h | 1 + src/mesa/state_tracker/st_mesa_to_tgsi.c | 4 ++-- 6 files changed, 34 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index d1a3dfd9c7..92aff88925 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -803,7 +803,14 @@ tgsi_dump_instruction( ENM( src->SrcRegister.File, TGSI_FILES_SHORT ); CHR( '[' ); - SID( src->SrcRegister.Index ); + if (src->SrcRegister.Indirect) { + TXT( "addr" ); + if (src->SrcRegister.Index > 0) + CHR( '+' ); + SID( src->SrcRegister.Index ); + } + else + SID( src->SrcRegister.Index ); CHR( ']' ); if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || diff --git a/src/mesa/shader/arbprogparse.c b/src/mesa/shader/arbprogparse.c index b60b9656c6..a6bbdc64f1 100644 --- a/src/mesa/shader/arbprogparse.c +++ b/src/mesa/shader/arbprogparse.c @@ -3880,7 +3880,7 @@ _mesa_parse_arb_fragment_program(GLcontext* ctx, GLenum target, } #if DEBUG_FP - _mesa_printf("____________Fragment program %u ________\n", program->Base.ID); + _mesa_printf("____________Fragment program %u ________\n", program->Base.Id); _mesa_print_program(&program->Base); #endif } diff --git a/src/mesa/shader/prog_print.c b/src/mesa/shader/prog_print.c index 1c35ce3fec..09bf15f004 100644 --- a/src/mesa/shader/prog_print.c +++ b/src/mesa/shader/prog_print.c @@ -206,7 +206,7 @@ arb_output_attrib_string(GLint index, GLenum progType) */ static const char * reg_string(enum register_file f, GLint index, gl_prog_print_mode mode, - const struct gl_program *prog) + GLint relAddr, const struct gl_program *prog) { static char str[100]; @@ -214,7 +214,10 @@ reg_string(enum register_file f, GLint index, gl_prog_print_mode mode, switch (mode) { case PROG_PRINT_DEBUG: - sprintf(str, "%s[%d]", file_string(f, mode), index); + if (relAddr) + sprintf(str, "%s[ADDR%s%d]", file_string(f, mode), (index > 0) ? "+" : "", index); + else + sprintf(str, "%s[%d]", file_string(f, mode), index); break; case PROG_PRINT_ARB: @@ -401,7 +404,7 @@ print_dst_reg(const struct prog_dst_register *dstReg, gl_prog_print_mode mode, { _mesa_printf("%s%s", reg_string((enum register_file) dstReg->File, - dstReg->Index, mode, prog), + dstReg->Index, mode, GL_FALSE, prog), writemask_string(dstReg->WriteMask)); if (dstReg->CondMask != COND_TR) { @@ -424,9 +427,9 @@ print_src_reg(const struct prog_src_register *srcReg, gl_prog_print_mode mode, { _mesa_printf("%s%s", reg_string((enum register_file) srcReg->File, - srcReg->Index, mode, prog), + srcReg->Index, mode, srcReg->RelAddr, prog), _mesa_swizzle_string(srcReg->Swizzle, - srcReg->NegateBase, GL_FALSE)); + srcReg->NegateBase, GL_FALSE)); #if 0 _mesa_printf("%s[%d]%s", file_string((enum register_file) srcReg->File, mode), @@ -590,7 +593,9 @@ _mesa_print_instruction_opt(const struct prog_instruction *inst, GLint indent, break; case OPCODE_ARL: - _mesa_printf("ARL addr.x, "); + _mesa_printf("ARL "); + print_dst_reg(&inst->DstReg, mode, prog); + _mesa_printf(", "); print_src_reg(&inst->SrcReg[0], mode, prog); print_comment(inst); break; diff --git a/src/mesa/shader/slang/slang_emit.c b/src/mesa/shader/slang/slang_emit.c index ff63e05dd2..93256f8647 100644 --- a/src/mesa/shader/slang/slang_emit.c +++ b/src/mesa/shader/slang/slang_emit.c @@ -223,6 +223,7 @@ storage_to_src_reg(struct prog_src_register *src, const slang_ir_storage *st) assert(st->Size <= 4); src->File = st->File; src->Index = st->Index; + src->RelAddr = st->RelAddr; if (st->Swizzle != SWIZZLE_NOOP) src->Swizzle = st->Swizzle; else @@ -1488,11 +1489,16 @@ emit_array_element(slang_emit_info *emitInfo, slang_ir_node *n) n->Store->Index = arrayAddr + index; } else { - /* Variable index - PROBLEM */ - const GLint arrayAddr = n->Children[0]->Store->Index; - const GLint index = 0; - _mesa_problem(NULL, "variable array indexes not supported yet!"); - n->Store->Index = arrayAddr + index; + /* Variable index*/ + struct prog_instruction *inst; + inst = new_instruction(emitInfo, OPCODE_ARL); + storage_to_dst_reg(&inst->DstReg, n->Store, n->Writemask); + storage_to_src_reg(&inst->SrcReg[0], n->Children[1]->Store); + inst->DstReg.File = PROGRAM_ADDRESS; + inst->Comment = _mesa_strdup("ARL ADDR"); + n->Store->RelAddr = GL_TRUE; + n->Store->Index = inst->DstReg.Index;/*index of the array*/ + inst->DstReg.Index = 0; /*addr index is always 0*/ } return NULL; /* no instruction */ } diff --git a/src/mesa/shader/slang/slang_ir.h b/src/mesa/shader/slang/slang_ir.h index c7c0ddbf9a..ba0735d64d 100644 --- a/src/mesa/shader/slang/slang_ir.h +++ b/src/mesa/shader/slang/slang_ir.h @@ -146,6 +146,7 @@ struct _slang_ir_storage GLint Size; /**< number of floats */ GLuint Swizzle; GLint RefCount; /**< Used during IR tree delete */ + GLboolean RelAddr; }; typedef struct _slang_ir_storage slang_ir_storage; diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 12979de523..a8b6faad1c 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -68,8 +68,8 @@ map_register_file( case PROGRAM_STATE_VAR: case PROGRAM_NAMED_PARAM: case PROGRAM_UNIFORM: - if (immediateMapping[index] != ~0) - return TGSI_FILE_IMMEDIATE; + if (immediateMapping && immediateMapping[index] != ~0) + return TGSI_FILE_IMMEDIATE; else return TGSI_FILE_CONSTANT; case PROGRAM_CONSTANT: -- cgit v1.2.3 From 08f1b8ac709105d42ec34f8b8a81421e3b0fbc81 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 12 Jun 2008 16:01:05 -0600 Subject: gallium: fix SSE codegen for instructions that use both a CONSTANT and IMMEDIATE Fixes codegen for instructions like MUL dst, CONST[0], IMM[0]; the two operands would up getting aliased in the x86/sse code. Fixes glean/vertProg1/fogparams test. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 26 +++++++++++++++++++------- src/gallium/auxiliary/draw/draw_vs_aos.h | 3 ++- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- 3 files changed, 22 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 5d4a8b38c8..388dd3fbee 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -67,19 +67,30 @@ static INLINE boolean eq( struct x86_reg a, } struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned which_reg, /* quick hack */ unsigned value ) { - if (cp->ebp != value) { + struct x86_reg reg; + + if (which_reg == 0) + reg = cp->temp_EBP; + else + reg = cp->tmp_EAX; + + if (cp->x86_reg[which_reg] != value) { unsigned offset; switch (value) { case X86_IMMEDIATES: + assert(which_reg == 0); offset = Offset(struct aos_machine, immediates); break; case X86_CONSTANTS: + assert(which_reg == 1); offset = Offset(struct aos_machine, constants); break; case X86_ATTRIBS: + assert(which_reg == 0); offset = Offset(struct aos_machine, attrib); break; default: @@ -87,14 +98,14 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp, offset = 0; } - x86_mov(cp->func, cp->temp_EBP, + + x86_mov(cp->func, reg, x86_make_disp(cp->machine_EDX, offset)); - /* x86_deref(x86_make_disp(cp->machine_EDX, offset))); */ - cp->ebp = value; + cp->x86_reg[which_reg] = value; } - return cp->temp_EBP; + return reg; } @@ -118,10 +129,10 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); case TGSI_FILE_IMMEDIATE: - return x86_make_disp(aos_get_x86(cp, X86_IMMEDIATES), idx * 4 * sizeof(float)); + return x86_make_disp(aos_get_x86(cp, 0, X86_IMMEDIATES), idx * 4 * sizeof(float)); case TGSI_FILE_CONSTANT: - return x86_make_disp(aos_get_x86(cp, X86_CONSTANTS), idx * 4 * sizeof(float)); + return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float)); default: ERROR(cp, "unknown reg file"); @@ -1413,6 +1424,7 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); + /* tmp_EAX has been pushed & will be restored below */ x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _powerf ); x86_call( cp->func, cp->tmp_EAX ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 66944a4e33..64e021ff6b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -145,7 +145,7 @@ struct aos_compilation { unsigned last_used; } xmm[8]; - unsigned ebp; /* one of X86_* */ + unsigned x86_reg[2]; /* one of X86_* */ boolean input_fetched[PIPE_MAX_ATTRIBS]; unsigned output_last_write[PIPE_MAX_ATTRIBS]; @@ -213,6 +213,7 @@ do { \ #define X86_ATTRIBS 3 struct x86_reg aos_get_x86( struct aos_compilation *cp, + unsigned which_reg, unsigned value ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index b720185709..6b92811870 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -96,7 +96,7 @@ static void get_src_ptr( struct aos_compilation *cp, struct x86_reg elt, unsigned a ) { - struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, X86_ATTRIBS ), + struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ), a * sizeof(struct aos_attrib)); struct x86_reg input_ptr = x86_make_disp(attrib, -- cgit v1.2.3 From 20ee00754d432cf6c9aca2ba61e004a83795e160 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 12 Jun 2008 16:01:43 -0600 Subject: gallium: disable the codegen for TGSI_OPCODE_EXPBASE2 for now. The x86 code seems to fail for exponents of 4 or larger. See glean's vertProg1/EX2 test. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 388dd3fbee..1f926b3e85 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1662,7 +1662,14 @@ emit_instruction( struct aos_compilation *cp, return emit_RND(cp, inst); case TGSI_OPCODE_EXPBASE2: +#if 0 + /* this seems to fail for "larger" exponents. + * See glean tvertProg1's EX2 test. + */ return emit_EX2(cp, inst); +#else + return FALSE; +#endif case TGSI_OPCODE_LOGBASE2: return emit_LG2(cp, inst); -- cgit v1.2.3 From 64d854ebf7074f4a86bb9d45bb2e1003f86a86a1 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 13 Jun 2008 17:22:11 +0200 Subject: util: Use pf_get_size(). --- src/gallium/auxiliary/util/u_blit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 999a3e5099..7b9415d49a 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -281,7 +281,7 @@ util_blit_pixels(struct blit_state *ctx, texTemp.height[0] = srcH; texTemp.depth[0] = 1; texTemp.compressed = 0; - texTemp.cpp = pf_get_bits(src->format) / 8; + texTemp.cpp = pf_get_size(src->format); tex = screen->texture_create(screen, &texTemp); if (!tex) -- cgit v1.2.3 From 51f24ef5bae3fa1f1ae6172fd5c5e6235f3f6857 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 17 Jun 2008 16:58:22 -0600 Subject: gallium: remove duplicated u_mm.c in file list --- src/gallium/auxiliary/util/Makefile | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 05bc43131a..db3d810a2e 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -15,8 +15,7 @@ C_SOURCES = \ u_mm.c \ u_simple_shaders.c \ u_snprintf.c \ - u_time.c \ - u_mm.c + u_time.c include ../../Makefile.template -- cgit v1.2.3 From 7d7f3e2c9451b2233c196d82d523c50b5d2616cc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 18 Jun 2008 14:50:35 -0600 Subject: gallium: split long prims into chunks with an even number of vertices This fixes culling "parity" errors when splitting long tri strips. Splitting strips into chunks with an odd number of vertices causes front/back-face orientation to get reversed and upsets culling. --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 9 +++++++++ src/gallium/auxiliary/draw/draw_pt_varray.c | 4 ++++ 2 files changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 5ce3aba2a2..fdf9b6fe6a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -193,6 +193,15 @@ static void fse_prepare( struct draw_pt_middle_end *middle, *max_vertices = (draw->render->max_vertex_buffer_bytes / (vinfo->size * 4)); + /* Return an even number of verts. + * This prevents "parity" errors when splitting long triangle strips which + * can lead to front/back culling mix-ups. + * Every other triangle in a strip has an alternate front/back orientation + * so splitting at an odd position can cause the orientation of subsequent + * triangles to get reversed. + */ + *max_vertices = *max_vertices & ~1; + /* Probably need to do this somewhere (or fix exec shader not to * need it): */ diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 4479963db1..2cc08a9e93 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -147,6 +147,10 @@ static void varray_prepare(struct draw_pt_front_end *frontend, varray->middle = middle; middle->prepare(middle, varray->output_prim, opt, &varray->driver_fetch_max ); + + /* check that the max is even */ + assert((varray->driver_fetch_max & 1) == 0); + varray->fetch_max = MIN2(FETCH_MAX, varray->driver_fetch_max); } -- cgit v1.2.3 From f1401385587882bb9d18a5f5b01dcbb71ddf0a2f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 18 Jun 2008 15:08:19 -0600 Subject: gallium: additional fixes to ensure even number of vertices per buffer --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +++ src/gallium/auxiliary/draw/draw_pt_emit.c | 3 +++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 9 +++++++++ src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 +++ 4 files changed, 18 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 10a1f7df79..a6fde77a0e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -390,6 +390,9 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) /* Allocate a new vertex buffer */ vbuf->max_vertices = vbuf->render->max_vertex_buffer_bytes / vbuf->vertex_size; + /* even number */ + vbuf->max_vertices = vbuf->max_vertices & ~1; + /* Must always succeed -- driver gives us a * 'max_vertex_buffer_bytes' which it guarantees it can allocate, * and it will flush itself if necessary to do so. If this does diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index a02f1f46fe..40f05cb9e0 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -143,6 +143,9 @@ void draw_pt_emit_prepare( struct pt_emit *emit, *max_vertices = (draw->render->max_vertex_buffer_bytes / (vinfo->size * 4)); + + /* even number */ + *max_vertices = *max_vertices & ~1; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 083ce105bf..4a1f3b0953 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -200,6 +200,15 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, *max_vertices = (draw->render->max_vertex_buffer_bytes / (vinfo->size * 4)); + + /* Return an even number of verts. + * This prevents "parity" errors when splitting long triangle strips which + * can lead to front/back culling mix-ups. + * Every other triangle in a strip has an alternate front/back orientation + * so splitting at an odd position can cause the orientation of subsequent + * triangles to get reversed. + */ + *max_vertices = *max_vertices & ~1; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 25118712a6..0aec4b71ba 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -99,6 +99,9 @@ static void fetch_pipeline_prepare( struct draw_pt_middle_end *middle, *max_vertices = DRAW_PIPE_MAX_VERTICES; } + /* return even number */ + *max_vertices = *max_vertices & ~1; + /* No need to prepare the shader. */ vs->prepare(vs, draw); -- cgit v1.2.3 From aa816d114ee90d0c5b861f622c0063b54f7eb612 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 12 Jun 2008 23:34:21 +0900 Subject: draw: Fix MSVC integer size conversion warning. --- src/gallium/auxiliary/draw/draw_pt_varray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 2cc08a9e93..46e722a154 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -172,7 +172,7 @@ static void varray_destroy(struct draw_pt_front_end *frontend) struct draw_pt_front_end *draw_pt_varray(struct draw_context *draw) { - unsigned i; + ushort i; struct varray_frontend *varray = CALLOC_STRUCT(varray_frontend); if (varray == NULL) return NULL; -- cgit v1.2.3 From b440cea343b007ca97edb2cd4e1e6150c989417b Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 19 Jun 2008 14:06:28 +0200 Subject: util: Add missing format names. --- src/gallium/auxiliary/util/p_debug.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index e4de12167a..9d56f8bfab 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -366,6 +366,7 @@ static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_A1R5G5B5_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_A4R4G4B4_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_R5G6B5_UNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A2B10G10R10_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_UNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_I8_UNORM), @@ -436,6 +437,9 @@ static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B6G5R5_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8B8G8R8_SNORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8B8G8R8_SNORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8_SSCALED), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8_SSCALED), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SSCALED), @@ -446,6 +450,8 @@ static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8UB8UG8SR8S_NORM), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B6UG5SR5S_NORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA), -- cgit v1.2.3 From c5bf215b1b955c8dafeae84a5f526052fd2e0256 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 19 Jun 2008 21:16:16 +0900 Subject: gallium: Add extra parenthesis as advised by gcc. --- src/gallium/auxiliary/util/p_debug_mem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 51dbd0a710..ed18c6540e 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -263,8 +263,8 @@ debug_memory_end(unsigned long start_no) void *ptr; hdr = LIST_ENTRY(struct debug_memory_header, entry, head); ptr = data_from_header(hdr); - if(start_no <= hdr->no && hdr->no < last_no || - last_no < start_no && (hdr->no < last_no || start_no <= hdr->no)) { + if((start_no <= hdr->no && hdr->no < last_no) || + (last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))) { debug_printf("%s:%u:%s: %u bytes at %p not freed\n", hdr->file, hdr->line, hdr->function, hdr->size, ptr); -- cgit v1.2.3 From 6fbfcf922210ddf29b73290557f9d40171b09d2e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 19 Jun 2008 22:57:33 +0900 Subject: gallium: Handle malloc failure. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 6689c3f1fb..13b8c2e5bf 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -165,9 +165,17 @@ tgsi_exec_machine_bind_shader( declarations = (struct tgsi_full_declaration *) MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); + if (!declarations) { + return; + } + instructions = (struct tgsi_full_instruction *) MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); + if (!instructions) { + FREE( declarations ); + return; + } while( !tgsi_parse_end_of_tokens( &parse ) ) { uint pointer = parse.Position; -- cgit v1.2.3 From e2c3f06e9649b5b87fc9adbca7d1f07841bba895 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sun, 22 Jun 2008 13:14:29 +0100 Subject: draw: fix non-i386 builds --- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index dcb6c2c8d4..6a54917ae3 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -28,7 +28,6 @@ #include "pipe/p_config.h" -#ifdef PIPE_ARCH_X86 #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" @@ -39,6 +38,8 @@ #include "draw_vs_aos.h" #include "draw_vertex.h" +#ifdef PIPE_ARCH_X86 + #include "rtasm/rtasm_x86sse.h" @@ -298,6 +299,25 @@ struct aos_machine *draw_vs_aos_machine( void ) return machine; } +#else + +void draw_vs_aos_machine_viewport( struct aos_machine *machine, + const struct pipe_viewport_state *viewport ) +{ +} + +void draw_vs_aos_machine_constants( struct aos_machine *machine, + const float (*constants)[4] ) +{ +} + +void draw_vs_aos_machine_destroy( struct aos_machine *machine ) +{ +} +struct aos_machine *draw_vs_aos_machine( void ) +{ + return NULL; +} #endif -- cgit v1.2.3 From f08da6b8214f1bf1d4a33e19dac4eeb155302481 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 23 Jun 2008 12:31:46 +0200 Subject: gallium: Fix warning in u_draw_quad.h --- src/gallium/auxiliary/util/u_draw_quad.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index 5b6539a99c..ec4862ead3 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -33,6 +33,7 @@ extern "C" { #endif +struct pipe_buffer; extern void util_draw_vertex_buffer(struct pipe_context *pipe, -- cgit v1.2.3 From 708bb35194e16bf7aaf5cd676f572b7200a156d1 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 23 Jun 2008 16:01:41 +0200 Subject: util: Blit can now copy from texture to surface --- src/gallium/auxiliary/util/u_blit.c | 101 ++++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_blit.h | 14 +++-- 2 files changed, 112 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 7b9415d49a..28513f54e6 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -362,3 +362,104 @@ util_blit_pixels(struct blit_state *ctx, screen->texture_release(screen, &tex); } +/** + * Copy pixel block from src texture to dst surface. + * Overlapping regions are acceptable. + * + * XXX Should support selection of level. + * XXX need some control over blitting Z and/or stencil. + */ +void +util_blit_pixels_tex(struct blit_state *ctx, + struct pipe_texture *tex, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + struct pipe_framebuffer_state fb; + const int srcLeft = MIN2(srcX0, srcX1); + const int srcTop = MIN2(srcY0, srcY1); + + assert(filter == PIPE_TEX_MIPFILTER_NEAREST || + filter == PIPE_TEX_MIPFILTER_LINEAR); + + if (srcLeft != srcX0) { + /* left-right flip */ + int tmp = dstX0; + dstX0 = dstX1; + dstX1 = tmp; + } + + if (srcTop != srcY0) { + /* up-down flip */ + int tmp = dstY0; + dstY0 = dstY1; + dstY1 = tmp; + } + + assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); + + /* save state (restored below) */ + cso_save_blend(ctx->cso); + cso_save_depth_stencil_alpha(ctx->cso); + cso_save_rasterizer(ctx->cso); + cso_save_samplers(ctx->cso); + cso_save_sampler_textures(ctx->cso); + cso_save_framebuffer(ctx->cso); + cso_save_fragment_shader(ctx->cso); + cso_save_vertex_shader(ctx->cso); + cso_save_viewport(ctx->cso); + + /* set misc state we care about */ + cso_set_blend(ctx->cso, &ctx->blend); + cso_set_depth_stencil_alpha(ctx->cso, &ctx->depthstencil); + cso_set_rasterizer(ctx->cso, &ctx->rasterizer); + cso_set_viewport(ctx->cso, &ctx->viewport); + + /* sampler */ + ctx->sampler.min_img_filter = filter; + ctx->sampler.mag_img_filter = filter; + cso_single_sampler(ctx->cso, 0, &ctx->sampler); + cso_single_sampler_done(ctx->cso); + + /* texture */ + cso_set_sampler_textures(ctx->cso, 1, &tex); + + /* shaders */ + cso_set_fragment_shader_handle(ctx->cso, ctx->fs); + cso_set_vertex_shader_handle(ctx->cso, ctx->vs); + + /* drawing dest */ + memset(&fb, 0, sizeof(fb)); + fb.width = dst->width; + fb.height = dst->height; + fb.num_cbufs = 1; + fb.cbufs[0] = dst; + cso_set_framebuffer(ctx->cso, &fb); + + /* draw quad */ + setup_vertex_data(ctx, + (float) dstX0, (float) dstY0, + (float) dstX1, (float) dstY1, z); + + util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + PIPE_PRIM_TRIANGLE_FAN, + 4, /* verts */ + 2); /* attribs/vert */ + + /* restore state we changed */ + cso_restore_blend(ctx->cso); + cso_restore_depth_stencil_alpha(ctx->cso); + cso_restore_rasterizer(ctx->cso); + cso_restore_samplers(ctx->cso); + cso_restore_sampler_textures(ctx->cso); + cso_restore_framebuffer(ctx->cso); + cso_restore_fragment_shader(ctx->cso); + cso_restore_vertex_shader(ctx->cso); + cso_restore_viewport(ctx->cso); +} diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index 0ce9732e62..7d0406c104 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -42,15 +42,13 @@ struct cso_context; struct blit_state; + extern struct blit_state * util_create_blit(struct pipe_context *pipe, struct cso_context *cso); - extern void util_destroy_blit(struct blit_state *ctx); - - extern void util_blit_pixels(struct blit_state *ctx, struct pipe_surface *src, @@ -61,6 +59,16 @@ util_blit_pixels(struct blit_state *ctx, int dstX1, int dstY1, float z, uint filter); +extern void +util_blit_pixels_tex(struct blit_state *ctx, + struct pipe_texture *tex, + int srcX0, int srcY0, + int srcX1, int srcY1, + struct pipe_surface *dst, + int dstX0, int dstY0, + int dstX1, int dstY1, + float z, uint filter); + #ifdef __cplusplus } -- cgit v1.2.3 From 016dbb0cf395702cfad046f827e3cc4541ae5818 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 23 Jun 2008 08:35:41 -0600 Subject: gallium: added some assertions --- src/gallium/auxiliary/util/p_util.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_util.c b/src/gallium/auxiliary/util/p_util.c index 2a92f8e408..0e3f082b3e 100644 --- a/src/gallium/auxiliary/util/p_util.c +++ b/src/gallium/auxiliary/util/p_util.c @@ -53,6 +53,12 @@ pipe_copy_rect(ubyte * dst, { unsigned i; + assert(cpp > 0); + assert(src_x >= 0); + assert(src_y >= 0); + assert(dst_x >= 0); + assert(dst_y >= 0); + dst_pitch *= cpp; src_pitch *= cpp; dst += dst_x * cpp; -- cgit v1.2.3 From f738c3acaca235c68a26c7b7d41903c64a36ae9f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 23 Jun 2008 09:47:12 -0600 Subject: gallium: fix Y-inverted copies Don't require the caller to pass a non-intuitive negative src_y coord anymore when doing a src-inverted copy. --- src/gallium/auxiliary/util/p_util.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_util.c b/src/gallium/auxiliary/util/p_util.c index 0e3f082b3e..4e60b1b841 100644 --- a/src/gallium/auxiliary/util/p_util.c +++ b/src/gallium/auxiliary/util/p_util.c @@ -37,6 +37,7 @@ /** * Copy 2D rect from one place to another. * Position and sizes are in pixels. + * src_pitch may be negative to do vertical flip of pixels from source. */ void pipe_copy_rect(ubyte * dst, @@ -52,6 +53,7 @@ pipe_copy_rect(ubyte * dst, int src_y) { unsigned i; + int src_pitch_pos = src_pitch < 0 ? -src_pitch : src_pitch; assert(cpp > 0); assert(src_x >= 0); @@ -61,10 +63,11 @@ pipe_copy_rect(ubyte * dst, dst_pitch *= cpp; src_pitch *= cpp; + src_pitch_pos *= cpp; dst += dst_x * cpp; src += src_x * cpp; dst += dst_y * dst_pitch; - src += src_y * src_pitch; + src += src_y * src_pitch_pos; width *= cpp; if (width == dst_pitch && width == src_pitch) -- cgit v1.2.3 From d9f38a2ad1477cc6aeb94c083ab439b8c704be2e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 24 Jun 2008 01:26:22 +0900 Subject: gallium: Use the more portable PIPE_ARCH_* PIPE_CC_* macros instead of ad-hoc ones. --- src/gallium/auxiliary/util/p_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 9d56f8bfab..b1106a5e55 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -100,9 +100,9 @@ void debug_print_blob( const char *name, void _debug_break(void) { -#if (defined(__i386__) || defined(__386__)) && defined(__GNUC__) +#if defined(PIPE_ARCH_X86) && defined(PIPE_CC_GCC) __asm("int3"); -#elif (defined(__i386__) || defined(__386__)) && defined(__MSC__) +#elif defined(PIPE_ARCH_X86) && defined(PIPE_CC_MSVC) _asm {int 3}; #elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) EngDebugBreak(); -- cgit v1.2.3 From dc73d15a9a1cc830781f8f3ef81507bfff79b7f9 Mon Sep 17 00:00:00 2001 From: Jonathan White Date: Mon, 23 Jun 2008 16:24:58 -0600 Subject: gallium: code for PIPE_SUBSYSTEM_WINDOWS_USER --- src/gallium/auxiliary/util/p_debug.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index b1106a5e55..7b28900a25 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -30,14 +30,28 @@ #include + #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY + #include #include + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + +#ifndef WIN32_LEAN_AND_MEAN +#define WIN32_LEAN_AND_MEAN // Exclude rarely-used stuff from Windows headers +#endif +#include + #else + #include #include + #endif + + #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "pipe/p_debug.h" @@ -74,6 +88,17 @@ void _debug_vprintf(const char *format, va_list ap) #else /* TODO: Implement debug print for WINCE */ #endif +#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) + /* EngDebugPrint does not handle float point arguments, so we need to use + * our own vsnprintf implementation. It is also very slow, so buffer until + * we find a newline. */ + static char buf[512 + 1] = {'\0'}; + size_t len = strlen(buf); + int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); + if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { + OutputDebugStringA(buf); + buf[0] = '\0'; + } #else vfprintf(stderr, format, ap); #endif -- cgit v1.2.3 From 89e9d6b6db933c870443714c3d7c9539d117cddf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 23 Jun 2008 17:13:14 -0600 Subject: gallium: added support for fixed-point formats, drawing --- src/gallium/auxiliary/translate/translate_generic.c | 18 ++++++++++++++++++ src/gallium/include/pipe/p_format.h | 5 +++++ src/mesa/state_tracker/st_cb_texture.c | 3 ++- src/mesa/state_tracker/st_draw.c | 14 ++++++++++++-- 4 files changed, 37 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 3fec89b36e..17e37d1745 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -121,6 +121,8 @@ emit_##NAME(const float *attrib, void *ptr) \ #define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f) #define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f) +#define FROM_32_FIXED(i) (((int *) ptr)[i] / 65536.0) + #define TO_64_FLOAT(x) ((double) x) #define TO_32_FLOAT(x) (x) @@ -140,6 +142,8 @@ emit_##NAME(const float *attrib, void *ptr) \ #define TO_16_SNORM(x) ((short) (x * 32767.0f)) #define TO_32_SNORM(x) ((int) (x * 2147483647.0f)) +#define TO_32_FIXED(x) ((int) (x * 65536.0f)) + ATTRIB( R64G64B64A64_FLOAT, 4, double, FROM_64_FLOAT, TO_64_FLOAT ) @@ -215,6 +219,11 @@ ATTRIB( R8_SNORM, 1, char, FROM_8_SNORM, TO_8_SNORM ) ATTRIB( A8R8G8B8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) //ATTRIB( R8G8B8A8_UNORM, 4, ubyte, FROM_8_UNORM, TO_8_UNORM ) +ATTRIB( R32G32B32A32_FIXED, 4, int, FROM_32_FIXED, TO_32_FIXED ) +ATTRIB( R32G32B32_FIXED, 3, int, FROM_32_FIXED, TO_32_FIXED ) +ATTRIB( R32G32_FIXED, 2, int, FROM_32_FIXED, TO_32_FIXED ) +ATTRIB( R32_FIXED, 1, int, FROM_32_FIXED, TO_32_FIXED ) + static void @@ -386,6 +395,15 @@ static fetch_func get_fetch_func( enum pipe_format format ) case PIPE_FORMAT_B8G8R8A8_UNORM: return &fetch_B8G8R8A8_UNORM; + case PIPE_FORMAT_R32_FIXED: + return &fetch_R32_FIXED; + case PIPE_FORMAT_R32G32_FIXED: + return &fetch_R32G32_FIXED; + case PIPE_FORMAT_R32G32B32_FIXED: + return &fetch_R32G32B32_FIXED; + case PIPE_FORMAT_R32G32B32A32_FIXED: + return &fetch_R32G32B32A32_FIXED; + default: assert(0); return &fetch_NULL; diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 579fdb2957..00aa02311c 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -83,6 +83,7 @@ static INLINE uint pf_layout(uint f) /**< PIPE_FORMAT_LAYOUT_ */ #define PIPE_FORMAT_TYPE_USCALED 4 /**< uints, not normalized */ #define PIPE_FORMAT_TYPE_SSCALED 5 /**< ints, not normalized */ #define PIPE_FORMAT_TYPE_SRGB 6 /**< sRGB colorspace */ +#define PIPE_FORMAT_TYPE_FIXED 7 /**< 16.16 fixed point */ /** @@ -353,6 +354,10 @@ enum pipe_format { PIPE_FORMAT_R8G8B8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SSCALED ), PIPE_FORMAT_R8G8B8A8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ), PIPE_FORMAT_R8G8B8X8_SSCALED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SSCALED ), + PIPE_FORMAT_R32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_R001, 4, 0, 0, 0, PIPE_FORMAT_TYPE_FIXED ), + PIPE_FORMAT_R32G32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RG01, 4, 4, 0, 0, PIPE_FORMAT_TYPE_FIXED ), + PIPE_FORMAT_R32G32B32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 4, 4, 4, 0, PIPE_FORMAT_TYPE_FIXED ), + PIPE_FORMAT_R32G32B32A32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FIXED ), /* sRGB formats */ PIPE_FORMAT_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_A8_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index da45a5e321..7d52d1da1b 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1161,7 +1161,8 @@ do_copy_texsubimage(GLcontext *ctx, (void) texImage; - /* XX need this? st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL);*/ + /* XX need this?*/ + st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); /* determine if copying depth or color data */ if (baseFormat == GL_DEPTH_COMPONENT) { diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 5300848ef6..6867d50c87 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -147,6 +147,14 @@ static GLuint byte_types_scale[4] = { PIPE_FORMAT_R8G8B8A8_SSCALED }; +static GLuint fixed_types[4] = { + PIPE_FORMAT_R32_FIXED, + PIPE_FORMAT_R32G32_FIXED, + PIPE_FORMAT_R32G32B32_FIXED, + PIPE_FORMAT_R32G32B32A32_FIXED +}; + + /** * Return a PIPE_FORMAT_x for the given GL datatype and size. @@ -154,8 +162,8 @@ static GLuint byte_types_scale[4] = { static GLuint pipe_vertex_format(GLenum type, GLuint size, GLboolean normalized) { - assert(type >= GL_BYTE); - assert(type <= GL_DOUBLE); + assert((type >= GL_BYTE && type <= GL_DOUBLE) || + type == GL_FIXED); assert(size >= 1); assert(size <= 4); @@ -169,6 +177,7 @@ pipe_vertex_format(GLenum type, GLuint size, GLboolean normalized) case GL_UNSIGNED_INT: return uint_types_norm[size-1]; case GL_UNSIGNED_SHORT: return ushort_types_norm[size-1]; case GL_UNSIGNED_BYTE: return ubyte_types_norm[size-1]; + case GL_FIXED: return fixed_types[size-1]; default: assert(0); return 0; } } @@ -182,6 +191,7 @@ pipe_vertex_format(GLenum type, GLuint size, GLboolean normalized) case GL_UNSIGNED_INT: return uint_types_scale[size-1]; case GL_UNSIGNED_SHORT: return ushort_types_scale[size-1]; case GL_UNSIGNED_BYTE: return ubyte_types_scale[size-1]; + case GL_FIXED: return fixed_types[size-1]; default: assert(0); return 0; } } -- cgit v1.2.3 From 9dfa6063be5ea506cc5a43d352fa2f7dcfa68dff Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 24 Jun 2008 14:22:09 +0900 Subject: gallium: Avoid double arithmetic. --- src/gallium/auxiliary/translate/translate_generic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 17e37d1745..4c8179ffa8 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -121,7 +121,7 @@ emit_##NAME(const float *attrib, void *ptr) \ #define FROM_16_SNORM(i) ((float) ((short *) ptr)[i] / 32767.0f) #define FROM_32_SNORM(i) ((float) ((int *) ptr)[i] / 2147483647.0f) -#define FROM_32_FIXED(i) (((int *) ptr)[i] / 65536.0) +#define FROM_32_FIXED(i) (((int *) ptr)[i] / 65536.0f) #define TO_64_FLOAT(x) ((double) x) #define TO_32_FLOAT(x) (x) -- cgit v1.2.3 From e8af160b247a4cf60c82a3d0c63ec952aef22d96 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 24 Jun 2008 13:00:42 +0200 Subject: gallium: Fix warning in u_blit.h --- src/gallium/auxiliary/util/u_blit.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index 7d0406c104..308075698f 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -37,6 +37,7 @@ extern "C" { struct pipe_context; struct pipe_surface; +struct pipe_texture; struct cso_context; -- cgit v1.2.3 From 62fd280c524b282f2448995a353d94f973d904d4 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 24 Jun 2008 16:06:59 +0200 Subject: gallium: Fix whole source being used in u_blit --- src/gallium/auxiliary/util/u_blit.c | 70 ++++++++++++++++++++++++++++--------- 1 file changed, 54 insertions(+), 16 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 28513f54e6..6555dcd588 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -222,6 +222,49 @@ setup_vertex_data(struct blit_state *ctx, } +/** + * Setup vertex data for the textured quad we'll draw. + * Note: y=0=top + */ +static void +setup_vertex_data_tex(struct blit_state *ctx, + float x0, float y0, float x1, float y1, + float s0, float t0, float s1, float t1, + float z) +{ + void *buf; + + ctx->vertices[0][0][0] = x0; + ctx->vertices[0][0][1] = y0; + ctx->vertices[0][0][2] = z; + ctx->vertices[0][1][0] = s0; /*s*/ + ctx->vertices[0][1][1] = t0; /*t*/ + + ctx->vertices[1][0][0] = x1; + ctx->vertices[1][0][1] = y0; + ctx->vertices[1][0][2] = z; + ctx->vertices[1][1][0] = s1; /*s*/ + ctx->vertices[1][1][1] = t0; /*t*/ + + ctx->vertices[2][0][0] = x1; + ctx->vertices[2][0][1] = y1; + ctx->vertices[2][0][2] = z; + ctx->vertices[2][1][0] = s1; + ctx->vertices[2][1][1] = t1; + + ctx->vertices[3][0][0] = x0; + ctx->vertices[3][0][1] = y1; + ctx->vertices[3][0][2] = z; + ctx->vertices[3][1][0] = s0; + ctx->vertices[3][1][1] = t1; + + buf = ctx->pipe->winsys->buffer_map(ctx->pipe->winsys, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + + ctx->pipe->winsys->buffer_unmap(ctx->pipe->winsys, ctx->vbuf); +} /** * Copy pixel block from src surface to dst surface. * Overlapping regions are acceptable. @@ -382,25 +425,18 @@ util_blit_pixels_tex(struct blit_state *ctx, struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb; - const int srcLeft = MIN2(srcX0, srcX1); - const int srcTop = MIN2(srcY0, srcY1); + float s0, t0, s1, t1; assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); - if (srcLeft != srcX0) { - /* left-right flip */ - int tmp = dstX0; - dstX0 = dstX1; - dstX1 = tmp; - } + assert(tex->width[0] != 0); + assert(tex->height[0] != 0); - if (srcTop != srcY0) { - /* up-down flip */ - int tmp = dstY0; - dstY0 = dstY1; - dstY1 = tmp; - } + s0 = srcX0 / (float)tex->width[0]; + s1 = srcX1 / (float)tex->width[0]; + t0 = srcY0 / (float)tex->height[0]; + t1 = srcY1 / (float)tex->height[0]; assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); @@ -443,9 +479,11 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_set_framebuffer(ctx->cso, &fb); /* draw quad */ - setup_vertex_data(ctx, + setup_vertex_data_tex(ctx, (float) dstX0, (float) dstY0, - (float) dstX1, (float) dstY1, z); + (float) dstX1, (float) dstY1, + s0, t0, s1, t1, + z); util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, PIPE_PRIM_TRIANGLE_FAN, -- cgit v1.2.3 From 4ddd65967915ca4846f2831bc676c878a29dae4a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 27 Jun 2008 19:37:56 +0900 Subject: gallium: Drop pipe_texture->cpp and pipe_surface->cpp. The chars-per-pixel concept falls apart with compressed and yuv images, where more than one pixel are coded in a single data block. --- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 6 +- src/gallium/auxiliary/util/p_tile.c | 98 ++++------- src/gallium/auxiliary/util/p_util.c | 100 +++++++++-- src/gallium/auxiliary/util/u_blit.c | 2 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 8 +- src/gallium/drivers/i915simple/i915_blit.c | 2 - src/gallium/drivers/i915simple/i915_context.h | 6 +- src/gallium/drivers/i915simple/i915_state_emit.c | 4 +- .../drivers/i915simple/i915_state_sampler.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 68 ++----- src/gallium/drivers/i915simple/i915_texture.c | 196 +++++++++++---------- src/gallium/drivers/i965simple/brw_context.h | 4 +- src/gallium/drivers/i965simple/brw_misc_state.c | 8 +- src/gallium/drivers/i965simple/brw_surface.c | 69 ++------ src/gallium/drivers/i965simple/brw_tex_layout.c | 117 ++++++------ .../drivers/i965simple/brw_wm_surface_state.c | 6 +- src/gallium/drivers/softpipe/sp_surface.c | 45 ++--- src/gallium/drivers/softpipe/sp_texture.c | 20 ++- src/gallium/drivers/softpipe/sp_texture.h | 2 +- src/gallium/include/pipe/p_format.h | 41 +++++ src/gallium/include/pipe/p_state.h | 11 +- src/gallium/include/pipe/p_util.h | 15 +- src/gallium/winsys/xlib/brw_aub.c | 9 +- src/gallium/winsys/xlib/xm_winsys.c | 24 +-- src/gallium/winsys/xlib/xm_winsys_aub.c | 31 ++-- src/mesa/state_tracker/st_cb_accum.c | 24 +-- src/mesa/state_tracker/st_cb_bitmap.c | 4 +- src/mesa/state_tracker/st_cb_drawpixels.c | 11 +- src/mesa/state_tracker/st_cb_fbo.c | 8 +- src/mesa/state_tracker/st_cb_readpixels.c | 6 +- src/mesa/state_tracker/st_cb_texture.c | 17 +- src/mesa/state_tracker/st_gen_mipmap.c | 4 +- src/mesa/state_tracker/st_texture.c | 26 ++- 34 files changed, 513 insertions(+), 485 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index ecdebca5f1..3dd7ee19fd 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -398,7 +398,7 @@ aaline_create_texture(struct aaline_stage *aaline) texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL; texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL; texTemp.depth[0] = 1; - texTemp.cpp = 1; + pf_get_block(texTemp.format, &texTemp.block); aaline->texture = screen->texture_create(screen, &texTemp); if (!aaline->texture) @@ -439,7 +439,7 @@ aaline_create_texture(struct aaline_stage *aaline) else { d = 255; } - data[i * surface->pitch + j] = d; + data[i * surface->stride + j] = d; } } diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 4087cf7a49..1f63f94365 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -394,11 +394,11 @@ pstip_update_texture(struct pstip_stage *pstip) for (j = 0; j < 32; j++) { if (stipple[i] & (bit31 >> j)) { /* fragment "on" */ - data[i * surface->pitch + j] = 0; + data[i * surface->stride + j] = 0; } else { /* fragment "off" */ - data[i * surface->pitch + j] = 255; + data[i * surface->stride + j] = 255; } } } @@ -426,7 +426,7 @@ pstip_create_texture(struct pstip_stage *pstip) texTemp.width[0] = 32; texTemp.height[0] = 32; texTemp.depth[0] = 1; - texTemp.cpp = 1; + pf_get_block(texTemp.format, &texTemp.block); pstip->texture = screen->texture_create(screen, &texTemp); if (pstip->texture == NULL) diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 5728757d2f..ab603ff6e4 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -48,34 +48,23 @@ void pipe_get_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, - void *p, int dst_stride) + void *dst, int dst_stride) { struct pipe_screen *screen = pipe->screen; - const uint cpp = ps->cpp; - const ubyte *pSrc; - const uint src_stride = ps->pitch * cpp; - ubyte *pDest; - uint i; - - if (dst_stride == 0) { - dst_stride = w * cpp; - } + const void *src; if (pipe_clip_tile(x, y, &w, &h, ps)) return; - pSrc = (const ubyte *) screen->surface_map(screen, ps, - PIPE_BUFFER_USAGE_CPU_READ); - assert(pSrc); /* XXX: proper error handling! */ + if (dst_stride == 0) + dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - pSrc += (y * ps->pitch + x) * cpp; - pDest = (ubyte *) p; + src = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + assert(src); + if(!src) + return; - for (i = 0; i < h; i++) { - memcpy(pDest, pSrc, w * cpp); - pDest += dst_stride; - pSrc += src_stride; - } + pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); screen->surface_unmap(screen, ps); } @@ -89,34 +78,23 @@ void pipe_put_tile_raw(struct pipe_context *pipe, struct pipe_surface *ps, uint x, uint y, uint w, uint h, - const void *p, int src_stride) + const void *src, int src_stride) { struct pipe_screen *screen = pipe->screen; - const uint cpp = ps->cpp; - const ubyte *pSrc; - const uint dst_stride = ps->pitch * cpp; - ubyte *pDest; - uint i; - - if (src_stride == 0) { - src_stride = w * cpp; - } + void *dst; if (pipe_clip_tile(x, y, &w, &h, ps)) return; - pSrc = (const ubyte *) p; + if (src_stride == 0) + src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - pDest = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); - assert(pDest); /* XXX: proper error handling */ - - pDest += (y * ps->pitch + x) * cpp; + dst = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(dst); + if(!dst) + return; - for (i = 0; i < h; i++) { - memcpy(pDest, pSrc, w * cpp); - pDest += dst_stride; - pSrc += src_stride; - } + pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); screen->surface_unmap(screen, ps); } @@ -692,12 +670,12 @@ pipe_get_tile_rgba(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - packed = MALLOC(h * w * ps->cpp); + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); if (!packed) return; - pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp); + pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, 0); switch (ps->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -774,7 +752,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - packed = MALLOC(h * w * ps->cpp); + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); if (!packed) return; @@ -829,7 +807,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe, assert(0); } - pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, w * ps->cpp); + pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, 0); FREE(packed); } @@ -846,14 +824,14 @@ pipe_get_tile_z(struct pipe_context *pipe, { struct pipe_screen *screen = pipe->screen; const uint dstStride = w; - void *map; + ubyte *map; uint *pDest = z; uint i, j; if (pipe_clip_tile(x, y, &w, &h, ps)) return; - map = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + map = (ubyte *)screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); if (!map) { assert(0); return; @@ -863,11 +841,11 @@ pipe_get_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_Z32_UNORM: { const uint *pSrc - = (const uint *)map + (y * ps->pitch + x); + = (const uint *)(map + y * ps->stride + x*4); for (i = 0; i < h; i++) { memcpy(pDest, pSrc, 4 * w); pDest += dstStride; - pSrc += ps->pitch; + pSrc += ps->stride/4; } } break; @@ -875,28 +853,28 @@ pipe_get_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_X8Z24_UNORM: { const uint *pSrc - = (const uint *)map + (y * ps->pitch + x); + = (const uint *)(map + y * ps->stride + x*4); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 24-bit Z to 32-bit Z */ pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); } pDest += dstStride; - pSrc += ps->pitch; + pSrc += ps->stride/4; } } break; case PIPE_FORMAT_Z16_UNORM: { const ushort *pSrc - = (const ushort *)map + (y * ps->pitch + x); + = (const ushort *)(map + y * ps->stride + x*2); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 16-bit Z to 32-bit Z */ pDest[j] = (pSrc[j] << 16) | pSrc[j]; } pDest += dstStride; - pSrc += ps->pitch; + pSrc += ps->stride/2; } } break; @@ -917,13 +895,13 @@ pipe_put_tile_z(struct pipe_context *pipe, struct pipe_screen *screen = pipe->screen; const uint srcStride = w; const uint *pSrc = zSrc; - void *map; + ubyte *map; uint i, j; if (pipe_clip_tile(x, y, &w, &h, ps)) return; - map = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + map = (ubyte *)screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); if (!map) { assert(0); return; @@ -932,10 +910,10 @@ pipe_put_tile_z(struct pipe_context *pipe, switch (ps->format) { case PIPE_FORMAT_Z32_UNORM: { - uint *pDest = (uint *) map + (y * ps->pitch + x); + uint *pDest = (uint *) (map + y * ps->stride + x*4); for (i = 0; i < h; i++) { memcpy(pDest, pSrc, 4 * w); - pDest += ps->pitch; + pDest += ps->stride/4; pSrc += srcStride; } } @@ -943,26 +921,26 @@ pipe_put_tile_z(struct pipe_context *pipe, case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: { - uint *pDest = (uint *) map + (y * ps->pitch + x); + uint *pDest = (uint *) (map + y * ps->stride + x*4); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 24-bit Z (0 stencil) */ pDest[j] = pSrc[j] >> 8; } - pDest += ps->pitch; + pDest += ps->stride/4; pSrc += srcStride; } } break; case PIPE_FORMAT_Z16_UNORM: { - ushort *pDest = (ushort *) map + (y * ps->pitch + x); + ushort *pDest = (ushort *) (map + y * ps->stride + x*2); for (i = 0; i < h; i++) { for (j = 0; j < w; j++) { /* convert 32-bit Z to 16-bit Z */ pDest[j] = pSrc[j] >> 16; } - pDest += ps->pitch; + pDest += ps->stride/2; pSrc += srcStride; } } diff --git a/src/gallium/auxiliary/util/p_util.c b/src/gallium/auxiliary/util/p_util.c index 4e60b1b841..271be4edf1 100644 --- a/src/gallium/auxiliary/util/p_util.c +++ b/src/gallium/auxiliary/util/p_util.c @@ -32,6 +32,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" +#include "pipe/p_format.h" /** @@ -41,42 +42,109 @@ */ void pipe_copy_rect(ubyte * dst, - unsigned cpp, - unsigned dst_pitch, + const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, unsigned width, unsigned height, const ubyte * src, - int src_pitch, + int src_stride, unsigned src_x, int src_y) { unsigned i; - int src_pitch_pos = src_pitch < 0 ? -src_pitch : src_pitch; + int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; - assert(cpp > 0); + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); assert(src_x >= 0); assert(src_y >= 0); assert(dst_x >= 0); assert(dst_y >= 0); - dst_pitch *= cpp; - src_pitch *= cpp; - src_pitch_pos *= cpp; - dst += dst_x * cpp; - src += src_x * cpp; - dst += dst_y * dst_pitch; - src += src_y * src_pitch_pos; - width *= cpp; + dst_x /= block->width; + dst_y /= block->height; + width = (width + block->width - 1)/block->width; + height = (height + block->height - 1)/block->height; + src_x /= block->width; + src_y /= block->height; + + dst += dst_x * block->size; + src += src_x * block->size; + dst += dst_y * dst_stride; + src += src_y * src_stride_pos; + width *= block->size; - if (width == dst_pitch && width == src_pitch) + if (width == dst_stride && width == src_stride) memcpy(dst, src, height * width); else { for (i = 0; i < height; i++) { memcpy(dst, src, width); - dst += dst_pitch; - src += src_pitch; + dst += dst_stride; + src += src_stride; } } } + +void +pipe_fill_rect(ubyte * dst, + const struct pipe_format_block *block, + unsigned dst_stride, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + uint32_t value) +{ + unsigned i, j; + unsigned width_size; + + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); + assert(dst_x >= 0); + assert(dst_y >= 0); + + dst_x /= block->width; + dst_y /= block->height; + width = (width + block->width - 1)/block->width; + height = (height + block->height - 1)/block->height; + + dst += dst_x * block->size; + dst += dst_y * dst_stride; + width_size = width * block->size; + + switch (block->size) { + case 1: + if(dst_stride == width_size) + memset(dst, (ubyte) value, height * width_size); + else { + for (i = 0; i < height; i++) { + memset(dst, (ubyte) value, width_size); + dst += dst_stride; + } + } + break; + case 2: + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) value; + dst += dst_stride; + } + break; + case 4: + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) + *row++ = value; + dst += dst_stride; + } + break; + default: + assert(0); + break; + } +} diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 6555dcd588..ae779335dc 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -324,7 +324,7 @@ util_blit_pixels(struct blit_state *ctx, texTemp.height[0] = srcH; texTemp.depth[0] = 1; texTemp.compressed = 0; - texTemp.cpp = pf_get_size(src->format); + pf_get_block(src->format, &texTemp.block); tex = screen->texture_create(screen, &texTemp); if (!tex) diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 7d71aefda9..5313a8008a 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -625,7 +625,9 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, struct pipe_winsys *winsys = pipe->winsys; const uint zslice = 0; uint dstLevel; - const int bpt = pf_get_size(pt->format); + + assert(pt->block.width == 1); + assert(pt->block.height == 1); for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { const uint srcLevel = dstLevel - 1; @@ -646,9 +648,9 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, reduce_2d(pt->format, srcSurf->width, srcSurf->height, - srcSurf->pitch * bpt, srcMap, + srcSurf->stride, srcMap, dstSurf->width, dstSurf->height, - dstSurf->pitch * bpt, dstMap); + dstSurf->stride, dstMap); winsys->buffer_unmap(winsys, srcSurf->buffer); winsys->buffer_unmap(winsys, dstSurf->buffer); diff --git a/src/gallium/drivers/i915simple/i915_blit.c b/src/gallium/drivers/i915simple/i915_blit.c index 22f91fab92..45fae4c999 100644 --- a/src/gallium/drivers/i915simple/i915_blit.c +++ b/src/gallium/drivers/i915simple/i915_blit.c @@ -47,8 +47,6 @@ i915_fill_blit(struct i915_context *i915, { unsigned BR13, CMD; - dst_pitch *= (short) cpp; - switch (cpp) { case 1: case 2: diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index 5d411a6648..c8db4f608c 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -188,9 +188,9 @@ struct i915_texture { /* Derived from the above: */ - unsigned pitch; - unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; + unsigned stride; + unsigned depth_stride; /* per-image on i945? */ + unsigned total_nblocksy; unsigned tiled; diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 19d968fd8b..9bd6f92323 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -211,7 +211,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) struct pipe_surface *depth_surface = i915->framebuffer.zsbuf; if (cbuf_surface) { - unsigned cpitch = (cbuf_surface->pitch * cbuf_surface->cpp); + unsigned cpitch = cbuf_surface->stride; unsigned ctile = BUF_3D_USE_FENCE; if (cbuf_surface->texture && ((struct i915_texture*)(cbuf_surface->texture))->tiled) { @@ -232,7 +232,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) /* What happens if no zbuf?? */ if (depth_surface) { - unsigned zpitch = (depth_surface->pitch * depth_surface->cpp); + unsigned zpitch = depth_surface->stride; unsigned ztile = BUF_3D_USE_FENCE; if (depth_surface->texture && ((struct i915_texture*)(depth_surface->texture))->tiled) { diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 379aff3846..7868f21ca6 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -242,7 +242,7 @@ i915_update_texture(struct i915_context *i915, assert(depth); format = translate_texture_format(pt->format); - pitch = tex->pitch * pt->cpp; + pitch = tex->stride; assert(format); assert(pitch); diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index cc55a0910e..0061b22f26 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -48,7 +48,9 @@ i915_surface_copy(struct pipe_context *pipe, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { assert( dst != src ); - assert( dst->cpp == src->cpp ); + assert( dst->block.size == src->block.size ); + assert( dst->block.width == src->block.height ); + assert( dst->block.height == src->block.height ); if (0) { void *dst_map = pipe->screen->surface_map( pipe->screen, @@ -60,38 +62,30 @@ i915_surface_copy(struct pipe_context *pipe, PIPE_BUFFER_USAGE_CPU_READ ); pipe_copy_rect(dst_map, - dst->cpp, - dst->pitch, + &dst->block, + dst->stride, dstx, dsty, width, height, src_map, - do_flip ? -(int) src->pitch : src->pitch, + do_flip ? -(int) src->stride : src->stride, srcx, do_flip ? height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); pipe->screen->surface_unmap(pipe->screen, dst); } else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); i915_copy_blit( i915_context(pipe), do_flip, - dst->cpp, - (short) src->pitch, src->buffer, src->offset, - (short) dst->pitch, dst->buffer, dst->offset, + dst->block.size, + (short) src->stride/src->block.size, src->buffer, src->offset, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); } } -/* Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; -} - - static void i915_surface_fill(struct pipe_context *pipe, struct pipe_surface *dst, @@ -99,50 +93,20 @@ i915_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { if (0) { - unsigned i, j; void *dst_map = pipe->screen->surface_map( pipe->screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); - - switch (dst->cpp) { - case 1: { - ubyte *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - memset(row, value, width); - row += dst->pitch; - } - } - break; - case 2: { - ushort *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = (ushort) value; - row += dst->pitch; - } - } - break; - case 4: { - unsigned *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = value; - row += dst->pitch; - } - } - break; - default: - assert(0); - break; - } + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); pipe->screen->surface_unmap(pipe->screen, dst); } else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); i915_fill_blit( i915_context(pipe), - dst->cpp, - (short) dst->pitch, + dst->block.size, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, (short) dstx, (short) dsty, (short) width, (short) height, diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index b2e490c7db..2815e61345 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -109,6 +109,9 @@ i915_miptree_set_level_info(struct i915_texture *tex, pt->width[level] = w; pt->height[level] = h; pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); tex->nr_images[level] = nr_images; @@ -140,7 +143,7 @@ i915_miptree_set_image_offset(struct i915_texture *tex, assert(img < tex->nr_images[level]); - tex->image_offset[level][img] = (x + y * tex->pitch); + tex->image_offset[level][img] = y * tex->stride + x * tex->base.block.size; /* printf("%s level %d img %d pos %d,%d image_offset %x\n", @@ -162,7 +165,7 @@ i915_displaytarget_layout(struct i915_texture *tex) { struct pipe_texture *pt = &tex->base; - if (pt->last_level > 0 || pt->cpp != 4) + if (pt->last_level > 0 || pt->block.size != 4) return 0; i915_miptree_set_level_info( tex, 0, 1, @@ -172,18 +175,18 @@ i915_displaytarget_layout(struct i915_texture *tex) i915_miptree_set_image_offset( tex, 0, 0, 0, 0 ); if (tex->base.width[0] >= 128) { - tex->pitch = power_of_two(tex->base.width[0] * pt->cpp) / pt->cpp; - tex->total_height = round_up(tex->base.height[0], 8); + tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size); + tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8); tex->tiled = 1; } else { - tex->pitch = round_up(tex->base.width[0], 64 / pt->cpp); - tex->total_height = tex->base.height[0]; + tex->stride = round_up(tex->base.nblocksx[0] * pt->block.size, 64); + tex->total_nblocksy = tex->base.nblocksy[0]; } /* printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__, - tex->base.width[0], tex->base.height[0], pt->cpp, - tex->pitch, tex->total_height, tex->pitch * tex->total_height * 4); + tex->base.width[0], tex->base.height[0], pt->block.size, + tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy); */ return 1; @@ -193,12 +196,14 @@ static void i945_miptree_layout_2d( struct i915_texture *tex ) { struct pipe_texture *pt = &tex->base; - int align_h = 2, align_w = 4; + const int align_x = 2, align_y = 4; unsigned level; unsigned x = 0; unsigned y = 0; unsigned width = pt->width[0]; unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; #if 0 /* used for tiled display targets */ if (pt->last_level == 0 && pt->cpp == 4) @@ -206,7 +211,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) return; #endif - tex->pitch = pt->width[0]; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); /* May need to adjust pitch to accomodate the placement of * the 2nd mipmap level. This occurs when the alignment @@ -214,47 +219,43 @@ i945_miptree_layout_2d( struct i915_texture *tex ) * 2nd mipmap level out past the width of its parent. */ if (pt->last_level > 0) { - unsigned mip1_width = align_int(minify(pt->width[0]), align_w) - + minify(minify(pt->width[0])); + unsigned mip1_nblocksx + = align_int(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); - if (mip1_width > pt->width[0]) - tex->pitch = mip1_width; + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; } - /* Pitch must be a whole number of dwords, even though we - * express it in texels. + /* Pitch must be a whole number of dwords */ - tex->pitch = align_int(tex->pitch * pt->cpp, 64) / pt->cpp; - tex->total_height = 0; + tex->stride = align_int(tex->stride, 64); + tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { - unsigned img_height; - i915_miptree_set_level_info(tex, level, 1, width, height, 1); i915_miptree_set_image_offset(tex, level, 0, x, y); - if (pt->compressed) - img_height = MAX2(1, height/4); - else - img_height = align_int(height, align_h); - + nblocksy = align_int(nblocksy, align_y); /* Because the images are packed better, the final offset * might not be the maximal one: */ - tex->total_height = MAX2(tex->total_height, y + img_height); + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); /* Layout_below: step right after second mipmap level. */ if (level == 1) { - x += align_int(width, align_w); + x += align_int(nblocksx, align_x); } else { - y += img_height; + y += nblocksy; } width = minify(width); height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } } @@ -264,15 +265,16 @@ i945_miptree_layout_cube(struct i915_texture *tex) struct pipe_texture *pt = &tex->base; unsigned level; - const unsigned dim = pt->width[0]; + const unsigned nblocks = pt->nblocksx[0]; unsigned face; - unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + unsigned width = pt->width[0]; + unsigned height = pt->height[0]; /* printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]); */ - assert(lvlWidth == lvlHeight); /* cubemap images are square */ + assert(width == height); /* cubemap images are square */ /* * XXX Should only be used for compressed formats. But lets @@ -282,35 +284,32 @@ i945_miptree_layout_cube(struct i915_texture *tex) * determined either by the old-style packing of cubemap faces, * or the final row of 4x4, 2x2 and 1x1 faces below this. */ - if (dim > 32) - tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; + if (nblocks > 32) + tex->stride = round_up(nblocks * pt->block.size * 2, 4); else - tex->pitch = 14 * 8; + tex->stride = 14 * 8 * pt->block.size; - /* - * XXX The 4 is only needed for compressed formats. See above. - */ - tex->total_height = dim * 4 + 4; + tex->total_nblocksy = nblocks * 4; /* Set all the levels to effectively occupy the whole rectangular region. */ for (level = 0; level <= pt->last_level; level++) { - i915_miptree_set_level_info(tex, level, 6, lvlWidth, lvlHeight, 1); - lvlWidth /= 2; - lvlHeight /= 2; + i915_miptree_set_level_info(tex, level, 6, width, height, 1); + width /= 2; + height /= 2; } for (face = 0; face < 6; face++) { - unsigned x = initial_offsets[face][0] * dim; - unsigned y = initial_offsets[face][1] * dim; - unsigned d = dim; + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; #if 0 /* Fix and enable this code for compressed formats */ - if (dim == 4 && face >= 4) { + if (nblocks == 4 && face >= 4) { y = tex->total_height - 4; x = (face - 4) * 8; } - else if (dim < 4 && (face > 0)) { + else if (nblocks < 4 && (face > 0)) { y = tex->total_height - 4; x = face * 8; } @@ -369,28 +368,28 @@ i915_miptree_layout(struct i915_texture * tex) switch (pt->target) { case PIPE_TEXTURE_CUBE: { - const unsigned dim = pt->width[0]; + const unsigned nblocks = pt->nblocksx[0]; unsigned face; - unsigned lvlWidth = pt->width[0], lvlHeight = pt->height[0]; + unsigned width = pt->width[0], height = pt->height[0]; - assert(lvlWidth == lvlHeight); /* cubemap images are square */ + assert(width == height); /* cubemap images are square */ /* double pitch for cube layouts */ - tex->pitch = ((dim * pt->cpp * 2 + 3) & ~3) / pt->cpp; - tex->total_height = dim * 4; + tex->stride = round_up(nblocks * pt->block.size * 2, 4); + tex->total_nblocksy = nblocks * 4; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 6, - lvlWidth, lvlHeight, + width, height, 1); - lvlWidth /= 2; - lvlHeight /= 2; + width /= 2; + height /= 2; } for (face = 0; face < 6; face++) { - unsigned x = initial_offsets[face][0] * dim; - unsigned y = initial_offsets[face][1] * dim; - unsigned d = dim; + unsigned x = initial_offsets[face][0] * nblocks; + unsigned y = initial_offsets[face][1] * nblocks; + unsigned d = nblocks; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_image_offset(tex, level, face, x, y); @@ -405,25 +404,29 @@ i915_miptree_layout(struct i915_texture * tex) unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; - unsigned stack_height = 0; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; + unsigned stack_nblocksy = 0; /* Calculate the size of a single slice. */ - tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); /* XXX: hardware expects/requires 9 levels at minimum. */ for (level = 0; level <= MAX2(8, pt->last_level); level++) { i915_miptree_set_level_info(tex, level, depth, - width, height, depth); + width, height, depth); - stack_height += MAX2(2, height); + stack_nblocksy += MAX2(2, nblocksy); width = minify(width); height = minify(height); depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } /* Fixup depth image_offsets: @@ -433,7 +436,7 @@ i915_miptree_layout(struct i915_texture * tex) unsigned i; for (i = 0; i < depth; i++) i915_miptree_set_image_offset(tex, level, i, - 0, i * stack_height); + 0, i * stack_nblocksy); depth = minify(depth); } @@ -443,33 +446,33 @@ i915_miptree_layout(struct i915_texture * tex) * remarkable how wasteful of memory the i915 texture layouts * are. They are largely fixed in the i945. */ - tex->total_height = stack_height * pt->depth[0]; + tex->total_nblocksy = stack_nblocksy * pt->depth[0]; break; } default:{ unsigned width = pt->width[0]; unsigned height = pt->height[0]; - unsigned img_height; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; - tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; - tex->total_height = 0; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 1, - width, height, 1); + width, height, 1); i915_miptree_set_image_offset(tex, level, 0, - 0, tex->total_height); + 0, tex->total_nblocksy); - if (pt->compressed) - img_height = MAX2(1, height / 4); - else - img_height = (MAX2(2, height) + 1) & ~1; + nblocksy = round_up(MAX2(2, nblocksy), 2); - tex->total_height += img_height; + tex->total_nblocksy += nblocksy; width = minify(width); height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } break; } @@ -477,7 +480,7 @@ i915_miptree_layout(struct i915_texture * tex) /* DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, tex->pitch, - tex->total_height, pt->cpp, tex->pitch * tex->total_height * pt->cpp); + tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); */ return TRUE; @@ -498,14 +501,16 @@ i945_miptree_layout(struct i915_texture * tex) unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; unsigned pack_x_pitch, pack_x_nr; unsigned pack_y_pitch; - tex->pitch = ((pt->width[0] * pt->cpp + 3) & ~3) / pt->cpp; - tex->total_height = 0; + tex->stride = round_up(pt->nblocksx[0] * pt->block.size, 4); + tex->total_nblocksy = 0; - pack_y_pitch = MAX2(pt->height[0], 2); - pack_x_pitch = tex->pitch; + pack_y_pitch = MAX2(pt->nblocksy[0], 2); + pack_x_pitch = tex->stride / pt->block.size; pack_x_nr = 1; for (level = 0; level <= pt->last_level; level++) { @@ -515,11 +520,11 @@ i945_miptree_layout(struct i915_texture * tex) unsigned q, j; i915_miptree_set_level_info(tex, level, nr_images, - width, height, depth); + width, height, depth); for (q = 0; q < nr_images;) { for (j = 0; j < pack_x_nr && q < nr_images; j++, q++) { - i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_height); + i915_miptree_set_image_offset(tex, level, q, x, y + tex->total_nblocksy); x += pack_x_pitch; } @@ -528,12 +533,12 @@ i945_miptree_layout(struct i915_texture * tex) } - tex->total_height += y; + tex->total_nblocksy += y; if (pack_x_pitch > 4) { pack_x_pitch >>= 1; pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= tex->pitch); + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); } if (pack_y_pitch > 2) { @@ -543,6 +548,8 @@ i945_miptree_layout(struct i915_texture * tex) width = minify(width); height = minify(height); depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } break; } @@ -560,7 +567,7 @@ i945_miptree_layout(struct i915_texture * tex) /* DBG("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, tex->pitch, - tex->total_height, pt->cpp, tex->pitch * tex->total_height * pt->cpp); + tex->total_nblocksy, pt->block.size, tex->stride * tex->total_nblocksy); */ return TRUE; @@ -582,6 +589,9 @@ i915_texture_create(struct pipe_screen *screen, tex->base.refcount = 1; tex->base.screen = screen; + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + if (i915screen->is_i945) { if (!i945_miptree_layout(tex)) goto fail; @@ -592,8 +602,8 @@ i915_texture_create(struct pipe_screen *screen, tex->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + tex->stride * + tex->total_nblocksy); if (!tex->buffer) goto fail; @@ -648,13 +658,13 @@ i915_get_tex_surface(struct pipe_screen *screen, unsigned offset; /* in bytes */ if (pt->target == PIPE_TEXTURE_CUBE) { - offset = tex->image_offset[level][face] * pt->cpp; + offset = tex->image_offset[level][face]; } else if (pt->target == PIPE_TEXTURE_3D) { - offset = tex->image_offset[level][zslice] * pt->cpp; + offset = tex->image_offset[level][zslice]; } else { - offset = tex->image_offset[level][0] * pt->cpp; + offset = tex->image_offset[level][0]; assert(face == 0); assert(zslice == 0); } @@ -666,10 +676,12 @@ i915_get_tex_surface(struct pipe_screen *screen, pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = tex->pitch; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = tex->stride; ps->offset = offset; ps->usage = flags; ps->status = PIPE_SURFACE_STATUS_DEFINED; @@ -680,7 +692,7 @@ i915_get_tex_surface(struct pipe_screen *screen, static struct pipe_texture * i915_texture_blanket(struct pipe_screen * screen, const struct pipe_texture *base, - const unsigned *pitch, + const unsigned *stride, struct pipe_buffer *buffer) { struct i915_texture *tex; @@ -699,7 +711,7 @@ i915_texture_blanket(struct pipe_screen * screen, tex->base = *base; - tex->pitch = pitch[0]; + tex->stride = stride[0]; i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 8ac6b4e689..2cae7665f7 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -231,9 +231,9 @@ struct brw_texture { /* Derived from the above: */ - unsigned pitch; + unsigned stride; unsigned depth_pitch; /* per-image on i945? */ - unsigned total_height; + unsigned total_nblocksy; unsigned nr_images[PIPE_MAX_TEXTURE_LEVELS]; diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c index 925049ecc1..be812c5da9 100644 --- a/src/gallium/drivers/i965simple/brw_misc_state.c +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -224,7 +224,9 @@ static void upload_depthbuffer(struct brw_context *brw) } else { unsigned int format; - switch (depth_surface->cpp) { + assert(depth_surface->block.width == 1); + assert(depth_surface->block.height == 1); + switch (depth_surface->block.size) { case 2: format = BRW_DEPTHFORMAT_D16_UNORM; break; @@ -239,7 +241,7 @@ static void upload_depthbuffer(struct brw_context *brw) return; } - OUT_BATCH(((depth_surface->pitch * depth_surface->cpp) - 1) | + OUT_BATCH((depth_surface->stride - 1) | (format << 18) | (BRW_TILEWALK_YMAJOR << 26) | // (depth_surface->region->tiled << 27) | @@ -247,7 +249,7 @@ static void upload_depthbuffer(struct brw_context *brw) OUT_RELOC(depth_surface->buffer, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | - ((depth_surface->pitch - 1) << 6) | + ((depth_surface->stride/depth_surface->block.size - 1) << 6) | ((depth_surface->height - 1) << 19)); OUT_BATCH(0); } diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 3d98a2bf19..0be3dfc743 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -47,8 +47,10 @@ brw_surface_copy(struct pipe_context *pipe, struct pipe_surface *src, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { - assert(dst != src); - assert(dst->cpp == src->cpp); + assert( dst != src ); + assert( dst->block.size == src->block.size ); + assert( dst->block.width == src->block.height ); + assert( dst->block.height == src->block.height ); if (0) { void *dst_map = pipe->screen->surface_map( pipe->screen, @@ -60,37 +62,30 @@ brw_surface_copy(struct pipe_context *pipe, PIPE_BUFFER_USAGE_CPU_READ ); pipe_copy_rect(dst_map, - dst->cpp, - dst->pitch, + &dst->block, + dst->stride, dstx, dsty, width, height, src_map, - do_flip ? -(int) src->pitch : src->pitch, + do_flip ? -(int) src->stride : src->stride, srcx, do_flip ? height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); pipe->screen->surface_unmap(pipe->screen, dst); } else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); brw_copy_blit(brw_context(pipe), do_flip, - dst->cpp, - (short) src->pitch, src->buffer, src->offset, FALSE, - (short) dst->pitch, dst->buffer, dst->offset, FALSE, + dst->block.size, + (short) src->stride/src->block.size, src->buffer, src->offset, FALSE, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height, PIPE_LOGICOP_COPY); } } -/* Fill a rectangular sub-region. Need better logic about when to - * push buffers into AGP - will currently do so whenever possible. - */ -static void * -get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) -{ - return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; -} - static void brw_surface_fill(struct pipe_context *pipe, @@ -99,50 +94,20 @@ brw_surface_fill(struct pipe_context *pipe, unsigned width, unsigned height, unsigned value) { if (0) { - unsigned i, j; void *dst_map = pipe->screen->surface_map( pipe->screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); - - switch (dst->cpp) { - case 1: { - ubyte *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - memset(row, value, width); - row += dst->pitch; - } - } - break; - case 2: { - ushort *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = (ushort) value; - row += dst->pitch; - } - } - break; - case 4: { - unsigned *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = value; - row += dst->pitch; - } - } - break; - default: - assert(0); - break; - } + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); pipe->screen->surface_unmap(pipe->screen, dst); } else { + assert(dst->block.width == 1); + assert(dst->block.height == 1); brw_fill_blit(brw_context(pipe), - dst->cpp, - (short) dst->pitch, + dst->block.size, + (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, (short) dstx, (short) dsty, (short) width, (short) height, diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 78ae0b1223..8c7725605b 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -81,7 +81,7 @@ static void intel_miptree_set_image_offset(struct brw_texture *tex, assert(x == 0 && y == 0); assert(img < tex->nr_images[level]); - tex->image_offset[level][img] = (x + y * tex->pitch) * pt->cpp; + tex->image_offset[level][img] = y * tex->stride + x * pt->block.size; } static void intel_miptree_set_level_info(struct brw_texture *tex, @@ -97,8 +97,11 @@ static void intel_miptree_set_level_info(struct brw_texture *tex, pt->width[level] = w; pt->height[level] = h; pt->depth[level] = d; + + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h); - tex->level_offset[level] = (x + y * tex->pitch) * pt->cpp; + tex->level_offset[level] = y * tex->stride + x * tex->base.block.size; tex->nr_images[level] = nr_images; /* @@ -123,77 +126,60 @@ static void intel_miptree_set_level_info(struct brw_texture *tex, static void i945_miptree_layout_2d(struct brw_texture *tex) { struct pipe_texture *pt = &tex->base; - unsigned align_h = 2, align_w = 4; + const int align_x = 2, align_y = 4; unsigned level; unsigned x = 0; unsigned y = 0; unsigned width = pt->width[0]; unsigned height = pt->height[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; - tex->pitch = pt->width[0]; - -#if 0 - if (pt->compressed) { - align_w = intel_compressed_alignment(pt->internal_format); - tex->pitch = ALIGN(pt->width[0], align_w); - } -#endif + tex->stride = align(pt->nblocksx[0] * pt->block.size, 4); /* May need to adjust pitch to accomodate the placement of - * the 2nd mipmap. This occurs when the alignment + * the 2nd mipmap level. This occurs when the alignment * constraints of mipmap placement push the right edge of the - * 2nd mipmap out past the width of its parent. + * 2nd mipmap level out past the width of its parent. */ if (pt->last_level > 0) { - unsigned mip1_width; - - if (pt->compressed) { - mip1_width = align(minify(pt->width[0]), align_w) - + align(minify(minify(pt->width[0])), align_w); - } else { - mip1_width = align(minify(pt->width[0]), align_w) - + minify(minify(pt->width[0])); - } + unsigned mip1_nblocksx + = align_int(pf_get_nblocksx(&pt->block, minify(width)), align_x) + + pf_get_nblocksx(&pt->block, minify(minify(width))); - if (mip1_width > tex->pitch) { - tex->pitch = mip1_width; - } + if (mip1_nblocksx > nblocksx) + tex->stride = mip1_nblocksx * pt->block.size; } - /* Pitch must be a whole number of dwords, even though we - * express it in texels. + /* Pitch must be a whole number of dwords */ - tex->pitch = align(tex->pitch * pt->cpp, 4) / pt->cpp; - tex->total_height = 0; + tex->stride = align_int(tex->stride, 64); + tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { - unsigned img_height; - intel_miptree_set_level_info(tex, level, 1, x, y, width, height, 1); - if (pt->compressed) - img_height = MAX2(1, height/4); - else - img_height = align(height, align_h); - + nblocksy = align_int(nblocksy, align_y); /* Because the images are packed better, the final offset * might not be the maximal one: */ - tex->total_height = MAX2(tex->total_height, y + img_height); + tex->total_nblocksy = MAX2(tex->total_nblocksy, y + nblocksy); - /* Layout_below: step right after second mipmap. + /* Layout_below: step right after second mipmap level. */ if (level == 1) { - x += align(width, align_w); + x += align_int(nblocksx, align_x); } else { - y += img_height; + y += nblocksy; } width = minify(width); height = minify(height); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); } } @@ -210,26 +196,20 @@ static boolean brw_miptree_layout(struct brw_texture *tex) unsigned width = pt->width[0]; unsigned height = pt->height[0]; unsigned depth = pt->depth[0]; + unsigned nblocksx = pt->nblocksx[0]; + unsigned nblocksy = pt->nblocksy[0]; unsigned pack_x_pitch, pack_x_nr; unsigned pack_y_pitch; unsigned level; unsigned align_h = 2; unsigned align_w = 4; - tex->total_height = 0; -#if 0 - if (pt->compressed) { - align_w = intel_compressed_alignment(pt->internal_format); - pt->pitch = align(width, align_w); - pack_y_pitch = (height + 3) / 4; - } else -#endif - { - tex->pitch = align(pt->width[0] * pt->cpp, 4) / pt->cpp; - pack_y_pitch = align(pt->height[0], align_h); - } + tex->total_nblocksy = 0; + + tex->stride = align(pt->nblocksx[0], 4); + pack_y_pitch = align(pt->nblocksy[0], align_h); - pack_x_pitch = tex->pitch; + pack_x_pitch = tex->stride / pt->block.size; pack_x_nr = 1; for (level = 0; level <= pt->last_level; level++) { @@ -239,7 +219,7 @@ static boolean brw_miptree_layout(struct brw_texture *tex) uint q, j; intel_miptree_set_level_info(tex, level, nr_images, - 0, tex->total_height, + 0, tex->total_nblocksy, width, height, depth); for (q = 0; q < nr_images;) { @@ -253,10 +233,12 @@ static boolean brw_miptree_layout(struct brw_texture *tex) } - tex->total_height += y; + tex->total_nblocksy += y; width = minify(width); height = minify(height); depth = minify(depth); + nblocksx = pf_get_nblocksx(&pt->block, width); + nblocksy = pf_get_nblocksy(&pt->block, height); if (pt->compressed) { pack_y_pitch = (height + 3) / 4; @@ -269,7 +251,7 @@ static boolean brw_miptree_layout(struct brw_texture *tex) if (pack_x_pitch > 4) { pack_x_pitch >>= 1; pack_x_nr <<= 1; - assert(pack_x_pitch * pack_x_nr <= tex->pitch); + assert(pack_x_pitch * pack_x_nr * pt->block.size <= tex->stride); } if (pack_y_pitch > 2) { @@ -289,9 +271,9 @@ static boolean brw_miptree_layout(struct brw_texture *tex) #if 0 PRINT("%s: %dx%dx%d - sz 0x%x\n", __FUNCTION__, pt->pitch, - pt->total_height, - pt->cpp, - pt->pitch * pt->total_height * pt->cpp ); + pt->total_nblocksy, + pt->block.size, + pt->stride * pt->total_nblocksy ); #endif return TRUE; @@ -309,11 +291,14 @@ brw_texture_create_screen(struct pipe_screen *screen, tex->base = *templat; tex->base.refcount = 1; + tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]); + tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); + if (brw_miptree_layout(tex)) tex->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, - tex->pitch * tex->base.cpp * - tex->total_height); + tex->stride * + tex->total_nblocksy); if (!tex->buffer) { FREE(tex); @@ -370,10 +355,10 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, offset = tex->level_offset[level]; if (pt->target == PIPE_TEXTURE_CUBE) { - offset += tex->image_offset[level][face] * pt->cpp; + offset += tex->image_offset[level][face]; } else if (pt->target == PIPE_TEXTURE_3D) { - offset += tex->image_offset[level][zslice] * pt->cpp; + offset += tex->image_offset[level][zslice]; } else { assert(face == 0); @@ -386,10 +371,12 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, assert(ps->refcount); pipe_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = tex->pitch; + ps->block = pt->block; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = tex->stride; ps->offset = offset; } return ps; diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index 69e56dc8bd..1a326f9918 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -160,7 +160,7 @@ void brw_update_texture_surface( struct brw_context *brw, surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; surf.ss3.tiled_surface = 0; /* always zero */ - surf.ss3.pitch = tObj->pitch - 1; + surf.ss3.pitch = tObj->stride - 1; surf.ss3.depth = tObj->base.depth[0] - 1; surf.ss4.min_lod = 0; @@ -197,7 +197,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) memset(&surf, 0, sizeof(surf)); if (pipe_surface != NULL) { - if (pipe_surface->cpp == 4) + if (pipe_surface->block.size == 4) surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; else surf.ss0.surface_format = BRW_SURFACEFORMAT_B5G6R5_UNORM; @@ -210,7 +210,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss2.height = pipe_surface->height - 1; surf.ss3.tile_walk = BRW_TILEWALK_XMAJOR; surf.ss3.tiled_surface = 0; - surf.ss3.pitch = (pipe_surface->pitch * pipe_surface->cpp) - 1; + surf.ss3.pitch = pipe_surface->stride - 1; } else { surf.ss0.surface_format = BRW_SURFACEFORMAT_B8G8R8A8_UNORM; surf.ss0.surface_type = BRW_SURFACE_NULL; diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 9fd48aeccc..7dc15c38d1 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -58,18 +58,20 @@ sp_surface_copy(struct pipe_context *pipe, src, PIPE_BUFFER_USAGE_CPU_READ ); - assert(dst->cpp == src->cpp); + assert(dst->block.size == src->block.size); + assert(dst->block.width == src->block.width); + assert(dst->block.height == src->block.height); assert(src_map); assert(dst_map); /* If do_flip, invert src_y position and pass negative src stride */ pipe_copy_rect(dst_map, - dst->cpp, - dst->pitch, + &dst->block, + dst->stride, dstx, dsty, width, height, src_map, - do_flip ? -(int) src->pitch : src->pitch, + do_flip ? -(int) src->stride : src->stride, srcx, do_flip ? src->height - 1 - srcy : srcy); pipe->screen->surface_unmap(pipe->screen, src); @@ -80,7 +82,7 @@ sp_surface_copy(struct pipe_context *pipe, static void * get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) { - return (char *)dst_map + (y * dst->pitch + x) * dst->cpp; + return (char *)dst_map + y / dst->block.height * dst->stride + x / dst->block.width * dst->block.size; } @@ -102,39 +104,14 @@ sp_surface_fill(struct pipe_context *pipe, dst, PIPE_BUFFER_USAGE_CPU_WRITE ); - assert(dst->pitch > 0); - assert(width <= dst->pitch); + assert(dst->stride > 0); - switch (dst->cpp) { + switch (dst->block.size) { case 1: - { - ubyte *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - memset(row, value, width); - row += dst->pitch; - } - } - break; case 2: - { - ushort *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = (ushort) value; - row += dst->pitch; - } - } - break; case 4: - { - unsigned *row = get_pointer(dst, dst_map, dstx, dsty); - for (i = 0; i < height; i++) { - for (j = 0; j < width; j++) - row[j] = value; - row += dst->pitch; - } - } + pipe_fill_rect(dst_map, &dst->block, dst->stride, dstx, dsty, width, height, value); break; case 8: { @@ -155,7 +132,7 @@ sp_surface_fill(struct pipe_context *pipe, row[j*4+2] = val2; row[j*4+3] = val3; } - row += dst->pitch * 4; + row += dst->stride/2; } } break; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 2ef17a220b..4db045cdc3 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -71,13 +71,15 @@ softpipe_texture_layout(struct pipe_screen *screen, pt->width[level] = width; pt->height[level] = height; pt->depth[level] = depth; - spt->pitch[level] = width; + pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width); + pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height); + spt->stride[level] = pt->nblocksx[level]*pt->block.size; spt->level_offset[level] = buffer_size; - buffer_size += (((pt->compressed) ? MAX2(1, height/4) : height) * + buffer_size += (pt->nblocksy[level] * ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) * - width * pt->cpp); + spt->stride[level]); width = minify(width); height = minify(height); @@ -121,7 +123,7 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, /* Now extract the goodies: */ spt->buffer = surf.buffer; - spt->pitch[0] = surf.pitch; + spt->stride[0] = surf.stride; return spt->buffer != NULL; } @@ -195,10 +197,12 @@ softpipe_get_tex_surface(struct pipe_screen *screen, assert(ps->winsys); pipe_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; - ps->cpp = pt->cpp; + ps->block = pt->block; ps->width = pt->width[level]; ps->height = pt->height[level]; - ps->pitch = spt->pitch[level]; + ps->nblocksx = pt->nblocksx[level]; + ps->nblocksy = pt->nblocksy[level]; + ps->stride = spt->stride[level]; ps->offset = spt->level_offset[level]; ps->usage = usage; @@ -228,8 +232,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (pt->target == PIPE_TEXTURE_CUBE || pt->target == PIPE_TEXTURE_3D) { ps->offset += ((pt->target == PIPE_TEXTURE_CUBE) ? face : zslice) * - (pt->compressed ? ps->height/4 : ps->height) * - ps->width * ps->cpp; + ps->nblocksy * + ps->stride; } else { assert(face == 0); diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index 0e1017632c..bf437a7c61 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -42,7 +42,7 @@ struct softpipe_texture struct pipe_texture base; unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long pitch[PIPE_MAX_TEXTURE_LEVELS]; + unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; /* The data is held here: */ diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index d973fb357b..a2c6155d01 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -501,6 +501,47 @@ pf_get_block(enum pipe_format format, struct pipe_format_block *block) } } +static INLINE unsigned +pf_get_nblocksx(const struct pipe_format_block *block, unsigned x) +{ + return (x + block->width - 1)/block->width; +} + +static INLINE unsigned +pf_get_nblocksy(const struct pipe_format_block *block, unsigned y) +{ + return (y + block->height - 1)/block->height; +} + +static INLINE unsigned +pf_get_nblocks(const struct pipe_format_block *block, unsigned width, unsigned height) +{ + return pf_get_nblocksx(block, width)*pf_get_nblocksy(block, height); +} + +static INLINE void +pipe_rect_to_blocks(const struct pipe_format_block *block, + unsigned *width, unsigned *height, + unsigned *src_x, unsigned *src_y, + unsigned *dst_x, unsigned *dst_y) +{ + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); + if(width) + *width = pf_get_nblocksx(block, *width); + if(height) + *height = pf_get_nblocksy(block, *height); + if(src_x) + *src_x /= block->width; + if(src_y) + *src_y /= block->height; + if(dst_x) + *dst_x /= block->width; + if(dst_y) + *dst_y /= block->height; +} + #ifdef __cplusplus } #endif diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index e7ee8c97ed..2992e2f3b3 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -267,10 +267,12 @@ struct pipe_surface enum pipe_format format; /**< PIPE_FORMAT_x */ unsigned status; /**< PIPE_SURFACE_STATUS_x */ unsigned clear_value; /**< XXX may be temporary */ - unsigned cpp; /**< bytes per pixel */ unsigned width; unsigned height; - unsigned pitch; /**< in pixels */ + struct pipe_format_block block; + unsigned nblocksx; + unsigned nblocksy; + unsigned stride; /**< in bytes */ unsigned layout; /**< PIPE_SURFACE_LAYOUT_x */ unsigned offset; /**< offset from start of buffer, in bytes */ unsigned refcount; @@ -303,7 +305,10 @@ struct pipe_texture unsigned height[PIPE_MAX_TEXTURE_LEVELS]; unsigned depth[PIPE_MAX_TEXTURE_LEVELS]; - unsigned cpp:8; + struct pipe_format_block block; + unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; + unsigned nblocksy[PIPE_MAX_TEXTURE_LEVELS]; + unsigned last_level:8; /**< Index of last mipmap level present/defined */ unsigned compressed:1; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index cf2447822a..7dcdd28287 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -31,6 +31,7 @@ #include "p_config.h" #include "p_compiler.h" #include "p_debug.h" +#include "p_format.h" #include "p_pointer.h" #include #include @@ -401,11 +402,15 @@ static INLINE int align(int value, int alignment) /* util/p_util.c */ -extern void pipe_copy_rect(ubyte * dst, unsigned cpp, unsigned dst_pitch, - unsigned dst_x, unsigned dst_y, unsigned width, - unsigned height, const ubyte * src, - int src_pitch, unsigned src_x, int src_y); - +extern void pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, const ubyte * src, + int src_stride, unsigned src_x, int src_y); + +extern void +pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, uint32_t value); #if defined(_MSC_VER) diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index 10eedd8402..6e814ce5d1 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -322,7 +322,10 @@ void brw_aub_dump_bmp( struct brw_aubfile *aubfile, struct aub_dump_bmp db; unsigned format; - if (surface->cpp == 4) + assert(surface->block.width == 1); + assert(surface->block.height == 1); + + if (surface->block.size == 4) format = 0x7; else format = 0x3; @@ -331,8 +334,8 @@ void brw_aub_dump_bmp( struct brw_aubfile *aubfile, db.xmin = 0; db.ymin = 0; db.format = format; - db.bpp = surface->cpp * 8; - db.pitch = surface->pitch; + db.bpp = surface->block.size * 8; + db.pitch = surface->stride/surface->block.size; db.xsize = surface->width; db.ysize = surface->height; db.addr = gtt_offset; diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index b14758f333..9225ee510d 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -364,9 +364,10 @@ xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf) return; } - if (XSHM_ENABLED(xm_buf) && (xm_buf->tempImage == NULL)) { - alloc_shm_ximage(xm_buf, b, surf->pitch, surf->height); + assert(surf->block.width == 1); + assert(surf->block.height == 1); + alloc_shm_ximage(xm_buf, b, surf->stride/surf->block.size, surf->height); } ximage = (XSHM_ENABLED(xm_buf)) ? xm_buf->tempImage : b->tempImage; @@ -386,7 +387,7 @@ xmesa_display_surface(XMesaBuffer b, const struct pipe_surface *surf) /* update XImage's fields */ ximage->width = surf->width; ximage->height = surf->height; - ximage->bytes_per_line = surf->pitch * surf->cpp; + ximage->bytes_per_line = surf->stride; XPutImage(b->xm_visual->display, b->drawable, b->gc, ximage, 0, 0, 0, 0, surf->width, surf->height); @@ -497,18 +498,21 @@ xm_surface_alloc_storage(struct pipe_winsys *winsys, surf->width = width; surf->height = height; surf->format = format; - surf->cpp = pf_get_size(format); - surf->pitch = round_up(width, alignment / surf->cpp); + pf_get_block(format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); surf->usage = flags; -#ifdef GALLIUM_CELL /* XXX a bit of a hack */ - height = round_up(height, TILE_SIZE); -#endif - assert(!surf->buffer); surf->buffer = winsys->buffer_create(winsys, alignment, PIPE_BUFFER_USAGE_PIXEL, - surf->pitch * surf->cpp * height); +#ifdef GALLIUM_CELL /* XXX a bit of a hack */ + surf->stride * round_up(surf->nblocksy, TILE_SIZE)); +#else + surf->stride * surf->nblocksy); +#endif + if(!surf->buffer) return -1; diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index 77376099f0..7fc9debdd5 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -279,22 +279,25 @@ aub_i915_surface_alloc_storage(struct pipe_winsys *winsys, unsigned flags, unsigned tex_usage) { - const unsigned alignment = 64; - - surf->width = width; - surf->height = height; - surf->format = format; - surf->cpp = pf_get_size(format); - surf->pitch = round_up(width, alignment / surf->cpp); - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->pitch * surf->cpp * height); + const unsigned alignment = 64; + + surf->width = width; + surf->height = height; + surf->format = format; + pf_get_block(format, &surf->block); + surf->nblocksx = pf_get_nblocksx(&surf->block, width); + surf->nblocksy = pf_get_nblocksy(&surf->block, height); + surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); + surf->usage = flags; + + assert(!surf->buffer); + surf->buffer = winsys->buffer_create(winsys, alignment, + PIPE_BUFFER_USAGE_PIXEL, + surf->stride * surf->nblocksy); if(!surf->buffer) - return -1; + return -1; - return 0; + return 0; } static void diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 8098d75e18..809a906ba3 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -66,15 +66,17 @@ acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, uint x, uint y, uint w, uint h, float *p) { const enum pipe_format f = acc_ps->format; - const int cpp = acc_ps->cpp; + const struct pipe_format_block b = acc_ps->block; acc_ps->format = DEFAULT_ACCUM_PIPE_FORMAT; - acc_ps->cpp = 8; + acc_ps->block.size = 8; + acc_ps->block.width = 1; + acc_ps->block.height = 1; pipe_get_tile_rgba(pipe, acc_ps, x, y, w, h, p); acc_ps->format = f; - acc_ps->cpp = cpp; + acc_ps->block = b; } @@ -88,15 +90,17 @@ acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, uint x, uint y, uint w, uint h, const float *p) { enum pipe_format f = acc_ps->format; - const int cpp = acc_ps->cpp; + const struct pipe_format_block b = acc_ps->block; acc_ps->format = DEFAULT_ACCUM_PIPE_FORMAT; - acc_ps->cpp = 8; + acc_ps->block.size = 8; + acc_ps->block.width = 1; + acc_ps->block.height = 1; pipe_put_tile_rgba(pipe, acc_ps, x, y, w, h, p); acc_ps->format = f; - acc_ps->cpp = cpp; + acc_ps->block = b; } @@ -111,7 +115,7 @@ st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) const GLint ypos = ctx->DrawBuffer->_Ymin; const GLint width = ctx->DrawBuffer->_Xmax - xpos; const GLint height = ctx->DrawBuffer->_Ymax - ypos; - GLvoid *map; + GLubyte *map; acc_ps = screen->get_tex_surface(screen, acc_strb->texture, 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE); @@ -128,8 +132,7 @@ st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) GLshort a = FLOAT_TO_SHORT(ctx->Accum.ClearColor[3]); int i, j; for (i = 0; i < height; i++) { - GLshort *dst = ((GLshort *) map - + ((ypos + i) * acc_ps->pitch + xpos) * 4); + GLshort *dst = (GLshort *) (map + (ypos + i) * acc_ps->stride + xpos * 8); for (j = 0; j < width; j++) { dst[0] = r; dst[1] = g; @@ -168,8 +171,7 @@ accum_mad(GLcontext *ctx, GLfloat scale, GLfloat bias, { int i, j; for (i = 0; i < height; i++) { - GLshort *acc = ((GLshort *) map - + ((ypos + i) * acc_ps->pitch + xpos) * 4); + GLshort *acc = (GLshort *) (map + (ypos + i) * acc_ps->stride + xpos * 8); for (j = 0; j < width * 4; j++) { float val = SHORT_TO_FLOAT(acc[j]) * scale + bias; acc[j] = FLOAT_TO_SHORT(val); diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 6fa3cbd533..f3bc3b8584 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -341,9 +341,9 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, dest = screen->surface_map(screen, surface, PIPE_BUFFER_USAGE_CPU_WRITE); /* Put image into texture surface */ - memset(dest, 0xff, height * surface->pitch); + memset(dest, 0xff, height * surface->stride); unpack_bitmap(ctx->st, 0, 0, width, height, unpack, bitmap, - dest, surface->pitch); + dest, surface->stride); _mesa_unmap_bitmap_pbo(ctx, unpack); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index d8f1d2367c..a7781f3ab2 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -382,7 +382,7 @@ make_texture(struct st_context *st, mformat, /* gl_texture_format */ dest, /* dest */ 0, 0, 0, /* dstX/Y/Zoffset */ - surface->pitch * cpp, /* dstRowStride, bytes */ + surface->stride, /* dstRowStride, bytes */ &dstImageOffsets, /* dstImageOffsets */ width, height, 1, /* size */ format, type, /* src format/type */ @@ -786,13 +786,13 @@ draw_stencil_pixels(GLcontext *ctx, GLint x, GLint y, switch (ps->format) { case PIPE_FORMAT_S8_UNORM: { - ubyte *dest = stmap + spanY * ps->pitch + spanX; + ubyte *dest = stmap + spanY * ps->stride + spanX; memcpy(dest, values, spanWidth); } break; case PIPE_FORMAT_S8Z24_UNORM: { - uint *dest = (uint *) stmap + spanY * ps->pitch + spanX; + uint *dest = (uint *) (stmap + spanY * ps->stride + spanX*4); GLint k; for (k = 0; k < spanWidth; k++) { uint p = dest[k]; @@ -903,6 +903,9 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, psDraw = screen->get_tex_surface(screen, rbDraw->texture, 0, 0, 0, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(psDraw->block.width == 1); + assert(psDraw->block.height == 1); + /* map the stencil buffer */ drawMap = screen->surface_map(screen, psDraw, PIPE_BUFFER_USAGE_CPU_WRITE); @@ -919,7 +922,7 @@ copy_stencil_pixels(GLcontext *ctx, GLint srcx, GLint srcy, y = ctx->DrawBuffer->Height - y - 1; } - dst = drawMap + (y * psDraw->pitch + dstx) * psDraw->cpp; + dst = drawMap + y * psDraw->stride + dstx * psDraw->block.size; src = buffer + i * width; switch (psDraw->format) { diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index db25ddd615..1067caf9b3 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -113,7 +113,7 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, template.target = PIPE_TEXTURE_2D; template.compressed = 0; - template.cpp = pf_get_size(template.format); + pf_get_block(template.format, &template.block); template.width[0] = width; template.height[0] = height; template.depth[0] = 1; @@ -171,10 +171,12 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, assert(strb->surface->buffer); assert(strb->surface->format); - assert(strb->surface->cpp); + assert(strb->surface->block.size); + assert(strb->surface->block.width); + assert(strb->surface->block.height); assert(strb->surface->width == width); assert(strb->surface->height == height); - assert(strb->surface->pitch); + assert(strb->surface->stride); return strb->surface != NULL; diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index c5193631a7..09d9c29e44 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -94,13 +94,13 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, switch (ps->format) { case PIPE_FORMAT_S8_UNORM: { - const ubyte *src = stmap + srcY * ps->pitch + x; + const ubyte *src = stmap + srcY * ps->stride + x; memcpy(values, src, width); } break; case PIPE_FORMAT_S8Z24_UNORM: { - const uint *src = (uint *) stmap + srcY * ps->pitch + x; + const uint *src = (uint *) (stmap + srcY * ps->stride + x*4); GLint k; for (k = 0; k < width; k++) { values[k] = src[k] >> 24; @@ -109,7 +109,7 @@ st_read_stencil_pixels(GLcontext *ctx, GLint x, GLint y, break; case PIPE_FORMAT_Z24S8_UNORM: { - const uint *src = (uint *) stmap + srcY * ps->pitch + x; + const uint *src = (uint *) (stmap + srcY * ps->stride + x*4); GLint k; for (k = 0; k < width; k++) { values[k] = src[k] & 0xff; diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 7d52d1da1b..b9aa513d72 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -658,7 +658,7 @@ st_TexImage(GLcontext * ctx, texImage->Data = st_texture_image_map(ctx->st, stImage, 0, PIPE_BUFFER_USAGE_CPU_WRITE); if (stImage->surface) - dstRowStride = stImage->surface->pitch * stImage->surface->cpp; + dstRowStride = stImage->surface->stride; } else { /* Allocate regular memory and store the image there temporarily. */ @@ -820,7 +820,7 @@ st_get_tex_image(GLcontext * ctx, GLenum target, GLint level, */ texImage->Data = st_texture_image_map(ctx->st, stImage, 0, PIPE_BUFFER_USAGE_CPU_READ); - texImage->RowStride = stImage->surface->pitch; + texImage->RowStride = stImage->surface->stride / stImage->pt->block.size; } else { /* Otherwise, the image should actually be stored in @@ -925,7 +925,7 @@ st_TexSubimage(GLcontext * ctx, texImage->Data = st_texture_image_map(ctx->st, stImage, zoffset, PIPE_BUFFER_USAGE_CPU_WRITE); if (stImage->surface) - dstRowStride = stImage->surface->pitch * stImage->surface->cpp; + dstRowStride = stImage->surface->stride; } if (!texImage->Data) { @@ -1425,9 +1425,11 @@ copy_image_data_to_texture(struct st_context *st, stImage->face, dstLevel, stImage->base.Data, - stImage->base.RowStride, + stImage->base.RowStride * + stObj->pt->block.size, stImage->base.RowStride * - stImage->base.Height); + stImage->base.Height * + stObj->pt->block.size); _mesa_align_free(stImage->base.Data); stImage->base.Data = NULL; } @@ -1477,6 +1479,7 @@ st_finalize_texture(GLcontext *ctx, pipe_texture_reference(&stObj->pt, firstImage->pt); } + /* FIXME: determine format block instead of cpp */ if (firstImage->base.IsCompressed) { comp_byte = compressed_num_bytes(firstImage->base.TexFormat->MesaFormat); cpp = comp_byte; @@ -1497,7 +1500,9 @@ st_finalize_texture(GLcontext *ctx, stObj->pt->width[0] != firstImage->base.Width2 || stObj->pt->height[0] != firstImage->base.Height2 || stObj->pt->depth[0] != firstImage->base.Depth2 || - stObj->pt->cpp != cpp || + stObj->pt->block.size != cpp || + stObj->pt->block.width != 1 || + stObj->pt->block.height != 1 || stObj->pt->compressed != firstImage->base.IsCompressed) { pipe_texture_release(&stObj->pt); ctx->st->dirty.st |= ST_NEW_FRAMEBUFFER; diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 851f17c3b4..2fc00df429 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -137,10 +137,10 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, _mesa_generate_mipmap_level(target, datatype, comps, 0 /*border*/, pt->width[srcLevel], pt->height[srcLevel], pt->depth[srcLevel], - srcSurf->pitch * srcSurf->cpp, /* stride in bytes */ + srcSurf->stride, /* stride in bytes */ srcData, pt->width[dstLevel], pt->height[dstLevel], pt->depth[dstLevel], - dstSurf->pitch * dstSurf->cpp, /* stride in bytes */ + dstSurf->stride, /* stride in bytes */ dstData); pipe_buffer_unmap(pipe, srcSurf->buffer); diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 9553b34e31..8222826e7a 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -98,7 +98,7 @@ st_texture_create(struct st_context *st, pt.height[0] = height0; pt.depth[0] = depth0; pt.compressed = compress_byte ? 1 : 0; - pt.cpp = pt.compressed ? compress_byte : st_sizeof_format(format); + pf_get_block(format, &pt.block); pt.tex_usage = usage; newtex = screen->texture_create(screen, &pt); @@ -231,16 +231,19 @@ static void st_surface_data(struct pipe_context *pipe, struct pipe_surface *dst, unsigned dstx, unsigned dsty, - const void *src, unsigned src_pitch, + const void *src, unsigned src_stride, unsigned srcx, unsigned srcy, unsigned width, unsigned height) { struct pipe_screen *screen = pipe->screen; void *map = screen->surface_map(screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE); pipe_copy_rect(map, - dst->cpp, - dst->pitch, - dstx, dsty, width, height, src, src_pitch, srcx, srcy); + &dst->block, + dst->stride, + dstx, dsty, + width, height, + src, src_stride, + srcx, srcy); screen->surface_unmap(screen, dst); } @@ -254,34 +257,29 @@ st_texture_image_data(struct pipe_context *pipe, GLuint face, GLuint level, void *src, - GLuint src_row_pitch, GLuint src_image_pitch) + GLuint src_row_stride, GLuint src_image_stride) { struct pipe_screen *screen = pipe->screen; GLuint depth = dst->depth[level]; GLuint i; - GLuint height = 0; const GLubyte *srcUB = src; struct pipe_surface *dst_surface; DBG("%s\n", __FUNCTION__); for (i = 0; i < depth; i++) { - height = dst->height[level]; - if(dst->compressed) - height /= 4; - dst_surface = screen->get_tex_surface(screen, dst, face, level, i, PIPE_BUFFER_USAGE_CPU_WRITE); st_surface_data(pipe, dst_surface, 0, 0, /* dstx, dsty */ srcUB, - src_row_pitch, + src_row_stride, 0, 0, /* source x, y */ - dst->width[level], height); /* width, height */ + dst->width[level], dst->height[level]); /* width, height */ screen->tex_surface_release(screen, &dst_surface); - srcUB += src_image_pitch * dst->cpp; + srcUB += src_image_stride; } } -- cgit v1.2.3 From ea4ca10b1bec67c8a60db0e4e5581318ce9f62f9 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 2 Jul 2008 12:22:51 +0900 Subject: pipebuffer: Verify usage flag consistency. Minor cleanups. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 19 +++++++++++++++---- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c | 3 +++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 10 +++++++--- src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 6 +++--- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 9 ++++++--- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 9 +++++++-- 8 files changed, 43 insertions(+), 17 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 857ea53c78..83ea0be74b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -204,13 +204,24 @@ pb_reference(struct pb_buffer **dst, /** - * Utility function to check whether a requested alignment is consistent with - * the provided alignment or not. + * Utility function to check whether the provided alignment is consistent with + * the requested or not. */ -static INLINE int +static INLINE boolean pb_check_alignment(size_t requested, size_t provided) { - return requested <= provided && (provided % requested) == 0; + return requested <= provided && (provided % requested) == 0 ? TRUE : FALSE; +} + + +/** + * Utility function to check whether the provided alignment is consistent with + * the requested or not. + */ +static INLINE boolean +pb_check_usage(unsigned requested, unsigned provided) +{ + return (requested & provided) == provided ? TRUE : FALSE; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 7f236887a9..0ffca298cd 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -406,7 +406,7 @@ fenced_buffer_list_create(struct pipe_winsys *winsys) { struct fenced_buffer_list *fenced_list; - fenced_list = (struct fenced_buffer_list *)CALLOC(1, sizeof(*fenced_list)); + fenced_list = CALLOC_STRUCT(fenced_buffer_list); if (!fenced_list) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c index 702bef1c04..0d2d6c0c1b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -88,6 +88,9 @@ pb_alt_manager_create(struct pb_manager *provider1, { struct pb_alt_manager *mgr; + if(!provider1 || !provider2) + return NULL; + mgr = CALLOC_STRUCT(pb_alt_manager); if (!mgr) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index f1a457dde4..bed4bec4fe 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -217,7 +217,8 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf, if(!pb_check_alignment(desc->alignment, buf->base.base.alignment)) return FALSE; - /* XXX: check usage too? */ + if(!pb_check_usage(desc->usage, buf->base.base.usage)) + return FALSE; return TRUE; } @@ -282,7 +283,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, assert(buf->buffer->base.refcount >= 1); assert(pb_check_alignment(desc->alignment, buf->buffer->base.alignment)); - assert((buf->buffer->base.usage & desc->usage) == desc->usage); + assert(pb_check_usage(desc->usage, buf->buffer->base.usage)); assert(buf->buffer->base.size >= size); buf->base.base.refcount = 1; @@ -331,7 +332,10 @@ pb_cache_manager_create(struct pb_manager *provider, { struct pb_cache_manager *mgr; - mgr = (struct pb_cache_manager *)CALLOC(1, sizeof(*mgr)); + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_cache_manager); if (!mgr) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 9d809e2f9b..05efd8ce41 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -117,7 +117,7 @@ fenced_bufmgr_create(struct pb_manager *provider, if(!provider) return NULL; - fenced_mgr = (struct fenced_pb_manager *)CALLOC(1, sizeof(*fenced_mgr)); + fenced_mgr = CALLOC_STRUCT(fenced_pb_manager); if (!fenced_mgr) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 0a1e8c83b1..c51e582611 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -29,7 +29,7 @@ * \file * Buffer manager using the old texture memory manager. * - * \author José Fonseca + * \author José Fonseca */ @@ -271,8 +271,8 @@ mm_bufmgr_create(struct pb_manager *provider, struct pb_manager *mgr; struct pb_desc desc; - assert(provider); - assert(provider->create_buffer); + if(!provider) + return NULL; memset(&desc, 0, sizeof(desc)); desc.alignment = 1 << align2; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 528e9528f6..95af08929a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -30,8 +30,8 @@ * \file * Batch buffer pool management. * - * \author José Fonseca - * \author Thomas Hellström + * \author José Fonseca + * \author Thomas Hellström */ @@ -229,7 +229,10 @@ pool_bufmgr_create(struct pb_manager *provider, struct pool_buffer *pool_buf; size_t i; - pool = (struct pool_pb_manager *)CALLOC(1, sizeof(*pool)); + if(!provider) + return NULL; + + pool = CALLOC_STRUCT(pool_pb_manager); if (!pool) return NULL; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index b9dff09804..598d9ce310 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -324,8 +324,10 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, if(!pb_check_alignment(desc->alignment, mgr->bufSize)) return NULL; - /* XXX: check for compatible buffer usage too? */ - + assert(pb_check_usage(desc->usage, mgr->desc.usage)); + if(!pb_check_usage(desc->usage, mgr->desc.usage)) + return NULL; + _glthread_LOCK_MUTEX(mgr->mutex); if (mgr->slabs.next == &mgr->slabs) { (void) pb_slab_create(mgr); @@ -438,6 +440,9 @@ pb_slab_range_manager_create(struct pb_manager *provider, size_t bufSize; unsigned i; + if(!provider) + return NULL; + mgr = CALLOC_STRUCT(pb_slab_range_manager); if (!mgr) goto out_err0; -- cgit v1.2.3 From d16fcd07f876fe7fb29f5f4e3df4e83ff7de3422 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 2 Jul 2008 12:24:19 +0900 Subject: pipebuffer: Debug buffer manager to detect buffer under- and overflows. It should detect both cpu and gpu buffer overflows. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 11 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 297 +++++++++++++++++++++ 4 files changed, 309 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 1d9b036c07..f9b39d9ce0 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -8,6 +8,7 @@ C_SOURCES = \ pb_buffer_malloc.c \ pb_bufmgr_alt.c \ pb_bufmgr_cache.c \ + pb_bufmgr_debug.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ pb_bufmgr_pool.c \ diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index e52177bc79..56a40dda0d 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -7,6 +7,7 @@ pipebuffer = env.ConvenienceLibrary( 'pb_buffer_malloc.c', 'pb_bufmgr_alt.c', 'pb_bufmgr_cache.c', + 'pb_bufmgr_debug.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', 'pb_bufmgr_pool.c', diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 00279f7010..32867029ee 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -175,7 +175,16 @@ struct pb_manager * pb_alt_manager_create(struct pb_manager *provider1, struct pb_manager *provider2); - + +/** + * Debug buffer manager to detect buffer under- and overflows. + * + * Band size should be a multiple of the largest alignment + */ +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, size_t band_size); + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c new file mode 100644 index 0000000000..acb9d7ad14 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -0,0 +1,297 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * Debug buffer manager to detect buffer under- and overflows. + * + * \author José Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_winsys.h" +#include "pipe/p_thread.h" +#include "pipe/p_util.h" +#include "util/u_double_list.h" +#include "util/u_time.h" + +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +#ifdef DEBUG + + +/** + * Convenience macro (type safe). + */ +#define SUPER(__derived) (&(__derived)->base) + + +struct pb_debug_manager; + + +/** + * Wrapper around a pipe buffer which adds delayed destruction. + */ +struct pb_debug_buffer +{ + struct pb_buffer base; + + struct pb_buffer *buffer; + struct pb_debug_manager *mgr; + + size_t underflow_size; + size_t overflow_size; +}; + + +struct pb_debug_manager +{ + struct pb_manager base; + + struct pb_manager *provider; + + size_t band_size; +}; + + +static INLINE struct pb_debug_buffer * +pb_debug_buffer(struct pb_buffer *buf) +{ + assert(buf); + return (struct pb_debug_buffer *)buf; +} + + +static INLINE struct pb_debug_manager * +pb_debug_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_debug_manager *)mgr; +} + + +static const uint8_t random_pattern[32] = { + 0xaf, 0xcf, 0xa5, 0xa2, 0xc2, 0x63, 0x15, 0x1a, + 0x7e, 0xe2, 0x7e, 0x84, 0x15, 0x49, 0xa2, 0x1e, + 0x49, 0x63, 0xf5, 0x52, 0x74, 0x66, 0x9e, 0xc4, + 0x6d, 0xcf, 0x2c, 0x4a, 0x74, 0xe6, 0xfd, 0x94 +}; + + +static INLINE void +fill_random_pattern(uint8_t *dst, size_t size) +{ + unsigned i = 0; + while(size--) { + *dst++ = random_pattern[i++]; + i &= sizeof(random_pattern) - 1; + } +} + + +static INLINE boolean +check_random_pattern(const uint8_t *dst, size_t size) +{ + unsigned i = 0; + while(size--) { + if(*dst++ != random_pattern[i++]) + return FALSE; + i &= sizeof(random_pattern) - 1; + } + return TRUE; +} + + +static void +pb_debug_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + uint8_t *map; + + assert(!buf->base.base.refcount); + + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + assert(map); + if(map) { + if(!check_random_pattern(map, buf->underflow_size)) { + debug_printf("buffer underflow\n"); + debug_assert(0); + } + if(!check_random_pattern(map + buf->underflow_size + buf->base.base.size, + buf->overflow_size)) { + debug_printf("buffer overflow\n"); + debug_assert(0); + } + pb_unmap(buf->buffer); + } + + pb_reference(&buf->buffer, NULL); + FREE(buf); +} + + +static void * +pb_debug_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + void *map = pb_map(buf->buffer, flags); + if(!map) + return NULL; + return (uint8_t *)map + buf->underflow_size; +} + + +static void +pb_debug_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_unmap(buf->buffer); +} + + +static void +pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_get_base_buffer(buf->buffer, base_buf, offset); + *offset += buf->underflow_size; +} + + +const struct pb_vtbl +pb_debug_buffer_vtbl = { + pb_debug_buffer_destroy, + pb_debug_buffer_map, + pb_debug_buffer_unmap, + pb_debug_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_debug_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + struct pb_debug_buffer *buf; + struct pb_desc real_desc; + size_t real_size; + uint8_t *map; + + buf = CALLOC_STRUCT(pb_debug_buffer); + if(!buf) + return NULL; + + real_size = size + 2*mgr->band_size; + real_desc = *desc; + real_desc.usage |= PIPE_BUFFER_USAGE_CPU_WRITE; + real_desc.usage |= PIPE_BUFFER_USAGE_CPU_READ; + + buf->buffer = mgr->provider->create_buffer(mgr->provider, + real_size, + &real_desc); + if(!buf->buffer) { + FREE(buf); + return NULL; + } + + assert(buf->buffer->base.refcount >= 1); + assert(pb_check_alignment(real_desc.alignment, buf->buffer->base.alignment)); + assert(pb_check_usage(real_desc.usage, buf->buffer->base.usage)); + assert(buf->buffer->base.size >= real_size); + + buf->base.base.refcount = 1; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + + buf->base.vtbl = &pb_debug_buffer_vtbl; + buf->mgr = mgr; + + buf->underflow_size = mgr->band_size; + buf->overflow_size = buf->buffer->base.size - buf->underflow_size - size; + + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(map); + if(map) { + fill_random_pattern(map, buf->underflow_size); + fill_random_pattern(map + buf->underflow_size + size, buf->overflow_size); + pb_unmap(buf->buffer); + } + + return &buf->base; +} + + +static void +pb_debug_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + mgr->provider->destroy(mgr->provider); + FREE(mgr); +} + + +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, size_t band_size) +{ + struct pb_debug_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_debug_manager); + if (!mgr) + return NULL; + + mgr->base.destroy = pb_debug_manager_destroy; + mgr->base.create_buffer = pb_debug_manager_create_buffer; + mgr->provider = provider; + mgr->band_size = band_size; + + return &mgr->base; +} + + +#else /* !DEBUG */ + + +struct pb_manager * +pb_debug_manager_create(struct pb_manager *provider, size_t band_size) +{ + return provider; +} + + +#endif /* !DEBUG */ -- cgit v1.2.3 From cc31eecbcb90dabacabac3e6be7c01cfe3a7a2a6 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Wed, 2 Jul 2008 12:10:15 +0200 Subject: gallium: Allow draw module to work on non-x86 platforms again. --- src/gallium/auxiliary/draw/draw_vs.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 978954e91c..f798b20492 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -160,8 +160,10 @@ draw_vs_init( struct draw_context *draw ) return FALSE; draw->vs.aos_machine = draw_vs_aos_machine(); +#ifdef PIPE_ARCH_X86 if (!draw->vs.aos_machine) return FALSE; +#endif return TRUE; } -- cgit v1.2.3 From 019ad5e284db08b46b484b409c1a54c302afd50b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 2 Jul 2008 12:41:18 -0600 Subject: gallium: replace 128 with MAX_LABELS --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2 +- src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 13b8c2e5bf..9649396c9b 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -214,7 +214,7 @@ tgsi_exec_machine_bind_shader( break; case TGSI_TOKEN_TYPE_INSTRUCTION: - assert( labels->count < 128 ); + assert( labels->count < MAX_LABELS ); labels->labels[labels->count][0] = instno; labels->labels[labels->count][1] = pointer; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h index 19bd78df3d..ea182bc454 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -34,6 +34,8 @@ extern "C" { #endif +#define MAX_LABELS 1024 + #define NUM_CHANNELS 4 /* R,G,B,A */ #define QUAD_SIZE 4 /* 4 pixel/quad */ @@ -93,7 +95,7 @@ struct tgsi_sampler */ struct tgsi_exec_labels { - unsigned labels[128][2]; + unsigned labels[MAX_LABELS][2]; unsigned count; }; -- cgit v1.2.3 From 8ccab313561d8cca3caf8d76504119bf5eb2e9ae Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 3 Jul 2008 15:02:56 +0900 Subject: gallium: Use surface_copy for 1:1 blits. --- src/gallium/auxiliary/util/u_blit.c | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index ae779335dc..c9e3abc88b 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -308,6 +308,16 @@ util_blit_pixels(struct blit_state *ctx, } assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE)); + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE)); + + if(dst->format == src->format && (dstX1 - dstX0) == srcW && (dstY1 - dstY0) == srcH) { + pipe->surface_copy(pipe, FALSE, + dst, dstX0, dstY0, /* dest */ + src, srcX0, srcY0, /* src */ + srcW, srcH); /* size */ + return; + } + assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); /* -- cgit v1.2.3 From c193cc506f74443c805a0df42e3364b39245d265 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 3 Jul 2008 19:52:05 +0900 Subject: gallium: Don't forget to get overllaping blits working again. --- src/gallium/auxiliary/util/u_blit.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index c9e3abc88b..3dc9fdd11e 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -311,6 +311,7 @@ util_blit_pixels(struct blit_state *ctx, assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE)); if(dst->format == src->format && (dstX1 - dstX0) == srcW && (dstY1 - dstY0) == srcH) { + /* FIXME: this will most surely fail for overlapping rectangles */ pipe->surface_copy(pipe, FALSE, dst, dstX0, dstY0, /* dest */ src, srcX0, srcY0, /* src */ -- cgit v1.2.3 From ade03755bcaec2dedb5cd4d13160ba366ee804cd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 3 Jul 2008 21:28:56 +0900 Subject: pipebuffer: Silent warnings. Although rarely hit in normal apps, they are too noisy with test suites. --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 0ffca298cd..f599bee07e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -174,7 +174,9 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf) struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pipe_winsys *winsys = fenced_list->winsys; +#if 0 debug_warning("waiting for GPU"); +#endif assert(fenced_buf->fence); if(fenced_buf->fence) { @@ -278,11 +280,13 @@ fenced_buffer_map(struct pb_buffer *buf, _fenced_buffer_finish(fenced_buf); } +#if 0 /* Check for CPU write access (read is OK) */ if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { /* this is legal -- just for debugging */ debug_warning("concurrent CPU writes"); } +#endif map = pb_map(fenced_buf->buffer, flags); if(map) { -- cgit v1.2.3 From 3c128748579c637ba7c777ba91ff4287a03190f6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Jul 2008 10:42:31 -0600 Subject: gallium: increase TGSI_EXEC_MAX_COND_NESTING, etc --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h index ea182bc454..c89dfcb167 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -147,9 +147,9 @@ struct tgsi_exec_labels #define TGSI_EXEC_NUM_ADDRS 1 #define TGSI_EXEC_NUM_IMMEDIATES 256 -#define TGSI_EXEC_MAX_COND_NESTING 10 -#define TGSI_EXEC_MAX_LOOP_NESTING 10 -#define TGSI_EXEC_MAX_CALL_NESTING 10 +#define TGSI_EXEC_MAX_COND_NESTING 20 +#define TGSI_EXEC_MAX_LOOP_NESTING 20 +#define TGSI_EXEC_MAX_CALL_NESTING 20 /** * Run-time virtual machine state for executing TGSI shader. -- cgit v1.2.3 From f042d662e2cec4315ddaae1ee536f593139f703d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Jul 2008 12:56:33 -0600 Subject: gallium: increase TGSI interpreter's number of temp registers to 64 Also, clean up the definitions of the misc/extra temp regs. A few new assertions too. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 4 ++- src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 48 +++++++++++++++-------------- 2 files changed, 28 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 9649396c9b..46949661af 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -262,7 +262,7 @@ tgsi_exec_machine_init( uint i; mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); - mach->Addrs = &mach->Temps[TGSI_EXEC_NUM_TEMPS]; + mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; /* Setup constants. */ for( i = 0; i < 4; i++ ) { @@ -941,6 +941,7 @@ fetch_src_file_channel( break; case TGSI_FILE_TEMPORARY: + assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; @@ -1127,6 +1128,7 @@ store_dest( break; case TGSI_FILE_TEMPORARY: + assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS); dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; break; diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h index c89dfcb167..18abdd9ac0 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -99,53 +99,57 @@ struct tgsi_exec_labels unsigned count; }; + +#define TGSI_EXEC_NUM_TEMPS 64 +#define TGSI_EXEC_NUM_TEMP_EXTRAS 6 +#define TGSI_EXEC_NUM_IMMEDIATES 256 + /* * Locations of various utility registers (_I = Index, _C = Channel) */ -#define TGSI_EXEC_TEMP_00000000_I 32 +#define TGSI_EXEC_TEMP_00000000_I (TGSI_EXEC_NUM_TEMPS + 0) #define TGSI_EXEC_TEMP_00000000_C 0 -#define TGSI_EXEC_TEMP_7FFFFFFF_I 32 +#define TGSI_EXEC_TEMP_7FFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) #define TGSI_EXEC_TEMP_7FFFFFFF_C 1 -#define TGSI_EXEC_TEMP_80000000_I 32 +#define TGSI_EXEC_TEMP_80000000_I (TGSI_EXEC_NUM_TEMPS + 0) #define TGSI_EXEC_TEMP_80000000_C 2 -#define TGSI_EXEC_TEMP_FFFFFFFF_I 32 +#define TGSI_EXEC_TEMP_FFFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) #define TGSI_EXEC_TEMP_FFFFFFFF_C 3 -#define TGSI_EXEC_TEMP_ONE_I 33 +#define TGSI_EXEC_TEMP_ONE_I (TGSI_EXEC_NUM_TEMPS + 1) #define TGSI_EXEC_TEMP_ONE_C 0 -#define TGSI_EXEC_TEMP_TWO_I 33 +#define TGSI_EXEC_TEMP_TWO_I (TGSI_EXEC_NUM_TEMPS + 1) #define TGSI_EXEC_TEMP_TWO_C 1 -#define TGSI_EXEC_TEMP_128_I 33 +#define TGSI_EXEC_TEMP_128_I (TGSI_EXEC_NUM_TEMPS + 1) #define TGSI_EXEC_TEMP_128_C 2 -#define TGSI_EXEC_TEMP_MINUS_128_I 33 +#define TGSI_EXEC_TEMP_MINUS_128_I (TGSI_EXEC_NUM_TEMPS + 1) #define TGSI_EXEC_TEMP_MINUS_128_C 3 -#define TGSI_EXEC_TEMP_KILMASK_I 34 +#define TGSI_EXEC_TEMP_KILMASK_I (TGSI_EXEC_NUM_TEMPS + 2) #define TGSI_EXEC_TEMP_KILMASK_C 0 -#define TGSI_EXEC_TEMP_OUTPUT_I 34 +#define TGSI_EXEC_TEMP_OUTPUT_I (TGSI_EXEC_NUM_TEMPS + 2) #define TGSI_EXEC_TEMP_OUTPUT_C 1 -#define TGSI_EXEC_TEMP_PRIMITIVE_I 34 +#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2) #define TGSI_EXEC_TEMP_PRIMITIVE_C 2 -#define TGSI_EXEC_TEMP_THREE_I 34 +#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2) #define TGSI_EXEC_TEMP_THREE_C 3 -#define TGSI_EXEC_TEMP_HALF_I 35 +#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) #define TGSI_EXEC_TEMP_HALF_C 0 -#define TGSI_EXEC_TEMP_R0 36 +#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) + +#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5) -#define TGSI_EXEC_NUM_TEMPS (32 + 5) -#define TGSI_EXEC_NUM_ADDRS 1 -#define TGSI_EXEC_NUM_IMMEDIATES 256 #define TGSI_EXEC_MAX_COND_NESTING 20 #define TGSI_EXEC_MAX_LOOP_NESTING 20 @@ -156,13 +160,11 @@ struct tgsi_exec_labels */ struct tgsi_exec_machine { - /* - * 32 program temporaries - * 4 internal temporaries - * 1 address - * 1 temporary of padding to align to 16 bytes + /* Total = program temporaries + internal temporaries + * + 1 padding to align to 16 bytes */ - struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_ADDRS + 1]; + struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; /* * This will point to _Temps after aligning to 16B boundary. -- cgit v1.2.3 From 51abbdd227224c596ae2232ff3137dc3f346d563 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 3 Jul 2008 14:55:14 -0600 Subject: gallium: added a4r4g4b4_put_tile_rgba() --- src/gallium/auxiliary/util/p_tile.c | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index ab603ff6e4..93abef9879 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -298,6 +298,33 @@ a4r4g4b4_get_tile_rgba(ushort *src, } +static void +a4r4g4b4_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); + UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); + UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + *dst++ = (a << 12) | (r << 16) | (g << 4) | b; + } + p += src_stride; + } +} + + /*** PIPE_FORMAT_R5G6B5_UNORM ***/ static void @@ -774,6 +801,10 @@ pipe_put_tile_rgba(struct pipe_context *pipe, r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_R8G8B8A8_UNORM: + assert(0); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_L8_UNORM: /*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ -- cgit v1.2.3 From ba9e6339028c36269cb50bb8535e415fa8e6e4c9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 4 Jul 2008 09:59:43 -0600 Subject: gallium: fix trim() function bug when count < first If the user called glDrawArrays(GL_TRIANGLES, count=1), trim() returned a very large integer because of the unsigned arithmetic. --- src/gallium/auxiliary/draw/draw_pt.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 9140faeea9..85a75525c8 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -37,6 +37,8 @@ static unsigned trim( unsigned count, unsigned first, unsigned incr ) { + if (count < first) + return 0; return count - (count - first) % incr; } -- cgit v1.2.3 From 7cbc244c52610eb59b0fe1fc7275f307b560c281 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 8 Jul 2008 15:00:11 -0600 Subject: gallium: tweak printing of generic declarations --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 92aff88925..a395c630cc 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -573,7 +573,8 @@ tgsi_dump_declaration( if (decl->Declaration.Semantic) { TXT( ", " ); ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); - if (decl->Semantic.SemanticIndex != 0) { + if (decl->Semantic.SemanticIndex != 0 || + decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { CHR( '[' ); UID( decl->Semantic.SemanticIndex ); CHR( ']' ); -- cgit v1.2.3 From d25709df1d5d9dccdeb173b33a57018004fe849c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Wed, 9 Jul 2008 09:29:49 -0400 Subject: draw: remove some debug output --- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 6b92811870..8e834501a4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -184,7 +184,6 @@ boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) if (!load_input( cp, i, linear )) return FALSE; cp->insn_counter++; - debug_printf("\n"); } return TRUE; @@ -316,7 +315,6 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) aos_release_xmm_reg( cp, data.idx ); cp->insn_counter++; - debug_printf("\n"); } return TRUE; -- cgit v1.2.3 From 93ff702b4f1c6016e249c4d326e71cdc4932f57c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 9 Jul 2008 11:46:16 -0600 Subject: gallium: fix logic in pb_check_usage() --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 83ea0be74b..8505d333bd 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -221,7 +221,7 @@ pb_check_alignment(size_t requested, size_t provided) static INLINE boolean pb_check_usage(unsigned requested, unsigned provided) { - return (requested & provided) == provided ? TRUE : FALSE; + return (requested & provided) == requested ? TRUE : FALSE; } -- cgit v1.2.3 From 7279d663e984ae8a243f56c010f175fee9ffccb3 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 12 Jul 2008 11:16:01 +0200 Subject: tgsi: Add missing copyright headers. --- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 30 +++++++++++++++++++++++++-- src/gallium/auxiliary/tgsi/util/tgsi_build.c | 28 ++++++++++++++++++++++++- src/gallium/auxiliary/tgsi/util/tgsi_build.h | 30 +++++++++++++++++++++++++-- src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 31 +++++++++++++++++++++++++--- src/gallium/auxiliary/tgsi/util/tgsi_util.c | 28 ++++++++++++++++++++++++- src/gallium/auxiliary/tgsi/util/tgsi_util.h | 30 +++++++++++++++++++++++++-- 6 files changed, 166 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h index e66d115283..af838b2a25 100755 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h @@ -1,4 +1,31 @@ -#if !defined TGSI_SSE2_H +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SSE2_H #define TGSI_SSE2_H #if defined __cplusplus @@ -20,4 +47,3 @@ tgsi_emit_sse2( #endif #endif /* TGSI_SSE2_H */ - diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c index 18e44b38c2..742ef14c35 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" @@ -1295,4 +1322,3 @@ tgsi_build_dst_register_ext_modulate( return dst_register_ext_modulate; } - diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h index 423cf141f5..ed25830248 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_build.h @@ -1,4 +1,31 @@ -#if !defined TGSI_BUILD_H +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_BUILD_H #define TGSI_BUILD_H #if defined __cplusplus @@ -303,4 +330,3 @@ tgsi_build_dst_register_ext_modulate( #endif #endif /* TGSI_BUILD_H */ - diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index ca83bdef20..ba7692b511 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -1,4 +1,31 @@ -#if !defined TGSI_DUMP_H +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_DUMP_H #define TGSI_DUMP_H #if defined __cplusplus @@ -31,10 +58,8 @@ void tgsi_dump_declaration( const struct tgsi_full_declaration *decl ); - #if defined __cplusplus } #endif #endif /* TGSI_DUMP_H */ - diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c index 56a50d3b21..10762b6c1a 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c @@ -1,3 +1,30 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #include "pipe/p_debug.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" @@ -271,4 +298,3 @@ tgsi_util_set_full_src_register_sign_mode( assert( 0 ); } } - diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h index 45f5f0be0e..7877f34558 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.h @@ -1,4 +1,31 @@ -#if !defined TGSI_UTIL_H +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_UTIL_H #define TGSI_UTIL_H #if defined __cplusplus @@ -67,4 +94,3 @@ tgsi_util_set_full_src_register_sign_mode( #endif #endif /* TGSI_UTIL_H */ - -- cgit v1.2.3 From 9ea485f8865404e3e2e10cdb3b1627e7194c27fe Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 12 Jul 2008 17:03:30 +0200 Subject: tgsi: Fix dumping of indirect addressing. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index a395c630cc..6356b6de06 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -805,10 +805,12 @@ tgsi_dump_instruction( CHR( '[' ); if (src->SrcRegister.Indirect) { - TXT( "addr" ); - if (src->SrcRegister.Index > 0) - CHR( '+' ); - SID( src->SrcRegister.Index ); + TXT( "ADDR[0]" ); + if (src->SrcRegister.Index != 0) { + if (src->SrcRegister.Index > 0) + CHR( '+' ); + SID( src->SrcRegister.Index ); + } } else SID( src->SrcRegister.Index ); -- cgit v1.2.3 From d0386d55ff257ab09475178d058ddcd9f1e37c2d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 12 Jul 2008 17:06:37 +0200 Subject: tgsi: Add tgsi_text utility module. Translates textual shader into a binary token stream. The syntax matches the tgsi_dump module, so it's possible to simply copy-paste the shader dump and transform it back to a binary form. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 580 ++++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_text.h | 47 +++ 2 files changed, 627 insertions(+) create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c new file mode 100644 index 0000000000..bb365d1efe --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -0,0 +1,580 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_text.h" +#include "tgsi_build.h" +#include "tgsi_parse.h" +#include "tgsi_util.h" + +static boolean is_alpha_underscore( const char *cur ) +{ + return + (*cur >= 'a' && *cur <= 'z') || + (*cur >= 'A' && *cur <= 'Z') || + *cur == '_'; +} + +static boolean is_digit( const char *cur ) +{ + return *cur >= '0' && *cur <= '9'; +} + +static boolean is_digit_alpha_underscore( const char *cur ) +{ + return is_digit( cur ) || is_alpha_underscore( cur ); +} + +/* Eat zero or more whitespaces. + */ +static void eat_opt_white( const char **pcur ) +{ + while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n') + (*pcur)++; +} + +/* Eat one or more whitespaces. + * Return TRUE if at least one whitespace eaten. + */ +static boolean eat_white( const char **pcur ) +{ + const char *cur = *pcur; + + eat_opt_white( pcur ); + return *pcur > cur; +} + +/* Parse unsigned integer. + * No checks for overflow. + */ +static boolean parse_uint( const char **pcur, uint *val ) +{ + const char *cur = *pcur; + + if (is_digit( cur )) { + *val = *cur++ - '0'; + while (is_digit( cur )) + *val = *val * 10 + *cur++ - '0'; + *pcur = cur; + return TRUE; + } + return FALSE; +} + +struct translate_ctx +{ + const char *text; + const char *cur; + struct tgsi_token *tokens; + struct tgsi_token *tokens_cur; + struct tgsi_token *tokens_end; + struct tgsi_header *header; +}; + +static void report_error( struct translate_ctx *ctx, const char *msg ) +{ + debug_printf( "\nError: %s", msg ); +} + +/* Parse shader header. + * Return TRUE for one of the following headers. + * FRAG1.1 + * GEOM1.1 + * VERT1.1 + */ +static boolean parse_header( struct translate_ctx *ctx ) +{ + uint processor; + + if (ctx->cur[0] == 'F' && ctx->cur[1] == 'R' && ctx->cur[2] == 'A' && ctx->cur[3] == 'G') { + ctx->cur += 4; + processor = TGSI_PROCESSOR_FRAGMENT; + } + else if (ctx->cur[0] == 'V' && ctx->cur[1] == 'E' && ctx->cur[2] == 'R' && ctx->cur[3] == 'T') { + ctx->cur += 4; + processor = TGSI_PROCESSOR_VERTEX; + } + else if (ctx->cur[0] == 'G' && ctx->cur[1] == 'E' && ctx->cur[2] == 'O' && ctx->cur[3] == 'M') { + ctx->cur += 4; + processor = TGSI_PROCESSOR_GEOMETRY; + } + else { + report_error( ctx, "Unknown processor type" ); + return FALSE; + } + + if (ctx->cur[0] == '1' && ctx->cur[1] == '.' && ctx->cur[2] == '1') { + ctx->cur += 3; + } + else { + report_error( ctx, "Unknown version" ); + return FALSE; + } + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version(); + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + ctx->header = (struct tgsi_header *) ctx->tokens_cur++; + *ctx->header = tgsi_build_header(); + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); + + return TRUE; +} + +static boolean parse_label( struct translate_ctx *ctx, uint *val ) +{ + const char *cur = ctx->cur; + + if (parse_uint( &cur, val )) { + eat_opt_white( &cur ); + if (*cur == ':') { + cur++; + ctx->cur = cur; + return TRUE; + } + } + return FALSE; +} + +static const char *file_names[TGSI_FILE_COUNT] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static boolean +parse_file( + struct translate_ctx *ctx, + uint *file ) +{ + uint i; + + for (i = 0; i < TGSI_FILE_COUNT; i++) { + const char *cur = ctx->cur; + const char *name = file_names[i]; + + while (*name != '\0' && *name == toupper( *cur )) { + name++; + cur++; + } + if (*name == '\0' && !is_digit_alpha_underscore( cur )) { + ctx->cur = cur; + *file = i; + return TRUE; + } + } + report_error( ctx, "Unknown register file" ); + return FALSE; +} + +static boolean +parse_register( + struct translate_ctx *ctx, + uint *file, + uint *index ) +{ + if (!parse_file( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, index )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + /* TODO: Modulate suffix */ + return TRUE; +} + +static boolean +parse_dst_operand( + struct translate_ctx *ctx, + struct tgsi_full_dst_register *dst ) +{ + const char *cur; + uint file; + uint index; + + if (!parse_register( ctx, &file, &index )) + return FALSE; + dst->DstRegister.File = file; + dst->DstRegister.Index = index; + + /* Parse optional write mask. + */ + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '.') { + uint writemask = TGSI_WRITEMASK_NONE; + + cur++; + eat_opt_white( &cur ); + if (toupper( *cur ) == 'X') { + cur++; + writemask |= TGSI_WRITEMASK_X; + } + if (toupper( *cur ) == 'Y') { + cur++; + writemask |= TGSI_WRITEMASK_Y; + } + if (toupper( *cur ) == 'Z') { + cur++; + writemask |= TGSI_WRITEMASK_Z; + } + if (toupper( *cur ) == 'W') { + cur++; + writemask |= TGSI_WRITEMASK_W; + } + + if (writemask == TGSI_WRITEMASK_NONE) { + report_error( ctx, "Writemask expected" ); + return FALSE; + } + + dst->DstRegister.WriteMask = writemask; + ctx->cur = cur; + } + return TRUE; +} + +static boolean +parse_src_operand( + struct translate_ctx *ctx, + struct tgsi_full_src_register *src ) +{ + const char *cur; + uint file; + uint index; + + /* TODO: Extended register modifiers */ + if (*ctx->cur == '-') { + ctx->cur++; + src->SrcRegister.Negate = 1; + eat_opt_white( &ctx->cur ); + } + + if (!parse_register( ctx, &file, &index )) + return FALSE; + src->SrcRegister.File = file; + src->SrcRegister.Index = index; + + /* Parse optional swizzle + */ + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '.') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < 4; i++) { + uint swizzle; + + if (toupper( *cur ) == 'X') + swizzle = TGSI_SWIZZLE_X; + else if (toupper( *cur ) == 'Y') + swizzle = TGSI_SWIZZLE_Y; + else if (toupper( *cur ) == 'Z') + swizzle = TGSI_SWIZZLE_Z; + else if (toupper( *cur ) == 'W') + swizzle = TGSI_SWIZZLE_W; + else { + report_error( ctx, "Expected register swizzle component either `x', `y', `z' or `w'" ); + return FALSE; + } + cur++; + tgsi_util_set_src_register_swizzle( &src->SrcRegister, swizzle, i ); + } + + ctx->cur = cur; + } + return TRUE; +} + +struct opcode_info +{ + uint num_dst; + uint num_src; + const char *mnemonic; +}; + +static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = +{ +{ 1, 1, "ARL" }, + { 1, 1, "MOV" }, +{ 1, 1, "LIT" }, + { 1, 1, "RCP" }, + { 1, 1, "RSQ" }, + { 1, 1, "EXP" }, + { 1, 1, "LOG" }, + { 1, 2, "MUL" }, + { 1, 2, "ADD" }, + { 1, 2, "DP3" }, + { 1, 2, "DP4" }, +{ 1, 2, "DST" }, + { 1, 2, "MIN" }, + { 1, 2, "MAX" }, +{ 1, 2, "SLT" }, +{ 1, 2, "SGE" }, + { 1, 3, "MAD" }, + { 1, 2, "SUB" }, +{ 1, 3, "LERP" }, +{ 1, 2, "CND" }, +{ 1, 2, "CND0" }, +{ 1, 2, "DOT2ADD" }, +{ 1, 2, "INDEX" }, +{ 1, 2, "NEGATE" }, +{ 1, 2, "FRAC" }, +{ 1, 2, "CLAMP" }, +{ 1, 2, "FLOOR" }, +{ 1, 2, "ROUND" }, +{ 1, 2, "EXPBASE2" }, +{ 1, 2, "LOGBASE2" }, +{ 1, 2, "POWER" }, +{ 1, 2, "CROSSPRODUCT" }, +{ 1, 2, "MULTIPLYMATRIX" }, + { 1, 2, "ABS" }, +{ 1, 2, "RCC" }, +{ 1, 2, "DPH" }, +{ 1, 2, "COS" }, +{ 1, 2, "DDX" }, +{ 1, 2, "DDY" }, +{ 1, 2, "KILP" }, +{ 1, 2, "PK2H" }, +{ 1, 2, "PK2US" }, +{ 1, 2, "PK4B" }, +{ 1, 2, "PK4UB" }, +{ 1, 2, "RFL" }, +{ 1, 2, "SEQ" }, +{ 1, 2, "SFL" }, +{ 1, 2, "SGT" }, +{ 1, 2, "SIN" }, +{ 1, 2, "SLE" }, +{ 1, 2, "SNE" }, +{ 1, 2, "STR" }, +{ 1, 2, "TEX" }, +{ 1, 2, "TXD" }, +{ 1, 2, "TXP" }, +{ 1, 2, "UP2H" }, +{ 1, 2, "UP2US" }, +{ 1, 2, "UP4B" }, +{ 1, 2, "UP4UB" }, +{ 1, 2, "X2D" }, +{ 1, 2, "ARA" }, +{ 1, 2, "ARR" }, +{ 1, 2, "BRA" }, +{ 1, 2, "CAL" }, +{ 1, 2, "RET" }, +{ 1, 2, "SSG" }, +{ 1, 2, "CMP" }, +{ 1, 2, "SCS" }, +{ 1, 2, "TXB" }, +{ 1, 2, "NRM" }, +{ 1, 2, "DIV" }, +{ 1, 2, "DP2" }, +{ 1, 2, "TXL" }, +{ 1, 2, "BRK" }, +{ 1, 2, "IF" }, +{ 1, 2, "LOOP" }, +{ 1, 2, "REP" }, +{ 1, 2, "ELSE" }, +{ 1, 2, "ENDIF" }, +{ 1, 2, "ENDLOOP" }, +{ 1, 2, "ENDREP" }, +{ 1, 2, "PUSHA" }, +{ 1, 2, "POPA" }, +{ 1, 2, "CEIL" }, +{ 1, 2, "I2F" }, +{ 1, 2, "NOT" }, +{ 1, 2, "TRUNC" }, +{ 1, 2, "SHL" }, +{ 1, 2, "SHR" }, +{ 1, 2, "AND" }, +{ 1, 2, "OR" }, +{ 1, 2, "MOD" }, +{ 1, 2, "XOR" }, +{ 1, 2, "SAD" }, +{ 1, 2, "TXF" }, +{ 1, 2, "TXQ" }, +{ 1, 2, "CONT" }, +{ 1, 2, "EMIT" }, +{ 1, 2, "ENDPRIM" }, +{ 1, 2, "BGNLOOP2" }, +{ 1, 2, "BGNSUB" }, +{ 1, 2, "ENDLOOP2" }, +{ 1, 2, "ENDSUB" }, +{ 1, 2, "NOISE1" }, +{ 1, 2, "NOISE2" }, +{ 1, 2, "NOISE3" }, +{ 1, 2, "NOISE4" }, +{ 1, 2, "NOP" }, +{ 1, 2, "M4X3" }, +{ 1, 2, "M3X4" }, +{ 1, 2, "M3X3" }, +{ 1, 2, "M3X2" }, +{ 1, 2, "NRM4" }, +{ 1, 2, "CALLNZ" }, +{ 1, 2, "IFC" }, +{ 1, 2, "BREAKC" }, +{ 1, 2, "KIL" }, + { 0, 0, "END" } +}; + +static boolean parse_instruction( struct translate_ctx *ctx ) +{ + uint i; + const struct opcode_info *info; + struct tgsi_full_instruction inst; + uint advance; + + /* Parse instruction name. + */ + eat_opt_white( &ctx->cur ); + for (i = 0; i < TGSI_OPCODE_LAST; i++) { + const char *cur = ctx->cur; + const char *op = opcode_info[i].mnemonic; + + while (*op != '\0' && *op == toupper( *cur )) { + op++; + cur++; + } + if (*op == '\0') { + /* TODO: _SAT suffix */ + if (*cur == '\0' || eat_white( &cur )) { + ctx->cur = cur; + break; + } + } + } + if (i == TGSI_OPCODE_LAST) { + report_error( ctx, "Unknown opcode" ); + return FALSE; + } + info = &opcode_info[i]; + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = i; + inst.Instruction.NumDstRegs = info->num_dst; + inst.Instruction.NumSrcRegs = info->num_src; + + /* Parse instruction operands. + */ + for (i = 0; i < info->num_dst + info->num_src; i++) { + if (i > 0) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + + if (i < info->num_dst) { + if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) + return FALSE; + } + else { + if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) + return FALSE; + } + } + eat_opt_white( &ctx->cur ); + + advance = tgsi_build_full_instruction( + &inst, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static boolean translate( struct translate_ctx *ctx ) +{ + eat_opt_white( &ctx->cur ); + if (!parse_header( ctx )) + return FALSE; + + eat_white( &ctx->cur ); + while (*ctx->cur != '\0') { + uint label_val = 0; + + if (parse_label( ctx, &label_val )) { + if (!parse_instruction( ctx )) + return FALSE; + } + else { + report_error( ctx, "Instruction expected" ); + return FALSE; + } + } + + return TRUE; +} + +boolean +tgsi_text_translate( + const char *text, + struct tgsi_token *tokens, + uint num_tokens ) +{ + struct translate_ctx ctx; + + ctx.text = text; + ctx.cur = text; + ctx.tokens = tokens; + ctx.tokens_cur = tokens; + ctx.tokens_end = tokens + num_tokens; + + return translate( &ctx ); +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.h b/src/gallium/auxiliary/tgsi/util/tgsi_text.h new file mode 100644 index 0000000000..8eeeeef140 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TEXT_H +#define TGSI_TEXT_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +boolean +tgsi_text_translate( + const char *text, + struct tgsi_token *tokens, + uint num_tokens ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_TEXT_H */ -- cgit v1.2.3 From c415de5e251eb4004b1ef5bc57299032d95c4842 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 12 Jul 2008 17:10:21 +0200 Subject: scons: List `util/tgsi_text.c'. --- src/gallium/auxiliary/tgsi/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 4632dcc072..b62c8efe02 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -9,6 +9,7 @@ tgsi = env.ConvenienceLibrary( 'util/tgsi_dump.c', 'util/tgsi_parse.c', 'util/tgsi_scan.c', + 'util/tgsi_text.c', 'util/tgsi_transform.c', 'util/tgsi_util.c', ]) -- cgit v1.2.3 From 92d711e9e6c1934e1cec774bfa4581869530cda6 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 13 Jul 2008 11:33:41 +0200 Subject: llvm: build fixes. --- src/gallium/auxiliary/draw/draw_vs_llvm.c | 10 +++++----- src/gallium/auxiliary/gallivm/instructions.h | 2 +- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 621472ec7c..c63bd51a10 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -119,7 +119,7 @@ draw_create_vs_llvm(struct draw_context *draw, vs->base.create_varient = draw_vs_varient_generic; vs->base.run_linear = vs_llvm_run_linear; vs->base.delete = vs_llvm_delete; - vs->machine = &draw->machine; + vs->machine = &draw->vs.machine; { struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS); @@ -130,16 +130,16 @@ draw_create_vs_llvm(struct draw_context *draw, gallivm_ir_delete(ir); } - draw->engine = gallivm_global_cpu_engine(); + draw->vs.engine = gallivm_global_cpu_engine(); /* XXX: Why are there two versions of this? Shouldn't creating the * engine be a separate operation to compiling a shader? */ - if (!draw->engine) { - draw->engine = gallivm_cpu_engine_create(vs->llvm_prog); + if (!draw->vs.engine) { + draw->vs.engine = gallivm_cpu_engine_create(vs->llvm_prog); } else { - gallivm_cpu_jit_compile(draw->engine, vs->llvm_prog); + gallivm_cpu_jit_compile(draw->vs.engine, vs->llvm_prog); } return &vs->base; diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index 19ca84ddc6..3a476928b6 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -85,7 +85,7 @@ public: llvm::Value *lit(llvm::Value *in); llvm::Value *lg2(llvm::Value *in); llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in2); + llvm::Value *in3); llvm::Value *min(llvm::Value *in1, llvm::Value *in2); llvm::Value *max(llvm::Value *in1, llvm::Value *in2); llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); -- cgit v1.2.3 From bd3b47590e2a8e91a8f5545d7c8872b26879c228 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 11:30:02 +0200 Subject: tgsi: Remove depricated ATTRIB interpolate mode. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 3 +-- src/gallium/include/pipe/p_shader_tokens.h | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 6356b6de06..59e7aaeceb 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -119,8 +119,7 @@ static const char *TGSI_INTERPOLATES_SHORT[] = { "CONSTANT", "LINEAR", - "PERSPECTIVE", - "ATTRIB" + "PERSPECTIVE" }; static const char *TGSI_SEMANTICS[] = diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 84e5096418..33268553ff 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -75,6 +75,7 @@ enum tgsi_file_type { #define TGSI_INTERPOLATE_CONSTANT 0 #define TGSI_INTERPOLATE_LINEAR 1 #define TGSI_INTERPOLATE_PERSPECTIVE 2 +#define TGSI_INTERPOLATE_COUNT 3 struct tgsi_declaration { -- cgit v1.2.3 From ee647b9020b5e16b9b6d399edfa4a2c99f491863 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 11:37:36 +0200 Subject: tgsi: Parse DCL statements. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 330 +++++++++++++++++++++------- 1 file changed, 256 insertions(+), 74 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index bb365d1efe..03c9217243 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -49,6 +49,21 @@ static boolean is_digit_alpha_underscore( const char *cur ) return is_digit( cur ) || is_alpha_underscore( cur ); } +static boolean str_match_no_case( const char **pcur, const char *str ) +{ + const char *cur = *pcur; + + while (*str != '\0' && *str == toupper( *cur )) { + str++; + cur++; + } + if (*str == '\0') { + *pcur = cur; + return TRUE; + } + return FALSE; +} + /* Eat zero or more whitespaces. */ static void eat_opt_white( const char **pcur ) @@ -110,28 +125,14 @@ static boolean parse_header( struct translate_ctx *ctx ) { uint processor; - if (ctx->cur[0] == 'F' && ctx->cur[1] == 'R' && ctx->cur[2] == 'A' && ctx->cur[3] == 'G') { - ctx->cur += 4; + if (str_match_no_case( &ctx->cur, "FRAG1.1" )) processor = TGSI_PROCESSOR_FRAGMENT; - } - else if (ctx->cur[0] == 'V' && ctx->cur[1] == 'E' && ctx->cur[2] == 'R' && ctx->cur[3] == 'T') { - ctx->cur += 4; + else if (str_match_no_case( &ctx->cur, "VERT1.1" )) processor = TGSI_PROCESSOR_VERTEX; - } - else if (ctx->cur[0] == 'G' && ctx->cur[1] == 'E' && ctx->cur[2] == 'O' && ctx->cur[3] == 'M') { - ctx->cur += 4; + else if (str_match_no_case( &ctx->cur, "GEOM1.1" )) processor = TGSI_PROCESSOR_GEOMETRY; - } - else { - report_error( ctx, "Unknown processor type" ); - return FALSE; - } - - if (ctx->cur[0] == '1' && ctx->cur[1] == '.' && ctx->cur[2] == '1') { - ctx->cur += 3; - } else { - report_error( ctx, "Unknown version" ); + report_error( ctx, "Unknown header" ); return FALSE; } @@ -187,16 +188,13 @@ parse_file( for (i = 0; i < TGSI_FILE_COUNT; i++) { const char *cur = ctx->cur; - const char *name = file_names[i]; - while (*name != '\0' && *name == toupper( *cur )) { - name++; - cur++; - } - if (*name == '\0' && !is_digit_alpha_underscore( cur )) { - ctx->cur = cur; - *file = i; - return TRUE; + if (str_match_no_case( &cur, file_names[i] )) { + if (!is_digit_alpha_underscore( cur )) { + ctx->cur = cur; + *file = i; + return TRUE; + } } } report_error( ctx, "Unknown register file" ); @@ -204,7 +202,53 @@ parse_file( } static boolean -parse_register( +parse_opt_writemask( + struct translate_ctx *ctx, + uint *writemask ) +{ + const char *cur; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '.') { + cur++; + *writemask = TGSI_WRITEMASK_NONE; + eat_opt_white( &cur ); + if (toupper( *cur ) == 'X') { + cur++; + *writemask |= TGSI_WRITEMASK_X; + } + if (toupper( *cur ) == 'Y') { + cur++; + *writemask |= TGSI_WRITEMASK_Y; + } + if (toupper( *cur ) == 'Z') { + cur++; + *writemask |= TGSI_WRITEMASK_Z; + } + if (toupper( *cur ) == 'W') { + cur++; + *writemask |= TGSI_WRITEMASK_W; + } + + if (*writemask == TGSI_WRITEMASK_NONE) { + report_error( ctx, "Writemask expected" ); + return FALSE; + } + + ctx->cur = cur; + } + else { + *writemask = TGSI_WRITEMASK_XYZW; + } + return TRUE; +} + +/* Parse register part common for decls and operands. + * ::= `[' + */ +static boolean +parse_register_prefix( struct translate_ctx *ctx, uint *file, uint *index ) @@ -222,6 +266,20 @@ parse_register( report_error( ctx, "Expected literal integer" ); return FALSE; } + return TRUE; +} + +/* Parse register operand. + * ::= `]' + */ +static boolean +parse_register( + struct translate_ctx *ctx, + uint *file, + uint *index ) +{ + if (!parse_register_prefix( ctx, file, index )) + return FALSE; eat_opt_white( &ctx->cur ); if (*ctx->cur != ']') { report_error( ctx, "Expected `]'" ); @@ -232,54 +290,55 @@ parse_register( return TRUE; } +/* Parse register declaration. + * ::= `]' | `..' `]' + */ +static boolean +parse_register_dcl( + struct translate_ctx *ctx, + uint *file, + uint *first, + uint *last ) +{ + if (!parse_register_prefix( ctx, file, first )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { + ctx->cur += 2; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, last )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + } + else { + *last = *first; + } + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]' or `..'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + static boolean parse_dst_operand( struct translate_ctx *ctx, struct tgsi_full_dst_register *dst ) { - const char *cur; uint file; uint index; + uint writemask; if (!parse_register( ctx, &file, &index )) return FALSE; + if (!parse_opt_writemask( ctx, &writemask )) + return FALSE; dst->DstRegister.File = file; dst->DstRegister.Index = index; - - /* Parse optional write mask. - */ - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == '.') { - uint writemask = TGSI_WRITEMASK_NONE; - - cur++; - eat_opt_white( &cur ); - if (toupper( *cur ) == 'X') { - cur++; - writemask |= TGSI_WRITEMASK_X; - } - if (toupper( *cur ) == 'Y') { - cur++; - writemask |= TGSI_WRITEMASK_Y; - } - if (toupper( *cur ) == 'Z') { - cur++; - writemask |= TGSI_WRITEMASK_Z; - } - if (toupper( *cur ) == 'W') { - cur++; - writemask |= TGSI_WRITEMASK_W; - } - - if (writemask == TGSI_WRITEMASK_NONE) { - report_error( ctx, "Writemask expected" ); - return FALSE; - } - - dst->DstRegister.WriteMask = writemask; - ctx->cur = cur; - } + dst->DstRegister.WriteMask = writemask; return TRUE; } @@ -478,15 +537,10 @@ static boolean parse_instruction( struct translate_ctx *ctx ) eat_opt_white( &ctx->cur ); for (i = 0; i < TGSI_OPCODE_LAST; i++) { const char *cur = ctx->cur; - const char *op = opcode_info[i].mnemonic; - while (*op != '\0' && *op == toupper( *cur )) { - op++; - cur++; - } - if (*op == '\0') { + if (str_match_no_case( &cur, opcode_info[i].mnemonic )) { /* TODO: _SAT suffix */ - if (*cur == '\0' || eat_white( &cur )) { + if (eat_white( &cur )) { ctx->cur = cur; break; } @@ -525,7 +579,6 @@ static boolean parse_instruction( struct translate_ctx *ctx ) return FALSE; } } - eat_opt_white( &ctx->cur ); advance = tgsi_build_full_instruction( &inst, @@ -539,22 +592,151 @@ static boolean parse_instruction( struct translate_ctx *ctx ) return TRUE; } +static const char *semantic_names[TGSI_SEMANTIC_COUNT] = +{ + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", + "NORMAL" +}; + +static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = +{ + "CONSTANT", + "LINEAR", + "PERSPECTIVE" +}; + +static boolean parse_declaration( struct translate_ctx *ctx ) +{ + struct tgsi_full_declaration decl; + uint file; + uint first; + uint last; + uint writemask; + const char *cur; + uint advance; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!parse_register_dcl( ctx, &file, &first, &last )) + return FALSE; + if (!parse_opt_writemask( ctx, &writemask )) + return FALSE; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = file; + decl.Declaration.UsageMask = writemask; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { + if (str_match_no_case( &cur, semantic_names[i] )) { + const char *cur2 = cur; + uint index; + + if (is_digit_alpha_underscore( cur )) + continue; + eat_opt_white( &cur2 ); + if (*cur2 == '[') { + cur2++; + eat_opt_white( &cur2 ); + if (!parse_uint( &cur2, &index )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &cur2 ); + if (*cur2 != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + cur2++; + + decl.Semantic.SemanticIndex = index; + + cur = cur2; + } + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = i; + + ctx->cur = cur; + break; + } + } + } + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) { + if (str_match_no_case( &cur, interpolate_names[i] )) { + if (is_digit_alpha_underscore( cur )) + continue; + decl.Declaration.Interpolate = i; + + ctx->cur = cur; + break; + } + } + if (i == TGSI_INTERPOLATE_COUNT) { + report_error( ctx, "Expected semantic or interpolate attribute" ); + return FALSE; + } + } + + advance = tgsi_build_full_declaration( + &decl, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + static boolean translate( struct translate_ctx *ctx ) { eat_opt_white( &ctx->cur ); if (!parse_header( ctx )) return FALSE; - eat_white( &ctx->cur ); while (*ctx->cur != '\0') { uint label_val = 0; + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (parse_label( ctx, &label_val )) { if (!parse_instruction( ctx )) return FALSE; } + else if (str_match_no_case( &ctx->cur, "DCL" )) { + if (!parse_declaration( ctx )) + return FALSE; + } else { - report_error( ctx, "Instruction expected" ); + report_error( ctx, "Expected `DCL' or a label" ); return FALSE; } } -- cgit v1.2.3 From 625034104aacaca793ff373414eff4ee53afc1fe Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 11:42:33 +0200 Subject: tgsi: Add missing SWZ opcode. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 3 ++- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 59e7aaeceb..cae74a9267 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -395,7 +395,8 @@ static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] = "IFC", "BREAKC", "KIL", - "END" + "END", + "SWZ" }; static const char *TGSI_SATS[] = diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 03c9217243..3415b38f9c 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -522,7 +522,8 @@ static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, "IFC" }, { 1, 2, "BREAKC" }, { 1, 2, "KIL" }, - { 0, 0, "END" } + { 0, 0, "END" }, + { 1, 1, "SWZ" } }; static boolean parse_instruction( struct translate_ctx *ctx ) -- cgit v1.2.3 From cfd2bf9fa127a7f0b1a89650fde34a23f318f90c Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 11:43:30 +0200 Subject: tgsi: Fix instruction opcode parsing. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 3415b38f9c..7848f3d55f 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -541,7 +541,7 @@ static boolean parse_instruction( struct translate_ctx *ctx ) if (str_match_no_case( &cur, opcode_info[i].mnemonic )) { /* TODO: _SAT suffix */ - if (eat_white( &cur )) { + if (*cur == '\0' || eat_white( &cur )) { ctx->cur = cur; break; } -- cgit v1.2.3 From 46a7843099f02b6dcff56a52c9247c11d5c4aa8b Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 12:16:16 +0200 Subject: tgsi: Fix instruction operand counts. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 208 ++++++++++++++-------------- 1 file changed, 104 insertions(+), 104 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 7848f3d55f..3c7a4688c9 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -405,9 +405,9 @@ struct opcode_info static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = { -{ 1, 1, "ARL" }, + { 1, 1, "ARL" }, { 1, 1, "MOV" }, -{ 1, 1, "LIT" }, + { 1, 1, "LIT" }, { 1, 1, "RCP" }, { 1, 1, "RSQ" }, { 1, 1, "EXP" }, @@ -416,112 +416,112 @@ static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 2, "ADD" }, { 1, 2, "DP3" }, { 1, 2, "DP4" }, -{ 1, 2, "DST" }, + { 1, 2, "DST" }, { 1, 2, "MIN" }, { 1, 2, "MAX" }, -{ 1, 2, "SLT" }, -{ 1, 2, "SGE" }, + { 1, 2, "SLT" }, + { 1, 2, "SGE" }, { 1, 3, "MAD" }, { 1, 2, "SUB" }, -{ 1, 3, "LERP" }, -{ 1, 2, "CND" }, -{ 1, 2, "CND0" }, -{ 1, 2, "DOT2ADD" }, -{ 1, 2, "INDEX" }, -{ 1, 2, "NEGATE" }, -{ 1, 2, "FRAC" }, -{ 1, 2, "CLAMP" }, -{ 1, 2, "FLOOR" }, -{ 1, 2, "ROUND" }, -{ 1, 2, "EXPBASE2" }, -{ 1, 2, "LOGBASE2" }, -{ 1, 2, "POWER" }, -{ 1, 2, "CROSSPRODUCT" }, -{ 1, 2, "MULTIPLYMATRIX" }, - { 1, 2, "ABS" }, -{ 1, 2, "RCC" }, -{ 1, 2, "DPH" }, -{ 1, 2, "COS" }, -{ 1, 2, "DDX" }, -{ 1, 2, "DDY" }, -{ 1, 2, "KILP" }, -{ 1, 2, "PK2H" }, -{ 1, 2, "PK2US" }, -{ 1, 2, "PK4B" }, -{ 1, 2, "PK4UB" }, -{ 1, 2, "RFL" }, -{ 1, 2, "SEQ" }, -{ 1, 2, "SFL" }, -{ 1, 2, "SGT" }, -{ 1, 2, "SIN" }, -{ 1, 2, "SLE" }, -{ 1, 2, "SNE" }, -{ 1, 2, "STR" }, -{ 1, 2, "TEX" }, -{ 1, 2, "TXD" }, -{ 1, 2, "TXP" }, -{ 1, 2, "UP2H" }, -{ 1, 2, "UP2US" }, -{ 1, 2, "UP4B" }, -{ 1, 2, "UP4UB" }, -{ 1, 2, "X2D" }, -{ 1, 2, "ARA" }, -{ 1, 2, "ARR" }, -{ 1, 2, "BRA" }, -{ 1, 2, "CAL" }, -{ 1, 2, "RET" }, -{ 1, 2, "SSG" }, -{ 1, 2, "CMP" }, -{ 1, 2, "SCS" }, -{ 1, 2, "TXB" }, -{ 1, 2, "NRM" }, -{ 1, 2, "DIV" }, -{ 1, 2, "DP2" }, -{ 1, 2, "TXL" }, -{ 1, 2, "BRK" }, -{ 1, 2, "IF" }, -{ 1, 2, "LOOP" }, -{ 1, 2, "REP" }, -{ 1, 2, "ELSE" }, -{ 1, 2, "ENDIF" }, -{ 1, 2, "ENDLOOP" }, -{ 1, 2, "ENDREP" }, -{ 1, 2, "PUSHA" }, -{ 1, 2, "POPA" }, -{ 1, 2, "CEIL" }, -{ 1, 2, "I2F" }, -{ 1, 2, "NOT" }, -{ 1, 2, "TRUNC" }, -{ 1, 2, "SHL" }, -{ 1, 2, "SHR" }, -{ 1, 2, "AND" }, -{ 1, 2, "OR" }, -{ 1, 2, "MOD" }, -{ 1, 2, "XOR" }, -{ 1, 2, "SAD" }, -{ 1, 2, "TXF" }, -{ 1, 2, "TXQ" }, -{ 1, 2, "CONT" }, -{ 1, 2, "EMIT" }, -{ 1, 2, "ENDPRIM" }, -{ 1, 2, "BGNLOOP2" }, -{ 1, 2, "BGNSUB" }, -{ 1, 2, "ENDLOOP2" }, -{ 1, 2, "ENDSUB" }, -{ 1, 2, "NOISE1" }, -{ 1, 2, "NOISE2" }, -{ 1, 2, "NOISE3" }, -{ 1, 2, "NOISE4" }, -{ 1, 2, "NOP" }, -{ 1, 2, "M4X3" }, -{ 1, 2, "M3X4" }, -{ 1, 2, "M3X3" }, -{ 1, 2, "M3X2" }, -{ 1, 2, "NRM4" }, -{ 1, 2, "CALLNZ" }, -{ 1, 2, "IFC" }, -{ 1, 2, "BREAKC" }, -{ 1, 2, "KIL" }, + { 1, 3, "LERP" }, + { 1, 3, "CND" }, + { 1, 3, "CND0" }, + { 1, 3, "DOT2ADD" }, + { 1, 2, "INDEX" }, + { 1, 1, "NEGATE" }, + { 1, 1, "FRAC" }, + { 1, 3, "CLAMP" }, + { 1, 1, "FLOOR" }, + { 1, 1, "ROUND" }, + { 1, 1, "EXPBASE2" }, + { 1, 1, "LOGBASE2" }, + { 1, 2, "POWER" }, + { 1, 2, "CROSSPRODUCT" }, + { 1, 2, "MULTIPLYMATRIX" }, + { 1, 1, "ABS" }, + { 1, 1, "RCC" }, + { 1, 2, "DPH" }, + { 1, 1, "COS" }, + { 1, 1, "DDX" }, + { 1, 1, "DDY" }, + { 0, 1, "KILP" }, + { 1, 1, "PK2H" }, + { 1, 1, "PK2US" }, + { 1, 1, "PK4B" }, + { 1, 1, "PK4UB" }, + { 1, 2, "RFL" }, + { 1, 2, "SEQ" }, + { 1, 2, "SFL" }, + { 1, 2, "SGT" }, + { 1, 1, "SIN" }, + { 1, 2, "SLE" }, + { 1, 2, "SNE" }, + { 1, 2, "STR" }, + { 1, 1, "TEX" }, + { 1, 3, "TXD" }, + { 1, 1, "TXP" }, + { 1, 1, "UP2H" }, + { 1, 1, "UP2US" }, + { 1, 1, "UP4B" }, + { 1, 1, "UP4UB" }, + { 1, 3, "X2D" }, + { 1, 1, "ARA" }, + { 1, 1, "ARR" }, + { 0, 1, "BRA" }, + { 0, 0, "CAL" }, + { 0, 0, "RET" }, + { 1, 1, "SSG" }, + { 1, 3, "CMP" }, + { 1, 1, "SCS" }, + { 1, 1, "TXB" }, + { 1, 1, "NRM" }, + { 1, 2, "DIV" }, + { 1, 2, "DP2" }, + { 1, 1, "TXL" }, + { 0, 0, "BRK" }, + { 0, 1, "IF" }, + { 0, 0, "LOOP" }, + { 0, 1, "REP" }, + { 0, 0, "ELSE" }, + { 0, 0, "ENDIF" }, + { 0, 0, "ENDLOOP" }, + { 0, 0, "ENDREP" }, + { 0, 1, "PUSHA" }, + { 1, 0, "POPA" }, + { 1, 1, "CEIL" }, + { 1, 1, "I2F" }, + { 1, 1, "NOT" }, + { 1, 1, "TRUNC" }, + { 1, 2, "SHL" }, + { 1, 2, "SHR" }, + { 1, 2, "AND" }, + { 1, 2, "OR" }, + { 1, 2, "MOD" }, + { 1, 2, "XOR" }, + { 1, 3, "SAD" }, + { 1, 1, "TXF" }, + { 1, 1, "TXQ" }, + { 0, 0, "CONT" }, + { 0, 0, "EMIT" }, + { 0, 0, "ENDPRIM" }, + { 0, 0, "BGNLOOP2" }, + { 0, 0, "BGNSUB" }, + { 0, 0, "ENDLOOP2" }, + { 0, 0, "ENDSUB" }, + { 1, 1, "NOISE1" }, + { 1, 1, "NOISE2" }, + { 1, 1, "NOISE3" }, + { 1, 1, "NOISE4" }, + { 0, 0, "NOP" }, + { 1, 2, "M4X3" }, + { 1, 2, "M3X4" }, + { 1, 2, "M3X3" }, + { 1, 2, "M3X2" }, + { 1, 1, "NRM4" }, + { 0, 1, "CALLNZ" }, + { 0, 1, "IFC" }, + { 0, 1, "BREAKC" }, + { 0, 0, "KIL" }, { 0, 0, "END" }, { 1, 1, "SWZ" } }; -- cgit v1.2.3 From 3d5dcc2203f5018863ad51958871542696e1ed1b Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 13:13:39 +0200 Subject: tgsi: Parse texture instructions correctly. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 272 +++++++++++++++------------- src/gallium/include/pipe/p_shader_tokens.h | 1 + 2 files changed, 152 insertions(+), 121 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 3c7a4688c9..7485002862 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -400,130 +400,144 @@ struct opcode_info { uint num_dst; uint num_src; + uint is_tex; const char *mnemonic; }; static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, "ARL" }, - { 1, 1, "MOV" }, - { 1, 1, "LIT" }, - { 1, 1, "RCP" }, - { 1, 1, "RSQ" }, - { 1, 1, "EXP" }, - { 1, 1, "LOG" }, - { 1, 2, "MUL" }, - { 1, 2, "ADD" }, - { 1, 2, "DP3" }, - { 1, 2, "DP4" }, - { 1, 2, "DST" }, - { 1, 2, "MIN" }, - { 1, 2, "MAX" }, - { 1, 2, "SLT" }, - { 1, 2, "SGE" }, - { 1, 3, "MAD" }, - { 1, 2, "SUB" }, - { 1, 3, "LERP" }, - { 1, 3, "CND" }, - { 1, 3, "CND0" }, - { 1, 3, "DOT2ADD" }, - { 1, 2, "INDEX" }, - { 1, 1, "NEGATE" }, - { 1, 1, "FRAC" }, - { 1, 3, "CLAMP" }, - { 1, 1, "FLOOR" }, - { 1, 1, "ROUND" }, - { 1, 1, "EXPBASE2" }, - { 1, 1, "LOGBASE2" }, - { 1, 2, "POWER" }, - { 1, 2, "CROSSPRODUCT" }, - { 1, 2, "MULTIPLYMATRIX" }, - { 1, 1, "ABS" }, - { 1, 1, "RCC" }, - { 1, 2, "DPH" }, - { 1, 1, "COS" }, - { 1, 1, "DDX" }, - { 1, 1, "DDY" }, - { 0, 1, "KILP" }, - { 1, 1, "PK2H" }, - { 1, 1, "PK2US" }, - { 1, 1, "PK4B" }, - { 1, 1, "PK4UB" }, - { 1, 2, "RFL" }, - { 1, 2, "SEQ" }, - { 1, 2, "SFL" }, - { 1, 2, "SGT" }, - { 1, 1, "SIN" }, - { 1, 2, "SLE" }, - { 1, 2, "SNE" }, - { 1, 2, "STR" }, - { 1, 1, "TEX" }, - { 1, 3, "TXD" }, - { 1, 1, "TXP" }, - { 1, 1, "UP2H" }, - { 1, 1, "UP2US" }, - { 1, 1, "UP4B" }, - { 1, 1, "UP4UB" }, - { 1, 3, "X2D" }, - { 1, 1, "ARA" }, - { 1, 1, "ARR" }, - { 0, 1, "BRA" }, - { 0, 0, "CAL" }, - { 0, 0, "RET" }, - { 1, 1, "SSG" }, - { 1, 3, "CMP" }, - { 1, 1, "SCS" }, - { 1, 1, "TXB" }, - { 1, 1, "NRM" }, - { 1, 2, "DIV" }, - { 1, 2, "DP2" }, - { 1, 1, "TXL" }, - { 0, 0, "BRK" }, - { 0, 1, "IF" }, - { 0, 0, "LOOP" }, - { 0, 1, "REP" }, - { 0, 0, "ELSE" }, - { 0, 0, "ENDIF" }, - { 0, 0, "ENDLOOP" }, - { 0, 0, "ENDREP" }, - { 0, 1, "PUSHA" }, - { 1, 0, "POPA" }, - { 1, 1, "CEIL" }, - { 1, 1, "I2F" }, - { 1, 1, "NOT" }, - { 1, 1, "TRUNC" }, - { 1, 2, "SHL" }, - { 1, 2, "SHR" }, - { 1, 2, "AND" }, - { 1, 2, "OR" }, - { 1, 2, "MOD" }, - { 1, 2, "XOR" }, - { 1, 3, "SAD" }, - { 1, 1, "TXF" }, - { 1, 1, "TXQ" }, - { 0, 0, "CONT" }, - { 0, 0, "EMIT" }, - { 0, 0, "ENDPRIM" }, - { 0, 0, "BGNLOOP2" }, - { 0, 0, "BGNSUB" }, - { 0, 0, "ENDLOOP2" }, - { 0, 0, "ENDSUB" }, - { 1, 1, "NOISE1" }, - { 1, 1, "NOISE2" }, - { 1, 1, "NOISE3" }, - { 1, 1, "NOISE4" }, - { 0, 0, "NOP" }, - { 1, 2, "M4X3" }, - { 1, 2, "M3X4" }, - { 1, 2, "M3X3" }, - { 1, 2, "M3X2" }, - { 1, 1, "NRM4" }, - { 0, 1, "CALLNZ" }, - { 0, 1, "IFC" }, - { 0, 1, "BREAKC" }, - { 0, 0, "KIL" }, - { 0, 0, "END" }, - { 1, 1, "SWZ" } + { 1, 1, 0, "ARL" }, + { 1, 1, 0, "MOV" }, + { 1, 1, 0, "LIT" }, + { 1, 1, 0, "RCP" }, + { 1, 1, 0, "RSQ" }, + { 1, 1, 0, "EXP" }, + { 1, 1, 0, "LOG" }, + { 1, 2, 0, "MUL" }, + { 1, 2, 0, "ADD" }, + { 1, 2, 0, "DP3" }, + { 1, 2, 0, "DP4" }, + { 1, 2, 0, "DST" }, + { 1, 2, 0, "MIN" }, + { 1, 2, 0, "MAX" }, + { 1, 2, 0, "SLT" }, + { 1, 2, 0, "SGE" }, + { 1, 3, 0, "MAD" }, + { 1, 2, 0, "SUB" }, + { 1, 3, 0, "LERP" }, + { 1, 3, 0, "CND" }, + { 1, 3, 0, "CND0" }, + { 1, 3, 0, "DOT2ADD" }, + { 1, 2, 0, "INDEX" }, + { 1, 1, 0, "NEGATE" }, + { 1, 1, 0, "FRAC" }, + { 1, 3, 0, "CLAMP" }, + { 1, 1, 0, "FLOOR" }, + { 1, 1, 0, "ROUND" }, + { 1, 1, 0, "EXPBASE2" }, + { 1, 1, 0, "LOGBASE2" }, + { 1, 2, 0, "POWER" }, + { 1, 2, 0, "CROSSPRODUCT" }, + { 1, 2, 0, "MULTIPLYMATRIX" }, + { 1, 1, 0, "ABS" }, + { 1, 1, 0, "RCC" }, + { 1, 2, 0, "DPH" }, + { 1, 1, 0, "COS" }, + { 1, 1, 0, "DDX" }, + { 1, 1, 0, "DDY" }, + { 0, 1, 0, "KILP" }, + { 1, 1, 0, "PK2H" }, + { 1, 1, 0, "PK2US" }, + { 1, 1, 0, "PK4B" }, + { 1, 1, 0, "PK4UB" }, + { 1, 2, 0, "RFL" }, + { 1, 2, 0, "SEQ" }, + { 1, 2, 0, "SFL" }, + { 1, 2, 0, "SGT" }, + { 1, 1, 0, "SIN" }, + { 1, 2, 0, "SLE" }, + { 1, 2, 0, "SNE" }, + { 1, 2, 0, "STR" }, + { 1, 2, 1, "TEX" }, + { 1, 4, 1, "TXD" }, + { 1, 2, 1, "TXP" }, + { 1, 1, 0, "UP2H" }, + { 1, 1, 0, "UP2US" }, + { 1, 1, 0, "UP4B" }, + { 1, 1, 0, "UP4UB" }, + { 1, 3, 0, "X2D" }, + { 1, 1, 0, "ARA" }, + { 1, 1, 0, "ARR" }, + { 0, 1, 0, "BRA" }, + { 0, 0, 0, "CAL" }, + { 0, 0, 0, "RET" }, + { 1, 1, 0, "SSG" }, + { 1, 3, 0, "CMP" }, + { 1, 1, 0, "SCS" }, + { 1, 2, 1, "TXB" }, + { 1, 1, 0, "NRM" }, + { 1, 2, 0, "DIV" }, + { 1, 2, 0, "DP2" }, + { 1, 2, 1, "TXL" }, + { 0, 0, 0, "BRK" }, + { 0, 1, 0, "IF" }, + { 0, 0, 0, "LOOP" }, + { 0, 1, 0, "REP" }, + { 0, 0, 0, "ELSE" }, + { 0, 0, 0, "ENDIF" }, + { 0, 0, 0, "ENDLOOP" }, + { 0, 0, 0, "ENDREP" }, + { 0, 1, 0, "PUSHA" }, + { 1, 0, 0, "POPA" }, + { 1, 1, 0, "CEIL" }, + { 1, 1, 0, "I2F" }, + { 1, 1, 0, "NOT" }, + { 1, 1, 0, "TRUNC" }, + { 1, 2, 0, "SHL" }, + { 1, 2, 0, "SHR" }, + { 1, 2, 0, "AND" }, + { 1, 2, 0, "OR" }, + { 1, 2, 0, "MOD" }, + { 1, 2, 0, "XOR" }, + { 1, 3, 0, "SAD" }, + { 1, 2, 1, "TXF" }, + { 1, 2, 1, "TXQ" }, + { 0, 0, 0, "CONT" }, + { 0, 0, 0, "EMIT" }, + { 0, 0, 0, "ENDPRIM" }, + { 0, 0, 0, "BGNLOOP2" }, + { 0, 0, 0, "BGNSUB" }, + { 0, 0, 0, "ENDLOOP2" }, + { 0, 0, 0, "ENDSUB" }, + { 1, 1, 0, "NOISE1" }, + { 1, 1, 0, "NOISE2" }, + { 1, 1, 0, "NOISE3" }, + { 1, 1, 0, "NOISE4" }, + { 0, 0, 0, "NOP" }, + { 1, 2, 0, "M4X3" }, + { 1, 2, 0, "M3X4" }, + { 1, 2, 0, "M3X3" }, + { 1, 2, 0, "M3X2" }, + { 1, 1, 0, "NRM4" }, + { 0, 1, 0, "CALLNZ" }, + { 0, 1, 0, "IFC" }, + { 0, 1, 0, "BREAKC" }, + { 0, 0, 0, "KIL" }, + { 0, 0, 0, "END" }, + { 1, 1, 0, "SWZ" } +}; + +static const char *texture_names[TGSI_TEXTURE_COUNT] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" }; static boolean parse_instruction( struct translate_ctx *ctx ) @@ -560,7 +574,7 @@ static boolean parse_instruction( struct translate_ctx *ctx ) /* Parse instruction operands. */ - for (i = 0; i < info->num_dst + info->num_src; i++) { + for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) { if (i > 0) { eat_opt_white( &ctx->cur ); if (*ctx->cur != ',') { @@ -575,10 +589,26 @@ static boolean parse_instruction( struct translate_ctx *ctx ) if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) return FALSE; } - else { + else if (i < info->num_dst + info->num_src) { if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) return FALSE; } + else { + uint j; + + for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { + if (str_match_no_case( &ctx->cur, texture_names[j] )) { + if (!is_digit_alpha_underscore( ctx->cur )) { + inst.InstructionExtTexture.Texture = j; + break; + } + } + } + if (j == TGSI_TEXTURE_COUNT) { + report_error( ctx, "Expected texture target" ); + return FALSE; + } + } } advance = tgsi_build_full_instruction( diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 33268553ff..c8e2830516 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -529,6 +529,7 @@ struct tgsi_instruction_ext_label #define TGSI_TEXTURE_SHADOW1D 6 #define TGSI_TEXTURE_SHADOW2D 7 #define TGSI_TEXTURE_SHADOWRECT 8 +#define TGSI_TEXTURE_COUNT 9 struct tgsi_instruction_ext_texture { -- cgit v1.2.3 From a7d8eed61c1fd0bd3d99ebcd10c24bc70f8e3293 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 14:03:21 +0200 Subject: tgsi: Parse IMM statements. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 111 +++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 7485002862..d5a9480f4a 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -100,6 +100,51 @@ static boolean parse_uint( const char **pcur, uint *val ) return FALSE; } +/* Parse floating point. + */ +static boolean parse_float( const char **pcur, float *val ) +{ + const char *cur = *pcur; + boolean integral_part = FALSE; + boolean fractional_part = FALSE; + + *val = (float) atof( cur ); + + if (*cur == '-' || *cur == '+') + cur++; + if (is_digit( cur )) { + cur++; + integral_part = TRUE; + while (is_digit( cur )) + cur++; + } + if (*cur == '.') { + cur++; + if (is_digit( cur )) { + cur++; + fractional_part = TRUE; + while (is_digit( cur )) + cur++; + } + } + if (!integral_part && !fractional_part) + return FALSE; + if (toupper( *cur ) == 'E') { + cur++; + if (*cur == '-' || *cur == '+') + cur++; + if (is_digit( cur )) { + cur++; + while (is_digit( cur )) + cur++; + } + else + return FALSE; + } + *pcur = cur; + return TRUE; +} + struct translate_ctx { const char *text; @@ -744,6 +789,66 @@ static boolean parse_declaration( struct translate_ctx *ctx ) return TRUE; } +static boolean parse_immediate( struct translate_ctx *ctx ) +{ + struct tgsi_full_immediate imm; + uint i; + float values[4]; + uint advance; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { + report_error( ctx, "Expected `FLT32'" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '{') { + report_error( ctx, "Expected `{'" ); + return FALSE; + } + ctx->cur++; + for (i = 0; i < 4; i++) { + eat_opt_white( &ctx->cur ); + if (i > 0) { + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + if (!parse_float( &ctx->cur, &values[i] )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + report_error( ctx, "Expected `}'" ); + return FALSE; + } + ctx->cur++; + + imm = tgsi_default_full_immediate(); + imm.Immediate.Size += 4; + imm.Immediate.DataType = TGSI_IMM_FLOAT32; + imm.u.Pointer = values; + + advance = tgsi_build_full_immediate( + &imm, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + static boolean translate( struct translate_ctx *ctx ) { eat_opt_white( &ctx->cur ); @@ -766,8 +871,12 @@ static boolean translate( struct translate_ctx *ctx ) if (!parse_declaration( ctx )) return FALSE; } + else if (str_match_no_case( &ctx->cur, "IMM" )) { + if (!parse_immediate( ctx )) + return FALSE; + } else { - report_error( ctx, "Expected `DCL' or a label" ); + report_error( ctx, "Expected `DCL', `IMM' or a label" ); return FALSE; } } -- cgit v1.2.3 From 47a45aaa0fd1dbc0de45de2ed2995f81a0154baf Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 14:50:12 +0200 Subject: tgsi: Parse _SAT and _SAT opcode suffix. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 2 +- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 8 +++++++- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index cae74a9267..8220e09199 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -716,7 +716,7 @@ tgsi_dump_instruction( TXT( "_SAT" ); break; case TGSI_SAT_MINUS_PLUS_ONE: - TXT( "_SAT[-1,1]" ); + TXT( "_SATNV" ); break; default: assert( 0 ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index d5a9480f4a..f3fc675807 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -588,6 +588,7 @@ static const char *texture_names[TGSI_TEXTURE_COUNT] = static boolean parse_instruction( struct translate_ctx *ctx ) { uint i; + uint saturate = TGSI_SAT_NONE; const struct opcode_info *info; struct tgsi_full_instruction inst; uint advance; @@ -599,7 +600,11 @@ static boolean parse_instruction( struct translate_ctx *ctx ) const char *cur = ctx->cur; if (str_match_no_case( &cur, opcode_info[i].mnemonic )) { - /* TODO: _SAT suffix */ + if (str_match_no_case( &cur, "_SATNV" )) + saturate = TGSI_SAT_MINUS_PLUS_ONE; + else if (str_match_no_case( &cur, "_SAT" )) + saturate = TGSI_SAT_ZERO_ONE; + if (*cur == '\0' || eat_white( &cur )) { ctx->cur = cur; break; @@ -614,6 +619,7 @@ static boolean parse_instruction( struct translate_ctx *ctx ) inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = i; + inst.Instruction.Saturate = saturate; inst.Instruction.NumDstRegs = info->num_dst; inst.Instruction.NumSrcRegs = info->num_src; -- cgit v1.2.3 From 94013b66b91112f31802c3d935e8d918ba12be62 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 15:14:31 +0200 Subject: tgsi: Parse extended source register modifiers. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 127 +++++++++++++++++++++++++++- 1 file changed, 126 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index f3fc675807..c618a50fb9 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -393,14 +393,78 @@ parse_src_operand( struct tgsi_full_src_register *src ) { const char *cur; + float value; uint file; uint index; - /* TODO: Extended register modifiers */ + if (*ctx->cur == '-') { + cur = ctx->cur; + cur++; + eat_opt_white( &cur ); + if (*cur == '(') { + cur++; + src->SrcRegisterExtMod.Negate = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (*ctx->cur == '|') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegisterExtMod.Absolute = 1; + } + if (*ctx->cur == '-') { ctx->cur++; + eat_opt_white( &ctx->cur ); src->SrcRegister.Negate = 1; + } + + cur = ctx->cur; + if (parse_float( &cur, &value )) { + if (value == 2.0f) { + eat_opt_white( &cur ); + if (*cur != '*') { + report_error( ctx, "Expected `*'" ); + return FALSE; + } + cur++; + if (*cur != '(') { + report_error( ctx, "Expected `('" ); + return FALSE; + } + cur++; + src->SrcRegisterExtMod.Scale2X = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (*ctx->cur == '(') { + ctx->cur++; eat_opt_white( &ctx->cur ); + src->SrcRegisterExtMod.Bias = 1; + } + + cur = ctx->cur; + if (parse_float( &cur, &value )) { + if (value == 1.0f) { + eat_opt_white( &cur ); + if (*cur != '-') { + report_error( ctx, "Expected `-'" ); + return FALSE; + } + cur++; + if (*cur != '(') { + report_error( ctx, "Expected `('" ); + return FALSE; + } + cur++; + src->SrcRegisterExtMod.Complement = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } } if (!parse_register( ctx, &file, &index )) @@ -438,6 +502,67 @@ parse_src_operand( ctx->cur = cur; } + + if (src->SrcRegisterExtMod.Complement) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Bias) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '-') { + report_error( ctx, "Expected `-'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_float( &ctx->cur, &value )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + if (value != 0.5f) { + report_error( ctx, "Expected 0.5" ); + return FALSE; + } + } + + if (src->SrcRegisterExtMod.Scale2X) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Absolute) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '|') { + report_error( ctx, "Expected `|'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Negate) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + return TRUE; } -- cgit v1.2.3 From f5c51ebd2afdfc87de40dca115526a5e0f6ab115 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 13 Jul 2008 15:23:14 +0200 Subject: tgsi: Parse destination operand modulate modifier. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 36 ++++++++++------------------- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 30 ++++++++++++++++++++++++ src/gallium/include/pipe/p_shader_tokens.h | 1 + 3 files changed, 43 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 8220e09199..0cf4454f25 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -539,6 +539,17 @@ static const char *TGSI_MODULATES[] = "MODULATE_EIGHTH" }; +static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] = +{ + "", + "_2X", + "_4X", + "_8X", + "_D2", + "_D4", + "_D8" +}; + void tgsi_dump_declaration( const struct tgsi_full_declaration *decl ) @@ -736,30 +747,7 @@ tgsi_dump_instruction( SID( dst->DstRegister.Index ); CHR( ']' ); - switch (dst->DstRegisterExtModulate.Modulate) { - case TGSI_MODULATE_1X: - break; - case TGSI_MODULATE_2X: - TXT( "_2X" ); - break; - case TGSI_MODULATE_4X: - TXT( "_4X" ); - break; - case TGSI_MODULATE_8X: - TXT( "_8X" ); - break; - case TGSI_MODULATE_HALF: - TXT( "_D2" ); - break; - case TGSI_MODULATE_QUARTER: - TXT( "_D4" ); - break; - case TGSI_MODULATE_EIGHTH: - TXT( "_D8" ); - break; - default: - assert( 0 ); - } + ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES_SHORT ); if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) { CHR( '.' ); diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index c618a50fb9..1b283feb4c 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -368,6 +368,17 @@ parse_register_dcl( return TRUE; } +static const char *modulate_names[TGSI_MODULATE_COUNT] = +{ + "_1X", + "_2X", + "_4X", + "_8X", + "_D2", + "_D4", + "_D8" +}; + static boolean parse_dst_operand( struct translate_ctx *ctx, @@ -376,11 +387,30 @@ parse_dst_operand( uint file; uint index; uint writemask; + const char *cur; if (!parse_register( ctx, &file, &index )) return FALSE; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '_') { + uint i; + + for (i = 0; i < TGSI_MODULATE_COUNT; i++) { + if (str_match_no_case( &cur, modulate_names[i] )) { + if (!is_digit_alpha_underscore( cur )) { + dst->DstRegisterExtModulate.Modulate = i; + ctx->cur = cur; + break; + } + } + } + } + if (!parse_opt_writemask( ctx, &writemask )) return FALSE; + dst->DstRegister.File = file; dst->DstRegister.Index = index; dst->DstRegister.WriteMask = writemask; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index c8e2830516..ed931d24b9 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -743,6 +743,7 @@ struct tgsi_dst_register_ext_concode #define TGSI_MODULATE_HALF 4 #define TGSI_MODULATE_QUARTER 5 #define TGSI_MODULATE_EIGHTH 6 +#define TGSI_MODULATE_COUNT 7 struct tgsi_dst_register_ext_modulate { -- cgit v1.2.3 From 17af66fc1a141920969ddf404bd7ffb52a94fb31 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Jul 2008 22:37:47 +0900 Subject: pb: buffer over/underflows are errors. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index acb9d7ad14..affa6aa85c 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -142,12 +142,12 @@ pb_debug_buffer_destroy(struct pb_buffer *_buf) assert(map); if(map) { if(!check_random_pattern(map, buf->underflow_size)) { - debug_printf("buffer underflow\n"); + debug_error("buffer underflow detected\n"); debug_assert(0); } if(!check_random_pattern(map + buf->underflow_size + buf->base.base.size, buf->overflow_size)) { - debug_printf("buffer overflow\n"); + debug_error("buffer overflow detected\n"); debug_assert(0); } pb_unmap(buf->buffer); -- cgit v1.2.3 From 36dd89c8a7f2a911e8f7f18d1edcaf982a75a438 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 13 Jul 2008 22:39:58 +0900 Subject: util: Eliminate pipe from the arguments to pipe_get/put_tile_xxx functions. You don't need a pipe_context * for this, and all other necessary info is already inside pipe_surface. --- src/gallium/auxiliary/util/p_tile.c | 42 +++++++++++----------------- src/gallium/auxiliary/util/p_tile.h | 19 ++++--------- src/gallium/drivers/softpipe/sp_tile_cache.c | 16 +++++------ src/mesa/drivers/x11/xm_surface.c | 4 +-- src/mesa/state_tracker/st_cb_accum.c | 12 ++++---- src/mesa/state_tracker/st_cb_drawpixels.c | 8 +++--- src/mesa/state_tracker/st_cb_readpixels.c | 10 +++---- src/mesa/state_tracker/st_cb_texture.c | 8 +++--- 8 files changed, 51 insertions(+), 68 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 93abef9879..1a1a2d96cc 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -45,12 +45,10 @@ * This should be usable by any hw driver that has mappable surfaces. */ void -pipe_get_tile_raw(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_raw(struct pipe_surface *ps, uint x, uint y, uint w, uint h, void *dst, int dst_stride) { - struct pipe_screen *screen = pipe->screen; const void *src; if (pipe_clip_tile(x, y, &w, &h, ps)) @@ -59,14 +57,14 @@ pipe_get_tile_raw(struct pipe_context *pipe, if (dst_stride == 0) dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - src = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); assert(src); if(!src) return; pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); - screen->surface_unmap(screen, ps); + pipe_surface_unmap(ps); } @@ -75,12 +73,10 @@ pipe_get_tile_raw(struct pipe_context *pipe, * This should be usable by any hw driver that has mappable surfaces. */ void -pipe_put_tile_raw(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_raw(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const void *src, int src_stride) { - struct pipe_screen *screen = pipe->screen; void *dst; if (pipe_clip_tile(x, y, &w, &h, ps)) @@ -89,14 +85,14 @@ pipe_put_tile_raw(struct pipe_context *pipe, if (src_stride == 0) src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - dst = screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); assert(dst); if(!dst) return; pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); - screen->surface_unmap(screen, ps); + pipe_surface_unmap(ps); } @@ -686,8 +682,7 @@ ycbcr_get_tile_rgba(ushort *src, void -pipe_get_tile_rgba(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_rgba(struct pipe_surface *ps, uint x, uint y, uint w, uint h, float *p) { @@ -702,7 +697,7 @@ pipe_get_tile_rgba(struct pipe_context *pipe, if (!packed) return; - pipe_get_tile_raw(pipe, ps, x, y, w, h, packed, 0); + pipe_get_tile_raw(ps, x, y, w, h, packed, 0); switch (ps->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -768,8 +763,7 @@ pipe_get_tile_rgba(struct pipe_context *pipe, void -pipe_put_tile_rgba(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_rgba(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const float *p) { @@ -838,7 +832,7 @@ pipe_put_tile_rgba(struct pipe_context *pipe, assert(0); } - pipe_put_tile_raw(pipe, ps, x, y, w, h, packed, 0); + pipe_put_tile_raw(ps, x, y, w, h, packed, 0); FREE(packed); } @@ -848,12 +842,10 @@ pipe_put_tile_rgba(struct pipe_context *pipe, * Get a block of Z values, converted to 32-bit range. */ void -pipe_get_tile_z(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, uint *z) { - struct pipe_screen *screen = pipe->screen; const uint dstStride = w; ubyte *map; uint *pDest = z; @@ -862,7 +854,7 @@ pipe_get_tile_z(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - map = (ubyte *)screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_READ); + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); if (!map) { assert(0); return; @@ -913,17 +905,15 @@ pipe_get_tile_z(struct pipe_context *pipe, assert(0); } - screen->surface_unmap(screen, ps); + pipe_surface_unmap(ps); } void -pipe_put_tile_z(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const uint *zSrc) { - struct pipe_screen *screen = pipe->screen; const uint srcStride = w; const uint *pSrc = zSrc; ubyte *map; @@ -932,7 +922,7 @@ pipe_put_tile_z(struct pipe_context *pipe, if (pipe_clip_tile(x, y, &w, &h, ps)) return; - map = (ubyte *)screen->surface_map(screen, ps, PIPE_BUFFER_USAGE_CPU_WRITE); + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); if (!map) { assert(0); return; @@ -980,7 +970,7 @@ pipe_put_tile_z(struct pipe_context *pipe, assert(0); } - screen->surface_unmap(screen, ps); + pipe_surface_unmap(ps); } diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h index fdc80a13b3..adfec8bcee 100644 --- a/src/gallium/auxiliary/util/p_tile.h +++ b/src/gallium/auxiliary/util/p_tile.h @@ -30,7 +30,6 @@ #include "pipe/p_compiler.h" -struct pipe_context; struct pipe_surface; @@ -57,40 +56,34 @@ extern "C" { #endif void -pipe_get_tile_raw(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_raw(struct pipe_surface *ps, uint x, uint y, uint w, uint h, void *p, int dst_stride); void -pipe_put_tile_raw(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_raw(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const void *p, int src_stride); void -pipe_get_tile_rgba(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_rgba(struct pipe_surface *ps, uint x, uint y, uint w, uint h, float *p); void -pipe_put_tile_rgba(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_rgba(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const float *p); void -pipe_get_tile_z(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_get_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, uint *z); void -pipe_put_tile_z(struct pipe_context *pipe, - struct pipe_surface *ps, +pipe_put_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const uint *z); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 2d5d2b50f5..bfdaaa6b8f 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -339,7 +339,7 @@ sp_tile_cache_flush_clear(struct pipe_context *pipe, for (y = 0; y < h; y += TILE_SIZE) { for (x = 0; x < w; x += TILE_SIZE) { if (is_clear_flag_set(tc->clear_flags, x, y)) { - pipe_put_tile_raw(pipe, ps, + pipe_put_tile_raw(ps, x, y, TILE_SIZE, TILE_SIZE, tc->tile.data.color32, 0/*STRIDE*/); @@ -374,12 +374,12 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, struct softpipe_cached_tile *tile = tc->entries + pos; if (tile->x >= 0) { if (tc->depth_stencil) { - pipe_put_tile_raw(pipe, ps, + pipe_put_tile_raw(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(pipe, ps, + pipe_put_tile_rgba(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -431,12 +431,12 @@ sp_get_cached_tile(struct softpipe_context *softpipe, if (tile->x != -1) { /* put dirty tile back in framebuffer */ if (tc->depth_stencil) { - pipe_put_tile_raw(pipe, ps, + pipe_put_tile_raw(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_put_tile_rgba(pipe, ps, + pipe_put_tile_rgba(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -458,12 +458,12 @@ sp_get_cached_tile(struct softpipe_context *softpipe, else { /* get new tile data from surface */ if (tc->depth_stencil) { - pipe_get_tile_raw(pipe, ps, + pipe_get_tile_raw(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, tile->data.depth32, 0/*STRIDE*/); } else { - pipe_get_tile_rgba(pipe, ps, + pipe_get_tile_rgba(ps, tile->x, tile->y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); } @@ -544,7 +544,7 @@ sp_get_cached_tile_tex(struct pipe_context *pipe, } /* get tile from the surface (view into texture) */ - pipe_get_tile_rgba(pipe, tc->tex_surf, + pipe_get_tile_rgba(tc->tex_surf, tile_x, tile_y, TILE_SIZE, TILE_SIZE, (float *) tile->data.color); tile->x = tile_x; diff --git a/src/mesa/drivers/x11/xm_surface.c b/src/mesa/drivers/x11/xm_surface.c index 81616b92d9..a3f2fe7d68 100644 --- a/src/mesa/drivers/x11/xm_surface.c +++ b/src/mesa/drivers/x11/xm_surface.c @@ -106,7 +106,7 @@ xmesa_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, } else { /* other softpipe surface */ - softpipe_get_tile_rgba(pipe, ps, x, y, w, h, p); + softpipe_get_tile_rgba(ps, x, y, w, h, p); } } @@ -142,7 +142,7 @@ xmesa_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *ps, } else { /* other softpipe surface */ - softpipe_put_tile_rgba(pipe, ps, x, y, w, h, p); + softpipe_put_tile_rgba(ps, x, y, w, h, p); } } diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index 2283905662..c0e8c6bf33 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -73,7 +73,7 @@ acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, acc_ps->block.width = 1; acc_ps->block.height = 1; - pipe_get_tile_rgba(pipe, acc_ps, x, y, w, h, p); + pipe_get_tile_rgba(acc_ps, x, y, w, h, p); acc_ps->format = f; acc_ps->block = b; @@ -97,7 +97,7 @@ acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, acc_ps->block.width = 1; acc_ps->block.height = 1; - pipe_put_tile_rgba(pipe, acc_ps, x, y, w, h, p); + pipe_put_tile_rgba(acc_ps, x, y, w, h, p); acc_ps->format = f; acc_ps->block = b; @@ -208,7 +208,7 @@ accum_accum(struct pipe_context *pipe, GLfloat value, colorBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(pipe, color_surf, xpos, ypos, width, height, colorBuf); + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, colorBuf); acc_get_tile_rgba(pipe, acc_surf, xpos, ypos, width, height, accBuf); for (i = 0; i < 4 * width * height; i++) { @@ -243,7 +243,7 @@ accum_load(struct pipe_context *pipe, GLfloat value, buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(pipe, color_surf, xpos, ypos, width, height, buf); + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, buf); for (i = 0; i < 4 * width * height; i++) { buf[i] = buf[i] * value; @@ -283,7 +283,7 @@ accum_return(GLcontext *ctx, GLfloat value, if (!colormask[0] || !colormask[1] || !colormask[2] || !colormask[3]) { cbuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(pipe, color_surf, xpos, ypos, width, height, cbuf); + pipe_get_tile_rgba(color_surf, xpos, ypos, width, height, cbuf); } for (i = 0; i < width * height; i++) { @@ -298,7 +298,7 @@ accum_return(GLcontext *ctx, GLfloat value, } } - pipe_put_tile_rgba(pipe, color_surf, xpos, ypos, width, height, abuf); + pipe_put_tile_rgba(color_surf, xpos, ypos, width, height, abuf); free(abuf); if (cbuf) diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index a7781f3ab2..6b0137dcf8 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -1049,16 +1049,16 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, /* alternate path using get/put_tile() */ GLfloat *buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); - pipe_get_tile_rgba(pipe, psRead, srcx, srcy, width, height, buf); - pipe_put_tile_rgba(pipe, psTex, 0, 0, width, height, buf); + pipe_get_tile_rgba(psRead, srcx, srcy, width, height, buf); + pipe_put_tile_rgba(psTex, 0, 0, width, height, buf); free(buf); } else { /* GL_DEPTH */ GLuint *buf = (GLuint *) malloc(width * height * sizeof(GLuint)); - pipe_get_tile_z(pipe, psRead, srcx, srcy, width, height, buf); - pipe_put_tile_z(pipe, psTex, 0, 0, width, height, buf); + pipe_get_tile_z(psRead, srcx, srcy, width, height, buf); + pipe_put_tile_z(psTex, 0, 0, width, height, buf); free(buf); } pipe_surface_reference(&psRead, NULL); diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 09d9c29e44..eb71779cc9 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -262,7 +262,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLuint ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / ((1 << 24) - 1); - pipe_get_tile_raw(pipe, surf, x, y, width, 1, ztemp, 0); + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * (ztemp[j] & 0xffffff)); @@ -276,7 +276,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, /* untested, but simple: */ assert(format == GL_DEPTH_STENCIL_EXT); for (i = 0; i < height; i++) { - pipe_get_tile_raw(pipe, surf, x, y, width, 1, dst, 0); + pipe_get_tile_raw(surf, x, y, width, 1, dst, 0); y += yStep; dst += dstStride; } @@ -287,7 +287,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLushort ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / 0xffff; - pipe_get_tile_raw(pipe, surf, x, y, width, 1, ztemp, 0); + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * ztemp[j]); @@ -302,7 +302,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, GLuint ztemp[MAX_WIDTH]; GLfloat zfloat[MAX_WIDTH]; const double scale = 1.0 / 0xffffffff; - pipe_get_tile_raw(pipe, surf, x, y, width, 1, ztemp, 0); + pipe_get_tile_raw(surf, x, y, width, 1, ztemp, 0); y += yStep; for (j = 0; j < width; j++) { zfloat[j] = (float) (scale * ztemp[j]); @@ -316,7 +316,7 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, /* RGBA format */ /* Do a row at a time to flip image data vertically */ for (i = 0; i < height; i++) { - pipe_get_tile_rgba(pipe, surf, x, y, width, 1, df); + pipe_get_tile_rgba(surf, x, y, width, 1, df); y += yStep; df += dfStride; if (!dfStride) { diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index b9aa513d72..73f8b8f788 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1100,25 +1100,25 @@ fallback_copy_texsubimage(GLcontext *ctx, for (row = 0; row < height; row++, srcY++, destY += yStep) { uint data[MAX_WIDTH]; - pipe_get_tile_z(pipe, src_surf, srcX, srcY, width, 1, data); + pipe_get_tile_z(src_surf, srcX, srcY, width, 1, data); if (scaleOrBias) { _mesa_scale_and_bias_depth_uint(ctx, width, data); } - pipe_put_tile_z(pipe, dest_surf, destX, destY, width, 1, data); + pipe_put_tile_z(dest_surf, destX, destY, width, 1, data); } } else { /* RGBA format */ for (row = 0; row < height; row++, srcY++, destY += yStep) { float data[4 * MAX_WIDTH]; - pipe_get_tile_rgba(pipe, src_surf, srcX, srcY, width, 1, data); + pipe_get_tile_rgba(src_surf, srcX, srcY, width, 1, data); /* XXX we're ignoring convolution for now */ if (ctx->_ImageTransferState) { _mesa_apply_rgba_transfer_ops(ctx, ctx->_ImageTransferState & ~IMAGE_CONVOLUTION_BIT, width, (GLfloat (*)[4]) data); } - pipe_put_tile_rgba(pipe, dest_surf, destX, destY, width, 1, data); + pipe_put_tile_rgba(dest_surf, destX, destY, width, 1, data); } } -- cgit v1.2.3 From 6eb7f763fbbbb7a32640760cd5d122020866fea1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 14 Jul 2008 18:08:52 -0600 Subject: tgsi: fix bug in execution of loops inside of conditionals. Fixes infinite loop bug. --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c index 46949661af..001a4c4b15 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c @@ -2400,7 +2400,8 @@ exec_instruction( /* Restore ContMask, but don't pop */ assert(mach->ContStackTop > 0); mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - if (mach->LoopMask) { + UPDATE_EXEC_MASK(mach); + if (mach->ExecMask) { /* repeat loop: jump to instruction just past BGNLOOP */ *pc = inst->InstructionExtLabel.Label + 1; } -- cgit v1.2.3 From c60e009a9138a573d2219ee2ad85e3203b09a7ae Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 15 Jul 2008 10:56:30 +0200 Subject: tgsi: Numerical label before an instruction is optional. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 1b283feb4c..6cab475b88 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -740,7 +740,10 @@ static const char *texture_names[TGSI_TEXTURE_COUNT] = "SHADOWRECT" }; -static boolean parse_instruction( struct translate_ctx *ctx ) +static boolean +parse_instruction( + struct translate_ctx *ctx, + boolean has_label ) { uint i; uint saturate = TGSI_SAT_NONE; @@ -767,7 +770,10 @@ static boolean parse_instruction( struct translate_ctx *ctx ) } } if (i == TGSI_OPCODE_LAST) { - report_error( ctx, "Unknown opcode" ); + if (has_label) + report_error( ctx, "Unknown opcode" ); + else + report_error( ctx, "Expected `DCL', `IMM' or a label" ); return FALSE; } info = &opcode_info[i]; @@ -1025,7 +1031,7 @@ static boolean translate( struct translate_ctx *ctx ) } if (parse_label( ctx, &label_val )) { - if (!parse_instruction( ctx )) + if (!parse_instruction( ctx, TRUE )) return FALSE; } else if (str_match_no_case( &ctx->cur, "DCL" )) { @@ -1036,8 +1042,7 @@ static boolean translate( struct translate_ctx *ctx ) if (!parse_immediate( ctx )) return FALSE; } - else { - report_error( ctx, "Expected `DCL', `IMM' or a label" ); + else if (!parse_instruction( ctx, FALSE )) { return FALSE; } } -- cgit v1.2.3 From 78d18bb690d47eba6d13f8f154e64f1c33ef29fd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 16 Jul 2008 19:36:36 +0900 Subject: gallium: ycbcr_get_tile_rgba allow reading an uneven number of pixels from yuv surfaces. --- src/gallium/auxiliary/util/p_tile.c | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 1a1a2d96cc..580a95568e 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -632,13 +632,10 @@ ycbcr_get_tile_rgba(ushort *src, const float scale = 1.0f / 255.0f; unsigned i, j; - /* we're assuming we're being asked for an even number of texels */ - assert((w & 1) == 0); - for (i = 0; i < h; i++) { float *pRow = p; /* do two texels at a time */ - for (j = 0; j < w; j += 2, src += 2) { + for (j = 0; j < (w & ~1); j += 2, src += 2) { const ushort t0 = src[0]; const ushort t1 = src[1]; const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ @@ -676,6 +673,33 @@ ycbcr_get_tile_rgba(ushort *src, pRow += 4; } + /* do the last texel */ + if (w & 1) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + } p += dst_stride; } } -- cgit v1.2.3 From 6a65a0d9efb82b11cafe5b411abddd57a4fb838a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 17 Jul 2008 11:26:53 +0900 Subject: gallium: Be less verbose with debug options messages. --- src/gallium/auxiliary/util/p_debug.c | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 7b28900a25..b0240ad737 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -174,20 +174,19 @@ copy(char *dst, const char *start, const char *end, size_t n) #endif -const char * -debug_get_option(const char *name, const char *dfault) +static INLINE const char * +_debug_get_option(const char *name) { - const char *result; #if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) /* EngMapFile creates the file if it does not exists, so it must either be * disabled on release versions (or put in a less conspicuous place). */ #ifdef DEBUG + const char *result = NULL; ULONG_PTR iFile = 0; const void *pMap = NULL; const char *sol, *eol, *sep; static char output[1024]; - result = dfault; pMap = EngMapFile(L"\\??\\c:\\gallium.cfg", 0, &iFile); if(pMap) { sol = (const char *)pMap; @@ -208,18 +207,27 @@ debug_get_option(const char *name, const char *dfault) } EngUnmapFile(iFile); } + return result; #else - result = dfault; + return NULL; #endif #elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) /* TODO: implement */ - result = dfault; + return NULL; #else - result = getenv(name); - if(!result) - result = dfault; + return getenv(name); #endif +} +const char * +debug_get_option(const char *name, const char *dfault) +{ + const char *result; + + result = _debug_get_option(name); + if(!result) + result = dfault; + debug_printf("%s: %s = %s\n", __FUNCTION__, name, result ? result : "(null)"); return result; @@ -228,7 +236,7 @@ debug_get_option(const char *name, const char *dfault) boolean debug_get_bool_option(const char *name, boolean dfault) { - const char *str = debug_get_option(name, NULL); + const char *str = _debug_get_option(name); boolean result; if(str == NULL) @@ -258,7 +266,7 @@ debug_get_num_option(const char *name, long dfault) long result; const char *str; - str = debug_get_option(name, NULL); + str = _debug_get_option(name); if(!str) result = dfault; else { @@ -294,7 +302,7 @@ debug_get_flags_option(const char *name, unsigned long result; const char *str; - str = debug_get_option(name, NULL); + str = _debug_get_option(name); if(!str) result = dfault; else { -- cgit v1.2.3 From 174c6912d68a954ff5eddf8b953b813e2eac3deb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 17 Jul 2008 22:54:14 +0900 Subject: gallium: Expose the ability to get a tile outside a surface. --- src/gallium/auxiliary/util/p_tile.c | 83 +++++++++++++++++++++---------------- src/gallium/auxiliary/util/p_tile.h | 7 ++++ 2 files changed, 54 insertions(+), 36 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 580a95568e..1daae6b640 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -706,81 +706,92 @@ ycbcr_get_tile_rgba(ushort *src, void -pipe_get_tile_rgba(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - float *p) +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride) { - unsigned dst_stride = w * 4; - void *packed; - - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - - packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); - - if (!packed) - return; - - pipe_get_tile_raw(ps, x, y, w, h, packed, 0); - - switch (ps->format) { + switch (format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - a8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_X8R8G8B8_UNORM: - x8r8g8b8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_B8G8R8A8_UNORM: - b8g8r8a8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_A1R5G5B5_UNORM: - a1r5g5b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_A4R4G4B4_UNORM: - a4r4g4b4_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_R5G6B5_UNORM: - r5g6b5_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_L8_UNORM: - l8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); + l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_A8_UNORM: - a8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); + a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_I8_UNORM: - i8_get_tile_rgba((ubyte *) packed, w, h, p, dst_stride); + i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_A8L8_UNORM: - a8_l8_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + a8_l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_get_tile_rgba((short *) packed, w, h, p, dst_stride); + r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_Z16_UNORM: - z16_get_tile_rgba((ushort *) packed, w, h, p, dst_stride); + z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_Z32_UNORM: - z32_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_S8Z24_UNORM: case PIPE_FORMAT_X8Z24_UNORM: - s8z24_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_Z24S8_UNORM: - z24s8_get_tile_rgba((unsigned *) packed, w, h, p, dst_stride); + z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_YCBCR: - assert((x & 1) == 0); - ycbcr_get_tile_rgba((ushort *) packed, w, h, p, dst_stride, FALSE); + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); break; case PIPE_FORMAT_YCBCR_REV: - assert((x & 1) == 0); - ycbcr_get_tile_rgba((ushort *) packed, w, h, p, dst_stride, TRUE); + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); break; default: assert(0); } +} + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p) +{ + unsigned dst_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV) + assert((x & 1) == 0); + + pipe_get_tile_raw(ps, x, y, w, h, packed, 0); + + pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride); FREE(packed); } diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h index adfec8bcee..a8ac805308 100644 --- a/src/gallium/auxiliary/util/p_tile.h +++ b/src/gallium/auxiliary/util/p_tile.h @@ -87,6 +87,13 @@ pipe_put_tile_z(struct pipe_surface *ps, uint x, uint y, uint w, uint h, const uint *z); +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride); + + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 2727702b1731a478de8806481416080d02af5862 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 17 Jul 2008 20:19:40 +0200 Subject: tgsi: New file. --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 258 ++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_sanity.h | 49 +++++ 2 files changed, 307 insertions(+) create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c new file mode 100644 index 0000000000..377309c3fd --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c @@ -0,0 +1,258 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_sanity.h" +#include "tgsi_transform.h" + +#define MAX_REGISTERS 256 + +typedef uint reg_flag; + +#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) + +struct sanity_check_ctx +{ + struct tgsi_transform_context xform; + + reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8]; + reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8]; + uint num_imms; + uint num_instructions; + uint index_of_END; + + uint errors; + uint warnings; +}; + +static void +report_error( + struct sanity_check_ctx *ctx, + const char *msg ) +{ + debug_printf( "\nError: %s", msg ); + ctx->errors++; +} + +static void +report_warning( + struct sanity_check_ctx *ctx, + const char *msg ) +{ + debug_printf( "\nWarning: %s", msg ); + ctx->warnings++; +} + +static boolean +check_file_name( + struct sanity_check_ctx *ctx, + uint file ) +{ + if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { + report_error( ctx, "Invalid file name" ); + return FALSE; + } + return TRUE; +} + +static boolean +is_register_declared( + struct sanity_check_ctx *ctx, + uint file, + uint index ) +{ + assert( index < MAX_REGISTERS ); + + return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +} + +static boolean +is_register_used( + struct sanity_check_ctx *ctx, + uint file, + uint index ) +{ + assert( index < MAX_REGISTERS ); + + return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +} + +static void xform_instruction( + struct tgsi_transform_context *xform, + struct tgsi_full_instruction *inst ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform; + uint i; + + /* There must be no other instructions after END. + */ + if (ctx->index_of_END != ~0) { + report_error( ctx, "Unexpected instruction after END" ); + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + ctx->index_of_END = ctx->num_instructions; + } + + /* Check destination and source registers' validity. + * Mark the registers as used. + */ + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + uint file; + uint index; + + file = inst->FullDstRegisters[i].DstRegister.File; + if (!check_file_name( ctx, file )) + return; + index = inst->FullDstRegisters[i].DstRegister.Index; + if (!is_register_declared( ctx, file, index )) + report_error( ctx, "Undeclared destination register" ); + ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + } + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + uint file; + uint index; + + file = inst->FullSrcRegisters[i].SrcRegister.File; + if (!check_file_name( ctx, file )) + return; + index = inst->FullSrcRegisters[i].SrcRegister.Index; + if (!is_register_declared( ctx, file, index )) + report_error( ctx, "Undeclared source register" ); + ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + } + + ctx->num_instructions++; +} + +static void xform_declaration( + struct tgsi_transform_context *xform, + struct tgsi_full_declaration *decl ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform; + uint file; + uint i; + + /* No declarations allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but declaration found" ); + + /* Check registers' validity. + * Mark the registers as declared. + */ + file = decl->Declaration.File; + if (!check_file_name( ctx, file )) + return; + for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { + if (is_register_declared( ctx, file, i )) + report_error( ctx, "The same register declared twice" ); + ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); + } +} + +static void xform_immediate( + struct tgsi_transform_context *xform, + struct tgsi_full_immediate *imm ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform; + + assert( ctx->num_imms < MAX_REGISTERS ); + + /* No immediates allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but immediate found" ); + + /* Mark the register as declared. + */ + ctx->num_imms++; + ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + + /* Check data type validity. + */ + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + report_error( ctx, "Invalid immediate data type" ); + return; + } +} + +static void epilog( + struct tgsi_transform_context *xform ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform; + uint file; + + /* There must be an END instruction at the end. + */ + if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) { + report_error( ctx, "Expected END at end of instruction sequence" ); + } + + /* Check if all declared registers were used. + */ + for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { + uint i; + + for (i = 0; i < MAX_REGISTERS; i++) { + if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i )) { + report_warning( ctx, "Register never used" ); + } + } + } + + /* Print totals, if any. + */ + if (ctx->errors || ctx->warnings) + debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings ); +} + +boolean +tgsi_sanity_check( + struct tgsi_token *tokens ) +{ + struct sanity_check_ctx ctx; + struct tgsi_token dummy_tokens[16]; + + ctx.xform.transform_instruction = xform_instruction; + ctx.xform.transform_declaration = xform_declaration; + ctx.xform.transform_immediate = xform_immediate; + ctx.xform.epilog = epilog; + + memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); + memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); + ctx.num_imms = 0; + ctx.num_instructions = 0; + ctx.index_of_END = ~0; + + ctx.errors = 0; + ctx.warnings = 0; + + if (tgsi_transform_shader( tokens, dummy_tokens, 16, &ctx.xform ) == -1) + return FALSE; + + return ctx.errors > 0; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h new file mode 100644 index 0000000000..ca45e94c7a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SANITY_H +#define TGSI_SANITY_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +/* Check the given token stream for errors and common mistakes. + * Diagnostic messages are printed out to the debug output. + * Returns TRUE if there are no errors, even though there could be some warnings. + */ +boolean +tgsi_sanity_check( + struct tgsi_token *tokens ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_SANITY_H */ -- cgit v1.2.3 From 3c5ec98e45880d6a896f3c094f8004000f50a149 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 17 Jul 2008 20:22:47 +0200 Subject: scons: List util/tgsi_sanity.c. --- src/gallium/auxiliary/tgsi/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index b62c8efe02..67230f6ce0 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -8,6 +8,7 @@ tgsi = env.ConvenienceLibrary( 'util/tgsi_build.c', 'util/tgsi_dump.c', 'util/tgsi_parse.c', + 'util/tgsi_sanity.c', 'util/tgsi_scan.c', 'util/tgsi_text.c', 'util/tgsi_transform.c', -- cgit v1.2.3 From 10d1dc68a413eaf642bf1bda2e5452835f7b7050 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 17 Jul 2008 20:23:26 +0200 Subject: tgsi: Perform a sanity check after reading a shader from text. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 6cab475b88..803f7a23b8 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -29,6 +29,7 @@ #include "tgsi_text.h" #include "tgsi_build.h" #include "tgsi_parse.h" +#include "tgsi_sanity.h" #include "tgsi_util.h" static boolean is_alpha_underscore( const char *cur ) @@ -1064,5 +1065,8 @@ tgsi_text_translate( ctx.tokens_cur = tokens; ctx.tokens_end = tokens + num_tokens; - return translate( &ctx ); + if (!translate( &ctx )) + return FALSE; + + return tgsi_sanity_check( tokens ); } -- cgit v1.2.3 From f2053cfa1ac4e4e2e0083670aac5df766adad5f9 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 17 Jul 2008 20:40:13 +0200 Subject: tgsi: Fix parsing an instruction with no operands. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 803f7a23b8..9ed0f52fc7 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -758,13 +758,20 @@ parse_instruction( for (i = 0; i < TGSI_OPCODE_LAST; i++) { const char *cur = ctx->cur; - if (str_match_no_case( &cur, opcode_info[i].mnemonic )) { + info = &opcode_info[i]; + if (str_match_no_case( &cur, info->mnemonic )) { if (str_match_no_case( &cur, "_SATNV" )) saturate = TGSI_SAT_MINUS_PLUS_ONE; else if (str_match_no_case( &cur, "_SAT" )) saturate = TGSI_SAT_ZERO_ONE; - if (*cur == '\0' || eat_white( &cur )) { + if (info->num_dst + info->num_src + info->is_tex == 0) { + if (!is_digit_alpha_underscore( cur )) { + ctx->cur = cur; + break; + } + } + else if (*cur == '\0' || eat_white( &cur )) { ctx->cur = cur; break; } @@ -777,7 +784,6 @@ parse_instruction( report_error( ctx, "Expected `DCL', `IMM' or a label" ); return FALSE; } - info = &opcode_info[i]; inst = tgsi_default_full_instruction(); inst.Instruction.Opcode = i; @@ -1031,6 +1037,9 @@ static boolean translate( struct translate_ctx *ctx ) return FALSE; } + if (*ctx->cur == '\0') + break; + if (parse_label( ctx, &label_val )) { if (!parse_instruction( ctx, TRUE )) return FALSE; -- cgit v1.2.3 From 638ecbda3eed8319ce82be9c119ca1f96f21d976 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 18 Jul 2008 00:39:23 +0200 Subject: tgsi: Add tgsi_iterate_shader utility. Walks the token stream and invokes callbacks. --- src/gallium/auxiliary/tgsi/util/tgsi_iterate.c | 82 ++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_iterate.h | 73 +++++++++++++++++++++++ 2 files changed, 155 insertions(+) create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c new file mode 100644 index 0000000000..43d7a6b1d5 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c @@ -0,0 +1,82 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_iterate.h" + +boolean +tgsi_iterate_shader( + const struct tgsi_token *tokens, + struct tgsi_iterate_context *ctx ) +{ + struct tgsi_parse_context parse; + + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) + return FALSE; + + if (ctx->prolog) + if (!ctx->prolog( ctx )) + goto fail; + + while (!tgsi_parse_end_of_tokens( &parse )) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (ctx->iterate_instruction) + if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (ctx->iterate_declaration) + if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + if (ctx->iterate_immediate) + if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate )) + goto fail; + break; + + default: + assert( 0 ); + } + } + + if (ctx->epilog) + if (!ctx->epilog( ctx )) + goto fail; + + tgsi_parse_free( &parse ); + return TRUE; + +fail: + tgsi_parse_free( &parse ); + return FALSE; +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h new file mode 100644 index 0000000000..18729c2d1a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h @@ -0,0 +1,73 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_ITERATE_H +#define TGSI_ITERATE_H + +#include "pipe/p_shader_tokens.h" +#include "tgsi/util/tgsi_parse.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_iterate_context +{ + boolean + (* prolog)( + struct tgsi_iterate_context *ctx ); + + boolean + (* iterate_instruction)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_instruction *inst ); + + boolean + (* iterate_declaration)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_declaration *decl ); + + boolean + (* iterate_immediate)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_immediate *imm ); + + boolean + (* epilog)( + struct tgsi_iterate_context *ctx ); +}; + +boolean +tgsi_iterate_shader( + const struct tgsi_token *tokens, + struct tgsi_iterate_context *ctx ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_ITERATE_H */ -- cgit v1.2.3 From e1fd3ea415db1bc80d0bc6d94645eaac60bb4121 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 18 Jul 2008 00:40:28 +0200 Subject: scons: List util/tgsi_iterate.c. --- src/gallium/auxiliary/tgsi/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 67230f6ce0..54f5c75db4 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -7,6 +7,7 @@ tgsi = env.ConvenienceLibrary( 'exec/tgsi_sse2.c', 'util/tgsi_build.c', 'util/tgsi_dump.c', + 'util/tgsi_iterate.c', 'util/tgsi_parse.c', 'util/tgsi_sanity.c', 'util/tgsi_scan.c', -- cgit v1.2.3 From 1e5419fa3061386413a98b75d0908cb3e3c6894e Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 18 Jul 2008 00:41:45 +0200 Subject: tgsi: Express tgsi_sanity in terms of tgsi_iterate. --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 60 ++++++++++++++++----------- 1 file changed, 36 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c index 377309c3fd..933127e661 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c @@ -27,7 +27,7 @@ #include "pipe/p_debug.h" #include "tgsi_sanity.h" -#include "tgsi_transform.h" +#include "tgsi_iterate.h" #define MAX_REGISTERS 256 @@ -37,7 +37,7 @@ typedef uint reg_flag; struct sanity_check_ctx { - struct tgsi_transform_context xform; + struct tgsi_iterate_context iter; reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8]; reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8]; @@ -101,11 +101,12 @@ is_register_used( return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; } -static void xform_instruction( - struct tgsi_transform_context *xform, +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, struct tgsi_full_instruction *inst ) { - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform; + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; uint i; /* There must be no other instructions after END. @@ -126,7 +127,7 @@ static void xform_instruction( file = inst->FullDstRegisters[i].DstRegister.File; if (!check_file_name( ctx, file )) - return; + return TRUE; index = inst->FullDstRegisters[i].DstRegister.Index; if (!is_register_declared( ctx, file, index )) report_error( ctx, "Undeclared destination register" ); @@ -138,7 +139,7 @@ static void xform_instruction( file = inst->FullSrcRegisters[i].SrcRegister.File; if (!check_file_name( ctx, file )) - return; + return TRUE; index = inst->FullSrcRegisters[i].SrcRegister.Index; if (!is_register_declared( ctx, file, index )) report_error( ctx, "Undeclared source register" ); @@ -146,13 +147,16 @@ static void xform_instruction( } ctx->num_instructions++; + + return TRUE; } -static void xform_declaration( - struct tgsi_transform_context *xform, +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, struct tgsi_full_declaration *decl ) { - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform; + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; uint file; uint i; @@ -166,19 +170,22 @@ static void xform_declaration( */ file = decl->Declaration.File; if (!check_file_name( ctx, file )) - return; + return TRUE; for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { if (is_register_declared( ctx, file, i )) report_error( ctx, "The same register declared twice" ); ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); } + + return TRUE; } -static void xform_immediate( - struct tgsi_transform_context *xform, +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, struct tgsi_full_immediate *imm ) { - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform; + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; assert( ctx->num_imms < MAX_REGISTERS ); @@ -196,14 +203,17 @@ static void xform_immediate( */ if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { report_error( ctx, "Invalid immediate data type" ); - return; + return TRUE; } + + return TRUE; } -static void epilog( - struct tgsi_transform_context *xform ) +static boolean +epilog( + struct tgsi_iterate_context *iter ) { - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) xform; + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; uint file; /* There must be an END instruction at the end. @@ -228,6 +238,8 @@ static void epilog( */ if (ctx->errors || ctx->warnings) debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings ); + + return TRUE; } boolean @@ -235,12 +247,12 @@ tgsi_sanity_check( struct tgsi_token *tokens ) { struct sanity_check_ctx ctx; - struct tgsi_token dummy_tokens[16]; - ctx.xform.transform_instruction = xform_instruction; - ctx.xform.transform_declaration = xform_declaration; - ctx.xform.transform_immediate = xform_immediate; - ctx.xform.epilog = epilog; + ctx.iter.prolog = NULL; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.epilog = epilog; memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); @@ -251,7 +263,7 @@ tgsi_sanity_check( ctx.errors = 0; ctx.warnings = 0; - if (tgsi_transform_shader( tokens, dummy_tokens, 16, &ctx.xform ) == -1) + if (tgsi_iterate_shader( tokens, &ctx.iter ) == -1) return FALSE; return ctx.errors > 0; -- cgit v1.2.3 From ff26c50153b3a348b35843262ceb27062ab37214 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 19 Jul 2008 11:52:41 +0900 Subject: tgsi: Make tgsi_sanity return TRUE on success as documented. --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c index 933127e661..7fc4c29c68 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c @@ -266,5 +266,5 @@ tgsi_sanity_check( if (tgsi_iterate_shader( tokens, &ctx.iter ) == -1) return FALSE; - return ctx.errors > 0; + return ctx.errors == 0; } -- cgit v1.2.3 From 8aafc03b260ab8923f1b373f7effa75bcdb40a72 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 19 Jul 2008 12:04:37 +0900 Subject: gallium: Finer grained is_format_supported. --- src/gallium/auxiliary/util/u_blit.c | 12 ++- src/gallium/auxiliary/util/u_gen_mipmap.c | 3 +- src/gallium/drivers/cell/ppu/cell_screen.c | 24 ++--- src/gallium/drivers/i915simple/i915_screen.c | 18 ++-- src/gallium/drivers/i965simple/brw_screen.c | 5 +- src/gallium/drivers/softpipe/sp_screen.c | 26 +++--- src/gallium/include/pipe/p_defines.h | 7 +- src/gallium/include/pipe/p_screen.h | 9 +- src/gallium/state_trackers/python/gallium.i | 15 ++- src/gallium/state_trackers/python/samples/tri.py | 8 +- src/gallium/state_trackers/python/tests/texture.py | 13 ++- src/mesa/state_tracker/st_atom_pixeltransfer.c | 2 +- src/mesa/state_tracker/st_cb_bitmap.c | 3 +- src/mesa/state_tracker/st_cb_drawpixels.c | 9 +- src/mesa/state_tracker/st_cb_texture.c | 8 +- src/mesa/state_tracker/st_extensions.c | 14 ++- src/mesa/state_tracker/st_format.c | 101 +++++++++++---------- src/mesa/state_tracker/st_format.h | 2 +- src/mesa/state_tracker/st_gen_mipmap.c | 3 +- src/mesa/state_tracker/st_texture.c | 3 +- 20 files changed, 158 insertions(+), 127 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 3dc9fdd11e..ae087df4cf 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -307,8 +307,10 @@ util_blit_pixels(struct blit_state *ctx, dstY1 = tmp; } - assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE)); - assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE)); + assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); if(dst->format == src->format && (dstX1 - dstX0) == srcW && (dstY1 - dstY0) == srcH) { /* FIXME: this will most surely fail for overlapping rectangles */ @@ -319,7 +321,8 @@ util_blit_pixels(struct blit_state *ctx, return; } - assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); /* * XXX for now we're always creating a temporary texture. @@ -449,7 +452,8 @@ util_blit_pixels_tex(struct blit_state *ctx, t0 = srcY0 / (float)tex->height[0]; t1 = srcY1 / (float)tex->height[0]; - assert(screen->is_format_supported(screen, dst->format, PIPE_SURFACE)); + assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)); /* save state (restored below) */ cso_save_blend(ctx->cso); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 5313a8008a..4999822068 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -858,7 +858,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, uint zslice = 0; /* check if we can render in the texture's format */ - if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) { + if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel); return; } diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 5198b51441..cf9b68b695 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -115,23 +115,17 @@ cell_get_paramf(struct pipe_screen *screen, int param) static boolean cell_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, uint type ) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { - switch (type) { - case PIPE_TEXTURE: - /* cell supports most texture formats, XXX for now anyway */ - if (format == PIPE_FORMAT_DXT5_RGBA || - format == PIPE_FORMAT_R8G8B8A8_SRGB) - return FALSE; - else - return TRUE; - case PIPE_SURFACE: - /* cell supports all (off-screen) surface formats, XXX for now */ - return TRUE; - default: - assert(0); + /* cell supports most formats, XXX for now anyway */ + if (format == PIPE_FORMAT_DXT5_RGBA || + format == PIPE_FORMAT_R8G8B8A8_SRGB) return FALSE; - } + else + return TRUE; } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index ba8f183bdf..4b1b8af7da 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -148,7 +148,10 @@ i915_get_paramf(struct pipe_screen *screen, int param) static boolean i915_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, uint type ) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { static const enum pipe_format tex_supported[] = { PIPE_FORMAT_R8G8B8A8_UNORM, @@ -173,17 +176,10 @@ i915_is_format_supported( struct pipe_screen *screen, const enum pipe_format *list; uint i; - switch (type) { - case PIPE_TEXTURE: - list = tex_supported; - break; - case PIPE_SURFACE: + if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) list = surface_supported; - break; - default: - assert(0); - return FALSE; - } + else + list = tex_supported; for (i = 0; list[i] != PIPE_FORMAT_NONE; i++) { if (list[i] == format) diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index b700f7e4f5..6d8f24d1c4 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -136,7 +136,10 @@ brw_get_paramf(struct pipe_screen *screen, int param) static boolean brw_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, uint type ) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { #if 0 /* XXX: This is broken -- rewrite if still needed. */ diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index f6193bfaf9..3f9d4b0ed3 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -115,23 +115,19 @@ softpipe_get_paramf(struct pipe_screen *screen, int param) */ static boolean softpipe_is_format_supported( struct pipe_screen *screen, - enum pipe_format format, uint type ) + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { - switch (type) { - case PIPE_TEXTURE: - case PIPE_SURFACE: - switch(format) { - case PIPE_FORMAT_DXT1_RGB: - case PIPE_FORMAT_DXT1_RGBA: - case PIPE_FORMAT_DXT3_RGBA: - case PIPE_FORMAT_DXT5_RGBA: - return FALSE; - default: - return TRUE; - } - default: - assert(0); + switch(format) { + case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_RGBA: + case PIPE_FORMAT_DXT3_RGBA: + case PIPE_FORMAT_DXT5_RGBA: return FALSE; + default: + return TRUE; } } diff --git a/src/gallium/include/pipe/p_defines.h b/src/gallium/include/pipe/p_defines.h index bc4d7c845a..b1d100ef53 100644 --- a/src/gallium/include/pipe/p_defines.h +++ b/src/gallium/include/pipe/p_defines.h @@ -172,11 +172,8 @@ enum pipe_texture_target { #define PIPE_TEXTURE_USAGE_DEPTH_STENCIL 0x8 #define PIPE_TEXTURE_USAGE_SAMPLER 0x10 -/** - * Surfaces, textures, etc. (others may be added) - */ -#define PIPE_TEXTURE 1 -#define PIPE_SURFACE 2 /**< user-created surfaces */ +#define PIPE_TEXTURE_GEOM_NON_SQUARE 0x1 +#define PIPE_TEXTURE_GEOM_NON_POWER_OF_TWO 0x2 /** diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index cc8430dae1..b15affef7a 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -77,11 +77,14 @@ struct pipe_screen { /** * Check if the given pipe_format is supported as a texture or * drawing surface. - * \param type one of PIPE_TEXTURE, PIPE_SURFACE + * \param tex_usage bitmask of PIPE_TEXTURE_USAGE_* + * \param flags bitmask of PIPE_TEXTURE_GEOM_* */ boolean (*is_format_supported)( struct pipe_screen *, - enum pipe_format format, - uint type ); + enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ); /** * Create a new texture object, using the given template info. diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index c08ac87aca..8d8b762ea5 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -158,8 +158,15 @@ struct st_context { * drawing surface. * \param type one of PIPE_TEXTURE, PIPE_SURFACE */ - int is_format_supported( enum pipe_format format, unsigned type ) { - return $self->screen->is_format_supported( $self->screen, format, type); + int is_format_supported( enum pipe_format format, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags ) { + return $self->screen->is_format_supported( $self->screen, + format, + target, + tex_usage, + geom_flags ); } struct st_context * @@ -175,7 +182,7 @@ struct st_context { unsigned depth = 1, unsigned last_level = 0, enum pipe_texture_target target = PIPE_TEXTURE_2D, - unsigned usage = 0 + unsigned tex_usage = 0 ) { struct pipe_texture templat; memset(&templat, 0, sizeof(templat)); @@ -186,7 +193,7 @@ struct st_context { templat.depth[0] = depth; templat.last_level = last_level; templat.target = target; - templat.tex_usage = usage; + templat.tex_usage = tex_usage; return $self->screen->texture_create($self->screen, &templat); } diff --git a/src/gallium/state_trackers/python/samples/tri.py b/src/gallium/state_trackers/python/samples/tri.py index 3665922929..1271c67627 100644 --- a/src/gallium/state_trackers/python/samples/tri.py +++ b/src/gallium/state_trackers/python/samples/tri.py @@ -140,9 +140,11 @@ def test(dev): ctx.set_clip(clip) # framebuffer - cbuf = dev.texture_create(PIPE_FORMAT_X8R8G8B8_UNORM, - width, height, - usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET) + cbuf = dev.texture_create( + PIPE_FORMAT_X8R8G8B8_UNORM, + width, height, + tex_usage=PIPE_TEXTURE_USAGE_DISPLAY_TARGET, + ) _cbuf = cbuf.get_surface(usage = PIPE_BUFFER_USAGE_GPU_READ|PIPE_BUFFER_USAGE_GPU_WRITE) fb = Framebuffer() fb.width = width diff --git a/src/gallium/state_trackers/python/tests/texture.py b/src/gallium/state_trackers/python/tests/texture.py index 16ad78c8aa..b2ca9f416f 100644 --- a/src/gallium/state_trackers/python/tests/texture.py +++ b/src/gallium/state_trackers/python/tests/texture.py @@ -136,7 +136,7 @@ class TextureTest(TestCase): level = self.level zslice = self.zslice - if not dev.is_format_supported(format, PIPE_TEXTURE): + if not dev.is_format_supported(format, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER, 0): raise TestSkip ctx = self.dev.context_create() @@ -199,6 +199,7 @@ class TextureTest(TestCase): height = height, depth = depth, last_level = last_level, + tex_usage = PIPE_TEXTURE_USAGE_SAMPLER, ) expected_rgba = FloatArray(height*width*4) @@ -212,10 +213,12 @@ class TextureTest(TestCase): ctx.set_sampler_texture(0, texture) # framebuffer - cbuf_tex = dev.texture_create(PIPE_FORMAT_A8R8G8B8_UNORM, - width, - height, - usage = PIPE_TEXTURE_USAGE_RENDER_TARGET) + cbuf_tex = dev.texture_create( + PIPE_FORMAT_A8R8G8B8_UNORM, + width, + height, + tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET, + ) cbuf = cbuf_tex.get_surface(usage = PIPE_BUFFER_USAGE_GPU_WRITE|PIPE_BUFFER_USAGE_GPU_READ) fb = Framebuffer() diff --git a/src/mesa/state_tracker/st_atom_pixeltransfer.c b/src/mesa/state_tracker/st_atom_pixeltransfer.c index e4de875e8c..a357b71677 100644 --- a/src/mesa/state_tracker/st_atom_pixeltransfer.c +++ b/src/mesa/state_tracker/st_atom_pixeltransfer.c @@ -122,7 +122,7 @@ create_color_map_texture(GLcontext *ctx) const uint texSize = 256; /* simple, and usually perfect */ /* find an RGBA texture format */ - format = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE); + format = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER); /* create texture for color map/table */ pt = st_texture_create(ctx->st, PIPE_TEXTURE_2D, format, 0, diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index de86832342..d5696a909f 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -746,7 +746,8 @@ st_init_bitmap(struct st_context *st) st->bitmap.rasterizer.bypass_vs = 1; /* find a usable texture format */ - if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, PIPE_TEXTURE)) { + if (screen->is_format_supported(screen, PIPE_FORMAT_I8_UNORM, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { st->bitmap.tex_format = PIPE_FORMAT_I8_UNORM; } else { diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 2ebfcaf82b..db0c9fbd09 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -995,18 +995,21 @@ st_CopyPixels(GLcontext *ctx, GLint srcx, GLint srcy, srcFormat = rbRead->texture->format; - if (screen->is_format_supported(screen, srcFormat, PIPE_TEXTURE)) { + if (screen->is_format_supported(screen, srcFormat, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { texFormat = srcFormat; } else { /* srcFormat can't be used as a texture format */ if (type == GL_DEPTH) { - texFormat = st_choose_format(pipe, GL_DEPTH_COMPONENT, PIPE_TEXTURE); + texFormat = st_choose_format(pipe, GL_DEPTH_COMPONENT, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_DEPTH_STENCIL); assert(texFormat != PIPE_FORMAT_NONE); /* XXX no depth texture formats??? */ } else { /* default color format */ - texFormat = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE); + texFormat = st_choose_format(pipe, GL_RGBA, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER); assert(texFormat != PIPE_FORMAT_NONE); } } diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index de782e8232..1f94a0b9ef 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -1210,9 +1210,13 @@ do_copy_texsubimage(GLcontext *ctx, use_fallback = GL_FALSE; } else if (screen->is_format_supported(screen, strb->surface->format, - PIPE_TEXTURE) && + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, + 0) && screen->is_format_supported(screen, dest_surface->format, - PIPE_SURFACE)) { + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET, + 0)) { boolean do_flip = (st_fb_orientation(ctx->ReadBuffer) == Y_0_TOP); int srcY0, srcY1; if (do_flip) { diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d804d2b453..cacf972a1b 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -213,18 +213,24 @@ void st_init_extensions(struct st_context *st) } if (screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SRGB, - PIPE_TEXTURE)) { + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { ctx->Extensions.EXT_texture_sRGB = GL_TRUE; } #if 01 if (screen->is_format_supported(screen, PIPE_FORMAT_DXT5_RGBA, - PIPE_TEXTURE)) { + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { ctx->Extensions.EXT_texture_compression_s3tc = GL_TRUE; } #endif - if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR, PIPE_TEXTURE) || - screen->is_format_supported(screen, PIPE_FORMAT_YCBCR_REV, PIPE_TEXTURE)) { + if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0) || + screen->is_format_supported(screen, PIPE_FORMAT_YCBCR_REV, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0)) { ctx->Extensions.MESA_ycbcr_texture = GL_TRUE; } diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index 50a06868df..b6d97ef659 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -281,7 +281,10 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat) * Find an RGBA format supported by the context/winsys. */ static enum pipe_format -default_rgba_format(struct pipe_screen *screen, uint type) +default_rgba_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) { static const enum pipe_format colorFormats[] = { PIPE_FORMAT_A8R8G8B8_UNORM, @@ -291,7 +294,7 @@ default_rgba_format(struct pipe_screen *screen, uint type) }; uint i; for (i = 0; i < Elements(colorFormats); i++) { - if (screen->is_format_supported( screen, colorFormats[i], type )) { + if (screen->is_format_supported( screen, colorFormats[i], target, tex_usage, geom_flags )) { return colorFormats[i]; } } @@ -303,13 +306,16 @@ default_rgba_format(struct pipe_screen *screen, uint type) * Search list of formats for first RGBA format with >8 bits/channel. */ static enum pipe_format -default_deep_rgba_format(struct pipe_screen *screen, uint type) +default_deep_rgba_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) { - if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_SNORM, type)) { + if (screen->is_format_supported(screen, PIPE_FORMAT_R16G16B16A16_SNORM, target, tex_usage, geom_flags)) { return PIPE_FORMAT_R16G16B16A16_SNORM; } - if (type == PIPE_TEXTURE) - return default_rgba_format(screen, type); + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + return default_rgba_format(screen, target, tex_usage, geom_flags); else return PIPE_FORMAT_NONE; } @@ -319,7 +325,10 @@ default_deep_rgba_format(struct pipe_screen *screen, uint type) * Find an Z format supported by the context/winsys. */ static enum pipe_format -default_depth_format(struct pipe_screen *screen, uint type) +default_depth_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) { static const enum pipe_format zFormats[] = { PIPE_FORMAT_Z16_UNORM, @@ -329,7 +338,7 @@ default_depth_format(struct pipe_screen *screen, uint type) }; uint i; for (i = 0; i < Elements(zFormats); i++) { - if (screen->is_format_supported( screen, zFormats[i], type )) { + if (screen->is_format_supported( screen, zFormats[i], target, tex_usage, geom_flags )) { return zFormats[i]; } } @@ -343,12 +352,10 @@ default_depth_format(struct pipe_screen *screen, uint type) */ enum pipe_format st_choose_format(struct pipe_context *pipe, GLint internalFormat, - uint surfType) + enum pipe_texture_target target, unsigned tex_usage) { struct pipe_screen *screen = pipe->screen; - - assert(surfType == PIPE_SURFACE || - surfType == PIPE_TEXTURE); + unsigned geom_flags = 0; switch (internalFormat) { case 4: @@ -360,38 +367,38 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_RGBA8: case GL_RGB10_A2: case GL_RGBA12: - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGBA16: - if (surfType == PIPE_SURFACE) - return default_deep_rgba_format( screen, surfType ); + if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) + return default_deep_rgba_format( screen, target, tex_usage, geom_flags ); else - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGBA4: case GL_RGBA2: - if (screen->is_format_supported( screen, PIPE_FORMAT_A4R4G4B4_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A4R4G4B4_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A4R4G4B4_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGB5_A1: - if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A1R5G5B5_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGB8: case GL_RGB10: case GL_RGB12: case GL_RGB16: - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_RGB5: case GL_RGB4: case GL_R3_G3_B2: - if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A1R5G5B5_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A1R5G5B5_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_R5G6B5_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_R5G6B5_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_R5G6B5_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_ALPHA: case GL_ALPHA4: @@ -399,9 +406,9 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_ALPHA12: case GL_ALPHA16: case GL_COMPRESSED_ALPHA: - if (screen->is_format_supported( screen, PIPE_FORMAT_A8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A8_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case 1: case GL_LUMINANCE: @@ -410,9 +417,9 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_LUMINANCE12: case GL_LUMINANCE16: case GL_COMPRESSED_LUMINANCE: - if (screen->is_format_supported( screen, PIPE_FORMAT_L8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_L8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_L8_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case 2: case GL_LUMINANCE_ALPHA: @@ -423,9 +430,9 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_LUMINANCE12_ALPHA12: case GL_LUMINANCE16_ALPHA16: case GL_COMPRESSED_LUMINANCE_ALPHA: - if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_A8L8_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_INTENSITY: case GL_INTENSITY4: @@ -433,17 +440,17 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_INTENSITY12: case GL_INTENSITY16: case GL_COMPRESSED_INTENSITY: - if (screen->is_format_supported( screen, PIPE_FORMAT_I8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_I8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_I8_UNORM; - return default_rgba_format( screen, surfType ); + return default_rgba_format( screen, target, tex_usage, geom_flags ); case GL_YCBCR_MESA: if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR, - PIPE_TEXTURE)) { + target, tex_usage, geom_flags)) { return PIPE_FORMAT_YCBCR; } if (screen->is_format_supported(screen, PIPE_FORMAT_YCBCR_REV, - PIPE_TEXTURE)) { + target, tex_usage, geom_flags)) { return PIPE_FORMAT_YCBCR_REV; } return PIPE_FORMAT_NONE; @@ -472,40 +479,40 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, #endif case GL_DEPTH_COMPONENT16: - if (screen->is_format_supported( screen, PIPE_FORMAT_Z16_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z16_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z16_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT24: - if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_S8Z24_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z24S8_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT32: - if (screen->is_format_supported( screen, PIPE_FORMAT_Z32_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z32_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z32_UNORM; /* fall-through */ case GL_DEPTH_COMPONENT: - return default_depth_format( screen, surfType ); + return default_depth_format( screen, target, tex_usage, geom_flags ); case GL_STENCIL_INDEX: case GL_STENCIL_INDEX1_EXT: case GL_STENCIL_INDEX4_EXT: case GL_STENCIL_INDEX8_EXT: case GL_STENCIL_INDEX16_EXT: - if (screen->is_format_supported( screen, PIPE_FORMAT_S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_S8_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_S8Z24_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z24S8_UNORM; return PIPE_FORMAT_NONE; case GL_DEPTH_STENCIL_EXT: case GL_DEPTH24_STENCIL8_EXT: - if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_S8Z24_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_S8Z24_UNORM; - if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, surfType )) + if (screen->is_format_supported( screen, PIPE_FORMAT_Z24S8_UNORM, target, tex_usage, geom_flags )) return PIPE_FORMAT_Z24S8_UNORM; return PIPE_FORMAT_NONE; @@ -521,7 +528,8 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, enum pipe_format st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat) { - return st_choose_format(pipe, internalFormat, PIPE_SURFACE); + return st_choose_format(pipe, internalFormat, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_RENDER_TARGET); } @@ -587,7 +595,8 @@ st_ChooseTextureFormat(GLcontext *ctx, GLint internalFormat, (void) format; (void) type; - pFormat = st_choose_format(ctx->st->pipe, internalFormat, PIPE_TEXTURE); + pFormat = st_choose_format(ctx->st->pipe, internalFormat, PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER); if (pFormat == PIPE_FORMAT_NONE) return NULL; diff --git a/src/mesa/state_tracker/st_format.h b/src/mesa/state_tracker/st_format.h index ff0fd042db..3f5ac3201b 100644 --- a/src/mesa/state_tracker/st_format.h +++ b/src/mesa/state_tracker/st_format.h @@ -65,7 +65,7 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat); extern enum pipe_format st_choose_format(struct pipe_context *pipe, GLint internalFormat, - uint surfType); + enum pipe_texture_target target, unsigned tex_usage); extern enum pipe_format st_choose_renderbuffer_format(struct pipe_context *pipe, GLint internalFormat); diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 2fc00df429..6db9bc0dd5 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -86,7 +86,8 @@ st_render_mipmap(struct st_context *st, assert(target != GL_TEXTURE_3D); /* not done yet */ /* check if we can render in the texture's format */ - if (!screen->is_format_supported(screen, pt->format, PIPE_SURFACE)) { + if (!screen->is_format_supported(screen, pt->format, target, + PIPE_TEXTURE_USAGE_RENDER_TARGET, 0)) { return FALSE; } diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 8222826e7a..3e5054ecd2 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -88,7 +88,8 @@ st_texture_create(struct st_context *st, _mesa_lookup_enum_by_nr(format), last_level); assert(format); - assert(screen->is_format_supported(screen, format, PIPE_TEXTURE)); + assert(screen->is_format_supported(screen, format, target, + PIPE_TEXTURE_USAGE_SAMPLER, 0)); memset(&pt, 0, sizeof(pt)); pt.target = target; -- cgit v1.2.3 From 43df2fe2d9fe242174aba58b5de191c3d1fff212 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 20 Jul 2008 09:27:48 +0900 Subject: gallium: Fix typo in function name. --- src/gallium/auxiliary/util/p_tile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 1daae6b640..1bf0d72733 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -346,7 +346,7 @@ r5g6b5_get_tile_rgba(ushort *src, static void -r5g5b5_put_tile_rgba(ushort *dst, +r5g6b5_put_tile_rgba(ushort *dst, unsigned w, unsigned h, const float *p, unsigned src_stride) @@ -827,7 +827,7 @@ pipe_put_tile_rgba(struct pipe_surface *ps, /*a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_R5G6B5_UNORM: - r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_R8G8B8A8_UNORM: assert(0); -- cgit v1.2.3 From 2170ec9048eab751828700728c1cc8264c860229 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 20 Jul 2008 18:11:43 +0900 Subject: pipebuffer: More detailed description of bufer over-/undereflows. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 49 +++++++++++++++------- 1 file changed, 34 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index affa6aa85c..d02e3500ff 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -109,7 +109,7 @@ static const uint8_t random_pattern[32] = { static INLINE void fill_random_pattern(uint8_t *dst, size_t size) { - unsigned i = 0; + size_t i = 0; while(size--) { *dst++ = random_pattern[i++]; i &= sizeof(random_pattern) - 1; @@ -118,15 +118,21 @@ fill_random_pattern(uint8_t *dst, size_t size) static INLINE boolean -check_random_pattern(const uint8_t *dst, size_t size) +check_random_pattern(const uint8_t *dst, size_t size, + size_t *min_ofs, size_t *max_ofs) { - unsigned i = 0; - while(size--) { - if(*dst++ != random_pattern[i++]) - return FALSE; - i &= sizeof(random_pattern) - 1; + boolean result = TRUE; + size_t i; + *min_ofs = size; + *max_ofs = 0; + for(i = 0; i < size; ++i) { + if(*dst++ != random_pattern[i % sizeof(random_pattern)]) { + *min_ofs = MIN2(*min_ofs, i); + *max_ofs = MIN2(*max_ofs, i); + result = FALSE; + } } - return TRUE; + return result; } @@ -141,15 +147,28 @@ pb_debug_buffer_destroy(struct pb_buffer *_buf) map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); assert(map); if(map) { - if(!check_random_pattern(map, buf->underflow_size)) { - debug_error("buffer underflow detected\n"); - debug_assert(0); + boolean underflow, overflow; + size_t min_ofs, max_ofs; + + underflow = !check_random_pattern(map, buf->underflow_size, + &min_ofs, &max_ofs); + if(underflow) { + debug_printf("buffer underflow (%u of %u bytes) detected\n", + buf->underflow_size - min_ofs, + buf->underflow_size); } - if(!check_random_pattern(map + buf->underflow_size + buf->base.base.size, - buf->overflow_size)) { - debug_error("buffer overflow detected\n"); - debug_assert(0); + + overflow = !check_random_pattern(map + buf->underflow_size + buf->base.base.size, + buf->overflow_size, + &min_ofs, &max_ofs); + if(overflow) { + debug_printf("buffer overflow (%u of %u bytes) detected\n", + max_ofs, + buf->overflow_size); } + + debug_assert(!underflow && !overflow); + pb_unmap(buf->buffer); } -- cgit v1.2.3 From 5ded4ffc506eb051b151d3e8b1e71b13576e951a Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 16:01:50 +0200 Subject: tgsi: Split tgsi_dump into two modules. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 791 +----------------------- src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 17 +- src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 853 ++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h | 49 ++ 4 files changed, 916 insertions(+), 794 deletions(-) create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c create mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 0cf4454f25..5a84b99166 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -27,7 +27,6 @@ #include "pipe/p_debug.h" #include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" #include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_parse.h" @@ -56,13 +55,6 @@ dump_enum( #define FLT(F) debug_printf( "%10.4f", F ) #define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) -static const char *TGSI_PROCESSOR_TYPES[] = -{ - "PROCESSOR_FRAGMENT", - "PROCESSOR_VERTEX", - "PROCESSOR_GEOMETRY" -}; - static const char *TGSI_PROCESSOR_TYPES_SHORT[] = { "FRAG", @@ -70,25 +62,6 @@ static const char *TGSI_PROCESSOR_TYPES_SHORT[] = "GEOM" }; -static const char *TGSI_TOKEN_TYPES[] = -{ - "TOKEN_TYPE_DECLARATION", - "TOKEN_TYPE_IMMEDIATE", - "TOKEN_TYPE_INSTRUCTION" -}; - -static const char *TGSI_FILES[] = -{ - "FILE_NULL", - "FILE_CONSTANT", - "FILE_INPUT", - "FILE_OUTPUT", - "FILE_TEMPORARY", - "FILE_SAMPLER", - "FILE_ADDRESS", - "FILE_IMMEDIATE" -}; - static const char *TGSI_FILES_SHORT[] = { "NULL", @@ -101,20 +74,6 @@ static const char *TGSI_FILES_SHORT[] = "IMM" }; -static const char *TGSI_DECLARES[] = -{ - "DECLARE_RANGE", - "DECLARE_MASK" -}; - -static const char *TGSI_INTERPOLATES[] = -{ - "INTERPOLATE_CONSTANT", - "INTERPOLATE_LINEAR", - "INTERPOLATE_PERSPECTIVE", - "INTERPOLATE_ATTRIB" -}; - static const char *TGSI_INTERPOLATES_SHORT[] = { "CONSTANT", @@ -122,17 +81,6 @@ static const char *TGSI_INTERPOLATES_SHORT[] = "PERSPECTIVE" }; -static const char *TGSI_SEMANTICS[] = -{ - "SEMANTIC_POSITION", - "SEMANTIC_COLOR", - "SEMANTIC_BCOLOR", - "SEMANTIC_FOG", - "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC", - "SEMANTIC_NORMAL" -}; - static const char *TGSI_SEMANTICS_SHORT[] = { "POSITION", @@ -144,138 +92,11 @@ static const char *TGSI_SEMANTICS_SHORT[] = "NORMAL" }; -static const char *TGSI_IMMS[] = -{ - "IMM_FLOAT32" -}; - static const char *TGSI_IMMS_SHORT[] = { "FLT32" }; -static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] = -{ - "OPCODE_ARL", - "OPCODE_MOV", - "OPCODE_LIT", - "OPCODE_RCP", - "OPCODE_RSQ", - "OPCODE_EXP", - "OPCODE_LOG", - "OPCODE_MUL", - "OPCODE_ADD", - "OPCODE_DP3", - "OPCODE_DP4", - "OPCODE_DST", - "OPCODE_MIN", - "OPCODE_MAX", - "OPCODE_SLT", - "OPCODE_SGE", - "OPCODE_MAD", - "OPCODE_SUB", - "OPCODE_LERP", - "OPCODE_CND", - "OPCODE_CND0", - "OPCODE_DOT2ADD", - "OPCODE_INDEX", - "OPCODE_NEGATE", - "OPCODE_FRAC", - "OPCODE_CLAMP", - "OPCODE_FLOOR", - "OPCODE_ROUND", - "OPCODE_EXPBASE2", - "OPCODE_LOGBASE2", - "OPCODE_POWER", - "OPCODE_CROSSPRODUCT", - "OPCODE_MULTIPLYMATRIX", - "OPCODE_ABS", - "OPCODE_RCC", - "OPCODE_DPH", - "OPCODE_COS", - "OPCODE_DDX", - "OPCODE_DDY", - "OPCODE_KILP", - "OPCODE_PK2H", - "OPCODE_PK2US", - "OPCODE_PK4B", - "OPCODE_PK4UB", - "OPCODE_RFL", - "OPCODE_SEQ", - "OPCODE_SFL", - "OPCODE_SGT", - "OPCODE_SIN", - "OPCODE_SLE", - "OPCODE_SNE", - "OPCODE_STR", - "OPCODE_TEX", - "OPCODE_TXD", - "OPCODE_TXP", - "OPCODE_UP2H", - "OPCODE_UP2US", - "OPCODE_UP4B", - "OPCODE_UP4UB", - "OPCODE_X2D", - "OPCODE_ARA", - "OPCODE_ARR", - "OPCODE_BRA", - "OPCODE_CAL", - "OPCODE_RET", - "OPCODE_SSG", - "OPCODE_CMP", - "OPCODE_SCS", - "OPCODE_TXB", - "OPCODE_NRM", - "OPCODE_DIV", - "OPCODE_DP2", - "OPCODE_TXL", - "OPCODE_BRK", - "OPCODE_IF", - "OPCODE_LOOP", - "OPCODE_REP", - "OPCODE_ELSE", - "OPCODE_ENDIF", - "OPCODE_ENDLOOP", - "OPCODE_ENDREP", - "OPCODE_PUSHA", - "OPCODE_POPA", - "OPCODE_CEIL", - "OPCODE_I2F", - "OPCODE_NOT", - "OPCODE_TRUNC", - "OPCODE_SHL", - "OPCODE_SHR", - "OPCODE_AND", - "OPCODE_OR", - "OPCODE_MOD", - "OPCODE_XOR", - "OPCODE_SAD", - "OPCODE_TXF", - "OPCODE_TXQ", - "OPCODE_CONT", - "OPCODE_EMIT", - "OPCODE_ENDPRIM", - "OPCODE_BGNLOOP2", - "OPCODE_BGNSUB", - "OPCODE_ENDLOOP2", - "OPCODE_ENDSUB", - "OPCODE_NOISE1", - "OPCODE_NOISE2", - "OPCODE_NOISE3", - "OPCODE_NOISE4", - "OPCODE_NOP", - "OPCODE_M4X3", - "OPCODE_M3X4", - "OPCODE_M3X3", - "OPCODE_M3X2", - "OPCODE_NRM4", - "OPCODE_CALLNZ", - "OPCODE_IFC", - "OPCODE_BREAKC", - "OPCODE_KIL", - "OPCODE_END" -}; - static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] = { "ARL", @@ -399,49 +220,6 @@ static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] = "SWZ" }; -static const char *TGSI_SATS[] = -{ - "SAT_NONE", - "SAT_ZERO_ONE", - "SAT_MINUS_PLUS_ONE" -}; - -static const char *TGSI_INSTRUCTION_EXTS[] = -{ - "INSTRUCTION_EXT_TYPE_NV", - "INSTRUCTION_EXT_TYPE_LABEL", - "INSTRUCTION_EXT_TYPE_TEXTURE" -}; - -static const char *TGSI_PRECISIONS[] = -{ - "PRECISION_DEFAULT", - "TGSI_PRECISION_FLOAT32", - "TGSI_PRECISION_FLOAT16", - "TGSI_PRECISION_FIXED12" -}; - -static const char *TGSI_CCS[] = -{ - "CC_GT", - "CC_EQ", - "CC_LT", - "CC_UN", - "CC_GE", - "CC_LE", - "CC_NE", - "CC_TR", - "CC_FL" -}; - -static const char *TGSI_SWIZZLES[] = -{ - "SWIZZLE_X", - "SWIZZLE_Y", - "SWIZZLE_Z", - "SWIZZLE_W" -}; - static const char *TGSI_SWIZZLES_SHORT[] = { "x", @@ -450,19 +228,6 @@ static const char *TGSI_SWIZZLES_SHORT[] = "w" }; -static const char *TGSI_TEXTURES[] = -{ - "TEXTURE_UNKNOWN", - "TEXTURE_1D", - "TEXTURE_2D", - "TEXTURE_3D", - "TEXTURE_CUBE", - "TEXTURE_RECT", - "TEXTURE_SHADOW1D", - "TEXTURE_SHADOW2D", - "TEXTURE_SHADOWRECT" -}; - static const char *TGSI_TEXTURES_SHORT[] = { "UNKNOWN", @@ -476,22 +241,6 @@ static const char *TGSI_TEXTURES_SHORT[] = "SHADOWRECT" }; -static const char *TGSI_SRC_REGISTER_EXTS[] = -{ - "SRC_REGISTER_EXT_TYPE_SWZ", - "SRC_REGISTER_EXT_TYPE_MOD" -}; - -static const char *TGSI_EXTSWIZZLES[] = -{ - "EXTSWIZZLE_X", - "EXTSWIZZLE_Y", - "EXTSWIZZLE_Z", - "EXTSWIZZLE_W", - "EXTSWIZZLE_ZERO", - "EXTSWIZZLE_ONE" -}; - static const char *TGSI_EXTSWIZZLES_SHORT[] = { "x", @@ -502,43 +251,6 @@ static const char *TGSI_EXTSWIZZLES_SHORT[] = "1" }; -static const char *TGSI_WRITEMASKS[] = -{ - "0", - "WRITEMASK_X", - "WRITEMASK_Y", - "WRITEMASK_XY", - "WRITEMASK_Z", - "WRITEMASK_XZ", - "WRITEMASK_YZ", - "WRITEMASK_XYZ", - "WRITEMASK_W", - "WRITEMASK_XW", - "WRITEMASK_YW", - "WRITEMASK_XYW", - "WRITEMASK_ZW", - "WRITEMASK_XZW", - "WRITEMASK_YZW", - "WRITEMASK_XYZW" -}; - -static const char *TGSI_DST_REGISTER_EXTS[] = -{ - "DST_REGISTER_EXT_TYPE_CONDCODE", - "DST_REGISTER_EXT_TYPE_MODULATE" -}; - -static const char *TGSI_MODULATES[] = -{ - "MODULATE_1X", - "MODULATE_2X", - "MODULATE_4X", - "MODULATE_8X", - "MODULATE_HALF", - "MODULATE_QUARTER", - "MODULATE_EIGHTH" -}; - static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] = { "", @@ -552,7 +264,7 @@ static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] = void tgsi_dump_declaration( - const struct tgsi_full_declaration *decl ) + const struct tgsi_full_declaration *decl ) { TXT( "\nDCL " ); ENM( decl->Declaration.File, TGSI_FILES_SHORT ); @@ -596,62 +308,6 @@ tgsi_dump_declaration( ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES_SHORT ); } -static void -dump_declaration_verbose( - struct tgsi_full_declaration *decl, - unsigned ignored, - unsigned deflt, - struct tgsi_full_declaration *fd ) -{ - TXT( "\nFile : " ); - ENM( decl->Declaration.File, TGSI_FILES ); - if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { - TXT( "\nUsageMask : " ); - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { - CHR( 'X' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { - CHR( 'Y' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { - CHR( 'Z' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { - CHR( 'W' ); - } - } - if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { - TXT( "\nInterpolate: " ); - ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); - } - if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { - TXT( "\nSemantic : " ); - UID( decl->Declaration.Semantic ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Declaration.Padding ); - } - - EOL(); - TXT( "\nFirst: " ); - UID( decl->DeclarationRange.First ); - TXT( "\nLast : " ); - UID( decl->DeclarationRange.Last ); - - if( decl->Declaration.Semantic ) { - EOL(); - TXT( "\nSemanticName : " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); - TXT( "\nSemanticIndex: " ); - UID( decl->Semantic.SemanticIndex ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Semantic.Padding ); - } - } -} - void tgsi_dump_immediate( const struct tgsi_full_immediate *imm ) @@ -679,38 +335,10 @@ tgsi_dump_immediate( TXT( " }" ); } -static void -dump_immediate_verbose( - struct tgsi_full_immediate *imm, - unsigned ignored ) -{ - unsigned i; - - TXT( "\nDataType : " ); - ENM( imm->Immediate.DataType, TGSI_IMMS ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( imm->Immediate.Padding ); - } - - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - EOL(); - switch( imm->Immediate.DataType ) { - case TGSI_IMM_FLOAT32: - TXT( "\nFloat: " ); - FLT( imm->u.ImmediateFloat32[i].Float ); - break; - - default: - assert( 0 ); - } - } -} - void tgsi_dump_instruction( - const struct tgsi_full_instruction *inst, - unsigned instno ) + const struct tgsi_full_instruction *inst, + uint instno ) { unsigned i; boolean first_reg = TRUE; @@ -856,389 +484,28 @@ tgsi_dump_instruction( } } -static void -dump_instruction_verbose( - struct tgsi_full_instruction *inst, - unsigned ignored, - unsigned deflt, - struct tgsi_full_instruction *fi ) -{ - unsigned i; - - TXT( "\nOpcode : " ); - ENM( inst->Instruction.Opcode, TGSI_OPCODES ); - if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { - TXT( "\nSaturate : " ); - ENM( inst->Instruction.Saturate, TGSI_SATS ); - } - if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { - TXT( "\nNumDstRegs : " ); - UID( inst->Instruction.NumDstRegs ); - } - if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { - TXT( "\nNumSrcRegs : " ); - UID( inst->Instruction.NumSrcRegs ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->Instruction.Padding ); - } - - if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { - TXT( "\nPrecision : " ); - ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); - } - if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { - TXT( "\nCondDstIndex : " ); - UID( inst->InstructionExtNv.CondDstIndex ); - } - if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { - TXT( "\nCondFlowIndex : " ); - UID( inst->InstructionExtNv.CondFlowIndex ); - } - if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { - TXT( "\nCondMask : " ); - ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { - TXT( "\nCondSwizzleX : " ); - ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { - TXT( "\nCondSwizzleY : " ); - ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { - TXT( "\nCondSwizzleZ : " ); - ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { - TXT( "\nCondSwizzleW : " ); - ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { - TXT( "\nCondDstUpdate : " ); - UID( inst->InstructionExtNv.CondDstUpdate ); - } - if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { - TXT( "\nCondFlowEnable: " ); - UID( inst->InstructionExtNv.CondFlowEnable ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtNv.Padding ); - if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { - TXT( "\nExtended : " ); - UID( inst->InstructionExtNv.Extended ); - } - } - } - - if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { - TXT( "\nLabel : " ); - UID( inst->InstructionExtLabel.Label ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtLabel.Padding ); - if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtLabel.Extended ); - } - } - } - - if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { - TXT( "\nTexture : " ); - ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtTexture.Padding ); - if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtTexture.Extended ); - } - } - } - - for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; - - EOL(); - TXT( "\nFile : " ); - ENM( dst->DstRegister.File, TGSI_FILES ); - if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { - TXT( "\nWriteMask: " ); - ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); - } - if( ignored ) { - if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { - TXT( "\nIndirect : " ); - UID( dst->DstRegister.Indirect ); - } - if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { - TXT( "\nDimension: " ); - UID( dst->DstRegister.Dimension ); - } - } - if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { - TXT( "\nIndex : " ); - SID( dst->DstRegister.Index ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegister.Padding ); - if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegister.Extended ); - } - } - - if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { - TXT( "\nCondMask : " ); - ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { - TXT( "\nCondSwizzleX: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { - TXT( "\nCondSwizzleY: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { - TXT( "\nCondSwizzleZ: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { - TXT( "\nCondSwizzleW: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { - TXT( "\nCondSrcIndex: " ); - UID( dst->DstRegisterExtConcode.CondSrcIndex ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtConcode.Padding ); - if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegisterExtConcode.Extended ); - } - } - } - - if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { - TXT( "\nModulate: " ); - ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtModulate.Padding ); - if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { - TXT( "\nExtended: " ); - UID( dst->DstRegisterExtModulate.Extended ); - } - } - } - } - - for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; - - EOL(); - TXT( "\nFile : "); - ENM( src->SrcRegister.File, TGSI_FILES ); - if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { - TXT( "\nSwizzleX : " ); - ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { - TXT( "\nSwizzleY : " ); - ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { - TXT( "\nSwizzleZ : " ); - ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { - TXT( "\nSwizzleW : " ); - ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { - TXT( "\nNegate : " ); - UID( src->SrcRegister.Negate ); - } - if( ignored ) { - if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { - TXT( "\nIndirect : " ); - UID( src->SrcRegister.Indirect ); - } - if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { - TXT( "\nDimension: " ); - UID( src->SrcRegister.Dimension ); - } - } - if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { - TXT( "\nIndex : " ); - SID( src->SrcRegister.Index ); - } - if( ignored ) { - if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegister.Extended ); - } - } - - if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { - TXT( "\nExtSwizzleX: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { - TXT( "\nExtSwizzleY: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { - TXT( "\nExtSwizzleZ: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { - TXT( "\nExtSwizzleW: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { - TXT( "\nNegateX : " ); - UID( src->SrcRegisterExtSwz.NegateX ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { - TXT( "\nNegateY : " ); - UID( src->SrcRegisterExtSwz.NegateY ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { - TXT( "\nNegateZ : " ); - UID( src->SrcRegisterExtSwz.NegateZ ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { - TXT( "\nNegateW : " ); - UID( src->SrcRegisterExtSwz.NegateW ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtSwz.Padding ); - if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtSwz.Extended ); - } - } - } - - if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { - TXT( "\nComplement: " ); - UID( src->SrcRegisterExtMod.Complement ); - } - if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { - TXT( "\nBias : " ); - UID( src->SrcRegisterExtMod.Bias ); - } - if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { - TXT( "\nScale2X : " ); - UID( src->SrcRegisterExtMod.Scale2X ); - } - if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { - TXT( "\nAbsolute : " ); - UID( src->SrcRegisterExtMod.Absolute ); - } - if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { - TXT( "\nNegate : " ); - UID( src->SrcRegisterExtMod.Negate ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtMod.Padding ); - if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtMod.Extended ); - } - } - } - } -} - void tgsi_dump( - const struct tgsi_token *tokens, - unsigned flags ) + const struct tgsi_token *tokens ) { struct tgsi_parse_context parse; struct tgsi_full_instruction fi; struct tgsi_full_declaration fd; - unsigned verbose = flags & TGSI_DUMP_VERBOSE; - unsigned ignored = !(flags & TGSI_DUMP_NO_IGNORED); - unsigned deflt = !(flags & TGSI_DUMP_NO_DEFAULT); unsigned instno = 0; /* sanity checks */ - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); + assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); + assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "OPCODE_END") == 0); assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0); tgsi_parse_init( &parse, tokens ); - TXT( "tgsi-dump begin -----------------" ); - EOL(); ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT ); UID( parse.FullVersion.Version.MajorVersion ); CHR( '.' ); UID( parse.FullVersion.Version.MinorVersion ); - if( verbose ) { - TXT( "\nMajorVersion: " ); - UID( parse.FullVersion.Version.MajorVersion ); - TXT( "\nMinorVersion: " ); - UID( parse.FullVersion.Version.MinorVersion ); - EOL(); - - TXT( "\nHeaderSize: " ); - UID( parse.FullHeader.Header.HeaderSize ); - TXT( "\nBodySize : " ); - UID( parse.FullHeader.Header.BodySize ); - TXT( "\nProcessor : " ); - ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); - EOL(); - } - fi = tgsi_default_full_instruction(); fd = tgsi_default_full_declaration(); @@ -1266,51 +533,7 @@ tgsi_dump( default: assert( 0 ); } - - if( verbose ) { - TXT( "\nType : " ); - ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); - if( ignored ) { - TXT( "\nSize : " ); - UID( parse.FullToken.Token.Size ); - if( deflt || parse.FullToken.Token.Extended ) { - TXT( "\nExtended : " ); - UID( parse.FullToken.Token.Extended ); - } - } - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - dump_declaration_verbose( - &parse.FullToken.FullDeclaration, - ignored, - deflt, - &fd ); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - dump_immediate_verbose( - &parse.FullToken.FullImmediate, - ignored ); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - dump_instruction_verbose( - &parse.FullToken.FullInstruction, - ignored, - deflt, - &fi ); - break; - - default: - assert( 0 ); - } - - EOL(); - } } - TXT( "\ntgsi-dump end -------------------\n" ); - tgsi_parse_free( &parse ); } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index ba7692b511..c130b9f1b7 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -28,18 +28,15 @@ #ifndef TGSI_DUMP_H #define TGSI_DUMP_H +#include "pipe/p_shader_tokens.h" + #if defined __cplusplus extern "C" { #endif -#define TGSI_DUMP_VERBOSE 1 -#define TGSI_DUMP_NO_IGNORED 2 -#define TGSI_DUMP_NO_DEFAULT 4 - void tgsi_dump( - const struct tgsi_token *tokens, - unsigned flags ); + const struct tgsi_token *tokens ); struct tgsi_full_immediate; struct tgsi_full_instruction; @@ -51,12 +48,12 @@ tgsi_dump_immediate( void tgsi_dump_instruction( - const struct tgsi_full_instruction *inst, - unsigned instno ); + const struct tgsi_full_instruction *inst, + uint instno ); void tgsi_dump_declaration( - const struct tgsi_full_declaration *decl ); + const struct tgsi_full_declaration *decl ); #if defined __cplusplus } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c new file mode 100644 index 0000000000..27b085dc07 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c @@ -0,0 +1,853 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "util/u_string.h" +#include "tgsi_dump_c.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" + +static void +dump_enum( + const unsigned e, + const char **enums, + const unsigned enums_count ) +{ + if (e >= enums_count) { + debug_printf( "%u", e ); + } + else { + debug_printf( "%s", enums[e] ); + } +} + +#define EOL() debug_printf( "\n" ) +#define TXT(S) debug_printf( "%s", S ) +#define CHR(C) debug_printf( "%c", C ) +#define UIX(I) debug_printf( "0x%x", I ) +#define UID(I) debug_printf( "%u", I ) +#define SID(I) debug_printf( "%d", I ) +#define FLT(F) debug_printf( "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) + +static const char *TGSI_PROCESSOR_TYPES[] = +{ + "PROCESSOR_FRAGMENT", + "PROCESSOR_VERTEX", + "PROCESSOR_GEOMETRY" +}; + +static const char *TGSI_TOKEN_TYPES[] = +{ + "TOKEN_TYPE_DECLARATION", + "TOKEN_TYPE_IMMEDIATE", + "TOKEN_TYPE_INSTRUCTION" +}; + +static const char *TGSI_FILES[] = +{ + "FILE_NULL", + "FILE_CONSTANT", + "FILE_INPUT", + "FILE_OUTPUT", + "FILE_TEMPORARY", + "FILE_SAMPLER", + "FILE_ADDRESS", + "FILE_IMMEDIATE" +}; + +static const char *TGSI_DECLARES[] = +{ + "DECLARE_RANGE", + "DECLARE_MASK" +}; + +static const char *TGSI_INTERPOLATES[] = +{ + "INTERPOLATE_CONSTANT", + "INTERPOLATE_LINEAR", + "INTERPOLATE_PERSPECTIVE", + "INTERPOLATE_ATTRIB" +}; + +static const char *TGSI_SEMANTICS[] = +{ + "SEMANTIC_POSITION", + "SEMANTIC_COLOR", + "SEMANTIC_BCOLOR", + "SEMANTIC_FOG", + "SEMANTIC_PSIZE", + "SEMANTIC_GENERIC", + "SEMANTIC_NORMAL" +}; + +static const char *TGSI_IMMS[] = +{ + "IMM_FLOAT32" +}; + +static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] = +{ + "OPCODE_ARL", + "OPCODE_MOV", + "OPCODE_LIT", + "OPCODE_RCP", + "OPCODE_RSQ", + "OPCODE_EXP", + "OPCODE_LOG", + "OPCODE_MUL", + "OPCODE_ADD", + "OPCODE_DP3", + "OPCODE_DP4", + "OPCODE_DST", + "OPCODE_MIN", + "OPCODE_MAX", + "OPCODE_SLT", + "OPCODE_SGE", + "OPCODE_MAD", + "OPCODE_SUB", + "OPCODE_LERP", + "OPCODE_CND", + "OPCODE_CND0", + "OPCODE_DOT2ADD", + "OPCODE_INDEX", + "OPCODE_NEGATE", + "OPCODE_FRAC", + "OPCODE_CLAMP", + "OPCODE_FLOOR", + "OPCODE_ROUND", + "OPCODE_EXPBASE2", + "OPCODE_LOGBASE2", + "OPCODE_POWER", + "OPCODE_CROSSPRODUCT", + "OPCODE_MULTIPLYMATRIX", + "OPCODE_ABS", + "OPCODE_RCC", + "OPCODE_DPH", + "OPCODE_COS", + "OPCODE_DDX", + "OPCODE_DDY", + "OPCODE_KILP", + "OPCODE_PK2H", + "OPCODE_PK2US", + "OPCODE_PK4B", + "OPCODE_PK4UB", + "OPCODE_RFL", + "OPCODE_SEQ", + "OPCODE_SFL", + "OPCODE_SGT", + "OPCODE_SIN", + "OPCODE_SLE", + "OPCODE_SNE", + "OPCODE_STR", + "OPCODE_TEX", + "OPCODE_TXD", + "OPCODE_TXP", + "OPCODE_UP2H", + "OPCODE_UP2US", + "OPCODE_UP4B", + "OPCODE_UP4UB", + "OPCODE_X2D", + "OPCODE_ARA", + "OPCODE_ARR", + "OPCODE_BRA", + "OPCODE_CAL", + "OPCODE_RET", + "OPCODE_SSG", + "OPCODE_CMP", + "OPCODE_SCS", + "OPCODE_TXB", + "OPCODE_NRM", + "OPCODE_DIV", + "OPCODE_DP2", + "OPCODE_TXL", + "OPCODE_BRK", + "OPCODE_IF", + "OPCODE_LOOP", + "OPCODE_REP", + "OPCODE_ELSE", + "OPCODE_ENDIF", + "OPCODE_ENDLOOP", + "OPCODE_ENDREP", + "OPCODE_PUSHA", + "OPCODE_POPA", + "OPCODE_CEIL", + "OPCODE_I2F", + "OPCODE_NOT", + "OPCODE_TRUNC", + "OPCODE_SHL", + "OPCODE_SHR", + "OPCODE_AND", + "OPCODE_OR", + "OPCODE_MOD", + "OPCODE_XOR", + "OPCODE_SAD", + "OPCODE_TXF", + "OPCODE_TXQ", + "OPCODE_CONT", + "OPCODE_EMIT", + "OPCODE_ENDPRIM", + "OPCODE_BGNLOOP2", + "OPCODE_BGNSUB", + "OPCODE_ENDLOOP2", + "OPCODE_ENDSUB", + "OPCODE_NOISE1", + "OPCODE_NOISE2", + "OPCODE_NOISE3", + "OPCODE_NOISE4", + "OPCODE_NOP", + "OPCODE_M4X3", + "OPCODE_M3X4", + "OPCODE_M3X3", + "OPCODE_M3X2", + "OPCODE_NRM4", + "OPCODE_CALLNZ", + "OPCODE_IFC", + "OPCODE_BREAKC", + "OPCODE_KIL", + "OPCODE_END" +}; + +static const char *TGSI_SATS[] = +{ + "SAT_NONE", + "SAT_ZERO_ONE", + "SAT_MINUS_PLUS_ONE" +}; + +static const char *TGSI_INSTRUCTION_EXTS[] = +{ + "INSTRUCTION_EXT_TYPE_NV", + "INSTRUCTION_EXT_TYPE_LABEL", + "INSTRUCTION_EXT_TYPE_TEXTURE" +}; + +static const char *TGSI_PRECISIONS[] = +{ + "PRECISION_DEFAULT", + "TGSI_PRECISION_FLOAT32", + "TGSI_PRECISION_FLOAT16", + "TGSI_PRECISION_FIXED12" +}; + +static const char *TGSI_CCS[] = +{ + "CC_GT", + "CC_EQ", + "CC_LT", + "CC_UN", + "CC_GE", + "CC_LE", + "CC_NE", + "CC_TR", + "CC_FL" +}; + +static const char *TGSI_SWIZZLES[] = +{ + "SWIZZLE_X", + "SWIZZLE_Y", + "SWIZZLE_Z", + "SWIZZLE_W" +}; + +static const char *TGSI_TEXTURES[] = +{ + "TEXTURE_UNKNOWN", + "TEXTURE_1D", + "TEXTURE_2D", + "TEXTURE_3D", + "TEXTURE_CUBE", + "TEXTURE_RECT", + "TEXTURE_SHADOW1D", + "TEXTURE_SHADOW2D", + "TEXTURE_SHADOWRECT" +}; + +static const char *TGSI_SRC_REGISTER_EXTS[] = +{ + "SRC_REGISTER_EXT_TYPE_SWZ", + "SRC_REGISTER_EXT_TYPE_MOD" +}; + +static const char *TGSI_EXTSWIZZLES[] = +{ + "EXTSWIZZLE_X", + "EXTSWIZZLE_Y", + "EXTSWIZZLE_Z", + "EXTSWIZZLE_W", + "EXTSWIZZLE_ZERO", + "EXTSWIZZLE_ONE" +}; + +static const char *TGSI_WRITEMASKS[] = +{ + "0", + "WRITEMASK_X", + "WRITEMASK_Y", + "WRITEMASK_XY", + "WRITEMASK_Z", + "WRITEMASK_XZ", + "WRITEMASK_YZ", + "WRITEMASK_XYZ", + "WRITEMASK_W", + "WRITEMASK_XW", + "WRITEMASK_YW", + "WRITEMASK_XYW", + "WRITEMASK_ZW", + "WRITEMASK_XZW", + "WRITEMASK_YZW", + "WRITEMASK_XYZW" +}; + +static const char *TGSI_DST_REGISTER_EXTS[] = +{ + "DST_REGISTER_EXT_TYPE_CONDCODE", + "DST_REGISTER_EXT_TYPE_MODULATE" +}; + +static const char *TGSI_MODULATES[] = +{ + "MODULATE_1X", + "MODULATE_2X", + "MODULATE_4X", + "MODULATE_8X", + "MODULATE_HALF", + "MODULATE_QUARTER", + "MODULATE_EIGHTH" +}; + +static void +dump_declaration_verbose( + struct tgsi_full_declaration *decl, + unsigned ignored, + unsigned deflt, + struct tgsi_full_declaration *fd ) +{ + TXT( "\nFile : " ); + ENM( decl->Declaration.File, TGSI_FILES ); + if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { + TXT( "\nUsageMask : " ); + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + CHR( 'X' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + CHR( 'Y' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + CHR( 'Z' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + CHR( 'W' ); + } + } + if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { + TXT( "\nInterpolate: " ); + ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); + } + if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { + TXT( "\nSemantic : " ); + UID( decl->Declaration.Semantic ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Declaration.Padding ); + } + + EOL(); + TXT( "\nFirst: " ); + UID( decl->DeclarationRange.First ); + TXT( "\nLast : " ); + UID( decl->DeclarationRange.Last ); + + if( decl->Declaration.Semantic ) { + EOL(); + TXT( "\nSemanticName : " ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); + TXT( "\nSemanticIndex: " ); + UID( decl->Semantic.SemanticIndex ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Semantic.Padding ); + } + } +} + +static void +dump_immediate_verbose( + struct tgsi_full_immediate *imm, + unsigned ignored ) +{ + unsigned i; + + TXT( "\nDataType : " ); + ENM( imm->Immediate.DataType, TGSI_IMMS ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( imm->Immediate.Padding ); + } + + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + EOL(); + switch( imm->Immediate.DataType ) { + case TGSI_IMM_FLOAT32: + TXT( "\nFloat: " ); + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + + default: + assert( 0 ); + } + } +} + +static void +dump_instruction_verbose( + struct tgsi_full_instruction *inst, + unsigned ignored, + unsigned deflt, + struct tgsi_full_instruction *fi ) +{ + unsigned i; + + TXT( "\nOpcode : " ); + ENM( inst->Instruction.Opcode, TGSI_OPCODES ); + if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { + TXT( "\nSaturate : " ); + ENM( inst->Instruction.Saturate, TGSI_SATS ); + } + if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { + TXT( "\nNumDstRegs : " ); + UID( inst->Instruction.NumDstRegs ); + } + if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { + TXT( "\nNumSrcRegs : " ); + UID( inst->Instruction.NumSrcRegs ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->Instruction.Padding ); + } + + if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { + TXT( "\nPrecision : " ); + ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); + } + if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { + TXT( "\nCondDstIndex : " ); + UID( inst->InstructionExtNv.CondDstIndex ); + } + if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { + TXT( "\nCondFlowIndex : " ); + UID( inst->InstructionExtNv.CondFlowIndex ); + } + if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { + TXT( "\nCondMask : " ); + ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { + TXT( "\nCondSwizzleX : " ); + ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { + TXT( "\nCondSwizzleY : " ); + ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ : " ); + ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { + TXT( "\nCondSwizzleW : " ); + ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { + TXT( "\nCondDstUpdate : " ); + UID( inst->InstructionExtNv.CondDstUpdate ); + } + if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { + TXT( "\nCondFlowEnable: " ); + UID( inst->InstructionExtNv.CondFlowEnable ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtNv.Padding ); + if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { + TXT( "\nExtended : " ); + UID( inst->InstructionExtNv.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { + TXT( "\nLabel : " ); + UID( inst->InstructionExtLabel.Label ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtLabel.Padding ); + if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtLabel.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { + TXT( "\nTexture : " ); + ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtTexture.Padding ); + if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtTexture.Extended ); + } + } + } + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; + + EOL(); + TXT( "\nFile : " ); + ENM( dst->DstRegister.File, TGSI_FILES ); + if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { + TXT( "\nWriteMask: " ); + ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); + } + if( ignored ) { + if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( dst->DstRegister.Indirect ); + } + if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( dst->DstRegister.Dimension ); + } + } + if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { + TXT( "\nIndex : " ); + SID( dst->DstRegister.Index ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegister.Padding ); + if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegister.Extended ); + } + } + + if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { + EOL(); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { + TXT( "\nCondMask : " ); + ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { + TXT( "\nCondSwizzleX: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { + TXT( "\nCondSwizzleY: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { + TXT( "\nCondSwizzleW: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { + TXT( "\nCondSrcIndex: " ); + UID( dst->DstRegisterExtConcode.CondSrcIndex ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtConcode.Padding ); + if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegisterExtConcode.Extended ); + } + } + } + + if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { + EOL(); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { + TXT( "\nModulate: " ); + ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtModulate.Padding ); + if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { + TXT( "\nExtended: " ); + UID( dst->DstRegisterExtModulate.Extended ); + } + } + } + } + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; + + EOL(); + TXT( "\nFile : "); + ENM( src->SrcRegister.File, TGSI_FILES ); + if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { + TXT( "\nSwizzleX : " ); + ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { + TXT( "\nSwizzleY : " ); + ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { + TXT( "\nSwizzleZ : " ); + ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { + TXT( "\nSwizzleW : " ); + ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegister.Negate ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( src->SrcRegister.Indirect ); + } + if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( src->SrcRegister.Dimension ); + } + } + if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { + TXT( "\nIndex : " ); + SID( src->SrcRegister.Index ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegister.Extended ); + } + } + + if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { + EOL(); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { + TXT( "\nExtSwizzleX: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { + TXT( "\nExtSwizzleY: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { + TXT( "\nExtSwizzleZ: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { + TXT( "\nExtSwizzleW: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { + TXT( "\nNegateX : " ); + UID( src->SrcRegisterExtSwz.NegateX ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { + TXT( "\nNegateY : " ); + UID( src->SrcRegisterExtSwz.NegateY ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { + TXT( "\nNegateZ : " ); + UID( src->SrcRegisterExtSwz.NegateZ ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { + TXT( "\nNegateW : " ); + UID( src->SrcRegisterExtSwz.NegateW ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtSwz.Padding ); + if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtSwz.Extended ); + } + } + } + + if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { + EOL(); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { + TXT( "\nComplement: " ); + UID( src->SrcRegisterExtMod.Complement ); + } + if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { + TXT( "\nBias : " ); + UID( src->SrcRegisterExtMod.Bias ); + } + if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { + TXT( "\nScale2X : " ); + UID( src->SrcRegisterExtMod.Scale2X ); + } + if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { + TXT( "\nAbsolute : " ); + UID( src->SrcRegisterExtMod.Absolute ); + } + if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegisterExtMod.Negate ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtMod.Padding ); + if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtMod.Extended ); + } + } + } + } +} + +void +tgsi_dump_c( + const struct tgsi_token *tokens, + uint flags ) +{ + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + uint ignored = flags & TGSI_DUMP_C_IGNORED; + uint deflt = flags & TGSI_DUMP_C_DEFAULT; + uint instno = 0; + + /* sanity checks */ + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "END") == 0); + + tgsi_parse_init( &parse, tokens ); + + TXT( "tgsi-dump begin -----------------" ); + + TXT( "\nMajorVersion: " ); + UID( parse.FullVersion.Version.MajorVersion ); + TXT( "\nMinorVersion: " ); + UID( parse.FullVersion.Version.MinorVersion ); + EOL(); + + TXT( "\nHeaderSize: " ); + UID( parse.FullHeader.Header.HeaderSize ); + TXT( "\nBodySize : " ); + UID( parse.FullHeader.Header.BodySize ); + TXT( "\nProcessor : " ); + ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); + EOL(); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + TXT( "\nType : " ); + ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); + if( ignored ) { + TXT( "\nSize : " ); + UID( parse.FullToken.Token.Size ); + if( deflt || parse.FullToken.Token.Extended ) { + TXT( "\nExtended : " ); + UID( parse.FullToken.Token.Extended ); + } + } + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + dump_declaration_verbose( + &parse.FullToken.FullDeclaration, + ignored, + deflt, + &fd ); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + dump_immediate_verbose( + &parse.FullToken.FullImmediate, + ignored ); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + dump_instruction_verbose( + &parse.FullToken.FullInstruction, + ignored, + deflt, + &fi ); + break; + + default: + assert( 0 ); + } + + EOL(); + } + + TXT( "\ntgsi-dump end -------------------\n" ); + + tgsi_parse_free( &parse ); +} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h new file mode 100644 index 0000000000..d91cd35b3b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_DUMP_C_H +#define TGSI_DUMP_C_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +#define TGSI_DUMP_C_IGNORED 1 +#define TGSI_DUMP_C_DEFAULT 2 + +void +tgsi_dump_c( + const struct tgsi_token *tokens, + uint flags ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_DUMP_C_H */ -- cgit v1.2.3 From 5e9ea9d938e9734524707e46be29e243e96f32a9 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 16:07:06 +0200 Subject: scons: List util/tgsi_dump_c.c. --- src/gallium/auxiliary/tgsi/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 54f5c75db4..3bbfa1be54 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -7,6 +7,7 @@ tgsi = env.ConvenienceLibrary( 'exec/tgsi_sse2.c', 'util/tgsi_build.c', 'util/tgsi_dump.c', + 'util/tgsi_dump_c.c', 'util/tgsi_iterate.c', 'util/tgsi_parse.c', 'util/tgsi_sanity.c', -- cgit v1.2.3 From 15c902455fe1b4572e614bf30912d92fe9c7bb28 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 16:08:36 +0200 Subject: tgsi: Preserve flags parameter for tgsi_dump(). --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 3 ++- src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 5a84b99166..1056951bc8 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -486,7 +486,8 @@ tgsi_dump_instruction( void tgsi_dump( - const struct tgsi_token *tokens ) + const struct tgsi_token *tokens, + uint flags ) { struct tgsi_parse_context parse; struct tgsi_full_instruction fi; diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h index c130b9f1b7..51c230b5db 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h @@ -36,7 +36,8 @@ extern "C" { void tgsi_dump( - const struct tgsi_token *tokens ); + const struct tgsi_token *tokens, + uint flags ); struct tgsi_full_immediate; struct tgsi_full_instruction; -- cgit v1.2.3 From 307a252d1529e8e9ffa10c88cc8f5d5412f587f5 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 16:12:44 +0200 Subject: tgsi: Fix dump enums. --- src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c index 27b085dc07..acb1246b7a 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c @@ -81,18 +81,11 @@ static const char *TGSI_FILES[] = "FILE_IMMEDIATE" }; -static const char *TGSI_DECLARES[] = -{ - "DECLARE_RANGE", - "DECLARE_MASK" -}; - static const char *TGSI_INTERPOLATES[] = { "INTERPOLATE_CONSTANT", "INTERPOLATE_LINEAR", - "INTERPOLATE_PERSPECTIVE", - "INTERPOLATE_ATTRIB" + "INTERPOLATE_PERSPECTIVE" }; static const char *TGSI_SEMANTICS[] = @@ -250,9 +243,9 @@ static const char *TGSI_INSTRUCTION_EXTS[] = static const char *TGSI_PRECISIONS[] = { "PRECISION_DEFAULT", - "TGSI_PRECISION_FLOAT32", - "TGSI_PRECISION_FLOAT16", - "TGSI_PRECISION_FIXED12" + "PRECISION_FLOAT32", + "PRECISION_FLOAT16", + "PRECISION_FIXED12" }; static const char *TGSI_CCS[] = -- cgit v1.2.3 From 82f11f7e7c0bbe0452da65f08195c2a346f820e9 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 16:28:59 +0200 Subject: tgsi: Tidy up and fix tgsi_dump. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 215 ++++++++++++-------------- src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 2 +- 2 files changed, 101 insertions(+), 116 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 1056951bc8..fe8742dc02 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -34,16 +34,14 @@ static void dump_enum( - const unsigned e, - const char **enums, - const unsigned enums_count ) + uint e, + const char **enums, + uint enum_count ) { - if (e >= enums_count) { + if (e >= enum_count) debug_printf( "%u", e ); - } - else { + else debug_printf( "%s", enums[e] ); - } } #define EOL() debug_printf( "\n" ) @@ -55,14 +53,14 @@ dump_enum( #define FLT(F) debug_printf( "%10.4f", F ) #define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) -static const char *TGSI_PROCESSOR_TYPES_SHORT[] = +static const char *processor_type_names[] = { "FRAG", "VERT", "GEOM" }; -static const char *TGSI_FILES_SHORT[] = +static const char *file_names[] = { "NULL", "CONST", @@ -74,14 +72,14 @@ static const char *TGSI_FILES_SHORT[] = "IMM" }; -static const char *TGSI_INTERPOLATES_SHORT[] = +static const char *interpolate_names[] = { "CONSTANT", "LINEAR", "PERSPECTIVE" }; -static const char *TGSI_SEMANTICS_SHORT[] = +static const char *semantic_names[] = { "POSITION", "COLOR", @@ -92,12 +90,12 @@ static const char *TGSI_SEMANTICS_SHORT[] = "NORMAL" }; -static const char *TGSI_IMMS_SHORT[] = +static const char *immediate_type_names[] = { "FLT32" }; -static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] = +static const char *opcode_names[TGSI_OPCODE_LAST] = { "ARL", "MOV", @@ -220,7 +218,7 @@ static const char *TGSI_OPCODES_SHORT[TGSI_OPCODE_LAST] = "SWZ" }; -static const char *TGSI_SWIZZLES_SHORT[] = +static const char *swizzle_names[] = { "x", "y", @@ -228,7 +226,7 @@ static const char *TGSI_SWIZZLES_SHORT[] = "w" }; -static const char *TGSI_TEXTURES_SHORT[] = +static const char *texture_names[] = { "UNKNOWN", "1D", @@ -241,7 +239,7 @@ static const char *TGSI_TEXTURES_SHORT[] = "SHADOWRECT" }; -static const char *TGSI_EXTSWIZZLES_SHORT[] = +static const char *extswizzle_names[] = { "x", "y", @@ -251,7 +249,7 @@ static const char *TGSI_EXTSWIZZLES_SHORT[] = "1" }; -static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] = +static const char *modulate_names[TGSI_MODULATE_COUNT] = { "", "_2X", @@ -262,40 +260,55 @@ static const char *TGSI_MODULATES_SHORT[TGSI_MODULATE_COUNT] = "_D8" }; -void -tgsi_dump_declaration( - const struct tgsi_full_declaration *decl ) +static void +_dump_register( + uint file, + uint first, + uint last ) { - TXT( "\nDCL " ); - ENM( decl->Declaration.File, TGSI_FILES_SHORT ); - + ENM( file, file_names ); CHR( '[' ); - UID( decl->DeclarationRange.First ); - if (decl->DeclarationRange.First != decl->DeclarationRange.Last) { + UID( first ); + if (first != last) { TXT( ".." ); - UID( decl->DeclarationRange.Last ); + UID( last ); } CHR( ']' ); +} - if( decl->Declaration.UsageMask != TGSI_WRITEMASK_XYZW ) { +static void +_dump_writemask( + uint writemask ) +{ + if (writemask != TGSI_WRITEMASK_XYZW) { CHR( '.' ); - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + if (writemask & TGSI_WRITEMASK_X) CHR( 'x' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + if (writemask & TGSI_WRITEMASK_Y) CHR( 'y' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + if (writemask & TGSI_WRITEMASK_Z) CHR( 'z' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + if (writemask & TGSI_WRITEMASK_W) CHR( 'w' ); - } } +} + +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ) +{ + TXT( "\nDCL " ); + + _dump_register( + decl->Declaration.File, + decl->DeclarationRange.First, + decl->DeclarationRange.Last ); + _dump_writemask( + decl->Declaration.UsageMask ); if (decl->Declaration.Semantic) { TXT( ", " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS_SHORT ); + ENM( decl->Semantic.SemanticName, semantic_names ); if (decl->Semantic.SemanticIndex != 0 || decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { CHR( '[' ); @@ -305,32 +318,30 @@ tgsi_dump_declaration( } TXT( ", " ); - ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES_SHORT ); + ENM( decl->Declaration.Interpolate, interpolate_names ); } void tgsi_dump_immediate( const struct tgsi_full_immediate *imm ) { - unsigned i; + uint i; TXT( "\nIMM " ); - ENM( imm->Immediate.DataType, TGSI_IMMS_SHORT ); + ENM( imm->Immediate.DataType, immediate_type_names ); TXT( " { " ); - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - switch( imm->Immediate.DataType ) { + for (i = 0; i < imm->Immediate.Size - 1; i++) { + switch (imm->Immediate.DataType) { case TGSI_IMM_FLOAT32: FLT( imm->u.ImmediateFloat32[i].Float ); break; - default: assert( 0 ); } - if( i < imm->Immediate.Size - 2 ) { + if (i < imm->Immediate.Size - 2) TXT( ", " ); - } } TXT( " }" ); } @@ -340,15 +351,15 @@ tgsi_dump_instruction( const struct tgsi_full_instruction *inst, uint instno ) { - unsigned i; - boolean first_reg = TRUE; + uint i; + boolean first_reg = TRUE; EOL(); UID( instno ); CHR( ':' ); - ENM( inst->Instruction.Opcode, TGSI_OPCODES_SHORT ); + ENM( inst->Instruction.Opcode, opcode_names ); - switch( inst->Instruction.Saturate ) { + switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: break; case TGSI_SAT_ZERO_ONE: @@ -361,47 +372,28 @@ tgsi_dump_instruction( assert( 0 ); } - for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - if( !first_reg ) { + if (!first_reg) CHR( ',' ); - } CHR( ' ' ); - ENM( dst->DstRegister.File, TGSI_FILES_SHORT ); - - CHR( '[' ); - SID( dst->DstRegister.Index ); - CHR( ']' ); - - ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES_SHORT ); - - if( dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW ) { - CHR( '.' ); - if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_X ) { - CHR( 'x' ); - } - if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Y ) { - CHR( 'y' ); - } - if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_Z ) { - CHR( 'z' ); - } - if( dst->DstRegister.WriteMask & TGSI_WRITEMASK_W ) { - CHR( 'w' ); - } - } + _dump_register( + dst->DstRegister.File, + dst->DstRegister.Index, + dst->DstRegister.Index ); + ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); + _dump_writemask( dst->DstRegister.WriteMask ); first_reg = FALSE; } - for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - if( !first_reg ) { + if (!first_reg) CHR( ',' ); - } CHR( ' ' ); if (src->SrcRegisterExtMod.Negate) @@ -417,40 +409,45 @@ tgsi_dump_instruction( if (src->SrcRegister.Negate) CHR( '-' ); - ENM( src->SrcRegister.File, TGSI_FILES_SHORT ); - - CHR( '[' ); if (src->SrcRegister.Indirect) { + /* TODO: Tidy up + */ + ENM( src->SrcRegister.File, file_names ); + CHR( '[' ); TXT( "ADDR[0]" ); if (src->SrcRegister.Index != 0) { if (src->SrcRegister.Index > 0) CHR( '+' ); SID( src->SrcRegister.Index ); } + CHR( ']' ); + } + else { + _dump_register( + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegister.Index ); } - else - SID( src->SrcRegister.Index ); - CHR( ']' ); if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { CHR( '.' ); - ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES_SHORT ); - ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES_SHORT ); - ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES_SHORT ); - ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES_SHORT ); + ENM( src->SrcRegister.SwizzleX, swizzle_names ); + ENM( src->SrcRegister.SwizzleY, swizzle_names ); + ENM( src->SrcRegister.SwizzleZ, swizzle_names ); + ENM( src->SrcRegister.SwizzleW, swizzle_names ); } if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { CHR( '.' ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES_SHORT ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES_SHORT ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); } if (src->SrcRegisterExtMod.Complement) @@ -469,10 +466,10 @@ tgsi_dump_instruction( if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { TXT( ", " ); - ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES_SHORT ); + ENM( inst->InstructionExtTexture.Texture, texture_names ); } - switch( inst->Instruction.Opcode ) { + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_IF: case TGSI_OPCODE_ELSE: case TGSI_OPCODE_BGNLOOP2: @@ -490,47 +487,35 @@ tgsi_dump( uint flags ) { struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - unsigned instno = 0; + uint instno = 0; /* sanity checks */ - assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); - assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "OPCODE_END") == 0); - assert(strcmp(TGSI_OPCODES_SHORT[TGSI_OPCODE_END], "END") == 0); + assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); + assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); + assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); tgsi_parse_init( &parse, tokens ); EOL(); - ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES_SHORT ); + ENM( parse.FullHeader.Processor.Processor, processor_type_names ); UID( parse.FullVersion.Version.MajorVersion ); CHR( '.' ); UID( parse.FullVersion.Version.MinorVersion ); - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { + while (!tgsi_parse_end_of_tokens( &parse )) { tgsi_parse_token( &parse ); - switch( parse.FullToken.Token.Type ) { + switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_DECLARATION: - tgsi_dump_declaration( - &parse.FullToken.FullDeclaration ); + tgsi_dump_declaration( &parse.FullToken.FullDeclaration ); break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - tgsi_dump_immediate( - &parse.FullToken.FullImmediate ); + tgsi_dump_immediate( &parse.FullToken.FullImmediate ); break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - tgsi_dump_instruction( - &parse.FullToken.FullInstruction, - instno ); + tgsi_dump_instruction( &parse.FullToken.FullInstruction, instno ); instno++; break; - default: assert( 0 ); } diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c index acb1246b7a..e7d1079588 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c @@ -773,7 +773,7 @@ tgsi_dump_c( /* sanity checks */ assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "END") == 0); + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); tgsi_parse_init( &parse, tokens ); -- cgit v1.2.3 From fef7f2b070ab797f78ebc210bb40a24685c6d4b7 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 16:42:43 +0200 Subject: tgsi: Fix error handling. --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c index 7fc4c29c68..a4edbb6d08 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c @@ -263,7 +263,7 @@ tgsi_sanity_check( ctx.errors = 0; ctx.warnings = 0; - if (tgsi_iterate_shader( tokens, &ctx.iter ) == -1) + if (!tgsi_iterate_shader( tokens, &ctx.iter )) return FALSE; return ctx.errors == 0; -- cgit v1.2.3 From 57482e1549e131fa1aebd442a677a95909b22508 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 16:42:57 +0200 Subject: tgsi: Keep version and processor info in iterate_ctx. --- src/gallium/auxiliary/tgsi/util/tgsi_iterate.c | 3 +++ src/gallium/auxiliary/tgsi/util/tgsi_iterate.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c index 43d7a6b1d5..5371a88b96 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c @@ -38,6 +38,9 @@ tgsi_iterate_shader( if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) return FALSE; + ctx->processor = parse.FullHeader.Processor; + ctx->version = parse.FullVersion.Version; + if (ctx->prolog) if (!ctx->prolog( ctx )) goto fail; diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h index 18729c2d1a..f5bebf89b8 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h +++ b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h @@ -59,6 +59,9 @@ struct tgsi_iterate_context boolean (* epilog)( struct tgsi_iterate_context *ctx ); + + struct tgsi_processor processor; + struct tgsi_version version; }; boolean -- cgit v1.2.3 From 83f245bd242cd2c5f59f072095dcc47aa6153b21 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 16:43:26 +0200 Subject: tgsi: Use tgsi_iterate in tgsi_dump. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 89 ++++++++++++++++++----------- 1 file changed, 57 insertions(+), 32 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index fe8742dc02..08d0d5200c 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -26,11 +26,15 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "util/u_string.h" #include "tgsi_dump.h" -#include "tgsi_parse.h" -#include "tgsi_build.h" +#include "tgsi_iterate.h" + +struct dump_ctx +{ + struct tgsi_iterate_context iter; + + uint instno; +}; static void dump_enum( @@ -321,6 +325,15 @@ tgsi_dump_declaration( ENM( decl->Declaration.Interpolate, interpolate_names ); } +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) +{ + tgsi_dump_declaration( decl ); + return TRUE; +} + void tgsi_dump_immediate( const struct tgsi_full_immediate *imm ) @@ -346,6 +359,15 @@ tgsi_dump_immediate( TXT( " }" ); } +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + tgsi_dump_immediate( imm ); + return TRUE; +} + void tgsi_dump_instruction( const struct tgsi_full_instruction *inst, @@ -481,45 +503,48 @@ tgsi_dump_instruction( } } +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct dump_ctx *ctx = (struct dump_ctx *) iter; + + tgsi_dump_instruction( inst, ctx->instno++ ); + return TRUE; +} + +static boolean +prolog( + struct tgsi_iterate_context *ctx ) +{ + EOL(); + ENM( ctx->processor.Processor, processor_type_names ); + UID( ctx->version.MajorVersion ); + CHR( '.' ); + UID( ctx->version.MinorVersion ); + return TRUE; +} + void tgsi_dump( const struct tgsi_token *tokens, uint flags ) { - struct tgsi_parse_context parse; - uint instno = 0; + struct dump_ctx ctx; /* sanity checks */ assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); - tgsi_parse_init( &parse, tokens ); - - EOL(); - ENM( parse.FullHeader.Processor.Processor, processor_type_names ); - UID( parse.FullVersion.Version.MajorVersion ); - CHR( '.' ); - UID( parse.FullVersion.Version.MinorVersion ); - - while (!tgsi_parse_end_of_tokens( &parse )) { - tgsi_parse_token( &parse ); + ctx.iter.prolog = prolog; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.epilog = NULL; - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_DECLARATION: - tgsi_dump_declaration( &parse.FullToken.FullDeclaration ); - break; - case TGSI_TOKEN_TYPE_IMMEDIATE: - tgsi_dump_immediate( &parse.FullToken.FullImmediate ); - break; - case TGSI_TOKEN_TYPE_INSTRUCTION: - tgsi_dump_instruction( &parse.FullToken.FullInstruction, instno ); - instno++; - break; - default: - assert( 0 ); - } - } + ctx.instno = 0; - tgsi_parse_free( &parse ); + tgsi_iterate_shader( tokens, &ctx.iter ); } -- cgit v1.2.3 From 613f0df64dd2c2db71dd73b595225016ae596576 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 19:29:52 +0200 Subject: tgsi: Remove redundant code. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 1 - src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 1 - 2 files changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 08d0d5200c..a9d500c8cf 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -536,7 +536,6 @@ tgsi_dump( /* sanity checks */ assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); - assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); ctx.iter.prolog = prolog; ctx.iter.iterate_instruction = iter_instruction; diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c index e7d1079588..eabd74bd6d 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c @@ -773,7 +773,6 @@ tgsi_dump_c( /* sanity checks */ assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); tgsi_parse_init( &parse, tokens ); -- cgit v1.2.3 From 73e1d0be756537376495547bc1e798805884b8ef Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 21:23:04 +0200 Subject: tgsi: Add support for indirect addressing in dump, sanity and text modules. --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 53 +++++++--- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 86 ++++++++++----- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 147 ++++++++++++++++++++------ 3 files changed, 215 insertions(+), 71 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index a9d500c8cf..94180f7e50 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -265,17 +265,48 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] = }; static void -_dump_register( +_dump_register_prefix( uint file, uint first, uint last ) { + + +} + +static void +_dump_register( + uint file, + int first, + int last ) +{ ENM( file, file_names ); CHR( '[' ); - UID( first ); + SID( first ); if (first != last) { TXT( ".." ); - UID( last ); + SID( last ); + } + CHR( ']' ); +} + +static void +_dump_register_ind( + uint file, + int index, + uint ind_file, + int ind_index ) +{ + ENM( file, file_names ); + CHR( '[' ); + ENM( ind_file, file_names ); + CHR( '[' ); + SID( ind_index ); + CHR( ']' ); + if (index != 0) { + if (index > 0) + CHR( '+' ); + SID( index ); } CHR( ']' ); } @@ -432,17 +463,11 @@ tgsi_dump_instruction( CHR( '-' ); if (src->SrcRegister.Indirect) { - /* TODO: Tidy up - */ - ENM( src->SrcRegister.File, file_names ); - CHR( '[' ); - TXT( "ADDR[0]" ); - if (src->SrcRegister.Index != 0) { - if (src->SrcRegister.Index > 0) - CHR( '+' ); - SID( src->SrcRegister.Index ); - } - CHR( ']' ); + _dump_register_ind( + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegisterInd.File, + src->SrcRegisterInd.Index ); } else { _dump_register( diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c index a4edbb6d08..f11de815b0 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c @@ -39,8 +39,9 @@ struct sanity_check_ctx { struct tgsi_iterate_context iter; - reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8]; - reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / sizeof( uint ) / 8]; + reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + boolean regs_ind_used[TGSI_FILE_COUNT]; uint num_imms; uint num_instructions; uint index_of_END; @@ -83,24 +84,59 @@ static boolean is_register_declared( struct sanity_check_ctx *ctx, uint file, - uint index ) + int index ) { - assert( index < MAX_REGISTERS ); + assert( index >= 0 && index < MAX_REGISTERS ); return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; } +static boolean +is_any_register_declared( + struct sanity_check_ctx *ctx, + uint file ) +{ + uint i; + + for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++) + if (ctx->regs_decl[file][i]) + return TRUE; + return FALSE; +} + static boolean is_register_used( struct sanity_check_ctx *ctx, uint file, - uint index ) + int index ) { assert( index < MAX_REGISTERS ); return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; } +static boolean +check_register_usage( + struct sanity_check_ctx *ctx, + uint file, + int index, + boolean indirect ) +{ + if (!check_file_name( ctx, file )) + return FALSE; + if (indirect) { + if (!is_any_register_declared( ctx, file )) + report_error( ctx, "Undeclared source register" ); + ctx->regs_ind_used[file] = TRUE; + } + else { + if (!is_register_declared( ctx, file, index )) + report_error( ctx, "Undeclared destination register" ); + ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + } + return TRUE; +} + static boolean iter_instruction( struct tgsi_iterate_context *iter, @@ -122,28 +158,25 @@ iter_instruction( * Mark the registers as used. */ for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - uint file; - uint index; - - file = inst->FullDstRegisters[i].DstRegister.File; - if (!check_file_name( ctx, file )) - return TRUE; - index = inst->FullDstRegisters[i].DstRegister.Index; - if (!is_register_declared( ctx, file, index )) - report_error( ctx, "Undeclared destination register" ); - ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + check_register_usage( + ctx, + inst->FullDstRegisters[i].DstRegister.File, + inst->FullDstRegisters[i].DstRegister.Index, + FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - uint file; - uint index; - - file = inst->FullSrcRegisters[i].SrcRegister.File; - if (!check_file_name( ctx, file )) - return TRUE; - index = inst->FullSrcRegisters[i].SrcRegister.Index; - if (!is_register_declared( ctx, file, index )) - report_error( ctx, "Undeclared source register" ); - ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + check_register_usage( + ctx, + inst->FullSrcRegisters[i].SrcRegister.File, + inst->FullSrcRegisters[i].SrcRegister.Index, + inst->FullSrcRegisters[i].SrcRegister.Indirect ); + if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { + check_register_usage( + ctx, + inst->FullSrcRegisters[i].SrcRegisterInd.File, + inst->FullSrcRegisters[i].SrcRegisterInd.Index, + FALSE ); + } } ctx->num_instructions++; @@ -228,7 +261,7 @@ epilog( uint i; for (i = 0; i < MAX_REGISTERS; i++) { - if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i )) { + if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { report_warning( ctx, "Register never used" ); } } @@ -256,6 +289,7 @@ tgsi_sanity_check( memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); + memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); ctx.num_imms = 0; ctx.num_instructions = 0; ctx.index_of_END = ~0; diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 9ed0f52fc7..2a50315463 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -226,24 +226,21 @@ static const char *file_names[TGSI_FILE_COUNT] = }; static boolean -parse_file( - struct translate_ctx *ctx, - uint *file ) +parse_file( const char **pcur, uint *file ) { uint i; for (i = 0; i < TGSI_FILE_COUNT; i++) { - const char *cur = ctx->cur; + const char *cur = *pcur; if (str_match_no_case( &cur, file_names[i] )) { if (!is_digit_alpha_underscore( cur )) { - ctx->cur = cur; + *pcur = cur; *file = i; return TRUE; } } } - report_error( ctx, "Unknown register file" ); return FALSE; } @@ -290,41 +287,57 @@ parse_opt_writemask( return TRUE; } -/* Parse register part common for decls and operands. - * ::= `[' +/* ::= `[' */ static boolean -parse_register_prefix( +parse_register_file_bracket( struct translate_ctx *ctx, - uint *file, - uint *index ) + uint *file ) { - if (!parse_file( ctx, file )) + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); return FALSE; + } eat_opt_white( &ctx->cur ); if (*ctx->cur != '[') { report_error( ctx, "Expected `['" ); return FALSE; } ctx->cur++; + return TRUE; +} + +/* ::= + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, index )) { - report_error( ctx, "Expected literal integer" ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); return FALSE; } + *index = (int) uindex; return TRUE; } -/* Parse register operand. - * ::= `]' +/* Parse destination register operand. + * ::= `]' */ static boolean -parse_register( +parse_register_dst( struct translate_ctx *ctx, uint *file, - uint *index ) + int *index ) { - if (!parse_register_prefix( ctx, file, index )) + if (!parse_register_file_bracket_index( ctx, file, index )) return FALSE; eat_opt_white( &ctx->cur ); if (*ctx->cur != ']') { @@ -332,30 +345,95 @@ parse_register( return FALSE; } ctx->cur++; - /* TODO: Modulate suffix */ + return TRUE; +} + +/* Parse source register operand. + * ::= `]' | + * `]' | + * `+' `]' | + * `-' `]' + */ +static boolean +parse_register_src( + struct translate_ctx *ctx, + uint *file, + int *index, + uint *ind_file, + int *ind_index ) +{ + const char *cur; + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + cur = ctx->cur; + if (parse_file( &cur, ind_file )) { + if (!parse_register_dst( ctx, ind_file, ind_index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur == '+' || *ctx->cur == '-') { + boolean negate; + + negate = *ctx->cur == '-'; + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + if (negate) + *index = -(int) uindex; + else + *index = (int) uindex; + } + else { + *index = 0; + } + } + else { + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + *ind_file = TGSI_FILE_NULL; + *ind_index = 0; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; return TRUE; } /* Parse register declaration. - * ::= `]' | `..' `]' + * ::= `]' | + * `..' `]' */ static boolean parse_register_dcl( struct translate_ctx *ctx, uint *file, - uint *first, - uint *last ) + int *first, + int *last ) { - if (!parse_register_prefix( ctx, file, first )) + if (!parse_register_file_bracket_index( ctx, file, first )) return FALSE; eat_opt_white( &ctx->cur ); if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { + uint uindex; + ctx->cur += 2; eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, last )) { + if (!parse_uint( &ctx->cur, &uindex )) { report_error( ctx, "Expected literal integer" ); return FALSE; } + *last = (int) uindex; eat_opt_white( &ctx->cur ); } else { @@ -386,11 +464,11 @@ parse_dst_operand( struct tgsi_full_dst_register *dst ) { uint file; - uint index; + int index; uint writemask; const char *cur; - if (!parse_register( ctx, &file, &index )) + if (!parse_register_dst( ctx, &file, &index )) return FALSE; cur = ctx->cur; @@ -426,7 +504,9 @@ parse_src_operand( const char *cur; float value; uint file; - uint index; + int index; + uint ind_file; + int ind_index; if (*ctx->cur == '-') { cur = ctx->cur; @@ -498,10 +578,15 @@ parse_src_operand( } } - if (!parse_register( ctx, &file, &index )) + if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index )) return FALSE; src->SrcRegister.File = file; src->SrcRegister.Index = index; + if (ind_file != TGSI_FILE_NULL) { + src->SrcRegister.Indirect = 1; + src->SrcRegisterInd.File = ind_file; + src->SrcRegisterInd.Index = ind_index; + } /* Parse optional swizzle */ @@ -864,8 +949,8 @@ static boolean parse_declaration( struct translate_ctx *ctx ) { struct tgsi_full_declaration decl; uint file; - uint first; - uint last; + int first; + int last; uint writemask; const char *cur; uint advance; -- cgit v1.2.3 From 25a7f422b4e307dce966220d47794fb056d04aac Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 21:28:28 +0200 Subject: tgsi: Warn if an indirect register not ADDR[0]. --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c index f11de815b0..08f3995b13 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c @@ -171,11 +171,14 @@ iter_instruction( inst->FullSrcRegisters[i].SrcRegister.Index, inst->FullSrcRegisters[i].SrcRegister.Indirect ); if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { - check_register_usage( - ctx, - inst->FullSrcRegisters[i].SrcRegisterInd.File, - inst->FullSrcRegisters[i].SrcRegisterInd.Index, - FALSE ); + uint file; + int index; + + file = inst->FullSrcRegisters[i].SrcRegisterInd.File; + index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; + check_register_usage( ctx, file, index, FALSE ); + if (file != TGSI_FILE_ADDRESS || index != 0) + report_warning( ctx, "Indirect register not ADDR[0]" ); } } -- cgit v1.2.3 From 9d068d4b90813d5afa243d669e8437b8922ac656 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 21:38:20 +0200 Subject: tgsi: Add support for branch instructions with target labels. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 256 +++++++++++++++------------- 1 file changed, 137 insertions(+), 119 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 2a50315463..442016104b 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -687,130 +687,131 @@ struct opcode_info uint num_dst; uint num_src; uint is_tex; + uint is_branch; const char *mnemonic; }; static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = { - { 1, 1, 0, "ARL" }, - { 1, 1, 0, "MOV" }, - { 1, 1, 0, "LIT" }, - { 1, 1, 0, "RCP" }, - { 1, 1, 0, "RSQ" }, - { 1, 1, 0, "EXP" }, - { 1, 1, 0, "LOG" }, - { 1, 2, 0, "MUL" }, - { 1, 2, 0, "ADD" }, - { 1, 2, 0, "DP3" }, - { 1, 2, 0, "DP4" }, - { 1, 2, 0, "DST" }, - { 1, 2, 0, "MIN" }, - { 1, 2, 0, "MAX" }, - { 1, 2, 0, "SLT" }, - { 1, 2, 0, "SGE" }, - { 1, 3, 0, "MAD" }, - { 1, 2, 0, "SUB" }, - { 1, 3, 0, "LERP" }, - { 1, 3, 0, "CND" }, - { 1, 3, 0, "CND0" }, - { 1, 3, 0, "DOT2ADD" }, - { 1, 2, 0, "INDEX" }, - { 1, 1, 0, "NEGATE" }, - { 1, 1, 0, "FRAC" }, - { 1, 3, 0, "CLAMP" }, - { 1, 1, 0, "FLOOR" }, - { 1, 1, 0, "ROUND" }, - { 1, 1, 0, "EXPBASE2" }, - { 1, 1, 0, "LOGBASE2" }, - { 1, 2, 0, "POWER" }, - { 1, 2, 0, "CROSSPRODUCT" }, - { 1, 2, 0, "MULTIPLYMATRIX" }, - { 1, 1, 0, "ABS" }, - { 1, 1, 0, "RCC" }, - { 1, 2, 0, "DPH" }, - { 1, 1, 0, "COS" }, - { 1, 1, 0, "DDX" }, - { 1, 1, 0, "DDY" }, - { 0, 1, 0, "KILP" }, - { 1, 1, 0, "PK2H" }, - { 1, 1, 0, "PK2US" }, - { 1, 1, 0, "PK4B" }, - { 1, 1, 0, "PK4UB" }, - { 1, 2, 0, "RFL" }, - { 1, 2, 0, "SEQ" }, - { 1, 2, 0, "SFL" }, - { 1, 2, 0, "SGT" }, - { 1, 1, 0, "SIN" }, - { 1, 2, 0, "SLE" }, - { 1, 2, 0, "SNE" }, - { 1, 2, 0, "STR" }, - { 1, 2, 1, "TEX" }, - { 1, 4, 1, "TXD" }, - { 1, 2, 1, "TXP" }, - { 1, 1, 0, "UP2H" }, - { 1, 1, 0, "UP2US" }, - { 1, 1, 0, "UP4B" }, - { 1, 1, 0, "UP4UB" }, - { 1, 3, 0, "X2D" }, - { 1, 1, 0, "ARA" }, - { 1, 1, 0, "ARR" }, - { 0, 1, 0, "BRA" }, - { 0, 0, 0, "CAL" }, - { 0, 0, 0, "RET" }, - { 1, 1, 0, "SSG" }, - { 1, 3, 0, "CMP" }, - { 1, 1, 0, "SCS" }, - { 1, 2, 1, "TXB" }, - { 1, 1, 0, "NRM" }, - { 1, 2, 0, "DIV" }, - { 1, 2, 0, "DP2" }, - { 1, 2, 1, "TXL" }, - { 0, 0, 0, "BRK" }, - { 0, 1, 0, "IF" }, - { 0, 0, 0, "LOOP" }, - { 0, 1, 0, "REP" }, - { 0, 0, 0, "ELSE" }, - { 0, 0, 0, "ENDIF" }, - { 0, 0, 0, "ENDLOOP" }, - { 0, 0, 0, "ENDREP" }, - { 0, 1, 0, "PUSHA" }, - { 1, 0, 0, "POPA" }, - { 1, 1, 0, "CEIL" }, - { 1, 1, 0, "I2F" }, - { 1, 1, 0, "NOT" }, - { 1, 1, 0, "TRUNC" }, - { 1, 2, 0, "SHL" }, - { 1, 2, 0, "SHR" }, - { 1, 2, 0, "AND" }, - { 1, 2, 0, "OR" }, - { 1, 2, 0, "MOD" }, - { 1, 2, 0, "XOR" }, - { 1, 3, 0, "SAD" }, - { 1, 2, 1, "TXF" }, - { 1, 2, 1, "TXQ" }, - { 0, 0, 0, "CONT" }, - { 0, 0, 0, "EMIT" }, - { 0, 0, 0, "ENDPRIM" }, - { 0, 0, 0, "BGNLOOP2" }, - { 0, 0, 0, "BGNSUB" }, - { 0, 0, 0, "ENDLOOP2" }, - { 0, 0, 0, "ENDSUB" }, - { 1, 1, 0, "NOISE1" }, - { 1, 1, 0, "NOISE2" }, - { 1, 1, 0, "NOISE3" }, - { 1, 1, 0, "NOISE4" }, - { 0, 0, 0, "NOP" }, - { 1, 2, 0, "M4X3" }, - { 1, 2, 0, "M3X4" }, - { 1, 2, 0, "M3X3" }, - { 1, 2, 0, "M3X2" }, - { 1, 1, 0, "NRM4" }, - { 0, 1, 0, "CALLNZ" }, - { 0, 1, 0, "IFC" }, - { 0, 1, 0, "BREAKC" }, - { 0, 0, 0, "KIL" }, - { 0, 0, 0, "END" }, - { 1, 1, 0, "SWZ" } + { 1, 1, 0, 0, "ARL" }, + { 1, 1, 0, 0, "MOV" }, + { 1, 1, 0, 0, "LIT" }, + { 1, 1, 0, 0, "RCP" }, + { 1, 1, 0, 0, "RSQ" }, + { 1, 1, 0, 0, "EXP" }, + { 1, 1, 0, 0, "LOG" }, + { 1, 2, 0, 0, "MUL" }, + { 1, 2, 0, 0, "ADD" }, + { 1, 2, 0, 0, "DP3" }, + { 1, 2, 0, 0, "DP4" }, + { 1, 2, 0, 0, "DST" }, + { 1, 2, 0, 0, "MIN" }, + { 1, 2, 0, 0, "MAX" }, + { 1, 2, 0, 0, "SLT" }, + { 1, 2, 0, 0, "SGE" }, + { 1, 3, 0, 0, "MAD" }, + { 1, 2, 0, 0, "SUB" }, + { 1, 3, 0, 0, "LERP" }, + { 1, 3, 0, 0, "CND" }, + { 1, 3, 0, 0, "CND0" }, + { 1, 3, 0, 0, "DOT2ADD" }, + { 1, 2, 0, 0, "INDEX" }, + { 1, 1, 0, 0, "NEGATE" }, + { 1, 1, 0, 0, "FRAC" }, + { 1, 3, 0, 0, "CLAMP" }, + { 1, 1, 0, 0, "FLOOR" }, + { 1, 1, 0, 0, "ROUND" }, + { 1, 1, 0, 0, "EXPBASE2" }, + { 1, 1, 0, 0, "LOGBASE2" }, + { 1, 2, 0, 0, "POWER" }, + { 1, 2, 0, 0, "CROSSPRODUCT" }, + { 1, 2, 0, 0, "MULTIPLYMATRIX" }, + { 1, 1, 0, 0, "ABS" }, + { 1, 1, 0, 0, "RCC" }, + { 1, 2, 0, 0, "DPH" }, + { 1, 1, 0, 0, "COS" }, + { 1, 1, 0, 0, "DDX" }, + { 1, 1, 0, 0, "DDY" }, + { 0, 1, 0, 0, "KILP" }, + { 1, 1, 0, 0, "PK2H" }, + { 1, 1, 0, 0, "PK2US" }, + { 1, 1, 0, 0, "PK4B" }, + { 1, 1, 0, 0, "PK4UB" }, + { 1, 2, 0, 0, "RFL" }, + { 1, 2, 0, 0, "SEQ" }, + { 1, 2, 0, 0, "SFL" }, + { 1, 2, 0, 0, "SGT" }, + { 1, 1, 0, 0, "SIN" }, + { 1, 2, 0, 0, "SLE" }, + { 1, 2, 0, 0, "SNE" }, + { 1, 2, 0, 0, "STR" }, + { 1, 2, 1, 0, "TEX" }, + { 1, 4, 1, 0, "TXD" }, + { 1, 2, 1, 0, "TXP" }, + { 1, 1, 0, 0, "UP2H" }, + { 1, 1, 0, 0, "UP2US" }, + { 1, 1, 0, 0, "UP4B" }, + { 1, 1, 0, 0, "UP4UB" }, + { 1, 3, 0, 0, "X2D" }, + { 1, 1, 0, 0, "ARA" }, + { 1, 1, 0, 0, "ARR" }, + { 0, 1, 0, 0, "BRA" }, + { 0, 0, 0, 1, "CAL" }, + { 0, 0, 0, 0, "RET" }, + { 1, 1, 0, 0, "SSG" }, + { 1, 3, 0, 0, "CMP" }, + { 1, 1, 0, 0, "SCS" }, + { 1, 2, 1, 0, "TXB" }, + { 1, 1, 0, 0, "NRM" }, + { 1, 2, 0, 0, "DIV" }, + { 1, 2, 0, 0, "DP2" }, + { 1, 2, 1, 0, "TXL" }, + { 0, 0, 0, 0, "BRK" }, + { 0, 1, 0, 1, "IF" }, + { 0, 0, 0, 0, "LOOP" }, + { 0, 1, 0, 0, "REP" }, + { 0, 0, 0, 1, "ELSE" }, + { 0, 0, 0, 0, "ENDIF" }, + { 0, 0, 0, 0, "ENDLOOP" }, + { 0, 0, 0, 0, "ENDREP" }, + { 0, 1, 0, 0, "PUSHA" }, + { 1, 0, 0, 0, "POPA" }, + { 1, 1, 0, 0, "CEIL" }, + { 1, 1, 0, 0, "I2F" }, + { 1, 1, 0, 0, "NOT" }, + { 1, 1, 0, 0, "TRUNC" }, + { 1, 2, 0, 0, "SHL" }, + { 1, 2, 0, 0, "SHR" }, + { 1, 2, 0, 0, "AND" }, + { 1, 2, 0, 0, "OR" }, + { 1, 2, 0, 0, "MOD" }, + { 1, 2, 0, 0, "XOR" }, + { 1, 3, 0, 0, "SAD" }, + { 1, 2, 1, 0, "TXF" }, + { 1, 2, 1, 0, "TXQ" }, + { 0, 0, 0, 0, "CONT" }, + { 0, 0, 0, 0, "EMIT" }, + { 0, 0, 0, 0, "ENDPRIM" }, + { 0, 0, 0, 1, "BGNLOOP2" }, + { 0, 0, 0, 0, "BGNSUB" }, + { 0, 0, 0, 1, "ENDLOOP2" }, + { 0, 0, 0, 0, "ENDSUB" }, + { 1, 1, 0, 0, "NOISE1" }, + { 1, 1, 0, 0, "NOISE2" }, + { 1, 1, 0, 0, "NOISE3" }, + { 1, 1, 0, 0, "NOISE4" }, + { 0, 0, 0, 0, "NOP" }, + { 1, 2, 0, 0, "M4X3" }, + { 1, 2, 0, 0, "M3X4" }, + { 1, 2, 0, 0, "M3X3" }, + { 1, 2, 0, 0, "M3X2" }, + { 1, 1, 0, 0, "NRM4" }, + { 0, 1, 0, 0, "CALLNZ" }, + { 0, 1, 0, 0, "IFC" }, + { 0, 1, 0, 0, "BREAKC" }, + { 0, 0, 0, 0, "KIL" }, + { 0, 0, 0, 0, "END" }, + { 1, 1, 0, 0, "SWZ" } }; static const char *texture_names[TGSI_TEXTURE_COUNT] = @@ -915,6 +916,23 @@ parse_instruction( } } + if (info->is_branch) { + uint target; + + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ':') { + report_error( ctx, "Expected `:'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &target )) { + report_error( ctx, "Expected a label" ); + return FALSE; + } + inst.InstructionExtLabel.Label = target; + } + advance = tgsi_build_full_instruction( &inst, ctx->tokens_cur, -- cgit v1.2.3 From b3c8d0c348c5d894e4df0314f060361893bdd38e Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 20 Jul 2008 21:58:07 +0200 Subject: tgsi: Parse source register extended swizzle. --- src/gallium/auxiliary/tgsi/util/tgsi_text.c | 91 ++++++++++++++++++++--------- 1 file changed, 64 insertions(+), 27 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c index 442016104b..35cb3055bb 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_text.c @@ -496,6 +496,52 @@ parse_dst_operand( return TRUE; } +static boolean +parse_optional_swizzle( + struct translate_ctx *ctx, + uint swizzle[4], + boolean *parsed_swizzle, + boolean *parsed_extswizzle ) +{ + const char *cur = ctx->cur; + + *parsed_swizzle = FALSE; + *parsed_extswizzle = FALSE; + + eat_opt_white( &cur ); + if (*cur == '.') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < 4; i++) { + if (toupper( *cur ) == 'X') + swizzle[i] = TGSI_SWIZZLE_X; + else if (toupper( *cur ) == 'Y') + swizzle[i] = TGSI_SWIZZLE_Y; + else if (toupper( *cur ) == 'Z') + swizzle[i] = TGSI_SWIZZLE_Z; + else if (toupper( *cur ) == 'W') + swizzle[i] = TGSI_SWIZZLE_W; + else { + if (*cur == '0') + swizzle[i] = TGSI_EXTSWIZZLE_ZERO; + else if (*cur == '1') + swizzle[i] = TGSI_EXTSWIZZLE_ONE; + else { + report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); + return FALSE; + } + *parsed_extswizzle = TRUE; + } + cur++; + } + *parsed_swizzle = TRUE; + ctx->cur = cur; + } + return TRUE; +} + static boolean parse_src_operand( struct translate_ctx *ctx, @@ -507,6 +553,9 @@ parse_src_operand( int index; uint ind_file; int ind_index; + uint swizzle[4]; + boolean parsed_swizzle; + boolean parsed_extswizzle; if (*ctx->cur == '-') { cur = ctx->cur; @@ -588,35 +637,23 @@ parse_src_operand( src->SrcRegisterInd.Index = ind_index; } - /* Parse optional swizzle + /* Parse optional swizzle. */ - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == '.') { - uint i; - - cur++; - eat_opt_white( &cur ); - for (i = 0; i < 4; i++) { - uint swizzle; - - if (toupper( *cur ) == 'X') - swizzle = TGSI_SWIZZLE_X; - else if (toupper( *cur ) == 'Y') - swizzle = TGSI_SWIZZLE_Y; - else if (toupper( *cur ) == 'Z') - swizzle = TGSI_SWIZZLE_Z; - else if (toupper( *cur ) == 'W') - swizzle = TGSI_SWIZZLE_W; - else { - report_error( ctx, "Expected register swizzle component either `x', `y', `z' or `w'" ); - return FALSE; - } - cur++; - tgsi_util_set_src_register_swizzle( &src->SrcRegister, swizzle, i ); + if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) { + if (parsed_extswizzle) { + assert( parsed_swizzle ); + + src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0]; + src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1]; + src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2]; + src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3]; + } + else if (parsed_swizzle) { + src->SrcRegister.SwizzleX = swizzle[0]; + src->SrcRegister.SwizzleY = swizzle[1]; + src->SrcRegister.SwizzleZ = swizzle[2]; + src->SrcRegister.SwizzleW = swizzle[3]; } - - ctx->cur = cur; } if (src->SrcRegisterExtMod.Complement) { -- cgit v1.2.3 From 1753ff9872562bc05aff2472fc1256539085bbfc Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 21 Jul 2008 14:38:57 +0200 Subject: tgsi: Update Makefile to include tgsi_iterate.c --- src/gallium/auxiliary/tgsi/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 5555639b70..9c4b967651 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -6,6 +6,7 @@ LIBNAME = tgsi C_SOURCES = \ exec/tgsi_exec.c \ exec/tgsi_sse2.c \ + util/tgsi_iterate.c \ util/tgsi_build.c \ util/tgsi_dump.c \ util/tgsi_parse.c \ -- cgit v1.2.3 From 4db631a3994ed2f5b7cf6e4ac6c08c0a4c091730 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 21 Jul 2008 19:38:25 +0900 Subject: gallium: Open a new file when existing profile memory map is exhausted. --- src/gallium/auxiliary/util/p_debug_prof.c | 147 +++++++++++++++++++++++------- 1 file changed, 113 insertions(+), 34 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/p_debug_prof.c index 958f99c327..69c914c780 100644 --- a/src/gallium/auxiliary/util/p_debug_prof.c +++ b/src/gallium/auxiliary/util/p_debug_prof.c @@ -49,12 +49,97 @@ #define PROFILE_FILE_SIZE 4*1024*1024 #define FILE_NAME_SIZE 256 -static WCHAR wFileName[FILE_NAME_SIZE]; +static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.trace"; static ULONG_PTR iFile = 0; -static void *pMap = NULL; -static void *pMapEnd = NULL; +static BYTE *pMap = NULL; +static BYTE *pMapBegin = NULL; +static BYTE *pMapEnd = NULL; +void __declspec(naked) __cdecl +debug_profile_close(void) +{ + _asm { + push eax + push ebx + push ecx + push edx + push ebp + push edi + push esi + } + + if(iFile) { + EngUnmapFile(iFile); + /* Truncate the file */ + pMap = EngMapFile(wFileName, pMap - pMapBegin, &iFile); + if(pMap) + EngUnmapFile(iFile); + } + iFile = 0; + pMapBegin = pMapEnd = pMap = NULL; + + _asm { + pop esi + pop edi + pop ebp + pop edx + pop ecx + pop ebx + pop eax + ret + } +} + + +void __declspec(naked) __cdecl +debug_profile_open(void) +{ + WCHAR *p; + + _asm { + push eax + push ebx + push ecx + push edx + push ebp + push edi + push esi + } + + debug_profile_close(); + + // increment starting from the less significant digit + p = &wFileName[14]; + while(1) { + if(*p == '9') { + *p-- = '0'; + } + else { + *p += 1; + break; + } + } + + pMap = EngMapFile(wFileName, PROFILE_FILE_SIZE, &iFile); + if(pMap) { + pMapBegin = pMap; + pMapEnd = pMap + PROFILE_FILE_SIZE; + } + + _asm { + pop esi + pop edi + pop ebp + pop edx + pop ecx + pop ebx + pop eax + ret + } +} + + /** * Called at the start of every method or function. * @@ -64,11 +149,15 @@ void __declspec(naked) __cdecl _penter(void) { _asm { push ebx +retry: mov ebx, [pMap] test ebx, ebx jz done cmp ebx, [pMapEnd] - je done + jne ready + call debug_profile_open + jmp retry +ready: push eax push edx mov eax, [esp+12] @@ -97,11 +186,15 @@ void __declspec(naked) __cdecl _pexit(void) { _asm { push ebx +retry: mov ebx, [pMap] test ebx, ebx jz done cmp ebx, [pMapEnd] - je done + jne ready + call debug_profile_open + jmp retry +ready: push eax push edx mov eax, [esp+12] @@ -121,21 +214,13 @@ done: } +/** + * Reference function for calibration. + */ void __declspec(naked) -__debug_profile_reference1(void) { - _asm { - call _penter - call _pexit - ret - } -} - - -void __declspec(naked) -__debug_profile_reference2(void) { +__debug_profile_reference(void) { _asm { call _penter - call __debug_profile_reference1 call _pexit ret } @@ -145,31 +230,25 @@ __debug_profile_reference2(void) { void debug_profile_start(void) { - static unsigned no = 0; - char filename[FILE_NAME_SIZE]; - unsigned i; - - util_snprintf(filename, sizeof(filename), "\\??\\c:\\%03u.prof", ++no); - for(i = 0; i < FILE_NAME_SIZE; ++i) - wFileName[i] = (WCHAR)filename[i]; + debug_profile_open(); - pMap = EngMapFile(wFileName, PROFILE_FILE_SIZE, &iFile); if(pMap) { - pMapEnd = (unsigned char*)pMap + PROFILE_FILE_SIZE; - /* reference functions for calibration purposes */ - __debug_profile_reference2(); + unsigned i; + for(i = 0; i < 8; ++i) { + _asm { + call _penter + call __debug_profile_reference + call _pexit + } + } } } + void debug_profile_stop(void) { - if(iFile) { - EngUnmapFile(iFile); - /* TODO: truncate file */ - } - iFile = 0; - pMapEnd = pMap = NULL; + debug_profile_close(); } #endif /* PROFILE */ -- cgit v1.2.3 From 883097053d1d3550cab92362a07f413e110520ae Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 22 Jul 2008 09:45:10 +0900 Subject: win32kprof: Store the profile data as an caller->callee hash table, instead of a trace. --- bin/win32kprof.py | 124 +++++------- src/gallium/auxiliary/util/p_debug_prof.c | 308 ++++++++++++++++++------------ 2 files changed, 233 insertions(+), 199 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/bin/win32kprof.py b/bin/win32kprof.py index 4ffa4cc694..c36317d23a 100755 --- a/bin/win32kprof.py +++ b/bin/win32kprof.py @@ -117,15 +117,7 @@ class Reader: self.symbols = [] self.symbol_cache = {} self.base_addr = None - self.last_stamp = 0 - self.stamp_base = 0 - def unwrap_stamp(self, stamp): - if stamp < self.last_stamp: - self.stamp_base += 1 << 32 - self.last_stamp = stamp - return self.stamp_base + stamp - def read_map(self, mapfile): # See http://msdn.microsoft.com/en-us/library/k7xkk3e2.aspx last_addr = 0 @@ -140,10 +132,6 @@ class Reader: continue section, offset = section_offset.split(':') addr = int(offset, 16) - if last_addr == addr: - # TODO: handle collapsed functions - #assert last_name == name - continue self.symbols.append((addr, name)) last_addr = addr last_name = name @@ -170,12 +158,12 @@ class Reader: e = i continue if addr == end_addr: - return next_name + return next_name, addr - start_addr if addr > end_addr: s = i continue return name, addr - start_addr - return "0x%08x" % addr, 0 + raise ValueError def lookup_symbol(self, name): for symbol_addr, symbol_name in self.symbols: @@ -187,96 +175,76 @@ class Reader: profile = Profile() fp = file(data, "rb") - entry_format = "II" + entry_format = "IIII" entry_size = struct.calcsize(entry_format) caller = None caller_stack = [] - last_stamp = 0 - delta = 0 while True: entry = fp.read(entry_size) if len(entry) < entry_size: break - addr_exit, stamp = struct.unpack(entry_format, entry) - if addr_exit == 0 and stamp == 0: - break - addr = addr_exit & 0xfffffffe - exit = addr_exit & 0x00000001 + caller_addr, callee_addr, samples_lo, samples_hi = struct.unpack(entry_format, entry) + if caller_addr == 0 and callee_addr == 0: + continue if self.base_addr is None: - ref_addr = self.lookup_symbol('___debug_profile_reference2@0') + ref_addr = self.lookup_symbol('___debug_profile_reference@0') if ref_addr: - self.base_addr = (addr - ref_addr) & ~(options.align - 1) + self.base_addr = (caller_addr - ref_addr) & ~(options.align - 1) else: self.base_addr = 0 - #print hex(self.base_addr) - rel_addr = addr - self.base_addr - #print hex(addr - self.base_addr) - - symbol, offset = self.lookup_addr(rel_addr) - stamp = self.unwrap_stamp(stamp) - delta = stamp - last_stamp - - if not exit: - if options.verbose >= 2: - sys.stderr.write("%08x >> 0x%08x\n" % (stamp, addr)) - if options.verbose: - sys.stderr.write("%+8u >> %s+%u\n" % (delta, symbol, offset)) + sys.stderr.write('Base addr: %08x\n' % self.base_addr) + + samples = (samples_hi << 32) | samples_lo + + try: + caller_raddr = caller_addr - self.base_addr + caller_sym, caller_ofs = self.lookup_addr(caller_raddr) + + try: + caller = profile.functions[caller_sym] + except KeyError: + caller_name = demangle(caller_sym) + caller = Function(caller_sym, caller_name) + profile.add_function(caller) + caller[CALLS] = 0 + caller[SAMPLES] = 0 + except ValueError: + caller = None + + if not callee_addr: + if caller: + caller[SAMPLES] += samples else: - if options.verbose >= 2: - sys.stderr.write("%08x << 0x%08x\n" % (stamp, addr)) - if options.verbose: - sys.stderr.write("%+8u << %s+%u\n" % (delta, symbol, offset)) - - # Eliminate outliers - if exit and delta > 0x1000000: - # TODO: Use a statistic test instead of a threshold - sys.stderr.write("warning: ignoring excessive delta of +%u in function %s\n" % (delta, symbol)) - delta = 0 - - # Remove overhead - # TODO: compute the overhead automatically - delta = max(0, delta - 84) - - if caller is not None: - caller[SAMPLES] += delta - - if not exit: - # Function call + callee_raddr = callee_addr - self.base_addr + callee_sym, callee_ofs = self.lookup_addr(callee_raddr) + try: - callee = profile.functions[symbol] + callee = profile.functions[callee_sym] except KeyError: - name = demangle(symbol) - callee = Function(symbol, name) + callee_name = demangle(callee_sym) + callee = Function(callee_sym, callee_name) profile.add_function(callee) - callee[CALLS] = 1 + callee[CALLS] = samples callee[SAMPLES] = 0 else: - callee[CALLS] += 1 + callee[CALLS] += samples if caller is not None: try: call = caller.calls[callee.id] except KeyError: call = Call(callee.id) - call[CALLS] = 1 + call[CALLS] = samples caller.add_call(call) else: - call[CALLS] += 1 - caller_stack.append(caller) - - caller = callee - - else: - # Function return - if caller is not None: - assert caller.id == symbol - try: - caller = caller_stack.pop() - except IndexError: - caller = None - - last_stamp = stamp + call[CALLS] += samples + + if options.verbose: + if not callee_addr: + sys.stderr.write('%s+%u: %u\n' % (caller_sym, caller_ofs, samples)) + else: + sys.stderr.write('%s+%u -> %s+%u: %u\n' % (caller_sym, caller_ofs, callee_sym, callee_ofs, samples)) # compute derived data profile.validate() diff --git a/src/gallium/auxiliary/util/p_debug_prof.c b/src/gallium/auxiliary/util/p_debug_prof.c index 69c914c780..5f9772ef91 100644 --- a/src/gallium/auxiliary/util/p_debug_prof.c +++ b/src/gallium/auxiliary/util/p_debug_prof.c @@ -46,97 +46,83 @@ #include "util/u_string.h" -#define PROFILE_FILE_SIZE 4*1024*1024 +#define PROFILE_TABLE_SIZE (1024*1024) #define FILE_NAME_SIZE 256 -static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.trace"; -static ULONG_PTR iFile = 0; -static BYTE *pMap = NULL; -static BYTE *pMapBegin = NULL; -static BYTE *pMapEnd = NULL; +struct debug_profile_entry +{ + uintptr_t caller; + uintptr_t callee; + uint64_t samples; +}; +static unsigned long enabled = 0; -void __declspec(naked) __cdecl -debug_profile_close(void) -{ - _asm { - push eax - push ebx - push ecx - push edx - push ebp - push edi - push esi - } +static WCHAR wFileName[FILE_NAME_SIZE] = L"\\??\\c:\\00000000.prof"; +static ULONG_PTR iFile = 0; - if(iFile) { - EngUnmapFile(iFile); - /* Truncate the file */ - pMap = EngMapFile(wFileName, pMap - pMapBegin, &iFile); - if(pMap) - EngUnmapFile(iFile); - } - iFile = 0; - pMapBegin = pMapEnd = pMap = NULL; - - _asm { - pop esi - pop edi - pop ebp - pop edx - pop ecx - pop ebx - pop eax - ret - } -} +static struct debug_profile_entry *table = NULL; +static unsigned long free_table_entries = 0; +static unsigned long max_table_entries = 0; +uint64_t start_stamp = 0; +uint64_t end_stamp = 0; -void __declspec(naked) __cdecl -debug_profile_open(void) -{ - WCHAR *p; - - _asm { - push eax - push ebx - push ecx - push edx - push ebp - push edi - push esi - } - debug_profile_close(); +static void +debug_profile_entry(uintptr_t caller, uintptr_t callee, uint64_t samples) +{ + unsigned hash = ( caller + callee ) & PROFILE_TABLE_SIZE - 1; - // increment starting from the less significant digit - p = &wFileName[14]; while(1) { - if(*p == '9') { - *p-- = '0'; + if(table[hash].caller == 0 && table[hash].callee == 0) { + table[hash].caller = caller; + table[hash].callee = callee; + table[hash].samples = samples; + --free_table_entries; + break; } - else { - *p += 1; + else if(table[hash].caller == caller && table[hash].callee == callee) { + table[hash].samples += samples; break; } + else { + ++hash; + } } +} - pMap = EngMapFile(wFileName, PROFILE_FILE_SIZE, &iFile); - if(pMap) { - pMapBegin = pMap; - pMapEnd = pMap + PROFILE_FILE_SIZE; - } - - _asm { - pop esi - pop edi - pop ebp - pop edx - pop ecx - pop ebx - pop eax - ret - } + +static uintptr_t caller_stack[1024]; +static unsigned last_caller = 0; + + +static int64_t delta(void) { + int64_t result = end_stamp - start_stamp; + if(result > UINT64_C(0xffffffff)) + result = 0; + return result; +} + + +static void __cdecl +debug_profile_enter(uintptr_t callee) +{ + uintptr_t caller = last_caller ? caller_stack[last_caller - 1] : 0; + + if (caller) + debug_profile_entry(caller, 0, delta()); + debug_profile_entry(caller, callee, 1); + caller_stack[last_caller++] = callee; +} + + +static void __cdecl +debug_profile_exit(uintptr_t callee) +{ + debug_profile_entry(callee, 0, delta()); + if(last_caller) + --last_caller; } @@ -148,31 +134,49 @@ debug_profile_open(void) void __declspec(naked) __cdecl _penter(void) { _asm { - push ebx -retry: - mov ebx, [pMap] - test ebx, ebx - jz done - cmp ebx, [pMapEnd] - jne ready - call debug_profile_open - jmp retry -ready: push eax + mov eax, [enabled] + test eax, eax + jz skip + push edx - mov eax, [esp+12] - and eax, 0xfffffffe - mov [ebx], eax - add ebx, 4 + rdtsc - mov [ebx], eax - add ebx, 4 - mov [pMap], ebx + mov dword ptr [end_stamp], eax + mov dword ptr [end_stamp+4], edx + + xor eax, eax + mov [enabled], eax + + mov eax, [esp+8] + + push ebx + push ecx + push ebp + push edi + push esi + + push eax + call debug_profile_enter + add esp, 4 + + pop esi + pop edi + pop ebp + pop ecx + pop ebx + + mov eax, 1 + mov [enabled], eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + pop edx +skip: pop eax -done: - pop ebx - ret + ret } } @@ -185,30 +189,48 @@ done: void __declspec(naked) __cdecl _pexit(void) { _asm { - push ebx -retry: - mov ebx, [pMap] - test ebx, ebx - jz done - cmp ebx, [pMapEnd] - jne ready - call debug_profile_open - jmp retry -ready: push eax + mov eax, [enabled] + test eax, eax + jz skip + push edx - mov eax, [esp+12] - or eax, 0x00000001 - mov [ebx], eax - add ebx, 4 + rdtsc - mov [ebx], eax - add ebx, 4 - mov [pMap], ebx + mov dword ptr [end_stamp], eax + mov dword ptr [end_stamp+4], edx + + xor eax, eax + mov [enabled], eax + + mov eax, [esp+8] + + push ebx + push ecx + push ebp + push edi + push esi + + push eax + call debug_profile_exit + add esp, 4 + + pop esi + pop edi + pop ebp + pop ecx + pop ebx + + mov eax, 1 + mov [enabled], eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + pop edx +skip: pop eax -done: - pop ebx ret } } @@ -230,15 +252,53 @@ __debug_profile_reference(void) { void debug_profile_start(void) { - debug_profile_open(); - - if(pMap) { + WCHAR *p; + + // increment starting from the less significant digit + p = &wFileName[14]; + while(1) { + if(*p == '9') { + *p-- = '0'; + } + else { + *p += 1; + break; + } + } + + table = EngMapFile(wFileName, + PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry), + &iFile); + if(table) { unsigned i; + + free_table_entries = max_table_entries = PROFILE_TABLE_SIZE; + memset(table, 0, PROFILE_TABLE_SIZE*sizeof(struct debug_profile_entry)); + + table[0].caller = (uintptr_t)&__debug_profile_reference; + table[0].callee = 0; + table[0].samples = 0; + --free_table_entries; + + _asm { + push edx + push eax + + rdtsc + mov dword ptr [start_stamp], eax + mov dword ptr [start_stamp+4], edx + + pop edx + pop eax + } + + last_caller = 0; + + enabled = 1; + for(i = 0; i < 8; ++i) { _asm { - call _penter call __debug_profile_reference - call _pexit } } } @@ -248,7 +308,13 @@ debug_profile_start(void) void debug_profile_stop(void) { - debug_profile_close(); + enabled = 0; + + if(iFile) + EngUnmapFile(iFile); + iFile = 0; + table = NULL; + free_table_entries = max_table_entries = 0; } #endif /* PROFILE */ -- cgit v1.2.3 From 51d219dbfe6852d348755574184639940af444e3 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 22 Jul 2008 17:14:54 +0200 Subject: tgsi: Fix immediate usage checks. Provide more info for register usage errors/warnings. --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 56 +++++++++++++++++++++------ 1 file changed, 45 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c index 08f3995b13..9673f061ce 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c @@ -53,18 +53,32 @@ struct sanity_check_ctx static void report_error( struct sanity_check_ctx *ctx, - const char *msg ) + const char *format, + ... ) { - debug_printf( "\nError: %s", msg ); + va_list args; + + debug_printf( "Error : " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); ctx->errors++; } static void report_warning( struct sanity_check_ctx *ctx, - const char *msg ) + const char *format, + ... ) { - debug_printf( "\nWarning: %s", msg ); + va_list args; + + debug_printf( "Warning: " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); ctx->warnings++; } @@ -74,7 +88,7 @@ check_file_name( uint file ) { if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { - report_error( ctx, "Invalid file name" ); + report_error( ctx, "Invalid register file name" ); return FALSE; } return TRUE; @@ -115,23 +129,36 @@ is_register_used( return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; } +static const char *file_names[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + static boolean check_register_usage( struct sanity_check_ctx *ctx, uint file, int index, - boolean indirect ) + const char *name, + boolean indirect_access ) { if (!check_file_name( ctx, file )) return FALSE; - if (indirect) { + if (indirect_access) { if (!is_any_register_declared( ctx, file )) - report_error( ctx, "Undeclared source register" ); + report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); ctx->regs_ind_used[file] = TRUE; } else { if (!is_register_declared( ctx, file, index )) - report_error( ctx, "Undeclared destination register" ); + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); } return TRUE; @@ -162,6 +189,7 @@ iter_instruction( ctx, inst->FullDstRegisters[i].DstRegister.File, inst->FullDstRegisters[i].DstRegister.Index, + "destination", FALSE ); } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { @@ -169,6 +197,7 @@ iter_instruction( ctx, inst->FullSrcRegisters[i].SrcRegister.File, inst->FullSrcRegisters[i].SrcRegister.Index, + "source", inst->FullSrcRegisters[i].SrcRegister.Indirect ); if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { uint file; @@ -176,7 +205,12 @@ iter_instruction( file = inst->FullSrcRegisters[i].SrcRegisterInd.File; index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; - check_register_usage( ctx, file, index, FALSE ); + check_register_usage( + ctx, + file, + index, + "indirect", + FALSE ); if (file != TGSI_FILE_ADDRESS || index != 0) report_warning( ctx, "Indirect register not ADDR[0]" ); } @@ -232,8 +266,8 @@ iter_immediate( /* Mark the register as declared. */ - ctx->num_imms++; ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + ctx->num_imms++; /* Check data type validity. */ -- cgit v1.2.3 From 0aa0141e0cd7b5231140805b1cda821bb3d32a86 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 23 Jul 2008 17:52:56 +0200 Subject: tgsi: Fix tgsi_util_get_full_src_register_extswizzle(). --- src/gallium/auxiliary/tgsi/util/tgsi_util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c index 10762b6c1a..09486e649e 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_util.c @@ -119,7 +119,7 @@ tgsi_util_get_full_src_register_extswizzle( if( swizzle <= TGSI_SWIZZLE_W ) { swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegister, - component ); + swizzle ); } return swizzle; -- cgit v1.2.3 From 5f2a5f6164ace6e12c1a3ba95f1103dc8fafa68f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Jul 2008 09:56:44 -0600 Subject: gallium: print extended swizzle negation flags --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c index 94180f7e50..d2e6375212 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c @@ -491,9 +491,17 @@ tgsi_dump_instruction( src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { CHR( '.' ); + if (src->SrcRegisterExtSwz.NegateX) + TXT("-"); ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateY) + TXT("-"); ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateZ) + TXT("-"); ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateW) + TXT("-"); ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); } -- cgit v1.2.3 From f7be39ea105aa951d0f6e1d8ffbea63412e30801 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 23 Jul 2008 16:28:15 -0600 Subject: gallium: bump TGSI_EXEC_NUM_TEMPS to 128 --- src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h index 18abdd9ac0..4f30650b07 100644 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h @@ -100,7 +100,7 @@ struct tgsi_exec_labels }; -#define TGSI_EXEC_NUM_TEMPS 64 +#define TGSI_EXEC_NUM_TEMPS 128 #define TGSI_EXEC_NUM_TEMP_EXTRAS 6 #define TGSI_EXEC_NUM_IMMEDIATES 256 -- cgit v1.2.3 From 83869ceab5b3faed8569a5ca752d4dc426db1aec Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 22 Jul 2008 22:07:22 +0900 Subject: tgsi: Silent msvc warning. Rather stupid warning: msvc is warning that converting from a 1bit structure bitfield to a unsigned char looses precision... /WX makes this an error. --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c index 9673f061ce..2e3ec96b5b 100644 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c @@ -198,7 +198,7 @@ iter_instruction( inst->FullSrcRegisters[i].SrcRegister.File, inst->FullSrcRegisters[i].SrcRegister.Index, "source", - inst->FullSrcRegisters[i].SrcRegister.Indirect ); + (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { uint file; int index; -- cgit v1.2.3 From ff7a7031caa0ac592f210aca696a20c9de6dc0d4 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 26 Jul 2008 09:17:10 +0900 Subject: gallium: Windows miniport portability fixes. --- src/gallium/auxiliary/util/p_debug.c | 12 +++---- src/gallium/auxiliary/util/u_snprintf.c | 10 ++++++ src/gallium/include/pipe/p_util.h | 63 +++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index b0240ad737..cdc7e66361 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -73,8 +73,7 @@ _EngDebugPrint(const char *format, ...) void _debug_vprintf(const char *format, va_list ap) { -#ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY -#ifndef WINCE +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation. It is also very slow, so buffer until * we find a newline. */ @@ -85,9 +84,6 @@ void _debug_vprintf(const char *format, va_list ap) _EngDebugPrint("%s", buf); buf[0] = '\0'; } -#else - /* TODO: Implement debug print for WINCE */ -#endif #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation. It is also very slow, so buffer until @@ -99,7 +95,9 @@ void _debug_vprintf(const char *format, va_list ap) OutputDebugStringA(buf); buf[0] = '\0'; } -#else +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + /* TODO */ +#else /* !PIPE_SUBSYSTEM_WINDOWS */ vfprintf(stderr, format, ap); #endif } @@ -211,7 +209,7 @@ _debug_get_option(const char *name) #else return NULL; #endif -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) /* TODO: implement */ return NULL; #else diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index c4f4bbd30c..7fa84d8bec 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -162,6 +162,8 @@ * . */ +#include "pipe/p_config.h" + #if HAVE_CONFIG_H #include #else @@ -1102,7 +1104,11 @@ again: * Factor of ten with the number of digits needed for the fractional * part. For example, if the precision is 3, the mask will be 1000. */ +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + mask = (unsigned long)mypow10(precision); +#else mask = (UINTMAX_T)mypow10(precision); +#endif /* * We "cheat" by converting the fractional part to integer by * multiplying by a factor of ten. @@ -1354,7 +1360,11 @@ cast(LDOUBLE value) if (value >= UINTMAX_MAX) return UINTMAX_MAX; +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + result = (unsigned long)value; +#else result = (UINTMAX_T)value; +#endif /* * At least on NetBSD/sparc64 3.0.2 and 4.99.30, casting long double to * an integer type converts e.g. 1.9 to 2 instead of 1 (which violates diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 7dcdd28287..892bd4bf8a 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -33,8 +33,71 @@ #include "p_debug.h" #include "p_format.h" #include "p_pointer.h" + +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) +__inline double ceil(double val) +{ + double ceil_val; + + if((val - (long) val) == 0) { + ceil_val = val; + } else { + if(val > 0) { + ceil_val = (long) val + 1; + } else { + ceil_val = (long) val; + } + } + + return ceil_val; +} + +#ifndef PIPE_SUBSYSTEM_WINDOWS_CE +__inline double floor(double val) +{ + double floor_val; + + if((val - (long) val) == 0) { + floor_val = val; + } else { + if(val > 0) { + floor_val = (long) val; + } else { + floor_val = (long) val - 1; + } + } + + return floor_val; +} +#endif + +#pragma function(pow) +__inline double __cdecl pow(double val, double exponent) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(log) +__inline double __cdecl log(double val) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(atan2) +__inline double __cdecl atan2(double val) +{ + /* XXX */ + assert(0); + return 0; +} +#else #include #include +#endif #ifdef __cplusplus -- cgit v1.2.3 From c208a2c791fa24c7c5887fc496738cbddbfafc72 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 28 Jul 2008 12:42:13 +0900 Subject: Merge tgsi/exec and tgsi/util directories. --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_aaline.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 4 +- src/gallium/auxiliary/draw/draw_private.h | 4 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 8 +- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 6 +- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 6 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 4 +- src/gallium/auxiliary/draw/draw_vs_llvm.c | 2 +- src/gallium/auxiliary/draw/draw_vs_sse.c | 4 +- src/gallium/auxiliary/gallivm/gallivm.cpp | 4 +- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 4 +- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 10 +- src/gallium/auxiliary/tgsi/Makefile | 18 +- src/gallium/auxiliary/tgsi/SConscript | 24 +- src/gallium/auxiliary/tgsi/exec/Makefile | 2 - src/gallium/auxiliary/tgsi/exec/tgsi_exec.c | 2522 -------------------- src/gallium/auxiliary/tgsi/exec/tgsi_exec.h | 253 -- src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c | 2275 ------------------ src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h | 49 - src/gallium/auxiliary/tgsi/tgsi_build.c | 1324 ++++++++++ src/gallium/auxiliary/tgsi/tgsi_build.h | 332 +++ src/gallium/auxiliary/tgsi/tgsi_dump.c | 582 +++++ src/gallium/auxiliary/tgsi/tgsi_dump.h | 63 + src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 845 +++++++ src/gallium/auxiliary/tgsi/tgsi_dump_c.h | 49 + src/gallium/auxiliary/tgsi/tgsi_exec.c | 2522 ++++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_exec.h | 253 ++ src/gallium/auxiliary/tgsi/tgsi_iterate.c | 85 + src/gallium/auxiliary/tgsi/tgsi_iterate.h | 76 + src/gallium/auxiliary/tgsi/tgsi_parse.c | 332 +++ src/gallium/auxiliary/tgsi/tgsi_parse.h | 151 ++ src/gallium/auxiliary/tgsi/tgsi_sanity.c | 341 +++ src/gallium/auxiliary/tgsi/tgsi_sanity.h | 49 + src/gallium/auxiliary/tgsi/tgsi_scan.c | 226 ++ src/gallium/auxiliary/tgsi/tgsi_scan.h | 74 + src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2275 ++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_sse2.h | 49 + src/gallium/auxiliary/tgsi/tgsi_text.c | 1221 ++++++++++ src/gallium/auxiliary/tgsi/tgsi_text.h | 47 + src/gallium/auxiliary/tgsi/tgsi_transform.c | 199 ++ src/gallium/auxiliary/tgsi/tgsi_transform.h | 93 + src/gallium/auxiliary/tgsi/tgsi_util.c | 300 +++ src/gallium/auxiliary/tgsi/tgsi_util.h | 96 + src/gallium/auxiliary/tgsi/util/tgsi_build.c | 1324 ---------- src/gallium/auxiliary/tgsi/util/tgsi_build.h | 332 --- src/gallium/auxiliary/tgsi/util/tgsi_dump.c | 582 ----- src/gallium/auxiliary/tgsi/util/tgsi_dump.h | 63 - src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c | 845 ------- src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h | 49 - src/gallium/auxiliary/tgsi/util/tgsi_iterate.c | 85 - src/gallium/auxiliary/tgsi/util/tgsi_iterate.h | 76 - src/gallium/auxiliary/tgsi/util/tgsi_parse.c | 332 --- src/gallium/auxiliary/tgsi/util/tgsi_parse.h | 151 -- src/gallium/auxiliary/tgsi/util/tgsi_sanity.c | 341 --- src/gallium/auxiliary/tgsi/util/tgsi_sanity.h | 49 - src/gallium/auxiliary/tgsi/util/tgsi_scan.c | 226 -- src/gallium/auxiliary/tgsi/util/tgsi_scan.h | 74 - src/gallium/auxiliary/tgsi/util/tgsi_text.c | 1221 ---------- src/gallium/auxiliary/tgsi/util/tgsi_text.h | 47 - src/gallium/auxiliary/tgsi/util/tgsi_transform.c | 199 -- src/gallium/auxiliary/tgsi/util/tgsi_transform.h | 93 - src/gallium/auxiliary/tgsi/util/tgsi_util.c | 300 --- src/gallium/auxiliary/tgsi/util/tgsi_util.h | 96 - src/gallium/auxiliary/util/u_gen_mipmap.c | 6 +- src/gallium/auxiliary/util/u_simple_shaders.c | 6 +- src/gallium/drivers/cell/ppu/cell_context.h | 2 +- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/cell/spu/spu_exec.c | 4 +- src/gallium/drivers/cell/spu/spu_exec.h | 2 +- src/gallium/drivers/cell/spu/spu_util.c | 4 +- src/gallium/drivers/i915simple/i915_context.h | 2 +- .../drivers/i915simple/i915_fpc_translate.c | 4 +- src/gallium/drivers/i915simple/i915_state.c | 2 +- src/gallium/drivers/i965simple/brw_context.h | 2 +- src/gallium/drivers/i965simple/brw_sf.c | 2 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 4 +- src/gallium/drivers/i965simple/brw_vs_emit.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 2 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 2 +- src/gallium/drivers/softpipe/sp_fs_exec.c | 4 +- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 4 +- src/gallium/drivers/softpipe/sp_headers.h | 2 +- src/gallium/drivers/softpipe/sp_state.h | 2 +- src/gallium/drivers/softpipe/sp_state_fs.c | 4 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/state_trackers/python/gallium.i | 4 +- src/mesa/state_tracker/st_debug.c | 2 +- src/mesa/state_tracker/st_mesa_to_tgsi.c | 6 +- src/mesa/state_tracker/st_program.c | 2 +- 93 files changed, 11680 insertions(+), 11682 deletions(-) delete mode 100644 src/gallium/auxiliary/tgsi/exec/Makefile delete mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.c delete mode 100644 src/gallium/auxiliary/tgsi/exec/tgsi_exec.h delete mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c delete mode 100755 src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_build.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_build.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump_c.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_dump_c.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_exec.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_exec.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_iterate.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_iterate.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_parse.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_parse.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sanity.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sanity.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_scan.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_scan.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sse2.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_sse2.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_text.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_text.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_transform.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_transform.h create mode 100644 src/gallium/auxiliary/tgsi/tgsi_util.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_util.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_build.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_iterate.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_parse.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_sanity.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_scan.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_text.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_transform.h delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.c delete mode 100644 src/gallium/auxiliary/tgsi/util/tgsi_util.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index af4af8ac1d..86e4d46a20 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -38,7 +38,7 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "cso_cache/cso_context.h" #include "cso_cache/cso_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 3dd7ee19fd..991304b2c8 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -38,8 +38,8 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" #include "draw_context.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 87fd303649..13b4401521 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -44,8 +44,8 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" #include "draw_context.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index 1f63f94365..d3bd9baddd 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -40,8 +40,8 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_transform.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_transform.h" +#include "tgsi/tgsi_dump.h" #include "draw_context.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 7bd1e670b4..626a2e3e30 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -44,8 +44,8 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_scan.h" struct pipe_context; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 1f926b3e85..441877d46f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -31,10 +31,10 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" #include "draw_vs.h" #include "draw_vs_aos.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 8e834501a4..eda677cc62 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -28,9 +28,9 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" #include "draw_vs.h" #include "draw_vs_aos.h" #include "draw_vertex.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index 6a54917ae3..e029b7b4bb 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -31,9 +31,9 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_exec.h" #include "draw_vs.h" #include "draw_vs_aos.h" #include "draw_vertex.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 4501877efc..e26903d8cc 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -38,8 +38,8 @@ #include "draw_context.h" #include "draw_vs.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" struct exec_vertex_shader { diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index c63bd51a10..fc03473b91 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -38,7 +38,7 @@ #include "draw_context.h" #include "draw_vs.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #ifdef MESA_LLVM diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index c3189c707d..61f0c084c3 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -45,8 +45,8 @@ #include "rtasm/rtasm_cpu.h" #include "rtasm/rtasm_x86sse.h" -#include "tgsi/exec/tgsi_sse2.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_sse2.h" +#include "tgsi/tgsi_parse.h" #define SSE_MAX_VERTICES 4 diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index 77900e342b..29adeea47d 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -42,8 +42,8 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" #include #include diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 857c190f7b..cf5b978837 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -43,8 +43,8 @@ #include "pipe/p_shader_tokens.h" #include "pipe/p_util.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_dump.h" #include #include diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 98014bdaa1..b14e2affd6 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -10,11 +10,11 @@ #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_util.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" #include diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 9c4b967651..bbeff1304d 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -4,15 +4,15 @@ include $(TOP)/configs/current LIBNAME = tgsi C_SOURCES = \ - exec/tgsi_exec.c \ - exec/tgsi_sse2.c \ - util/tgsi_iterate.c \ - util/tgsi_build.c \ - util/tgsi_dump.c \ - util/tgsi_parse.c \ - util/tgsi_scan.c \ - util/tgsi_transform.c \ - util/tgsi_util.c + tgsi_build.c \ + tgsi_dump.c \ + tgsi_exec.c \ + tgsi_iterate.c \ + tgsi_parse.c \ + tgsi_scan.c \ + tgsi_sse2.c \ + tgsi_transform.c \ + tgsi_util.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 3bbfa1be54..03982e2194 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -3,18 +3,18 @@ Import('*') tgsi = env.ConvenienceLibrary( target = 'tgsi', source = [ - 'exec/tgsi_exec.c', - 'exec/tgsi_sse2.c', - 'util/tgsi_build.c', - 'util/tgsi_dump.c', - 'util/tgsi_dump_c.c', - 'util/tgsi_iterate.c', - 'util/tgsi_parse.c', - 'util/tgsi_sanity.c', - 'util/tgsi_scan.c', - 'util/tgsi_text.c', - 'util/tgsi_transform.c', - 'util/tgsi_util.c', + 'tgsi_build.c', + 'tgsi_dump.c', + 'tgsi_dump_c.c', + 'tgsi_exec.c', + 'tgsi_iterate.c', + 'tgsi_parse.c', + 'tgsi_sanity.c', + 'tgsi_scan.c', + 'tgsi_sse2.c', + 'tgsi_text.c', + 'tgsi_transform.c', + 'tgsi_util.c', ]) auxiliaries.insert(0, tgsi) diff --git a/src/gallium/auxiliary/tgsi/exec/Makefile b/src/gallium/auxiliary/tgsi/exec/Makefile deleted file mode 100644 index 451911a354..0000000000 --- a/src/gallium/auxiliary/tgsi/exec/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -default: - cd .. ; make diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c deleted file mode 100644 index 001a4c4b15..0000000000 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.c +++ /dev/null @@ -1,2522 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI interpretor/executor. - * - * Flow control information: - * - * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) - * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special - * care since a condition may be true for some quad components but false - * for other components. - * - * We basically execute all statements (even if they're in the part of - * an IF/ELSE clause that's "not taken") and use a special mask to - * control writing to destination registers. This is the ExecMask. - * See store_dest(). - * - * The ExecMask is computed from three other masks (CondMask, LoopMask and - * ContMask) which are controlled by the flow control instructions (namely: - * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). - * - * - * Authors: - * Michal Krol - * Brian Paul - */ - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi_exec.h" - -#define TILE_TOP_LEFT 0 -#define TILE_TOP_RIGHT 1 -#define TILE_BOTTOM_LEFT 2 -#define TILE_BOTTOM_RIGHT 3 - -/* - * Shorthand locations of various utility registers (_I = Index, _C = Channel) - */ -#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I -#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C -#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I -#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C -#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I -#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C -#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I -#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C -#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I -#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C -#define TEMP_128_I TGSI_EXEC_TEMP_128_I -#define TEMP_128_C TGSI_EXEC_TEMP_128_C -#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I -#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C -#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I -#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C -#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I -#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C -#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I -#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C -#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I -#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C -#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I -#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 - -#define FOR_EACH_CHANNEL(CHAN)\ - for (CHAN = 0; CHAN < 4; CHAN++) - -#define IS_CHANNEL_ENABLED(INST, CHAN)\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) - -#define IS_CHANNEL_ENABLED2(INST, CHAN)\ - ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) - -#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ - if (IS_CHANNEL_ENABLED2( INST, CHAN )) - - -/** The execution mask depends on the conditional mask and the loop mask */ -#define UPDATE_EXEC_MASK(MACH) \ - MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask - - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - - - -/** - * Initialize machine state by expanding tokens to full instructions, - * allocating temporary storage, setting up constants, etc. - * After this, we can call tgsi_exec_machine_run() many times. - */ -void -tgsi_exec_machine_bind_shader( - struct tgsi_exec_machine *mach, - const struct tgsi_token *tokens, - uint numSamplers, - struct tgsi_sampler *samplers) -{ - uint k; - struct tgsi_parse_context parse; - struct tgsi_exec_labels *labels = &mach->Labels; - struct tgsi_full_instruction *instructions; - struct tgsi_full_declaration *declarations; - uint maxInstructions = 10, numInstructions = 0; - uint maxDeclarations = 10, numDeclarations = 0; - uint instno = 0; - -#if 0 - tgsi_dump(tokens, 0); -#endif - - mach->Tokens = tokens; - mach->Samplers = samplers; - - k = tgsi_parse_init (&parse, mach->Tokens); - if (k != TGSI_PARSE_OK) { - debug_printf( "Problem parsing!\n" ); - return; - } - - mach->Processor = parse.FullHeader.Processor.Processor; - mach->ImmLimit = 0; - labels->count = 0; - - declarations = (struct tgsi_full_declaration *) - MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); - - if (!declarations) { - return; - } - - instructions = (struct tgsi_full_instruction *) - MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); - - if (!instructions) { - FREE( declarations ); - return; - } - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - uint pointer = parse.Position; - uint i; - - tgsi_parse_token( &parse ); - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - /* save expanded declaration */ - if (numDeclarations == maxDeclarations) { - declarations = REALLOC(declarations, - maxDeclarations - * sizeof(struct tgsi_full_declaration), - (maxDeclarations + 10) - * sizeof(struct tgsi_full_declaration)); - maxDeclarations += 10; - } - memcpy(declarations + numDeclarations, - &parse.FullToken.FullDeclaration, - sizeof(declarations[0])); - numDeclarations++; - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; - assert( size % 4 == 0 ); - assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); - - for( i = 0; i < size; i++ ) { - mach->Imms[mach->ImmLimit + i / 4][i % 4] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - } - mach->ImmLimit += size / 4; - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - assert( labels->count < MAX_LABELS ); - - labels->labels[labels->count][0] = instno; - labels->labels[labels->count][1] = pointer; - labels->count++; - - /* save expanded instruction */ - if (numInstructions == maxInstructions) { - instructions = REALLOC(instructions, - maxInstructions - * sizeof(struct tgsi_full_instruction), - (maxInstructions + 10) - * sizeof(struct tgsi_full_instruction)); - maxInstructions += 10; - } - memcpy(instructions + numInstructions, - &parse.FullToken.FullInstruction, - sizeof(instructions[0])); - numInstructions++; - break; - - default: - assert( 0 ); - } - } - tgsi_parse_free (&parse); - - if (mach->Declarations) { - FREE( mach->Declarations ); - } - mach->Declarations = declarations; - mach->NumDeclarations = numDeclarations; - - if (mach->Instructions) { - FREE( mach->Instructions ); - } - mach->Instructions = instructions; - mach->NumInstructions = numInstructions; -} - - -void -tgsi_exec_machine_init( - struct tgsi_exec_machine *mach ) -{ - uint i; - - mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); - mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; - - /* Setup constants. */ - for( i = 0; i < 4; i++ ) { - mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; - mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; - mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; - mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; - mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; - mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; - mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; - mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; - mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; - mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; - } -} - - -void -tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) -{ - if (mach->Instructions) { - FREE(mach->Instructions); - mach->Instructions = NULL; - mach->NumInstructions = 0; - } - if (mach->Declarations) { - FREE(mach->Declarations); - mach->Declarations = NULL; - mach->NumDeclarations = 0; - } -} - - -static void -micro_abs( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = fabsf( src->f[0] ); - dst->f[1] = fabsf( src->f[1] ); - dst->f[2] = fabsf( src->f[2] ); - dst->f[3] = fabsf( src->f[3] ); -} - -static void -micro_add( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] + src1->f[0]; - dst->f[1] = src0->f[1] + src1->f[1]; - dst->f[2] = src0->f[2] + src1->f[2]; - dst->f[3] = src0->f[3] + src1->f[3]; -} - -static void -micro_iadd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] + src1->i[0]; - dst->i[1] = src0->i[1] + src1->i[1]; - dst->i[2] = src0->i[2] + src1->i[2]; - dst->i[3] = src0->i[3] + src1->i[3]; -} - -static void -micro_and( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] & src1->u[0]; - dst->u[1] = src0->u[1] & src1->u[1]; - dst->u[2] = src0->u[2] & src1->u[2]; - dst->u[3] = src0->u[3] & src1->u[3]; -} - -static void -micro_ceil( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = ceilf( src->f[0] ); - dst->f[1] = ceilf( src->f[1] ); - dst->f[2] = ceilf( src->f[2] ); - dst->f[3] = ceilf( src->f[3] ); -} - -static void -micro_cos( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = cosf( src->f[0] ); - dst->f[1] = cosf( src->f[1] ); - dst->f[2] = cosf( src->f[2] ); - dst->f[3] = cosf( src->f[3] ); -} - -static void -micro_ddx( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_ddy( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = - dst->f[1] = - dst->f[2] = - dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; -} - -static void -micro_div( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] / src1->f[0]; - dst->f[1] = src0->f[1] / src1->f[1]; - dst->f[2] = src0->f[2] / src1->f[2]; - dst->f[3] = src0->f[3] / src1->f[3]; -} - -static void -micro_udiv( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] / src1->u[0]; - dst->u[1] = src0->u[1] / src1->u[1]; - dst->u[2] = src0->u[2] / src1->u[2]; - dst->u[3] = src0->u[3] / src1->u[3]; -} - -static void -micro_eq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void -micro_ieq( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; -} - -static void -micro_exp2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src) -{ - dst->f[0] = powf( 2.0f, src->f[0] ); - dst->f[1] = powf( 2.0f, src->f[1] ); - dst->f[2] = powf( 2.0f, src->f[2] ); - dst->f[3] = powf( 2.0f, src->f[3] ); -} - -static void -micro_f2it( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = (int) src->f[0]; - dst->i[1] = (int) src->f[1]; - dst->i[2] = (int) src->f[2]; - dst->i[3] = (int) src->f[3]; -} - -static void -micro_f2ut( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = (uint) src->f[0]; - dst->u[1] = (uint) src->f[1]; - dst->u[2] = (uint) src->f[2]; - dst->u[3] = (uint) src->f[3]; -} - -static void -micro_flr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] ); - dst->f[1] = floorf( src->f[1] ); - dst->f[2] = floorf( src->f[2] ); - dst->f[3] = floorf( src->f[3] ); -} - -static void -micro_frc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = src->f[0] - floorf( src->f[0] ); - dst->f[1] = src->f[1] - floorf( src->f[1] ); - dst->f[2] = src->f[2] - floorf( src->f[2] ); - dst->f[3] = src->f[3] - floorf( src->f[3] ); -} - -static void -micro_ge( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void -micro_i2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->i[0]; - dst->f[1] = (float) src->i[1]; - dst->f[2] = (float) src->i[2]; - dst->f[3] = (float) src->i[3]; -} - -static void -micro_lg2( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = logf( src->f[0] ) * 1.442695f; - dst->f[1] = logf( src->f[1] ) * 1.442695f; - dst->f[2] = logf( src->f[2] ) * 1.442695f; - dst->f[3] = logf( src->f[3] ) * 1.442695f; -} - -static void -micro_le( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void -micro_lt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; - dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; - dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; - dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; -} - -static void -micro_ilt( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; -} - -static void -micro_ult( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2, - const union tgsi_exec_channel *src3 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; -} - -static void -micro_max( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; - dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; - dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; - dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; -} - -static void -micro_imax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; -} - -static void -micro_umax( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; -} - -static void -micro_min( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; - dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; - dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; - dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; -} - -static void -micro_imin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; - dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; - dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; - dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; -} - -static void -micro_umin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; - dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; - dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; - dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; -} - -static void -micro_umod( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] % src1->u[0]; - dst->u[1] = src0->u[1] % src1->u[1]; - dst->u[2] = src0->u[2] % src1->u[2]; - dst->u[3] = src0->u[3] % src1->u[3]; -} - -static void -micro_mul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] * src1->f[0]; - dst->f[1] = src0->f[1] * src1->f[1]; - dst->f[2] = src0->f[2] * src1->f[2]; - dst->f[3] = src0->f[3] * src1->f[3]; -} - -static void -micro_imul( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] * src1->i[0]; - dst->i[1] = src0->i[1] * src1->i[1]; - dst->i[2] = src0->i[2] * src1->i[2]; - dst->i[3] = src0->i[3] * src1->i[3]; -} - -static void -micro_imul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->i[0] = src0->i[0] * src1->i[0]; - dst1->i[1] = src0->i[1] * src1->i[1]; - dst1->i[2] = src0->i[2] * src1->i[2]; - dst1->i[3] = src0->i[3] * src1->i[3]; - dst0->i[0] = 0; - dst0->i[1] = 0; - dst0->i[2] = 0; - dst0->i[3] = 0; -} - -static void -micro_umul64( - union tgsi_exec_channel *dst0, - union tgsi_exec_channel *dst1, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst1->u[0] = src0->u[0] * src1->u[0]; - dst1->u[1] = src0->u[1] * src1->u[1]; - dst1->u[2] = src0->u[2] * src1->u[2]; - dst1->u[3] = src0->u[3] * src1->u[3]; - dst0->u[0] = 0; - dst0->u[1] = 0; - dst0->u[2] = 0; - dst0->u[3] = 0; -} - -static void -micro_movc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1, - const union tgsi_exec_channel *src2 ) -{ - dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; - dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; - dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; - dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; -} - -static void -micro_neg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = -src->f[0]; - dst->f[1] = -src->f[1]; - dst->f[2] = -src->f[2]; - dst->f[3] = -src->f[3]; -} - -static void -micro_ineg( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = -src->i[0]; - dst->i[1] = -src->i[1]; - dst->i[2] = -src->i[2]; - dst->i[3] = -src->i[3]; -} - -static void -micro_not( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->u[0] = ~src->u[0]; - dst->u[1] = ~src->u[1]; - dst->u[2] = ~src->u[2]; - dst->u[3] = ~src->u[3]; -} - -static void -micro_or( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] | src1->u[0]; - dst->u[1] = src0->u[1] | src1->u[1]; - dst->u[2] = src0->u[2] | src1->u[2]; - dst->u[3] = src0->u[3] | src1->u[3]; -} - -static void -micro_pow( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = powf( src0->f[0], src1->f[0] ); - dst->f[1] = powf( src0->f[1], src1->f[1] ); - dst->f[2] = powf( src0->f[2], src1->f[2] ); - dst->f[3] = powf( src0->f[3], src1->f[3] ); -} - -static void -micro_rnd( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = floorf( src->f[0] + 0.5f ); - dst->f[1] = floorf( src->f[1] + 0.5f ); - dst->f[2] = floorf( src->f[2] + 0.5f ); - dst->f[3] = floorf( src->f[3] + 0.5f ); -} - -static void -micro_shl( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] << src1->i[0]; - dst->i[1] = src0->i[1] << src1->i[1]; - dst->i[2] = src0->i[2] << src1->i[2]; - dst->i[3] = src0->i[3] << src1->i[3]; -} - -static void -micro_ishr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->i[0] = src0->i[0] >> src1->i[0]; - dst->i[1] = src0->i[1] >> src1->i[1]; - dst->i[2] = src0->i[2] >> src1->i[2]; - dst->i[3] = src0->i[3] >> src1->i[3]; -} - -static void -micro_trunc( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0 ) -{ - dst->f[0] = (float) (int) src0->f[0]; - dst->f[1] = (float) (int) src0->f[1]; - dst->f[2] = (float) (int) src0->f[2]; - dst->f[3] = (float) (int) src0->f[3]; -} - -static void -micro_ushr( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] >> src1->u[0]; - dst->u[1] = src0->u[1] >> src1->u[1]; - dst->u[2] = src0->u[2] >> src1->u[2]; - dst->u[3] = src0->u[3] >> src1->u[3]; -} - -static void -micro_sin( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sinf( src->f[0] ); - dst->f[1] = sinf( src->f[1] ); - dst->f[2] = sinf( src->f[2] ); - dst->f[3] = sinf( src->f[3] ); -} - -static void -micro_sqrt( union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = sqrtf( src->f[0] ); - dst->f[1] = sqrtf( src->f[1] ); - dst->f[2] = sqrtf( src->f[2] ); - dst->f[3] = sqrtf( src->f[3] ); -} - -static void -micro_sub( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->f[0] = src0->f[0] - src1->f[0]; - dst->f[1] = src0->f[1] - src1->f[1]; - dst->f[2] = src0->f[2] - src1->f[2]; - dst->f[3] = src0->f[3] - src1->f[3]; -} - -static void -micro_u2f( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->f[0] = (float) src->u[0]; - dst->f[1] = (float) src->u[1]; - dst->f[2] = (float) src->u[2]; - dst->f[3] = (float) src->u[3]; -} - -static void -micro_xor( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src0, - const union tgsi_exec_channel *src1 ) -{ - dst->u[0] = src0->u[0] ^ src1->u[0]; - dst->u[1] = src0->u[1] ^ src1->u[1]; - dst->u[2] = src0->u[2] ^ src1->u[2]; - dst->u[3] = src0->u[3] ^ src1->u[3]; -} - -static void -fetch_src_file_channel( - const struct tgsi_exec_machine *mach, - const uint file, - const uint swizzle, - const union tgsi_exec_channel *index, - union tgsi_exec_channel *chan ) -{ - switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch( file ) { - case TGSI_FILE_CONSTANT: - chan->f[0] = mach->Consts[index->i[0]][swizzle]; - chan->f[1] = mach->Consts[index->i[1]][swizzle]; - chan->f[2] = mach->Consts[index->i[2]][swizzle]; - chan->f[3] = mach->Consts[index->i[3]][swizzle]; - break; - - case TGSI_FILE_INPUT: - chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_TEMPORARY: - assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); - chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_IMMEDIATE: - assert( index->i[0] < (int) mach->ImmLimit ); - chan->f[0] = mach->Imms[index->i[0]][swizzle]; - assert( index->i[1] < (int) mach->ImmLimit ); - chan->f[1] = mach->Imms[index->i[1]][swizzle]; - assert( index->i[2] < (int) mach->ImmLimit ); - chan->f[2] = mach->Imms[index->i[2]][swizzle]; - assert( index->i[3] < (int) mach->ImmLimit ); - chan->f[3] = mach->Imms[index->i[3]][swizzle]; - break; - - case TGSI_FILE_ADDRESS: - chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; - break; - - case TGSI_FILE_OUTPUT: - /* vertex/fragment output vars can be read too */ - chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; - chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; - chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; - chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; - break; - - default: - assert( 0 ); - } - break; - - case TGSI_EXTSWIZZLE_ZERO: - *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; - break; - - case TGSI_EXTSWIZZLE_ONE: - *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; - break; - - default: - assert( 0 ); - } -} - -static void -fetch_source( - const struct tgsi_exec_machine *mach, - union tgsi_exec_channel *chan, - const struct tgsi_full_src_register *reg, - const uint chan_index ) -{ - union tgsi_exec_channel index; - uint swizzle; - - index.i[0] = - index.i[1] = - index.i[2] = - index.i[3] = reg->SrcRegister.Index; - - if (reg->SrcRegister.Indirect) { - union tgsi_exec_channel index2; - union tgsi_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->SrcRegisterInd.Index; - - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); - fetch_src_file_channel( - mach, - reg->SrcRegisterInd.File, - swizzle, - &index2, - &indir_index ); - - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; - } - - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { - case TGSI_FILE_INPUT: - index.i[0] *= 17; - index.i[1] *= 17; - index.i[2] *= 17; - index.i[3] *= 17; - break; - case TGSI_FILE_CONSTANT: - index.i[0] *= 4096; - index.i[1] *= 4096; - index.i[2] *= 4096; - index.i[3] *= 4096; - break; - default: - assert( 0 ); - } - - index.i[0] += reg->SrcRegisterDim.Index; - index.i[1] += reg->SrcRegisterDim.Index; - index.i[2] += reg->SrcRegisterDim.Index; - index.i[3] += reg->SrcRegisterDim.Index; - - if (reg->SrcRegisterDim.Indirect) { - union tgsi_exec_channel index2; - union tgsi_exec_channel indir_index; - - index2.i[0] = - index2.i[1] = - index2.i[2] = - index2.i[3] = reg->SrcRegisterDimInd.Index; - - swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); - fetch_src_file_channel( - mach, - reg->SrcRegisterDimInd.File, - swizzle, - &index2, - &indir_index ); - - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; - } - } - - swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - fetch_src_file_channel( - mach, - reg->SrcRegister.File, - swizzle, - &index, - chan ); - - switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { - case TGSI_UTIL_SIGN_CLEAR: - micro_abs( chan, chan ); - break; - - case TGSI_UTIL_SIGN_SET: - micro_abs( chan, chan ); - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - micro_neg( chan, chan ); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } - - if (reg->SrcRegisterExtMod.Complement) { - micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); - } -} - -static void -store_dest( - struct tgsi_exec_machine *mach, - const union tgsi_exec_channel *chan, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - uint chan_index ) -{ - union tgsi_exec_channel *dst; - - switch( reg->DstRegister.File ) { - case TGSI_FILE_NULL: - return; - - case TGSI_FILE_OUTPUT: - dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] - + reg->DstRegister.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_TEMPORARY: - assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS); - dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; - break; - - case TGSI_FILE_ADDRESS: - dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; - break; - - default: - assert( 0 ); - return; - } - - switch (inst->Instruction.Saturate) - { - case TGSI_SAT_NONE: - if (mach->ExecMask & 0x1) - dst->i[0] = chan->i[0]; - if (mach->ExecMask & 0x2) - dst->i[1] = chan->i[1]; - if (mach->ExecMask & 0x4) - dst->i[2] = chan->i[2]; - if (mach->ExecMask & 0x8) - dst->i[3] = chan->i[3]; - break; - - case TGSI_SAT_ZERO_ONE: - /* XXX need to obey ExecMask here */ - micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - - default: - assert( 0 ); - } -} - -#define FETCH(VAL,INDEX,CHAN)\ - fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) - -#define STORE(VAL,INDEX,CHAN)\ - store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) - - -/** - * Execute ARB-style KIL which is predicated by a src register. - * Kill fragment if any of the four values is less than zero. - */ -static void -exec_kilp(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) -{ - uint uniquemask; - uint chan_index; - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - union tgsi_exec_channel r[1]; - - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); - - for (chan_index = 0; chan_index < 4; chan_index++) - { - uint swizzle; - uint i; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle ( - &inst->FullSrcRegisters[0], - chan_index); - - /* check if the component has not been already tested */ - if (uniquemask & (1 << swizzle)) - continue; - uniquemask |= 1 << swizzle; - - FETCH(&r[0], 0, chan_index); - for (i = 0; i < 4; i++) - if (r[0].f[i] < 0.0f) - kilmask |= 1 << i; - } - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; -} - - -/* - * Fetch a texel using STR texture coordinates. - */ -static void -fetch_texel( struct tgsi_sampler *sampler, - const union tgsi_exec_channel *s, - const union tgsi_exec_channel *t, - const union tgsi_exec_channel *p, - float lodbias, /* XXX should be float[4] */ - union tgsi_exec_channel *r, - union tgsi_exec_channel *g, - union tgsi_exec_channel *b, - union tgsi_exec_channel *a ) -{ - uint j; - float rgba[NUM_CHANNELS][QUAD_SIZE]; - - sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); - - for (j = 0; j < 4; j++) { - r->f[j] = rgba[0][j]; - g->f[j] = rgba[1][j]; - b->f[j] = rgba[2][j]; - a->f[j] = rgba[3][j]; - } -} - - -static void -exec_tex(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - boolean biasLod, - boolean projected) -{ - const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - union tgsi_exec_channel r[8]; - uint chan_index; - float lodBias; - - /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ - - switch (inst->InstructionExtTexture.Texture) { - case TGSI_TEXTURE_1D: - - FETCH(&r[0], 0, CHAN_X); - - if (projected) { - FETCH(&r[1], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[1] ); - } - - if (biasLod) { - FETCH(&r[1], 0, CHAN_W); - lodBias = r[2].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ - &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ - break; - - case TGSI_TEXTURE_2D: - case TGSI_TEXTURE_RECT: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, /* inputs */ - &r[0], &r[1], &r[2], &r[3]); /* outputs */ - break; - - case TGSI_TEXTURE_3D: - case TGSI_TEXTURE_CUBE: - - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 0, CHAN_Z); - - if (projected) { - FETCH(&r[3], 0, CHAN_W); - micro_div( &r[0], &r[0], &r[3] ); - micro_div( &r[1], &r[1], &r[3] ); - micro_div( &r[2], &r[2], &r[3] ); - } - - if (biasLod) { - FETCH(&r[3], 0, CHAN_W); - lodBias = r[3].f[0]; - } - else - lodBias = 0.0; - - fetch_texel(&mach->Samplers[unit], - &r[0], &r[1], &r[2], lodBias, - &r[0], &r[1], &r[2], &r[3]); - break; - - default: - assert (0); - } - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[chan_index], 0, chan_index ); - } -} - - -/** - * Evaluate a constant-valued coefficient at the position of the - * current quad. - */ -static void -eval_constant_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - unsigned i; - - for( i = 0; i < QUAD_SIZE; i++ ) { - mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; - } -} - -/** - * Evaluate a linear-valued coefficient at the position of the - * current quad. - */ -static void -eval_linear_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - mach->Inputs[attrib].xyzw[chan].f[0] = a0; - mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; - mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; - mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; -} - -/** - * Evaluate a perspective-valued coefficient at the position of the - * current quad. - */ -static void -eval_perspective_coef( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ) -{ - const float x = mach->QuadPos.xyzw[0].f[0]; - const float y = mach->QuadPos.xyzw[1].f[0]; - const float dadx = mach->InterpCoefs[attrib].dadx[chan]; - const float dady = mach->InterpCoefs[attrib].dady[chan]; - const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; - const float *w = mach->QuadPos.xyzw[3].f; - /* divide by W here */ - mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; - mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; - mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; - mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; -} - - -typedef void (* eval_coef_func)( - struct tgsi_exec_machine *mach, - unsigned attrib, - unsigned chan ); - -static void -exec_declaration( - struct tgsi_exec_machine *mach, - const struct tgsi_full_declaration *decl ) -{ - if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - eval_coef_func eval; - - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - eval = eval_constant_coef; - break; - - case TGSI_INTERPOLATE_LINEAR: - eval = eval_linear_coef; - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - eval = eval_perspective_coef; - break; - - default: - assert( 0 ); - } - - if( mask == TGSI_WRITEMASK_XYZW ) { - unsigned i, j; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - eval( mach, i, j ); - } - } - } - else { - unsigned i, j; - - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - for( i = first; i <= last; i++ ) { - eval( mach, i, j ); - } - } - } - } - } - } -} - -static void -exec_instruction( - struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst, - int *pc ) -{ - uint chan_index; - union tgsi_exec_channel r[8]; - - (*pc)++; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_f2it( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_X ); - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[1], 0, CHAN_Y ); - micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - - FETCH( &r[2], 0, CHAN_W ); - micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); - micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); - micro_pow( &r[1], &r[1], &r[2] ); - micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, CHAN_Z ); - } - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( &r[0], 0, CHAN_X ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( &r[0], 0, CHAN_X ); - micro_sqrt( &r[0], &r[0] ); - micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - FETCH( &r[0], 0, CHAN_X ); - micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ - STORE( &r[2], 0, CHAN_X ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ - STORE( &r[2], 0, CHAN_Y ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ - STORE( &r[2], 0, CHAN_Z ); /* store r2 */ - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_LOG: - FETCH( &r[0], 0, CHAN_X ); - micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ - micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ - micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[0], 0, CHAN_X ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ - micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ - STORE( &r[0], 0, CHAN_Y ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[1], 0, CHAN_Z ); - } - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) - { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - micro_mul( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Z ); - FETCH( &r[2], 1, CHAN_Z ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_W); - FETCH(&r[2], 1, CHAN_W); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - FETCH( &r[0], 0, CHAN_Y ); - FETCH( &r[1], 1, CHAN_Y); - micro_mul( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, CHAN_Y ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( &r[0], 0, CHAN_Z ); - STORE( &r[0], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - FETCH( &r[0], 1, CHAN_W ); - STORE( &r[0], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - /* XXX use micro_min()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - /* XXX use micro_max()?? */ - micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); - - STORE(&r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_mul( &r[0], &r[0], &r[1] ); - FETCH( &r[1], 2, chan_index ); - micro_add( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - - micro_sub( &r[0], &r[0], &r[1] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - micro_sub( &r[1], &r[1], &r[2] ); - micro_mul( &r[0], &r[0], &r[1] ); - micro_add( &r[0], &r[0], &r[2] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_CND: - assert (0); - break; - - case TGSI_OPCODE_CND0: - assert (0); - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - assert (0); - break; - - case TGSI_OPCODE_INDEX: - assert (0); - break; - - case TGSI_OPCODE_NEGATE: - assert (0); - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_frc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - assert (0); - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_flr( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_rnd( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ - FETCH(&r[0], 0, CHAN_X); - - micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ - FETCH( &r[0], 0, CHAN_X ); - micro_lg2( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_pow( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ - FETCH(&r[0], 0, CHAN_Y); - FETCH(&r[1], 1, CHAN_Z); - - micro_mul( &r[2], &r[0], &r[1] ); - - FETCH(&r[3], 0, CHAN_Z); - FETCH(&r[4], 1, CHAN_Y); - - micro_mul( &r[5], &r[3], &r[4] ); - micro_sub( &r[2], &r[2], &r[5] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[2], 0, CHAN_X ); - } - - FETCH(&r[2], 1, CHAN_X); - - micro_mul( &r[3], &r[3], &r[2] ); - - FETCH(&r[5], 0, CHAN_X); - - micro_mul( &r[1], &r[1], &r[5] ); - micro_sub( &r[3], &r[3], &r[1] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { - STORE( &r[3], 0, CHAN_Y ); - } - - micro_mul( &r[5], &r[5], &r[4] ); - micro_mul( &r[0], &r[0], &r[2] ); - micro_sub( &r[5], &r[5], &r[0] ); - - if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[5], 0, CHAN_Z ); - } - - if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MULTIPLYMATRIX: - assert (0); - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - - micro_abs( &r[0], &r[0] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_RCC: - assert (0); - break; - - case TGSI_OPCODE_DPH: - FETCH(&r[0], 0, CHAN_X); - FETCH(&r[1], 1, CHAN_X); - - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Y); - FETCH(&r[2], 1, CHAN_Y); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 0, CHAN_Z); - FETCH(&r[2], 1, CHAN_Z); - - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FETCH(&r[1], 1, CHAN_W); - - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH(&r[0], 0, CHAN_X); - - micro_cos( &r[0], &r[0] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddx( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDY: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ddy( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_KILP: - exec_kilp (mach, inst); - break; - - case TGSI_OPCODE_KIL: - /* for enabled ExecMask bits, set the killed bit */ - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; - break; - - case TGSI_OPCODE_PK2H: - assert (0); - break; - - case TGSI_OPCODE_PK2US: - assert (0); - break; - - case TGSI_OPCODE_PK4B: - assert (0); - break; - - case TGSI_OPCODE_PK4UB: - assert (0); - break; - - case TGSI_OPCODE_RFL: - assert (0); - break; - - case TGSI_OPCODE_SEQ: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], - &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], - &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SFL: - assert (0); - break; - - case TGSI_OPCODE_SGT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SIN: - FETCH( &r[0], 0, CHAN_X ); - micro_sin( &r[0], &r[0] ); - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SNE: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_STR: - assert (0); - break; - - case TGSI_OPCODE_TEX: - /* simple texture lookup */ - /* src[0] = texcoord */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, FALSE); - break; - - case TGSI_OPCODE_TXB: - /* Texture lookup with lod bias */ - /* src[0] = texcoord (src[0].w = LOD bias) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); - break; - - case TGSI_OPCODE_TXD: - /* Texture lookup with explict partial derivatives */ - /* src[0] = texcoord */ - /* src[1] = d[strq]/dx */ - /* src[2] = d[strq]/dy */ - /* src[3] = sampler unit */ - assert (0); - break; - - case TGSI_OPCODE_TXL: - /* Texture lookup with explit LOD */ - /* src[0] = texcoord (src[0].w = LOD) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, TRUE, FALSE); - break; - - case TGSI_OPCODE_TXP: - /* Texture lookup with projection */ - /* src[0] = texcoord (src[0].w = projection) */ - /* src[1] = sampler unit */ - exec_tex(mach, inst, FALSE, TRUE); - break; - - case TGSI_OPCODE_UP2H: - assert (0); - break; - - case TGSI_OPCODE_UP2US: - assert (0); - break; - - case TGSI_OPCODE_UP4B: - assert (0); - break; - - case TGSI_OPCODE_UP4UB: - assert (0); - break; - - case TGSI_OPCODE_X2D: - assert (0); - break; - - case TGSI_OPCODE_ARA: - assert (0); - break; - - case TGSI_OPCODE_ARR: - assert (0); - break; - - case TGSI_OPCODE_BRA: - assert (0); - break; - - case TGSI_OPCODE_CAL: - /* skip the call if no execution channels are enabled */ - if (mach->ExecMask) { - /* do the call */ - - /* push the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - - assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); - mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; - - /* note that PC was already incremented above */ - mach->CallStack[mach->CallStackTop++] = *pc; - *pc = inst->InstructionExtLabel.Label; - } - break; - - case TGSI_OPCODE_RET: - mach->FuncMask &= ~mach->ExecMask; - UPDATE_EXEC_MASK(mach); - - if (mach->ExecMask == 0x0) { - /* really return now (otherwise, keep executing */ - - if (mach->CallStackTop == 0) { - /* returning from main() */ - *pc = -1; - return; - } - *pc = mach->CallStack[--mach->CallStackTop]; - - /* pop the Cond, Loop, Cont stacks */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - assert(mach->FuncStackTop > 0); - mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; - - UPDATE_EXEC_MASK(mach); - } - break; - - case TGSI_OPCODE_SSG: - assert (0); - break; - - case TGSI_OPCODE_CMP: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH(&r[0], 0, chan_index); - FETCH(&r[1], 1, chan_index); - FETCH(&r[2], 2, chan_index); - - micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); - - STORE(&r[0], 0, chan_index); - } - break; - - case TGSI_OPCODE_SCS: - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( &r[0], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - micro_cos( &r[1], &r[0] ); - STORE( &r[1], 0, CHAN_X ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - micro_sin( &r[1], &r[0] ); - STORE( &r[1], 0, CHAN_Y ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); - } - if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_NRM: - assert (0); - break; - - case TGSI_OPCODE_DIV: - assert( 0 ); - break; - - case TGSI_OPCODE_DP2: - FETCH( &r[0], 0, CHAN_X ); - FETCH( &r[1], 1, CHAN_X ); - micro_mul( &r[0], &r[0], &r[1] ); - - FETCH( &r[1], 0, CHAN_Y ); - FETCH( &r[2], 1, CHAN_Y ); - micro_mul( &r[1], &r[1], &r[2] ); - micro_add( &r[0], &r[0], &r[1] ); - - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_IF: - /* push CondMask */ - assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); - mach->CondStack[mach->CondStackTop++] = mach->CondMask; - FETCH( &r[0], 0, CHAN_X ); - /* update CondMask */ - if( ! r[0].u[0] ) { - mach->CondMask &= ~0x1; - } - if( ! r[0].u[1] ) { - mach->CondMask &= ~0x2; - } - if( ! r[0].u[2] ) { - mach->CondMask &= ~0x4; - } - if( ! r[0].u[3] ) { - mach->CondMask &= ~0x8; - } - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ELSE */ - break; - - case TGSI_OPCODE_ELSE: - /* invert CondMask wrt previous mask */ - { - uint prevMask; - assert(mach->CondStackTop > 0); - prevMask = mach->CondStack[mach->CondStackTop - 1]; - mach->CondMask = ~mach->CondMask & prevMask; - UPDATE_EXEC_MASK(mach); - /* Todo: If CondMask==0, jump to ENDIF */ - } - break; - - case TGSI_OPCODE_ENDIF: - /* pop CondMask */ - assert(mach->CondStackTop > 0); - mach->CondMask = mach->CondStack[--mach->CondStackTop]; - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_END: - /* halt execution */ - *pc = -1; - break; - - case TGSI_OPCODE_REP: - assert (0); - break; - - case TGSI_OPCODE_ENDREP: - assert (0); - break; - - case TGSI_OPCODE_PUSHA: - assert (0); - break; - - case TGSI_OPCODE_POPA: - assert (0); - break; - - case TGSI_OPCODE_CEIL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_ceil( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_I2F: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_i2f( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_NOT: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_not( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_TRUNC: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHL: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_shl( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SHR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_ishr( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_AND: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_and( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_OR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_or( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOD: - assert (0); - break; - - case TGSI_OPCODE_XOR: - FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - FETCH( &r[1], 1, chan_index ); - micro_xor( &r[0], &r[0], &r[1] ); - STORE( &r[0], 0, chan_index ); - } - break; - - case TGSI_OPCODE_SAD: - assert (0); - break; - - case TGSI_OPCODE_TXF: - assert (0); - break; - - case TGSI_OPCODE_TXQ: - assert (0); - break; - - case TGSI_OPCODE_EMIT: - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; - break; - - case TGSI_OPCODE_ENDPRIM: - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; - mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; - break; - - case TGSI_OPCODE_LOOP: - /* fall-through (for now) */ - case TGSI_OPCODE_BGNLOOP2: - /* push LoopMask and ContMasks */ - assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; - assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); - mach->ContStack[mach->ContStackTop++] = mach->ContMask; - break; - - case TGSI_OPCODE_ENDLOOP: - /* fall-through (for now at least) */ - case TGSI_OPCODE_ENDLOOP2: - /* Restore ContMask, but don't pop */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; - UPDATE_EXEC_MASK(mach); - if (mach->ExecMask) { - /* repeat loop: jump to instruction just past BGNLOOP */ - *pc = inst->InstructionExtLabel.Label + 1; - } - else { - /* exit loop: pop LoopMask */ - assert(mach->LoopStackTop > 0); - mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; - /* pop ContMask */ - assert(mach->ContStackTop > 0); - mach->ContMask = mach->ContStack[--mach->ContStackTop]; - } - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BRK: - /* turn off loop channels for each enabled exec channel */ - mach->LoopMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_CONT: - /* turn off cont channels for each enabled exec channel */ - mach->ContMask &= ~mach->ExecMask; - /* Todo: if mach->LoopMask == 0, jump to end of loop */ - UPDATE_EXEC_MASK(mach); - break; - - case TGSI_OPCODE_BGNSUB: - /* no-op */ - break; - - case TGSI_OPCODE_ENDSUB: - /* no-op */ - break; - - case TGSI_OPCODE_NOISE1: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE2: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE3: - assert( 0 ); - break; - - case TGSI_OPCODE_NOISE4: - assert( 0 ); - break; - - case TGSI_OPCODE_NOP: - break; - - default: - assert( 0 ); - } -} - - -/** - * Run TGSI interpreter. - * \return bitmask of "alive" quad components - */ -uint -tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) -{ - uint i; - int pc = 0; - - mach->CondMask = 0xf; - mach->LoopMask = 0xf; - mach->ContMask = 0xf; - mach->FuncMask = 0xf; - mach->ExecMask = 0xf; - - mach->CondStackTop = 0; /* temporarily subvert this assertion */ - assert(mach->CondStackTop == 0); - assert(mach->LoopStackTop == 0); - assert(mach->ContStackTop == 0); - assert(mach->CallStackTop == 0); - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; - mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; - - if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { - mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; - mach->Primitives[0] = 0; - } - - - /* execute declarations (interpolants) */ - for (i = 0; i < mach->NumDeclarations; i++) { - exec_declaration( mach, mach->Declarations+i ); - } - - /* execute instructions, until pc is set to -1 */ - while (pc != -1) { - assert(pc < (int) mach->NumInstructions); - exec_instruction( mach, mach->Instructions + pc, &pc ); - } - -#if 0 - /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ - if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { - /* - * Scale back depth component. - */ - for (i = 0; i < 4; i++) - mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; - } -#endif - - return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; -} - - diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h b/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h deleted file mode 100644 index 4f30650b07..0000000000 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_exec.h +++ /dev/null @@ -1,253 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#if !defined TGSI_EXEC_H -#define TGSI_EXEC_H - -#include "pipe/p_compiler.h" - -#if defined __cplusplus -extern "C" { -#endif - -#define MAX_LABELS 1024 - -#define NUM_CHANNELS 4 /* R,G,B,A */ -#define QUAD_SIZE 4 /* 4 pixel/quad */ - -/** - * Registers may be treated as float, signed int or unsigned int. - */ -union tgsi_exec_channel -{ - float f[QUAD_SIZE]; - int i[QUAD_SIZE]; - unsigned u[QUAD_SIZE]; -}; - -/** - * A vector[RGBA] of channels[4 pixels] - */ -struct tgsi_exec_vector -{ - union tgsi_exec_channel xyzw[NUM_CHANNELS]; -}; - -/** - * For fragment programs, information for computing fragment input - * values from plane equation of the triangle/line. - */ -struct tgsi_interp_coef -{ - float a0[NUM_CHANNELS]; /* in an xyzw layout */ - float dadx[NUM_CHANNELS]; - float dady[NUM_CHANNELS]; -}; - - -struct softpipe_tile_cache; /**< Opaque to TGSI */ - -/** - * Information for sampling textures, which must be implemented - * by code outside the TGSI executor. - */ -struct tgsi_sampler -{ - const struct pipe_sampler_state *state; - struct pipe_texture *texture; - /** Get samples for four fragments in a quad */ - void (*get_samples)(struct tgsi_sampler *sampler, - const float s[QUAD_SIZE], - const float t[QUAD_SIZE], - const float p[QUAD_SIZE], - float lodbias, - float rgba[NUM_CHANNELS][QUAD_SIZE]); - void *pipe; /*XXX temporary*/ - struct softpipe_tile_cache *cache; -}; - -/** - * For branching/calling subroutines. - */ -struct tgsi_exec_labels -{ - unsigned labels[MAX_LABELS][2]; - unsigned count; -}; - - -#define TGSI_EXEC_NUM_TEMPS 128 -#define TGSI_EXEC_NUM_TEMP_EXTRAS 6 -#define TGSI_EXEC_NUM_IMMEDIATES 256 - -/* - * Locations of various utility registers (_I = Index, _C = Channel) - */ -#define TGSI_EXEC_TEMP_00000000_I (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_00000000_C 0 - -#define TGSI_EXEC_TEMP_7FFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_7FFFFFFF_C 1 - -#define TGSI_EXEC_TEMP_80000000_I (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_80000000_C 2 - -#define TGSI_EXEC_TEMP_FFFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) -#define TGSI_EXEC_TEMP_FFFFFFFF_C 3 - -#define TGSI_EXEC_TEMP_ONE_I (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_ONE_C 0 - -#define TGSI_EXEC_TEMP_TWO_I (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_TWO_C 1 - -#define TGSI_EXEC_TEMP_128_I (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_128_C 2 - -#define TGSI_EXEC_TEMP_MINUS_128_I (TGSI_EXEC_NUM_TEMPS + 1) -#define TGSI_EXEC_TEMP_MINUS_128_C 3 - -#define TGSI_EXEC_TEMP_KILMASK_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_KILMASK_C 0 - -#define TGSI_EXEC_TEMP_OUTPUT_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_OUTPUT_C 1 - -#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_PRIMITIVE_C 2 - -#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_THREE_C 3 - -#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_HALF_C 0 - -#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) - -#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5) - - -#define TGSI_EXEC_MAX_COND_NESTING 20 -#define TGSI_EXEC_MAX_LOOP_NESTING 20 -#define TGSI_EXEC_MAX_CALL_NESTING 20 - -/** - * Run-time virtual machine state for executing TGSI shader. - */ -struct tgsi_exec_machine -{ - /* Total = program temporaries + internal temporaries - * + 1 padding to align to 16 bytes - */ - struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + - TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; - - /* - * This will point to _Temps after aligning to 16B boundary. - */ - struct tgsi_exec_vector *Temps; - struct tgsi_exec_vector *Addrs; - - struct tgsi_sampler *Samplers; - - float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; - unsigned ImmLimit; - const float (*Consts)[4]; - struct tgsi_exec_vector *Inputs; - struct tgsi_exec_vector *Outputs; - const struct tgsi_token *Tokens; - unsigned Processor; - - /* GEOMETRY processor only. */ - unsigned *Primitives; - - /* FRAGMENT processor only. */ - const struct tgsi_interp_coef *InterpCoefs; - struct tgsi_exec_vector QuadPos; - - /* Conditional execution masks */ - uint CondMask; /**< For IF/ELSE/ENDIF */ - uint LoopMask; /**< For BGNLOOP/ENDLOOP */ - uint ContMask; /**< For loop CONT statements */ - uint FuncMask; /**< For function calls */ - uint ExecMask; /**< = CondMask & LoopMask */ - - /** Condition mask stack (for nested conditionals) */ - uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; - int CondStackTop; - - /** Loop mask stack (for nested loops) */ - uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int LoopStackTop; - - /** Loop continue mask stack (see comments in tgsi_exec.c) */ - uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; - int ContStackTop; - - /** Function execution mask stack (for executing subroutine code) */ - uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; - int FuncStackTop; - - /** Function call stack for saving/restoring the program counter */ - uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; - int CallStackTop; - - struct tgsi_full_instruction *Instructions; - uint NumInstructions; - - struct tgsi_full_declaration *Declarations; - uint NumDeclarations; - - struct tgsi_exec_labels Labels; -}; - -void -tgsi_exec_machine_init( - struct tgsi_exec_machine *mach ); - - -void -tgsi_exec_machine_bind_shader( - struct tgsi_exec_machine *mach, - const struct tgsi_token *tokens, - uint numSamplers, - struct tgsi_sampler *samplers); - -uint -tgsi_exec_machine_run( - struct tgsi_exec_machine *mach ); - - -void -tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); - - -#if defined __cplusplus -} /* extern "C" */ -#endif - -#endif /* TGSI_EXEC_H */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c deleted file mode 100755 index cdbdf5c882..0000000000 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.c +++ /dev/null @@ -1,2275 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" -#include "tgsi_exec.h" -#include "tgsi_sse2.h" - -#include "rtasm/rtasm_x86sse.h" - -#ifdef PIPE_ARCH_X86 - -/* for 1/sqrt() - * - * This costs about 100fps (close to 10%) in gears: - */ -#define HIGH_PRECISION 1 - - -#define FOR_EACH_CHANNEL( CHAN )\ - for( CHAN = 0; CHAN < 4; CHAN++ ) - -#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) - -#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - if( IS_DST0_CHANNEL_ENABLED( INST, CHAN )) - -#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ - FOR_EACH_CHANNEL( CHAN )\ - IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - -#define TEMP_R0 TGSI_EXEC_TEMP_R0 - -/** - * X86 utility functions. - */ - -static struct x86_reg -make_xmm( - unsigned xmm ) -{ - return x86_make_reg( - file_XMM, - (enum x86_reg_name) xmm ); -} - -/** - * X86 register mapping helpers. - */ - -static struct x86_reg -get_const_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_CX ); -} - -static struct x86_reg -get_input_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_AX ); -} - -static struct x86_reg -get_output_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_DX ); -} - -static struct x86_reg -get_temp_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_BX ); -} - -static struct x86_reg -get_coef_base( void ) -{ - return get_output_base(); -} - -static struct x86_reg -get_immediate_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_DI ); -} - - -/** - * Data access helpers. - */ - - -static struct x86_reg -get_immediate( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_immediate_base(), - (vec * 4 + chan) * 4 ); -} - -static struct x86_reg -get_const( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_const_base(), - (vec * 4 + chan) * 4 ); -} - -static struct x86_reg -get_input( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_input_base(), - (vec * 4 + chan) * 16 ); -} - -static struct x86_reg -get_output( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_output_base(), - (vec * 4 + chan) * 16 ); -} - -static struct x86_reg -get_temp( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_temp_base(), - (vec * 4 + chan) * 16 ); -} - -static struct x86_reg -get_coef( - unsigned vec, - unsigned chan, - unsigned member ) -{ - return x86_make_disp( - get_coef_base(), - ((vec * 3 + member) * 4 + chan) * 4 ); -} - - -static void -emit_ret( - struct x86_function *func ) -{ - x86_ret( func ); -} - - -/** - * Data fetch helpers. - */ - -/** - * Copy a shader constant to xmm register - * \param xmm the destination xmm register - * \param vec the src const buffer index - * \param chan src channel to fetch (X, Y, Z or W) - */ -static void -emit_const( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_const( vec, chan ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); -} - -static void -emit_immediate( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_immediate( vec, chan ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); -} - - -/** - * Copy a shader input to xmm register - * \param xmm the destination xmm register - * \param vec the src input attrib - * \param chan src channel to fetch (X, Y, Z or W) - */ -static void -emit_inputf( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - make_xmm( xmm ), - get_input( vec, chan ) ); -} - -/** - * Store an xmm register to a shader output - * \param xmm the source xmm register - * \param vec the dest output attrib - * \param chan src dest channel to store (X, Y, Z or W) - */ -static void -emit_output( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - get_output( vec, chan ), - make_xmm( xmm ) ); -} - -/** - * Copy a shader temporary to xmm register - * \param xmm the destination xmm register - * \param vec the src temp register - * \param chan src channel to fetch (X, Y, Z or W) - */ -static void -emit_tempf( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movaps( - func, - make_xmm( xmm ), - get_temp( vec, chan ) ); -} - -/** - * Load an xmm register with an input attrib coefficient (a0, dadx or dady) - * \param xmm the destination xmm register - * \param vec the src input/attribute coefficient index - * \param chan src channel to fetch (X, Y, Z or W) - * \param member 0=a0, 1=dadx, 2=dady - */ -static void -emit_coef( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan, - unsigned member ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_coef( vec, chan, member ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); -} - -/** - * Data store helpers. - */ - -static void -emit_inputs( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - get_input( vec, chan ), - make_xmm( xmm ) ); -} - -static void -emit_temps( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movaps( - func, - get_temp( vec, chan ), - make_xmm( xmm ) ); -} - -static void -emit_addrs( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_temps( - func, - xmm, - vec + TGSI_EXEC_NUM_TEMPS, - chan ); -} - -/** - * Coefficent fetch helpers. - */ - -static void -emit_coef_a0( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 0 ); -} - -static void -emit_coef_dadx( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 1 ); -} - -static void -emit_coef_dady( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 2 ); -} - -/** - * Function call helpers. - */ - -static void -emit_push_gp( - struct x86_function *func ) -{ - x86_push( - func, - x86_make_reg( file_REG32, reg_AX) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_CX) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_DX) ); -} - -static void -x86_pop_gp( - struct x86_function *func ) -{ - /* Restore GP registers in a reverse order. - */ - x86_pop( - func, - x86_make_reg( file_REG32, reg_DX) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_CX) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_AX) ); -} - -static void -emit_func_call_dst( - struct x86_function *func, - unsigned xmm_dst, - void (PIPE_CDECL *code)() ) -{ - sse_movaps( - func, - get_temp( TEMP_R0, 0 ), - make_xmm( xmm_dst ) ); - - emit_push_gp( - func ); - - { - struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - - x86_lea( - func, - ecx, - get_temp( TEMP_R0, 0 ) ); - - x86_push( func, ecx ); - x86_mov_reg_imm( func, ecx, (unsigned long) code ); - x86_call( func, ecx ); - x86_pop(func, ecx ); - } - - - x86_pop_gp( - func ); - - sse_movaps( - func, - make_xmm( xmm_dst ), - get_temp( TEMP_R0, 0 ) ); -} - -static void -emit_func_call_dst_src( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src, - void (PIPE_CDECL *code)() ) -{ - sse_movaps( - func, - get_temp( TEMP_R0, 1 ), - make_xmm( xmm_src ) ); - - emit_func_call_dst( - func, - xmm_dst, - code ); -} - -/** - * Low-level instruction translators. - */ - -static void -emit_abs( - struct x86_function *func, - unsigned xmm ) -{ - sse_andps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_7FFFFFFF_I, - TGSI_EXEC_TEMP_7FFFFFFF_C ) ); -} - -static void -emit_add( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_addps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void PIPE_CDECL -cos4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = cosf( store[X + 0] ); - store[X + 1] = cosf( store[X + 1] ); - store[X + 2] = cosf( store[X + 2] ); - store[X + 3] = cosf( store[X + 3] ); -} - -static void -emit_cos( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - cos4f ); -} - -static void PIPE_CDECL -ex24f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = powf( 2.0f, store[X + 0] ); - store[X + 1] = powf( 2.0f, store[X + 1] ); - store[X + 2] = powf( 2.0f, store[X + 2] ); - store[X + 3] = powf( 2.0f, store[X + 3] ); -} - -static void -emit_ex2( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - ex24f ); -} - -static void -emit_f2it( - struct x86_function *func, - unsigned xmm ) -{ - sse2_cvttps2dq( - func, - make_xmm( xmm ), - make_xmm( xmm ) ); -} - -static void PIPE_CDECL -flr4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = floorf( store[X + 0] ); - store[X + 1] = floorf( store[X + 1] ); - store[X + 2] = floorf( store[X + 2] ); - store[X + 3] = floorf( store[X + 3] ); -} - -static void -emit_flr( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - flr4f ); -} - -static void PIPE_CDECL -frc4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] -= floorf( store[X + 0] ); - store[X + 1] -= floorf( store[X + 1] ); - store[X + 2] -= floorf( store[X + 2] ); - store[X + 3] -= floorf( store[X + 3] ); -} - -static void -emit_frc( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - frc4f ); -} - -static void PIPE_CDECL -lg24f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = LOG2( store[X + 0] ); - store[X + 1] = LOG2( store[X + 1] ); - store[X + 2] = LOG2( store[X + 2] ); - store[X + 3] = LOG2( store[X + 3] ); -} - -static void -emit_lg2( - struct x86_function *func, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_dst, - lg24f ); -} - -static void -emit_MOV( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_movups( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_mul (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) -{ - sse_mulps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_neg( - struct x86_function *func, - unsigned xmm ) -{ - sse_xorps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C ) ); -} - -static void PIPE_CDECL -pow4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = powf( store[X + 0], store[X + 4] ); - store[X + 1] = powf( store[X + 1], store[X + 5] ); - store[X + 2] = powf( store[X + 2], store[X + 6] ); - store[X + 3] = powf( store[X + 3], store[X + 7] ); -} - -static void -emit_pow( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - emit_func_call_dst_src( - func, - xmm_dst, - xmm_src, - pow4f ); -} - -static void -emit_rcp ( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - /* On Intel CPUs at least, this is only accurate to 12 bits -- not - * good enough. Need to either emit a proper divide or use the - * iterative technique described below in emit_rsqrt(). - */ - sse2_rcpps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_rsqrt( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ -#if HIGH_PRECISION - /* Although rsqrtps() and rcpps() are low precision on some/all SSE - * implementations, it is possible to improve its precision at - * fairly low cost, using a newton/raphson step, as below: - * - * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) - * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] - * - * See: http://softwarecommunity.intel.com/articles/eng/1818.htm - */ - { - struct x86_reg dst = make_xmm( xmm_dst ); - struct x86_reg src = make_xmm( xmm_src ); - struct x86_reg tmp0 = make_xmm( 2 ); - struct x86_reg tmp1 = make_xmm( 3 ); - - assert( xmm_dst != xmm_src ); - assert( xmm_dst != 2 && xmm_dst != 3 ); - assert( xmm_src != 2 && xmm_src != 3 ); - - sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); - sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); - sse_rsqrtps( func, tmp1, src ); - sse_mulps( func, src, tmp1 ); - sse_mulps( func, dst, tmp1 ); - sse_mulps( func, src, tmp1 ); - sse_subps( func, tmp0, src ); - sse_mulps( func, dst, tmp0 ); - } -#else - /* On Intel CPUs at least, this is only accurate to 12 bits -- not - * good enough. - */ - sse_rsqrtps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -#endif -} - -static void -emit_setsign( - struct x86_function *func, - unsigned xmm ) -{ - sse_orps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C ) ); -} - -static void PIPE_CDECL -sin4f( - float *store ) -{ - const unsigned X = 0; - - store[X + 0] = sinf( store[X + 0] ); - store[X + 1] = sinf( store[X + 1] ); - store[X + 2] = sinf( store[X + 2] ); - store[X + 3] = sinf( store[X + 3] ); -} - -static void -emit_sin (struct x86_function *func, - unsigned xmm_dst) -{ - emit_func_call_dst( - func, - xmm_dst, - sin4f ); -} - -static void -emit_sub( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_subps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -/** - * Register fetch. - */ - -static void -emit_fetch( - struct x86_function *func, - unsigned xmm, - const struct tgsi_full_src_register *reg, - const unsigned chan_index ) -{ - unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - - switch( swizzle ) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch( reg->SrcRegister.File ) { - case TGSI_FILE_CONSTANT: - emit_const( - func, - xmm, - reg->SrcRegister.Index, - swizzle ); - break; - - case TGSI_FILE_IMMEDIATE: - emit_immediate( - func, - xmm, - reg->SrcRegister.Index, - swizzle ); - break; - - case TGSI_FILE_INPUT: - emit_inputf( - func, - xmm, - reg->SrcRegister.Index, - swizzle ); - break; - - case TGSI_FILE_TEMPORARY: - emit_tempf( - func, - xmm, - reg->SrcRegister.Index, - swizzle ); - break; - - default: - assert( 0 ); - } - break; - - case TGSI_EXTSWIZZLE_ZERO: - emit_tempf( - func, - xmm, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ); - break; - - case TGSI_EXTSWIZZLE_ONE: - emit_tempf( - func, - xmm, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - break; - - default: - assert( 0 ); - } - - switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { - case TGSI_UTIL_SIGN_CLEAR: - emit_abs( func, xmm ); - break; - - case TGSI_UTIL_SIGN_SET: - emit_setsign( func, xmm ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - emit_neg( func, xmm ); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; - } -} - -#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ - emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) - -/** - * Register store. - */ - -static void -emit_store( - struct x86_function *func, - unsigned xmm, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - unsigned chan_index ) -{ - switch( reg->DstRegister.File ) { - case TGSI_FILE_OUTPUT: - emit_output( - func, - xmm, - reg->DstRegister.Index, - chan_index ); - break; - - case TGSI_FILE_TEMPORARY: - emit_temps( - func, - xmm, - reg->DstRegister.Index, - chan_index ); - break; - - case TGSI_FILE_ADDRESS: - emit_addrs( - func, - xmm, - reg->DstRegister.Index, - chan_index ); - break; - - default: - assert( 0 ); - } - - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: - /* assert( 0 ); */ - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - } -} - -#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ - emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) - -/** - * High-level instruction translators. - */ - -static void -emit_kil( - struct x86_function *func, - const struct tgsi_full_src_register *reg ) -{ - unsigned uniquemask; - unsigned registers[4]; - unsigned nextregister = 0; - unsigned firstchan = ~0; - unsigned chan_index; - - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); - - FOR_EACH_CHANNEL( chan_index ) { - unsigned swizzle; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle( - reg, - chan_index ); - - /* check if the component has not been already tested */ - if( !(uniquemask & (1 << swizzle)) ) { - uniquemask |= 1 << swizzle; - - /* allocate register */ - registers[chan_index] = nextregister; - emit_fetch( - func, - nextregister, - reg, - chan_index ); - nextregister++; - - /* mark the first channel used */ - if( firstchan == ~0 ) { - firstchan = chan_index; - } - } - } - - x86_push( - func, - x86_make_reg( file_REG32, reg_AX ) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_DX ) ); - - FOR_EACH_CHANNEL( chan_index ) { - if( uniquemask & (1 << chan_index) ) { - sse_cmpps( - func, - make_xmm( registers[chan_index] ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ), - cc_LessThan ); - - if( chan_index == firstchan ) { - sse_pmovmskb( - func, - x86_make_reg( file_REG32, reg_AX ), - make_xmm( registers[chan_index] ) ); - } - else { - sse_pmovmskb( - func, - x86_make_reg( file_REG32, reg_DX ), - make_xmm( registers[chan_index] ) ); - x86_or( - func, - x86_make_reg( file_REG32, reg_AX ), - x86_make_reg( file_REG32, reg_DX ) ); - } - } - } - - x86_or( - func, - get_temp( - TGSI_EXEC_TEMP_KILMASK_I, - TGSI_EXEC_TEMP_KILMASK_C ), - x86_make_reg( file_REG32, reg_AX ) ); - - x86_pop( - func, - x86_make_reg( file_REG32, reg_DX ) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_AX ) ); -} - -static void -emit_setcc( - struct x86_function *func, - struct tgsi_full_instruction *inst, - enum sse_cc cc ) -{ - unsigned chan_index; - - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_cmpps( - func, - make_xmm( 0 ), - make_xmm( 1 ), - cc ); - sse_andps( - func, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ) ); - STORE( func, *inst, 0, 0, chan_index ); - } -} - -static void -emit_cmp( - struct x86_function *func, - struct tgsi_full_instruction *inst ) -{ - unsigned chan_index; - - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - sse_cmpps( - func, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ), - cc_LessThan ); - sse_andps( - func, - make_xmm( 1 ), - make_xmm( 0 ) ); - sse_andnps( - func, - make_xmm( 0 ), - make_xmm( 2 ) ); - sse_orps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } -} - -static int -emit_instruction( - struct x86_function *func, - struct tgsi_full_instruction *inst ) -{ - unsigned chan_index; - - switch( inst->Instruction.Opcode ) { - case TGSI_OPCODE_ARL: -#if 0 - /* XXX this isn't working properly (see glean vertProg1 test) */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_f2it( func, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } -#else - return 0; -#endif - break; - - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C); - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - STORE( func, *inst, 0, 0, CHAN_X ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( func, *inst, 0, 0, CHAN_W ); - } - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - sse_maxps( - func, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ) ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - /* XMM[1] = SrcReg[0].yyyy */ - FETCH( func, *inst, 1, 0, CHAN_Y ); - /* XMM[1] = max(XMM[1], 0) */ - sse_maxps( - func, - make_xmm( 1 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ) ); - /* XMM[2] = SrcReg[0].wwww */ - FETCH( func, *inst, 2, 0, CHAN_W ); - /* XMM[2] = min(XMM[2], 128.0) */ - sse_minps( - func, - make_xmm( 2 ), - get_temp( - TGSI_EXEC_TEMP_128_I, - TGSI_EXEC_TEMP_128_C ) ); - /* XMM[2] = max(XMM[2], -128.0) */ - sse_maxps( - func, - make_xmm( 2 ), - get_temp( - TGSI_EXEC_TEMP_MINUS_128_I, - TGSI_EXEC_TEMP_MINUS_128_C ) ); - emit_pow( func, 1, 2 ); - FETCH( func, *inst, 0, 0, CHAN_X ); - sse_xorps( - func, - make_xmm( 2 ), - make_xmm( 2 ) ); - sse_cmpps( - func, - make_xmm( 2 ), - make_xmm( 0 ), - cc_LessThanEqual ); - sse_andps( - func, - make_xmm( 2 ), - make_xmm( 1 ) ); - STORE( func, *inst, 2, 0, CHAN_Z ); - } - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rcp( func, 0, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rsqrt( func, 1, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 1, 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - return 0; - break; - - case TGSI_OPCODE_LOG: - return 0; - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_mul( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_add( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul(func, 1, 2 ); - emit_add(func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_W ); - FETCH( func, *inst, 2, 1, CHAN_W ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 1, 1, CHAN_Y ); - emit_mul( func, 0, 1 ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - FETCH( func, *inst, 0, 0, CHAN_Z ); - STORE( func, *inst, 0, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - FETCH( func, *inst, 0, 1, CHAN_W ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_minps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_maxps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - emit_setcc( func, inst, cc_LessThan ); - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - emit_setcc( func, inst, cc_NotLessThan ); - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - emit_mul( func, 0, 1 ); - emit_add( func, 0, 2 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_sub( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - emit_sub( func, 1, 2 ); - emit_mul( func, 0, 1 ); - emit_add( func, 0, 2 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CND: - return 0; - break; - - case TGSI_OPCODE_CND0: - return 0; - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - return 0; - break; - - case TGSI_OPCODE_INDEX: - return 0; - break; - - case TGSI_OPCODE_NEGATE: - return 0; - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_frc( func, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - return 0; - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_flr( func, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - return 0; - break; - - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_ex2( func, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_lg2( func, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_pow( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 1, 1, CHAN_Z ); - FETCH( func, *inst, 3, 0, CHAN_Z ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 4, 1, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_MOV( func, 2, 0 ); - emit_mul( func, 2, 1 ); - emit_MOV( func, 5, 3 ); - emit_mul( func, 5, 4 ); - emit_sub( func, 2, 5 ); - STORE( func, *inst, 2, 0, CHAN_X ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 2, 1, CHAN_X ); - FETCH( func, *inst, 5, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - emit_mul( func, 3, 2 ); - emit_mul( func, 1, 5 ); - emit_sub( func, 3, 1 ); - STORE( func, *inst, 3, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_mul( func, 5, 4 ); - emit_mul( func, 0, 2 ); - emit_sub( func, 5, 0 ); - STORE( func, *inst, 5, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MULTIPLYMATRIX: - return 0; - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_abs( func, 0) ; - - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_RCC: - return 0; - break; - - case TGSI_OPCODE_DPH: - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 1, CHAN_W ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - return 0; - break; - - case TGSI_OPCODE_DDY: - return 0; - break; - - case TGSI_OPCODE_KIL: - emit_kil( func, &inst->FullSrcRegisters[0] ); - break; - - case TGSI_OPCODE_PK2H: - return 0; - break; - - case TGSI_OPCODE_PK2US: - return 0; - break; - - case TGSI_OPCODE_PK4B: - return 0; - break; - - case TGSI_OPCODE_PK4UB: - return 0; - break; - - case TGSI_OPCODE_RFL: - return 0; - break; - - case TGSI_OPCODE_SEQ: - return 0; - break; - - case TGSI_OPCODE_SFL: - return 0; - break; - - case TGSI_OPCODE_SGT: - return 0; - break; - - case TGSI_OPCODE_SIN: - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - return 0; - break; - - case TGSI_OPCODE_SNE: - return 0; - break; - - case TGSI_OPCODE_STR: - return 0; - break; - - case TGSI_OPCODE_TEX: - if (0) { - /* Disable dummy texture code: - */ - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - } - else { - return 0; - } - break; - - case TGSI_OPCODE_TXD: - return 0; - break; - - case TGSI_OPCODE_UP2H: - return 0; - break; - - case TGSI_OPCODE_UP2US: - return 0; - break; - - case TGSI_OPCODE_UP4B: - return 0; - break; - - case TGSI_OPCODE_UP4UB: - return 0; - break; - - case TGSI_OPCODE_X2D: - return 0; - break; - - case TGSI_OPCODE_ARA: - return 0; - break; - - case TGSI_OPCODE_ARR: - return 0; - break; - - case TGSI_OPCODE_BRA: - return 0; - break; - - case TGSI_OPCODE_CAL: - return 0; - break; - - case TGSI_OPCODE_RET: - emit_ret( func ); - break; - - case TGSI_OPCODE_END: - break; - - case TGSI_OPCODE_SSG: - return 0; - break; - - case TGSI_OPCODE_CMP: - emit_cmp (func, inst); - break; - - case TGSI_OPCODE_SCS: - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); - STORE( func, *inst, 0, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ); - STORE( func, *inst, 0, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_TXB: - return 0; - break; - - case TGSI_OPCODE_NRM: - return 0; - break; - - case TGSI_OPCODE_DIV: - return 0; - break; - - case TGSI_OPCODE_DP2: - return 0; - break; - - case TGSI_OPCODE_TXL: - return 0; - break; - - case TGSI_OPCODE_BRK: - return 0; - break; - - case TGSI_OPCODE_IF: - return 0; - break; - - case TGSI_OPCODE_LOOP: - return 0; - break; - - case TGSI_OPCODE_REP: - return 0; - break; - - case TGSI_OPCODE_ELSE: - return 0; - break; - - case TGSI_OPCODE_ENDIF: - return 0; - break; - - case TGSI_OPCODE_ENDLOOP: - return 0; - break; - - case TGSI_OPCODE_ENDREP: - return 0; - break; - - case TGSI_OPCODE_PUSHA: - return 0; - break; - - case TGSI_OPCODE_POPA: - return 0; - break; - - case TGSI_OPCODE_CEIL: - return 0; - break; - - case TGSI_OPCODE_I2F: - return 0; - break; - - case TGSI_OPCODE_NOT: - return 0; - break; - - case TGSI_OPCODE_TRUNC: - return 0; - break; - - case TGSI_OPCODE_SHL: - return 0; - break; - - case TGSI_OPCODE_SHR: - return 0; - break; - - case TGSI_OPCODE_AND: - return 0; - break; - - case TGSI_OPCODE_OR: - return 0; - break; - - case TGSI_OPCODE_MOD: - return 0; - break; - - case TGSI_OPCODE_XOR: - return 0; - break; - - case TGSI_OPCODE_SAD: - return 0; - break; - - case TGSI_OPCODE_TXF: - return 0; - break; - - case TGSI_OPCODE_TXQ: - return 0; - break; - - case TGSI_OPCODE_CONT: - return 0; - break; - - case TGSI_OPCODE_EMIT: - return 0; - break; - - case TGSI_OPCODE_ENDPRIM: - return 0; - break; - - default: - return 0; - } - - return 1; -} - -static void -emit_declaration( - struct x86_function *func, - struct tgsi_full_declaration *decl ) -{ - if( decl->Declaration.File == TGSI_FILE_INPUT ) { - unsigned first, last, mask; - unsigned i, j; - - first = decl->DeclarationRange.First; - last = decl->DeclarationRange.Last; - mask = decl->Declaration.UsageMask; - - for( i = first; i <= last; i++ ) { - for( j = 0; j < NUM_CHANNELS; j++ ) { - if( mask & (1 << j) ) { - switch( decl->Declaration.Interpolate ) { - case TGSI_INTERPOLATE_CONSTANT: - emit_coef_a0( func, 0, i, j ); - emit_inputs( func, 0, i, j ); - break; - - case TGSI_INTERPOLATE_LINEAR: - emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); - emit_coef_dadx( func, 1, i, j ); - emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); - emit_coef_dady( func, 3, i, j ); - emit_mul( func, 0, 1 ); /* x * dadx */ - emit_coef_a0( func, 4, i, j ); - emit_mul( func, 2, 3 ); /* y * dady */ - emit_add( func, 0, 4 ); /* x * dadx + a0 */ - emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ - emit_inputs( func, 0, i, j ); - break; - - case TGSI_INTERPOLATE_PERSPECTIVE: - emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); - emit_coef_dadx( func, 1, i, j ); - emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); - emit_coef_dady( func, 3, i, j ); - emit_mul( func, 0, 1 ); /* x * dadx */ - emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); - emit_coef_a0( func, 5, i, j ); - emit_rcp( func, 4, 4 ); /* 1.0 / w */ - emit_mul( func, 2, 3 ); /* y * dady */ - emit_add( func, 0, 5 ); /* x * dadx + a0 */ - emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ - emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ - emit_inputs( func, 0, i, j ); - break; - - default: - assert( 0 ); - break; - } - } - } - } - } -} - -static void aos_to_soa( struct x86_function *func, - uint arg_aos, - uint arg_soa, - uint arg_num, - uint arg_stride ) -{ - struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); - struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); - struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); - struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); - int inner_loop; - - - /* Save EBX */ - x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); - - x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); - x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); - x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); - x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); - - /* do */ - inner_loop = x86_get_label( func ); - { - x86_push( func, aos_input ); - sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); - sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); - sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); - sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); - sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_pop( func, aos_input ); - - sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); - sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); - sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); - sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); - sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); - sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); - - sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); - sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); - sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); - sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); - - /* Advance to next input */ - x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); - x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); - } - /* while --num_inputs */ - x86_dec( func, num_inputs ); - x86_jcc( func, cc_NE, inner_loop ); - - /* Restore EBX */ - x86_pop( func, aos_input ); -} - -static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) -{ - struct x86_reg soa_output; - struct x86_reg aos_output; - struct x86_reg num_outputs; - struct x86_reg temp; - int inner_loop; - - soa_output = x86_make_reg( file_REG32, reg_AX ); - aos_output = x86_make_reg( file_REG32, reg_BX ); - num_outputs = x86_make_reg( file_REG32, reg_CX ); - temp = x86_make_reg( file_REG32, reg_DX ); - - /* Save EBX */ - x86_push( func, aos_output ); - - x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); - x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); - x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); - - /* do */ - inner_loop = x86_get_label( func ); - { - sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); - sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); - sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); - sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); - - sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); - sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); - sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); - sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); - sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); - sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); - - x86_mov( func, temp, x86_fn_arg( func, stride ) ); - x86_push( func, aos_output ); - sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); - sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); - x86_add( func, aos_output, temp ); - sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); - sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); - x86_add( func, aos_output, temp ); - sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); - sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); - x86_add( func, aos_output, temp ); - sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); - sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); - x86_pop( func, aos_output ); - - /* Advance to next output */ - x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); - x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); - } - /* while --num_outputs */ - x86_dec( func, num_outputs ); - x86_jcc( func, cc_NE, inner_loop ); - - /* Restore EBX */ - x86_pop( func, aos_output ); -} - -/** - * Translate a TGSI vertex/fragment shader to SSE2 code. - * Slightly different things are done for vertex vs. fragment shaders. - * - * Note that fragment shaders are responsible for interpolating shader - * inputs. Because on x86 we have only 4 GP registers, and here we - * have 5 shader arguments (input, output, const, temp and coef), the - * code is split into two phases -- DECLARATION and INSTRUCTION phase. - * GP register holding the output argument is aliased with the coeff - * argument, as outputs are not needed in the DECLARATION phase. - * - * \param tokens the TGSI input shader - * \param func the output SSE code/function - * \param immediates buffer to place immediates, later passed to SSE func - * \param return 1 for success, 0 if translation failed - */ -unsigned -tgsi_emit_sse2( - const struct tgsi_token *tokens, - struct x86_function *func, - float (*immediates)[4], - boolean do_swizzles ) -{ - struct tgsi_parse_context parse; - boolean instruction_phase = FALSE; - unsigned ok = 1; - uint num_immediates = 0; - - func->csr = func->store; - - tgsi_parse_init( &parse, tokens ); - - /* Can't just use EDI, EBX without save/restoring them: - */ - x86_push( - func, - get_immediate_base() ); - - x86_push( - func, - get_temp_base() ); - - - /* - * Different function args for vertex/fragment shaders: - */ - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - /* DECLARATION phase, do not load output argument. */ - x86_mov( - func, - get_input_base(), - x86_fn_arg( func, 1 ) ); - /* skipping outputs argument here */ - x86_mov( - func, - get_const_base(), - x86_fn_arg( func, 3 ) ); - x86_mov( - func, - get_temp_base(), - x86_fn_arg( func, 4 ) ); - x86_mov( - func, - get_coef_base(), - x86_fn_arg( func, 5 ) ); - x86_mov( - func, - get_immediate_base(), - x86_fn_arg( func, 6 ) ); - } - else { - assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); - - if (do_swizzles) - aos_to_soa( func, - 6, /* aos_input */ - 1, /* machine->input */ - 7, /* num_inputs */ - 8 ); /* input_stride */ - - x86_mov( - func, - get_input_base(), - x86_fn_arg( func, 1 ) ); - x86_mov( - func, - get_output_base(), - x86_fn_arg( func, 2 ) ); - x86_mov( - func, - get_const_base(), - x86_fn_arg( func, 3 ) ); - x86_mov( - func, - get_temp_base(), - x86_fn_arg( func, 4 ) ); - x86_mov( - func, - get_immediate_base(), - x86_fn_arg( func, 5 ) ); - } - - while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - emit_declaration( - func, - &parse.FullToken.FullDeclaration ); - } - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - if( !instruction_phase ) { - /* INSTRUCTION phase, overwrite coeff with output. */ - instruction_phase = TRUE; - x86_mov( - func, - get_output_base(), - x86_fn_arg( func, 2 ) ); - } - } - - ok = emit_instruction( - func, - &parse.FullToken.FullInstruction ); - - if (!ok) { - debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", - parse.FullToken.FullInstruction.Instruction.Opcode, - parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? - "vertex shader" : "fragment shader"); - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* simply copy the immediate values into the next immediates[] slot */ - { - const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; - uint i; - assert(size <= 4); - assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); - for( i = 0; i < size; i++ ) { - immediates[num_immediates][i] = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - } -#if 0 - debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", - num_immediates, - immediates[num_immediates][0], - immediates[num_immediates][1], - immediates[num_immediates][2], - immediates[num_immediates][3]); -#endif - num_immediates++; - } - break; - - default: - ok = 0; - assert( 0 ); - } - } - - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { - if (do_swizzles) - soa_to_aos( func, 9, 2, 10, 11 ); - } - - /* Can't just use EBX, EDI without save/restoring them: - */ - x86_pop( - func, - get_temp_base() ); - - x86_pop( - func, - get_immediate_base() ); - - emit_ret( func ); - - tgsi_parse_free( &parse ); - - return ok; -} - -#endif /* PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h deleted file mode 100755 index af838b2a25..0000000000 --- a/src/gallium/auxiliary/tgsi/exec/tgsi_sse2.h +++ /dev/null @@ -1,49 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_SSE2_H -#define TGSI_SSE2_H - -#if defined __cplusplus -extern "C" { -#endif - -struct tgsi_token; -struct x86_function; - -unsigned -tgsi_emit_sse2( - const struct tgsi_token *tokens, - struct x86_function *function, - float (*immediates)[4], - boolean do_swizzles ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_SSE2_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c new file mode 100644 index 0000000000..742ef14c35 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -0,0 +1,1324 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_build.h" +#include "tgsi_parse.h" + +/* + * version + */ + +struct tgsi_version +tgsi_build_version( void ) +{ + struct tgsi_version version; + + version.MajorVersion = 1; + version.MinorVersion = 1; + version.Padding = 0; + + return version; +} + +/* + * header + */ + +struct tgsi_header +tgsi_build_header( void ) +{ + struct tgsi_header header; + + header.HeaderSize = 1; + header.BodySize = 0; + + return header; +} + +static void +header_headersize_grow( struct tgsi_header *header ) +{ + assert( header->HeaderSize < 0xFF ); + assert( header->BodySize == 0 ); + + header->HeaderSize++; +} + +static void +header_bodysize_grow( struct tgsi_header *header ) +{ + assert( header->BodySize < 0xFFFFFF ); + + header->BodySize++; +} + +struct tgsi_processor +tgsi_default_processor( void ) +{ + struct tgsi_processor processor; + + processor.Processor = TGSI_PROCESSOR_FRAGMENT; + processor.Padding = 0; + + return processor; +} + +struct tgsi_processor +tgsi_build_processor( + unsigned type, + struct tgsi_header *header ) +{ + struct tgsi_processor processor; + + processor = tgsi_default_processor(); + processor.Processor = type; + + header_headersize_grow( header ); + + return processor; +} + +/* + * declaration + */ + +struct tgsi_declaration +tgsi_default_declaration( void ) +{ + struct tgsi_declaration declaration; + + declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; + declaration.Size = 1; + declaration.File = TGSI_FILE_NULL; + declaration.UsageMask = TGSI_WRITEMASK_XYZW; + declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; + declaration.Semantic = 0; + declaration.Padding = 0; + declaration.Extended = 0; + + return declaration; +} + +struct tgsi_declaration +tgsi_build_declaration( + unsigned file, + unsigned usage_mask, + unsigned interpolate, + unsigned semantic, + struct tgsi_header *header ) +{ + struct tgsi_declaration declaration; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE ); + + declaration = tgsi_default_declaration(); + declaration.File = file; + declaration.UsageMask = usage_mask; + declaration.Interpolate = interpolate; + declaration.Semantic = semantic; + + header_bodysize_grow( header ); + + return declaration; +} + +static void +declaration_grow( + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + assert( declaration->Size < 0xFF ); + + declaration->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_full_declaration +tgsi_default_full_declaration( void ) +{ + struct tgsi_full_declaration full_declaration; + + full_declaration.Declaration = tgsi_default_declaration(); + full_declaration.DeclarationRange = tgsi_default_declaration_range(); + full_declaration.Semantic = tgsi_default_declaration_semantic(); + + return full_declaration; +} + +unsigned +tgsi_build_full_declaration( + const struct tgsi_full_declaration *full_decl, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0; + struct tgsi_declaration *declaration; + struct tgsi_declaration_range *dr; + + if( maxsize <= size ) + return 0; + declaration = (struct tgsi_declaration *) &tokens[size]; + size++; + + *declaration = tgsi_build_declaration( + full_decl->Declaration.File, + full_decl->Declaration.UsageMask, + full_decl->Declaration.Interpolate, + full_decl->Declaration.Semantic, + header ); + + if (maxsize <= size) + return 0; + dr = (struct tgsi_declaration_range *) &tokens[size]; + size++; + + *dr = tgsi_build_declaration_range( + full_decl->DeclarationRange.First, + full_decl->DeclarationRange.Last, + declaration, + header ); + + if( full_decl->Declaration.Semantic ) { + struct tgsi_declaration_semantic *ds; + + if( maxsize <= size ) + return 0; + ds = (struct tgsi_declaration_semantic *) &tokens[size]; + size++; + + *ds = tgsi_build_declaration_semantic( + full_decl->Semantic.SemanticName, + full_decl->Semantic.SemanticIndex, + declaration, + header ); + } + + return size; +} + +struct tgsi_declaration_range +tgsi_default_declaration_range( void ) +{ + struct tgsi_declaration_range dr; + + dr.First = 0; + dr.Last = 0; + + return dr; +} + +struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_range declaration_range; + + assert( last >= first ); + assert( last <= 0xFFFF ); + + declaration_range = tgsi_default_declaration_range(); + declaration_range.First = first; + declaration_range.Last = last; + + declaration_grow( declaration, header ); + + return declaration_range; +} + +struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ) +{ + struct tgsi_declaration_semantic ds; + + ds.SemanticName = TGSI_SEMANTIC_POSITION; + ds.SemanticIndex = 0; + ds.Padding = 0; + + return ds; +} + +struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ) +{ + struct tgsi_declaration_semantic ds; + + assert( semantic_name <= TGSI_SEMANTIC_COUNT ); + assert( semantic_index <= 0xFFFF ); + + ds = tgsi_default_declaration_semantic(); + ds.SemanticName = semantic_name; + ds.SemanticIndex = semantic_index; + + declaration_grow( declaration, header ); + + return ds; +} + +/* + * immediate + */ + +struct tgsi_immediate +tgsi_default_immediate( void ) +{ + struct tgsi_immediate immediate; + + immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE; + immediate.Size = 1; + immediate.DataType = TGSI_IMM_FLOAT32; + immediate.Padding = 0; + immediate.Extended = 0; + + return immediate; +} + +struct tgsi_immediate +tgsi_build_immediate( + struct tgsi_header *header ) +{ + struct tgsi_immediate immediate; + + immediate = tgsi_default_immediate(); + + header_bodysize_grow( header ); + + return immediate; +} + +struct tgsi_full_immediate +tgsi_default_full_immediate( void ) +{ + struct tgsi_full_immediate fullimm; + + fullimm.Immediate = tgsi_default_immediate(); + fullimm.u.Pointer = (void *) 0; + + return fullimm; +} + +static void +immediate_grow( + struct tgsi_immediate *immediate, + struct tgsi_header *header ) +{ + assert( immediate->Size < 0xFF ); + + immediate->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_immediate_float32 +tgsi_build_immediate_float32( + float value, + struct tgsi_immediate *immediate, + struct tgsi_header *header ) +{ + struct tgsi_immediate_float32 immediate_float32; + + immediate_float32.Float = value; + + immediate_grow( immediate, header ); + + return immediate_float32; +} + +unsigned +tgsi_build_full_immediate( + const struct tgsi_full_immediate *full_imm, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0, i; + struct tgsi_immediate *immediate; + + if( maxsize <= size ) + return 0; + immediate = (struct tgsi_immediate *) &tokens[size]; + size++; + + *immediate = tgsi_build_immediate( header ); + + for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) { + struct tgsi_immediate_float32 *if32; + + if( maxsize <= size ) + return 0; + if32 = (struct tgsi_immediate_float32 *) &tokens[size]; + size++; + + *if32 = tgsi_build_immediate_float32( + full_imm->u.ImmediateFloat32[i].Float, + immediate, + header ); + } + + return size; +} + +/* + * instruction + */ + +struct tgsi_instruction +tgsi_default_instruction( void ) +{ + struct tgsi_instruction instruction; + + instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; + instruction.Size = 1; + instruction.Opcode = TGSI_OPCODE_MOV; + instruction.Saturate = TGSI_SAT_NONE; + instruction.NumDstRegs = 1; + instruction.NumSrcRegs = 1; + instruction.Padding = 0; + instruction.Extended = 0; + + return instruction; +} + +struct tgsi_instruction +tgsi_build_instruction( + unsigned opcode, + unsigned saturate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header ) +{ + struct tgsi_instruction instruction; + + assert (opcode <= TGSI_OPCODE_LAST); + assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE); + assert (num_dst_regs <= 3); + assert (num_src_regs <= 15); + + instruction = tgsi_default_instruction(); + instruction.Opcode = opcode; + instruction.Saturate = saturate; + instruction.NumDstRegs = num_dst_regs; + instruction.NumSrcRegs = num_src_regs; + + header_bodysize_grow( header ); + + return instruction; +} + +static void +instruction_grow( + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + assert (instruction->Size < 0xFF); + + instruction->Size++; + + header_bodysize_grow( header ); +} + +struct tgsi_full_instruction +tgsi_default_full_instruction( void ) +{ + struct tgsi_full_instruction full_instruction; + unsigned i; + + full_instruction.Instruction = tgsi_default_instruction(); + full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv(); + full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); + full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); + for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { + full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); + } + for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { + full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); + } + + return full_instruction; +} + +unsigned +tgsi_build_full_instruction( + const struct tgsi_full_instruction *full_inst, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ) +{ + unsigned size = 0; + unsigned i; + struct tgsi_instruction *instruction; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + instruction = (struct tgsi_instruction *) &tokens[size]; + size++; + + *instruction = tgsi_build_instruction( + full_inst->Instruction.Opcode, + full_inst->Instruction.Saturate, + full_inst->Instruction.NumDstRegs, + full_inst->Instruction.NumSrcRegs, + header ); + prev_token = (struct tgsi_token *) instruction; + + if( tgsi_compare_instruction_ext_nv( + full_inst->InstructionExtNv, + tgsi_default_instruction_ext_nv() ) ) { + struct tgsi_instruction_ext_nv *instruction_ext_nv; + + if( maxsize <= size ) + return 0; + instruction_ext_nv = + (struct tgsi_instruction_ext_nv *) &tokens[size]; + size++; + + *instruction_ext_nv = tgsi_build_instruction_ext_nv( + full_inst->InstructionExtNv.Precision, + full_inst->InstructionExtNv.CondDstIndex, + full_inst->InstructionExtNv.CondFlowIndex, + full_inst->InstructionExtNv.CondMask, + full_inst->InstructionExtNv.CondSwizzleX, + full_inst->InstructionExtNv.CondSwizzleY, + full_inst->InstructionExtNv.CondSwizzleZ, + full_inst->InstructionExtNv.CondSwizzleW, + full_inst->InstructionExtNv.CondDstUpdate, + full_inst->InstructionExtNv.CondFlowEnable, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_nv; + } + + if( tgsi_compare_instruction_ext_label( + full_inst->InstructionExtLabel, + tgsi_default_instruction_ext_label() ) ) { + struct tgsi_instruction_ext_label *instruction_ext_label; + + if( maxsize <= size ) + return 0; + instruction_ext_label = + (struct tgsi_instruction_ext_label *) &tokens[size]; + size++; + + *instruction_ext_label = tgsi_build_instruction_ext_label( + full_inst->InstructionExtLabel.Label, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_label; + } + + if( tgsi_compare_instruction_ext_texture( + full_inst->InstructionExtTexture, + tgsi_default_instruction_ext_texture() ) ) { + struct tgsi_instruction_ext_texture *instruction_ext_texture; + + if( maxsize <= size ) + return 0; + instruction_ext_texture = + (struct tgsi_instruction_ext_texture *) &tokens[size]; + size++; + + *instruction_ext_texture = tgsi_build_instruction_ext_texture( + full_inst->InstructionExtTexture.Texture, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) instruction_ext_texture; + } + + for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { + const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; + struct tgsi_dst_register *dst_register; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + dst_register = (struct tgsi_dst_register *) &tokens[size]; + size++; + + *dst_register = tgsi_build_dst_register( + reg->DstRegister.File, + reg->DstRegister.WriteMask, + reg->DstRegister.Index, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register; + + if( tgsi_compare_dst_register_ext_concode( + reg->DstRegisterExtConcode, + tgsi_default_dst_register_ext_concode() ) ) { + struct tgsi_dst_register_ext_concode *dst_register_ext_concode; + + if( maxsize <= size ) + return 0; + dst_register_ext_concode = + (struct tgsi_dst_register_ext_concode *) &tokens[size]; + size++; + + *dst_register_ext_concode = tgsi_build_dst_register_ext_concode( + reg->DstRegisterExtConcode.CondMask, + reg->DstRegisterExtConcode.CondSwizzleX, + reg->DstRegisterExtConcode.CondSwizzleY, + reg->DstRegisterExtConcode.CondSwizzleZ, + reg->DstRegisterExtConcode.CondSwizzleW, + reg->DstRegisterExtConcode.CondSrcIndex, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register_ext_concode; + } + + if( tgsi_compare_dst_register_ext_modulate( + reg->DstRegisterExtModulate, + tgsi_default_dst_register_ext_modulate() ) ) { + struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; + + if( maxsize <= size ) + return 0; + dst_register_ext_modulate = + (struct tgsi_dst_register_ext_modulate *) &tokens[size]; + size++; + + *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( + reg->DstRegisterExtModulate.Modulate, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) dst_register_ext_modulate; + } + } + + for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { + const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; + struct tgsi_src_register *src_register; + struct tgsi_token *prev_token; + + if( maxsize <= size ) + return 0; + src_register = (struct tgsi_src_register *) &tokens[size]; + size++; + + *src_register = tgsi_build_src_register( + reg->SrcRegister.File, + reg->SrcRegister.SwizzleX, + reg->SrcRegister.SwizzleY, + reg->SrcRegister.SwizzleZ, + reg->SrcRegister.SwizzleW, + reg->SrcRegister.Negate, + reg->SrcRegister.Indirect, + reg->SrcRegister.Dimension, + reg->SrcRegister.Index, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register; + + if( tgsi_compare_src_register_ext_swz( + reg->SrcRegisterExtSwz, + tgsi_default_src_register_ext_swz() ) ) { + struct tgsi_src_register_ext_swz *src_register_ext_swz; + + /* Use of the extended swizzle requires the simple swizzle to be identity. + */ + assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X ); + assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y ); + assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z ); + assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W ); + assert( reg->SrcRegister.Negate == FALSE ); + + if( maxsize <= size ) + return 0; + src_register_ext_swz = + (struct tgsi_src_register_ext_swz *) &tokens[size]; + size++; + + *src_register_ext_swz = tgsi_build_src_register_ext_swz( + reg->SrcRegisterExtSwz.ExtSwizzleX, + reg->SrcRegisterExtSwz.ExtSwizzleY, + reg->SrcRegisterExtSwz.ExtSwizzleZ, + reg->SrcRegisterExtSwz.ExtSwizzleW, + reg->SrcRegisterExtSwz.NegateX, + reg->SrcRegisterExtSwz.NegateY, + reg->SrcRegisterExtSwz.NegateZ, + reg->SrcRegisterExtSwz.NegateW, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register_ext_swz; + } + + if( tgsi_compare_src_register_ext_mod( + reg->SrcRegisterExtMod, + tgsi_default_src_register_ext_mod() ) ) { + struct tgsi_src_register_ext_mod *src_register_ext_mod; + + if( maxsize <= size ) + return 0; + src_register_ext_mod = + (struct tgsi_src_register_ext_mod *) &tokens[size]; + size++; + + *src_register_ext_mod = tgsi_build_src_register_ext_mod( + reg->SrcRegisterExtMod.Complement, + reg->SrcRegisterExtMod.Bias, + reg->SrcRegisterExtMod.Scale2X, + reg->SrcRegisterExtMod.Absolute, + reg->SrcRegisterExtMod.Negate, + prev_token, + instruction, + header ); + prev_token = (struct tgsi_token *) src_register_ext_mod; + } + + if( reg->SrcRegister.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->SrcRegisterInd.File, + reg->SrcRegisterInd.SwizzleX, + reg->SrcRegisterInd.SwizzleY, + reg->SrcRegisterInd.SwizzleZ, + reg->SrcRegisterInd.SwizzleW, + reg->SrcRegisterInd.Negate, + reg->SrcRegisterInd.Indirect, + reg->SrcRegisterInd.Dimension, + reg->SrcRegisterInd.Index, + instruction, + header ); + } + + if( reg->SrcRegister.Dimension ) { + struct tgsi_dimension *dim; + + assert( !reg->SrcRegisterDim.Dimension ); + + if( maxsize <= size ) + return 0; + dim = (struct tgsi_dimension *) &tokens[size]; + size++; + + *dim = tgsi_build_dimension( + reg->SrcRegisterDim.Indirect, + reg->SrcRegisterDim.Index, + instruction, + header ); + + if( reg->SrcRegisterDim.Indirect ) { + struct tgsi_src_register *ind; + + if( maxsize <= size ) + return 0; + ind = (struct tgsi_src_register *) &tokens[size]; + size++; + + *ind = tgsi_build_src_register( + reg->SrcRegisterDimInd.File, + reg->SrcRegisterDimInd.SwizzleX, + reg->SrcRegisterDimInd.SwizzleY, + reg->SrcRegisterDimInd.SwizzleZ, + reg->SrcRegisterDimInd.SwizzleW, + reg->SrcRegisterDimInd.Negate, + reg->SrcRegisterDimInd.Indirect, + reg->SrcRegisterDimInd.Dimension, + reg->SrcRegisterDimInd.Index, + instruction, + header ); + } + } + } + + return size; +} + +struct tgsi_instruction_ext_nv +tgsi_default_instruction_ext_nv( void ) +{ + struct tgsi_instruction_ext_nv instruction_ext_nv; + + instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV; + instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT; + instruction_ext_nv.CondDstIndex = 0; + instruction_ext_nv.CondFlowIndex = 0; + instruction_ext_nv.CondMask = TGSI_CC_TR; + instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X; + instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y; + instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z; + instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W; + instruction_ext_nv.CondDstUpdate = 0; + instruction_ext_nv.CondFlowEnable = 0; + instruction_ext_nv.Padding = 0; + instruction_ext_nv.Extended = 0; + + return instruction_ext_nv; +} + +union token_u32 +{ + unsigned u32; +}; + +unsigned +tgsi_compare_instruction_ext_nv( + struct tgsi_instruction_ext_nv a, + struct tgsi_instruction_ext_nv b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_nv +tgsi_build_instruction_ext_nv( + unsigned precision, + unsigned cond_dst_index, + unsigned cond_flow_index, + unsigned cond_mask, + unsigned cond_swizzle_x, + unsigned cond_swizzle_y, + unsigned cond_swizzle_z, + unsigned cond_swizzle_w, + unsigned cond_dst_update, + unsigned cond_flow_update, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_nv instruction_ext_nv; + + instruction_ext_nv = tgsi_default_instruction_ext_nv(); + instruction_ext_nv.Precision = precision; + instruction_ext_nv.CondDstIndex = cond_dst_index; + instruction_ext_nv.CondFlowIndex = cond_flow_index; + instruction_ext_nv.CondMask = cond_mask; + instruction_ext_nv.CondSwizzleX = cond_swizzle_x; + instruction_ext_nv.CondSwizzleY = cond_swizzle_y; + instruction_ext_nv.CondSwizzleZ = cond_swizzle_z; + instruction_ext_nv.CondSwizzleW = cond_swizzle_w; + instruction_ext_nv.CondDstUpdate = cond_dst_update; + instruction_ext_nv.CondFlowEnable = cond_flow_update; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_nv; +} + +struct tgsi_instruction_ext_label +tgsi_default_instruction_ext_label( void ) +{ + struct tgsi_instruction_ext_label instruction_ext_label; + + instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; + instruction_ext_label.Label = 0; + instruction_ext_label.Padding = 0; + instruction_ext_label.Extended = 0; + + return instruction_ext_label; +} + +unsigned +tgsi_compare_instruction_ext_label( + struct tgsi_instruction_ext_label a, + struct tgsi_instruction_ext_label b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_label +tgsi_build_instruction_ext_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_label instruction_ext_label; + + instruction_ext_label = tgsi_default_instruction_ext_label(); + instruction_ext_label.Label = label; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_label; +} + +struct tgsi_instruction_ext_texture +tgsi_default_instruction_ext_texture( void ) +{ + struct tgsi_instruction_ext_texture instruction_ext_texture; + + instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; + instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; + instruction_ext_texture.Padding = 0; + instruction_ext_texture.Extended = 0; + + return instruction_ext_texture; +} + +unsigned +tgsi_compare_instruction_ext_texture( + struct tgsi_instruction_ext_texture a, + struct tgsi_instruction_ext_texture b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_instruction_ext_texture +tgsi_build_instruction_ext_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_instruction_ext_texture instruction_ext_texture; + + instruction_ext_texture = tgsi_default_instruction_ext_texture(); + instruction_ext_texture.Texture = texture; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return instruction_ext_texture; +} + +struct tgsi_src_register +tgsi_default_src_register( void ) +{ + struct tgsi_src_register src_register; + + src_register.File = TGSI_FILE_NULL; + src_register.SwizzleX = TGSI_SWIZZLE_X; + src_register.SwizzleY = TGSI_SWIZZLE_Y; + src_register.SwizzleZ = TGSI_SWIZZLE_Z; + src_register.SwizzleW = TGSI_SWIZZLE_W; + src_register.Negate = 0; + src_register.Indirect = 0; + src_register.Dimension = 0; + src_register.Index = 0; + src_register.Extended = 0; + + return src_register; +} + +struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register src_register; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( negate <= 1 ); + assert( index >= -0x8000 && index <= 0x7FFF ); + + src_register = tgsi_default_src_register(); + src_register.File = file; + src_register.SwizzleX = swizzle_x; + src_register.SwizzleY = swizzle_y; + src_register.SwizzleZ = swizzle_z; + src_register.SwizzleW = swizzle_w; + src_register.Negate = negate; + src_register.Indirect = indirect; + src_register.Dimension = dimension; + src_register.Index = index; + + instruction_grow( instruction, header ); + + return src_register; +} + +struct tgsi_full_src_register +tgsi_default_full_src_register( void ) +{ + struct tgsi_full_src_register full_src_register; + + full_src_register.SrcRegister = tgsi_default_src_register(); + full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz(); + full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); + full_src_register.SrcRegisterInd = tgsi_default_src_register(); + full_src_register.SrcRegisterDim = tgsi_default_dimension(); + full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); + + return full_src_register; +} + +struct tgsi_src_register_ext_swz +tgsi_default_src_register_ext_swz( void ) +{ + struct tgsi_src_register_ext_swz src_register_ext_swz; + + src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; + src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X; + src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y; + src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z; + src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W; + src_register_ext_swz.NegateX = 0; + src_register_ext_swz.NegateY = 0; + src_register_ext_swz.NegateZ = 0; + src_register_ext_swz.NegateW = 0; + src_register_ext_swz.Padding = 0; + src_register_ext_swz.Extended = 0; + + return src_register_ext_swz; +} + +unsigned +tgsi_compare_src_register_ext_swz( + struct tgsi_src_register_ext_swz a, + struct tgsi_src_register_ext_swz b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_src_register_ext_swz +tgsi_build_src_register_ext_swz( + unsigned ext_swizzle_x, + unsigned ext_swizzle_y, + unsigned ext_swizzle_z, + unsigned ext_swizzle_w, + unsigned negate_x, + unsigned negate_y, + unsigned negate_z, + unsigned negate_w, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register_ext_swz src_register_ext_swz; + + assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE ); + assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE ); + assert( negate_x <= 1 ); + assert( negate_y <= 1 ); + assert( negate_z <= 1 ); + assert( negate_w <= 1 ); + + src_register_ext_swz = tgsi_default_src_register_ext_swz(); + src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; + src_register_ext_swz.ExtSwizzleY = ext_swizzle_y; + src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z; + src_register_ext_swz.ExtSwizzleW = ext_swizzle_w; + src_register_ext_swz.NegateX = negate_x; + src_register_ext_swz.NegateY = negate_y; + src_register_ext_swz.NegateZ = negate_z; + src_register_ext_swz.NegateW = negate_w; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return src_register_ext_swz; +} + +struct tgsi_src_register_ext_mod +tgsi_default_src_register_ext_mod( void ) +{ + struct tgsi_src_register_ext_mod src_register_ext_mod; + + src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; + src_register_ext_mod.Complement = 0; + src_register_ext_mod.Bias = 0; + src_register_ext_mod.Scale2X = 0; + src_register_ext_mod.Absolute = 0; + src_register_ext_mod.Negate = 0; + src_register_ext_mod.Padding = 0; + src_register_ext_mod.Extended = 0; + + return src_register_ext_mod; +} + +unsigned +tgsi_compare_src_register_ext_mod( + struct tgsi_src_register_ext_mod a, + struct tgsi_src_register_ext_mod b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_src_register_ext_mod +tgsi_build_src_register_ext_mod( + unsigned complement, + unsigned bias, + unsigned scale_2x, + unsigned absolute, + unsigned negate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_src_register_ext_mod src_register_ext_mod; + + assert( complement <= 1 ); + assert( bias <= 1 ); + assert( scale_2x <= 1 ); + assert( absolute <= 1 ); + assert( negate <= 1 ); + + src_register_ext_mod = tgsi_default_src_register_ext_mod(); + src_register_ext_mod.Complement = complement; + src_register_ext_mod.Bias = bias; + src_register_ext_mod.Scale2X = scale_2x; + src_register_ext_mod.Absolute = absolute; + src_register_ext_mod.Negate = negate; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return src_register_ext_mod; +} + +struct tgsi_dimension +tgsi_default_dimension( void ) +{ + struct tgsi_dimension dimension; + + dimension.Indirect = 0; + dimension.Dimension = 0; + dimension.Padding = 0; + dimension.Index = 0; + dimension.Extended = 0; + + return dimension; +} + +struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dimension dimension; + + dimension = tgsi_default_dimension(); + dimension.Indirect = indirect; + dimension.Index = index; + + instruction_grow( instruction, header ); + + return dimension; +} + +struct tgsi_dst_register +tgsi_default_dst_register( void ) +{ + struct tgsi_dst_register dst_register; + + dst_register.File = TGSI_FILE_NULL; + dst_register.WriteMask = TGSI_WRITEMASK_XYZW; + dst_register.Indirect = 0; + dst_register.Dimension = 0; + dst_register.Index = 0; + dst_register.Padding = 0; + dst_register.Extended = 0; + + return dst_register; +} + +struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register dst_register; + + assert( file <= TGSI_FILE_IMMEDIATE ); + assert( mask <= TGSI_WRITEMASK_XYZW ); + assert( index >= -32768 && index <= 32767 ); + + dst_register = tgsi_default_dst_register(); + dst_register.File = file; + dst_register.WriteMask = mask; + dst_register.Index = index; + + instruction_grow( instruction, header ); + + return dst_register; +} + +struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ) +{ + struct tgsi_full_dst_register full_dst_register; + + full_dst_register.DstRegister = tgsi_default_dst_register(); + full_dst_register.DstRegisterExtConcode = + tgsi_default_dst_register_ext_concode(); + full_dst_register.DstRegisterExtModulate = + tgsi_default_dst_register_ext_modulate(); + + return full_dst_register; +} + +struct tgsi_dst_register_ext_concode +tgsi_default_dst_register_ext_concode( void ) +{ + struct tgsi_dst_register_ext_concode dst_register_ext_concode; + + dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE; + dst_register_ext_concode.CondMask = TGSI_CC_TR; + dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X; + dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y; + dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z; + dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W; + dst_register_ext_concode.CondSrcIndex = 0; + dst_register_ext_concode.Padding = 0; + dst_register_ext_concode.Extended = 0; + + return dst_register_ext_concode; +} + +unsigned +tgsi_compare_dst_register_ext_concode( + struct tgsi_dst_register_ext_concode a, + struct tgsi_dst_register_ext_concode b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_dst_register_ext_concode +tgsi_build_dst_register_ext_concode( + unsigned cc, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + int index, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register_ext_concode dst_register_ext_concode; + + assert( cc <= TGSI_CC_FL ); + assert( swizzle_x <= TGSI_SWIZZLE_W ); + assert( swizzle_y <= TGSI_SWIZZLE_W ); + assert( swizzle_z <= TGSI_SWIZZLE_W ); + assert( swizzle_w <= TGSI_SWIZZLE_W ); + assert( index >= -32768 && index <= 32767 ); + + dst_register_ext_concode = tgsi_default_dst_register_ext_concode(); + dst_register_ext_concode.CondMask = cc; + dst_register_ext_concode.CondSwizzleX = swizzle_x; + dst_register_ext_concode.CondSwizzleY = swizzle_y; + dst_register_ext_concode.CondSwizzleZ = swizzle_z; + dst_register_ext_concode.CondSwizzleW = swizzle_w; + dst_register_ext_concode.CondSrcIndex = index; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return dst_register_ext_concode; +} + +struct tgsi_dst_register_ext_modulate +tgsi_default_dst_register_ext_modulate( void ) +{ + struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + + dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; + dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; + dst_register_ext_modulate.Padding = 0; + dst_register_ext_modulate.Extended = 0; + + return dst_register_ext_modulate; +} + +unsigned +tgsi_compare_dst_register_ext_modulate( + struct tgsi_dst_register_ext_modulate a, + struct tgsi_dst_register_ext_modulate b ) +{ + a.Padding = b.Padding = 0; + a.Extended = b.Extended = 0; + return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; +} + +struct tgsi_dst_register_ext_modulate +tgsi_build_dst_register_ext_modulate( + unsigned modulate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ) +{ + struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; + + assert( modulate <= TGSI_MODULATE_EIGHTH ); + + dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); + dst_register_ext_modulate.Modulate = modulate; + + prev_token->Extended = 1; + instruction_grow( instruction, header ); + + return dst_register_ext_modulate; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h new file mode 100644 index 0000000000..ed25830248 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -0,0 +1,332 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_BUILD_H +#define TGSI_BUILD_H + +#if defined __cplusplus +extern "C" { +#endif + +/* + * version + */ + +struct tgsi_version +tgsi_build_version( void ); + +/* + * header + */ + +struct tgsi_header +tgsi_build_header( void ); + +struct tgsi_processor +tgsi_default_processor( void ); + +struct tgsi_processor +tgsi_build_processor( + unsigned processor, + struct tgsi_header *header ); + +/* + * declaration + */ + +struct tgsi_declaration +tgsi_default_declaration( void ); + +struct tgsi_declaration +tgsi_build_declaration( + unsigned file, + unsigned usage_mask, + unsigned interpolate, + unsigned semantic, + struct tgsi_header *header ); + +struct tgsi_full_declaration +tgsi_default_full_declaration( void ); + +unsigned +tgsi_build_full_declaration( + const struct tgsi_full_declaration *full_decl, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +struct tgsi_declaration_range +tgsi_default_declaration_range( void ); + +struct tgsi_declaration_range +tgsi_build_declaration_range( + unsigned first, + unsigned last, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +struct tgsi_declaration_semantic +tgsi_default_declaration_semantic( void ); + +struct tgsi_declaration_semantic +tgsi_build_declaration_semantic( + unsigned semantic_name, + unsigned semantic_index, + struct tgsi_declaration *declaration, + struct tgsi_header *header ); + +/* + * immediate + */ + +struct tgsi_immediate +tgsi_default_immediate( void ); + +struct tgsi_immediate +tgsi_build_immediate( + struct tgsi_header *header ); + +struct tgsi_full_immediate +tgsi_default_full_immediate( void ); + +struct tgsi_immediate_float32 +tgsi_build_immediate_float32( + float value, + struct tgsi_immediate *immediate, + struct tgsi_header *header ); + +unsigned +tgsi_build_full_immediate( + const struct tgsi_full_immediate *full_imm, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +/* + * instruction + */ + +struct tgsi_instruction +tgsi_default_instruction( void ); + +struct tgsi_instruction +tgsi_build_instruction( + unsigned opcode, + unsigned saturate, + unsigned num_dst_regs, + unsigned num_src_regs, + struct tgsi_header *header ); + +struct tgsi_full_instruction +tgsi_default_full_instruction( void ); + +unsigned +tgsi_build_full_instruction( + const struct tgsi_full_instruction *full_inst, + struct tgsi_token *tokens, + struct tgsi_header *header, + unsigned maxsize ); + +struct tgsi_instruction_ext_nv +tgsi_default_instruction_ext_nv( void ); + +unsigned +tgsi_compare_instruction_ext_nv( + struct tgsi_instruction_ext_nv a, + struct tgsi_instruction_ext_nv b ); + +struct tgsi_instruction_ext_nv +tgsi_build_instruction_ext_nv( + unsigned precision, + unsigned cond_dst_index, + unsigned cond_flow_index, + unsigned cond_mask, + unsigned cond_swizzle_x, + unsigned cond_swizzle_y, + unsigned cond_swizzle_z, + unsigned cond_swizzle_w, + unsigned cond_dst_update, + unsigned cond_flow_update, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_instruction_ext_label +tgsi_default_instruction_ext_label( void ); + +unsigned +tgsi_compare_instruction_ext_label( + struct tgsi_instruction_ext_label a, + struct tgsi_instruction_ext_label b ); + +struct tgsi_instruction_ext_label +tgsi_build_instruction_ext_label( + unsigned label, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_instruction_ext_texture +tgsi_default_instruction_ext_texture( void ); + +unsigned +tgsi_compare_instruction_ext_texture( + struct tgsi_instruction_ext_texture a, + struct tgsi_instruction_ext_texture b ); + +struct tgsi_instruction_ext_texture +tgsi_build_instruction_ext_texture( + unsigned texture, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_src_register +tgsi_default_src_register( void ); + +struct tgsi_src_register +tgsi_build_src_register( + unsigned file, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + unsigned negate, + unsigned indirect, + unsigned dimension, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_full_src_register +tgsi_default_full_src_register( void ); + +struct tgsi_src_register_ext_swz +tgsi_default_src_register_ext_swz( void ); + +unsigned +tgsi_compare_src_register_ext_swz( + struct tgsi_src_register_ext_swz a, + struct tgsi_src_register_ext_swz b ); + +struct tgsi_src_register_ext_swz +tgsi_build_src_register_ext_swz( + unsigned ext_swizzle_x, + unsigned ext_swizzle_y, + unsigned ext_swizzle_z, + unsigned ext_swizzle_w, + unsigned negate_x, + unsigned negate_y, + unsigned negate_z, + unsigned negate_w, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_src_register_ext_mod +tgsi_default_src_register_ext_mod( void ); + +unsigned +tgsi_compare_src_register_ext_mod( + struct tgsi_src_register_ext_mod a, + struct tgsi_src_register_ext_mod b ); + +struct tgsi_src_register_ext_mod +tgsi_build_src_register_ext_mod( + unsigned complement, + unsigned bias, + unsigned scale_2x, + unsigned absolute, + unsigned negate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dimension +tgsi_default_dimension( void ); + +struct tgsi_dimension +tgsi_build_dimension( + unsigned indirect, + unsigned index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dst_register +tgsi_default_dst_register( void ); + +struct tgsi_dst_register +tgsi_build_dst_register( + unsigned file, + unsigned mask, + int index, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_full_dst_register +tgsi_default_full_dst_register( void ); + +struct tgsi_dst_register_ext_concode +tgsi_default_dst_register_ext_concode( void ); + +unsigned +tgsi_compare_dst_register_ext_concode( + struct tgsi_dst_register_ext_concode a, + struct tgsi_dst_register_ext_concode b ); + +struct tgsi_dst_register_ext_concode +tgsi_build_dst_register_ext_concode( + unsigned cc, + unsigned swizzle_x, + unsigned swizzle_y, + unsigned swizzle_z, + unsigned swizzle_w, + int index, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +struct tgsi_dst_register_ext_modulate +tgsi_default_dst_register_ext_modulate( void ); + +unsigned +tgsi_compare_dst_register_ext_modulate( + struct tgsi_dst_register_ext_modulate a, + struct tgsi_dst_register_ext_modulate b ); + +struct tgsi_dst_register_ext_modulate +tgsi_build_dst_register_ext_modulate( + unsigned modulate, + struct tgsi_token *prev_token, + struct tgsi_instruction *instruction, + struct tgsi_header *header ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_BUILD_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c new file mode 100644 index 0000000000..d2e6375212 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -0,0 +1,582 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_dump.h" +#include "tgsi_iterate.h" + +struct dump_ctx +{ + struct tgsi_iterate_context iter; + + uint instno; +}; + +static void +dump_enum( + uint e, + const char **enums, + uint enum_count ) +{ + if (e >= enum_count) + debug_printf( "%u", e ); + else + debug_printf( "%s", enums[e] ); +} + +#define EOL() debug_printf( "\n" ) +#define TXT(S) debug_printf( "%s", S ) +#define CHR(C) debug_printf( "%c", C ) +#define UIX(I) debug_printf( "0x%x", I ) +#define UID(I) debug_printf( "%u", I ) +#define SID(I) debug_printf( "%d", I ) +#define FLT(F) debug_printf( "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) + +static const char *processor_type_names[] = +{ + "FRAG", + "VERT", + "GEOM" +}; + +static const char *file_names[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static const char *interpolate_names[] = +{ + "CONSTANT", + "LINEAR", + "PERSPECTIVE" +}; + +static const char *semantic_names[] = +{ + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", + "NORMAL" +}; + +static const char *immediate_type_names[] = +{ + "FLT32" +}; + +static const char *opcode_names[TGSI_OPCODE_LAST] = +{ + "ARL", + "MOV", + "LIT", + "RCP", + "RSQ", + "EXP", + "LOG", + "MUL", + "ADD", + "DP3", + "DP4", + "DST", + "MIN", + "MAX", + "SLT", + "SGE", + "MAD", + "SUB", + "LERP", + "CND", + "CND0", + "DOT2ADD", + "INDEX", + "NEGATE", + "FRAC", + "CLAMP", + "FLOOR", + "ROUND", + "EXPBASE2", + "LOGBASE2", + "POWER", + "CROSSPRODUCT", + "MULTIPLYMATRIX", + "ABS", + "RCC", + "DPH", + "COS", + "DDX", + "DDY", + "KILP", + "PK2H", + "PK2US", + "PK4B", + "PK4UB", + "RFL", + "SEQ", + "SFL", + "SGT", + "SIN", + "SLE", + "SNE", + "STR", + "TEX", + "TXD", + "TXP", + "UP2H", + "UP2US", + "UP4B", + "UP4UB", + "X2D", + "ARA", + "ARR", + "BRA", + "CAL", + "RET", + "SSG", + "CMP", + "SCS", + "TXB", + "NRM", + "DIV", + "DP2", + "TXL", + "BRK", + "IF", + "LOOP", + "REP", + "ELSE", + "ENDIF", + "ENDLOOP", + "ENDREP", + "PUSHA", + "POPA", + "CEIL", + "I2F", + "NOT", + "TRUNC", + "SHL", + "SHR", + "AND", + "OR", + "MOD", + "XOR", + "SAD", + "TXF", + "TXQ", + "CONT", + "EMIT", + "ENDPRIM", + "BGNLOOP2", + "BGNSUB", + "ENDLOOP2", + "ENDSUB", + "NOISE1", + "NOISE2", + "NOISE3", + "NOISE4", + "NOP", + "M4X3", + "M3X4", + "M3X3", + "M3X2", + "NRM4", + "CALLNZ", + "IFC", + "BREAKC", + "KIL", + "END", + "SWZ" +}; + +static const char *swizzle_names[] = +{ + "x", + "y", + "z", + "w" +}; + +static const char *texture_names[] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" +}; + +static const char *extswizzle_names[] = +{ + "x", + "y", + "z", + "w", + "0", + "1" +}; + +static const char *modulate_names[TGSI_MODULATE_COUNT] = +{ + "", + "_2X", + "_4X", + "_8X", + "_D2", + "_D4", + "_D8" +}; + +static void +_dump_register_prefix( + uint file, + uint first, + uint last ) +{ + + +} + +static void +_dump_register( + uint file, + int first, + int last ) +{ + ENM( file, file_names ); + CHR( '[' ); + SID( first ); + if (first != last) { + TXT( ".." ); + SID( last ); + } + CHR( ']' ); +} + +static void +_dump_register_ind( + uint file, + int index, + uint ind_file, + int ind_index ) +{ + ENM( file, file_names ); + CHR( '[' ); + ENM( ind_file, file_names ); + CHR( '[' ); + SID( ind_index ); + CHR( ']' ); + if (index != 0) { + if (index > 0) + CHR( '+' ); + SID( index ); + } + CHR( ']' ); +} + +static void +_dump_writemask( + uint writemask ) +{ + if (writemask != TGSI_WRITEMASK_XYZW) { + CHR( '.' ); + if (writemask & TGSI_WRITEMASK_X) + CHR( 'x' ); + if (writemask & TGSI_WRITEMASK_Y) + CHR( 'y' ); + if (writemask & TGSI_WRITEMASK_Z) + CHR( 'z' ); + if (writemask & TGSI_WRITEMASK_W) + CHR( 'w' ); + } +} + +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ) +{ + TXT( "\nDCL " ); + + _dump_register( + decl->Declaration.File, + decl->DeclarationRange.First, + decl->DeclarationRange.Last ); + _dump_writemask( + decl->Declaration.UsageMask ); + + if (decl->Declaration.Semantic) { + TXT( ", " ); + ENM( decl->Semantic.SemanticName, semantic_names ); + if (decl->Semantic.SemanticIndex != 0 || + decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { + CHR( '[' ); + UID( decl->Semantic.SemanticIndex ); + CHR( ']' ); + } + } + + TXT( ", " ); + ENM( decl->Declaration.Interpolate, interpolate_names ); +} + +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) +{ + tgsi_dump_declaration( decl ); + return TRUE; +} + +void +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ) +{ + uint i; + + TXT( "\nIMM " ); + ENM( imm->Immediate.DataType, immediate_type_names ); + + TXT( " { " ); + for (i = 0; i < imm->Immediate.Size - 1; i++) { + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + default: + assert( 0 ); + } + + if (i < imm->Immediate.Size - 2) + TXT( ", " ); + } + TXT( " }" ); +} + +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + tgsi_dump_immediate( imm ); + return TRUE; +} + +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, + uint instno ) +{ + uint i; + boolean first_reg = TRUE; + + EOL(); + UID( instno ); + CHR( ':' ); + ENM( inst->Instruction.Opcode, opcode_names ); + + switch (inst->Instruction.Saturate) { + case TGSI_SAT_NONE: + break; + case TGSI_SAT_ZERO_ONE: + TXT( "_SAT" ); + break; + case TGSI_SAT_MINUS_PLUS_ONE: + TXT( "_SATNV" ); + break; + default: + assert( 0 ); + } + + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + + if (!first_reg) + CHR( ',' ); + CHR( ' ' ); + + _dump_register( + dst->DstRegister.File, + dst->DstRegister.Index, + dst->DstRegister.Index ); + ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); + _dump_writemask( dst->DstRegister.WriteMask ); + + first_reg = FALSE; + } + + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + + if (!first_reg) + CHR( ',' ); + CHR( ' ' ); + + if (src->SrcRegisterExtMod.Negate) + TXT( "-(" ); + if (src->SrcRegisterExtMod.Absolute) + CHR( '|' ); + if (src->SrcRegisterExtMod.Scale2X) + TXT( "2*(" ); + if (src->SrcRegisterExtMod.Bias) + CHR( '(' ); + if (src->SrcRegisterExtMod.Complement) + TXT( "1-(" ); + if (src->SrcRegister.Negate) + CHR( '-' ); + + if (src->SrcRegister.Indirect) { + _dump_register_ind( + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegisterInd.File, + src->SrcRegisterInd.Index ); + } + else { + _dump_register( + src->SrcRegister.File, + src->SrcRegister.Index, + src->SrcRegister.Index ); + } + + if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { + CHR( '.' ); + ENM( src->SrcRegister.SwizzleX, swizzle_names ); + ENM( src->SrcRegister.SwizzleY, swizzle_names ); + ENM( src->SrcRegister.SwizzleZ, swizzle_names ); + ENM( src->SrcRegister.SwizzleW, swizzle_names ); + } + if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { + CHR( '.' ); + if (src->SrcRegisterExtSwz.NegateX) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateY) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateZ) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); + if (src->SrcRegisterExtSwz.NegateW) + TXT("-"); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); + } + + if (src->SrcRegisterExtMod.Complement) + CHR( ')' ); + if (src->SrcRegisterExtMod.Bias) + TXT( ")-.5" ); + if (src->SrcRegisterExtMod.Scale2X) + CHR( ')' ); + if (src->SrcRegisterExtMod.Absolute) + CHR( '|' ); + if (src->SrcRegisterExtMod.Negate) + CHR( ')' ); + + first_reg = FALSE; + } + + if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { + TXT( ", " ); + ENM( inst->InstructionExtTexture.Texture, texture_names ); + } + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_IF: + case TGSI_OPCODE_ELSE: + case TGSI_OPCODE_BGNLOOP2: + case TGSI_OPCODE_ENDLOOP2: + case TGSI_OPCODE_CAL: + TXT( " :" ); + UID( inst->InstructionExtLabel.Label ); + break; + } +} + +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct dump_ctx *ctx = (struct dump_ctx *) iter; + + tgsi_dump_instruction( inst, ctx->instno++ ); + return TRUE; +} + +static boolean +prolog( + struct tgsi_iterate_context *ctx ) +{ + EOL(); + ENM( ctx->processor.Processor, processor_type_names ); + UID( ctx->version.MajorVersion ); + CHR( '.' ); + UID( ctx->version.MinorVersion ); + return TRUE; +} + +void +tgsi_dump( + const struct tgsi_token *tokens, + uint flags ) +{ + struct dump_ctx ctx; + + /* sanity checks */ + assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); + assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); + + ctx.iter.prolog = prolog; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.epilog = NULL; + + ctx.instno = 0; + + tgsi_iterate_shader( tokens, &ctx.iter ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h new file mode 100644 index 0000000000..51c230b5db --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -0,0 +1,63 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_DUMP_H +#define TGSI_DUMP_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +void +tgsi_dump( + const struct tgsi_token *tokens, + uint flags ); + +struct tgsi_full_immediate; +struct tgsi_full_instruction; +struct tgsi_full_declaration; + +void +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ); + +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, + uint instno ); + +void +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_DUMP_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c new file mode 100644 index 0000000000..eabd74bd6d --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -0,0 +1,845 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "util/u_string.h" +#include "tgsi_dump_c.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" + +static void +dump_enum( + const unsigned e, + const char **enums, + const unsigned enums_count ) +{ + if (e >= enums_count) { + debug_printf( "%u", e ); + } + else { + debug_printf( "%s", enums[e] ); + } +} + +#define EOL() debug_printf( "\n" ) +#define TXT(S) debug_printf( "%s", S ) +#define CHR(C) debug_printf( "%c", C ) +#define UIX(I) debug_printf( "0x%x", I ) +#define UID(I) debug_printf( "%u", I ) +#define SID(I) debug_printf( "%d", I ) +#define FLT(F) debug_printf( "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) + +static const char *TGSI_PROCESSOR_TYPES[] = +{ + "PROCESSOR_FRAGMENT", + "PROCESSOR_VERTEX", + "PROCESSOR_GEOMETRY" +}; + +static const char *TGSI_TOKEN_TYPES[] = +{ + "TOKEN_TYPE_DECLARATION", + "TOKEN_TYPE_IMMEDIATE", + "TOKEN_TYPE_INSTRUCTION" +}; + +static const char *TGSI_FILES[] = +{ + "FILE_NULL", + "FILE_CONSTANT", + "FILE_INPUT", + "FILE_OUTPUT", + "FILE_TEMPORARY", + "FILE_SAMPLER", + "FILE_ADDRESS", + "FILE_IMMEDIATE" +}; + +static const char *TGSI_INTERPOLATES[] = +{ + "INTERPOLATE_CONSTANT", + "INTERPOLATE_LINEAR", + "INTERPOLATE_PERSPECTIVE" +}; + +static const char *TGSI_SEMANTICS[] = +{ + "SEMANTIC_POSITION", + "SEMANTIC_COLOR", + "SEMANTIC_BCOLOR", + "SEMANTIC_FOG", + "SEMANTIC_PSIZE", + "SEMANTIC_GENERIC", + "SEMANTIC_NORMAL" +}; + +static const char *TGSI_IMMS[] = +{ + "IMM_FLOAT32" +}; + +static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] = +{ + "OPCODE_ARL", + "OPCODE_MOV", + "OPCODE_LIT", + "OPCODE_RCP", + "OPCODE_RSQ", + "OPCODE_EXP", + "OPCODE_LOG", + "OPCODE_MUL", + "OPCODE_ADD", + "OPCODE_DP3", + "OPCODE_DP4", + "OPCODE_DST", + "OPCODE_MIN", + "OPCODE_MAX", + "OPCODE_SLT", + "OPCODE_SGE", + "OPCODE_MAD", + "OPCODE_SUB", + "OPCODE_LERP", + "OPCODE_CND", + "OPCODE_CND0", + "OPCODE_DOT2ADD", + "OPCODE_INDEX", + "OPCODE_NEGATE", + "OPCODE_FRAC", + "OPCODE_CLAMP", + "OPCODE_FLOOR", + "OPCODE_ROUND", + "OPCODE_EXPBASE2", + "OPCODE_LOGBASE2", + "OPCODE_POWER", + "OPCODE_CROSSPRODUCT", + "OPCODE_MULTIPLYMATRIX", + "OPCODE_ABS", + "OPCODE_RCC", + "OPCODE_DPH", + "OPCODE_COS", + "OPCODE_DDX", + "OPCODE_DDY", + "OPCODE_KILP", + "OPCODE_PK2H", + "OPCODE_PK2US", + "OPCODE_PK4B", + "OPCODE_PK4UB", + "OPCODE_RFL", + "OPCODE_SEQ", + "OPCODE_SFL", + "OPCODE_SGT", + "OPCODE_SIN", + "OPCODE_SLE", + "OPCODE_SNE", + "OPCODE_STR", + "OPCODE_TEX", + "OPCODE_TXD", + "OPCODE_TXP", + "OPCODE_UP2H", + "OPCODE_UP2US", + "OPCODE_UP4B", + "OPCODE_UP4UB", + "OPCODE_X2D", + "OPCODE_ARA", + "OPCODE_ARR", + "OPCODE_BRA", + "OPCODE_CAL", + "OPCODE_RET", + "OPCODE_SSG", + "OPCODE_CMP", + "OPCODE_SCS", + "OPCODE_TXB", + "OPCODE_NRM", + "OPCODE_DIV", + "OPCODE_DP2", + "OPCODE_TXL", + "OPCODE_BRK", + "OPCODE_IF", + "OPCODE_LOOP", + "OPCODE_REP", + "OPCODE_ELSE", + "OPCODE_ENDIF", + "OPCODE_ENDLOOP", + "OPCODE_ENDREP", + "OPCODE_PUSHA", + "OPCODE_POPA", + "OPCODE_CEIL", + "OPCODE_I2F", + "OPCODE_NOT", + "OPCODE_TRUNC", + "OPCODE_SHL", + "OPCODE_SHR", + "OPCODE_AND", + "OPCODE_OR", + "OPCODE_MOD", + "OPCODE_XOR", + "OPCODE_SAD", + "OPCODE_TXF", + "OPCODE_TXQ", + "OPCODE_CONT", + "OPCODE_EMIT", + "OPCODE_ENDPRIM", + "OPCODE_BGNLOOP2", + "OPCODE_BGNSUB", + "OPCODE_ENDLOOP2", + "OPCODE_ENDSUB", + "OPCODE_NOISE1", + "OPCODE_NOISE2", + "OPCODE_NOISE3", + "OPCODE_NOISE4", + "OPCODE_NOP", + "OPCODE_M4X3", + "OPCODE_M3X4", + "OPCODE_M3X3", + "OPCODE_M3X2", + "OPCODE_NRM4", + "OPCODE_CALLNZ", + "OPCODE_IFC", + "OPCODE_BREAKC", + "OPCODE_KIL", + "OPCODE_END" +}; + +static const char *TGSI_SATS[] = +{ + "SAT_NONE", + "SAT_ZERO_ONE", + "SAT_MINUS_PLUS_ONE" +}; + +static const char *TGSI_INSTRUCTION_EXTS[] = +{ + "INSTRUCTION_EXT_TYPE_NV", + "INSTRUCTION_EXT_TYPE_LABEL", + "INSTRUCTION_EXT_TYPE_TEXTURE" +}; + +static const char *TGSI_PRECISIONS[] = +{ + "PRECISION_DEFAULT", + "PRECISION_FLOAT32", + "PRECISION_FLOAT16", + "PRECISION_FIXED12" +}; + +static const char *TGSI_CCS[] = +{ + "CC_GT", + "CC_EQ", + "CC_LT", + "CC_UN", + "CC_GE", + "CC_LE", + "CC_NE", + "CC_TR", + "CC_FL" +}; + +static const char *TGSI_SWIZZLES[] = +{ + "SWIZZLE_X", + "SWIZZLE_Y", + "SWIZZLE_Z", + "SWIZZLE_W" +}; + +static const char *TGSI_TEXTURES[] = +{ + "TEXTURE_UNKNOWN", + "TEXTURE_1D", + "TEXTURE_2D", + "TEXTURE_3D", + "TEXTURE_CUBE", + "TEXTURE_RECT", + "TEXTURE_SHADOW1D", + "TEXTURE_SHADOW2D", + "TEXTURE_SHADOWRECT" +}; + +static const char *TGSI_SRC_REGISTER_EXTS[] = +{ + "SRC_REGISTER_EXT_TYPE_SWZ", + "SRC_REGISTER_EXT_TYPE_MOD" +}; + +static const char *TGSI_EXTSWIZZLES[] = +{ + "EXTSWIZZLE_X", + "EXTSWIZZLE_Y", + "EXTSWIZZLE_Z", + "EXTSWIZZLE_W", + "EXTSWIZZLE_ZERO", + "EXTSWIZZLE_ONE" +}; + +static const char *TGSI_WRITEMASKS[] = +{ + "0", + "WRITEMASK_X", + "WRITEMASK_Y", + "WRITEMASK_XY", + "WRITEMASK_Z", + "WRITEMASK_XZ", + "WRITEMASK_YZ", + "WRITEMASK_XYZ", + "WRITEMASK_W", + "WRITEMASK_XW", + "WRITEMASK_YW", + "WRITEMASK_XYW", + "WRITEMASK_ZW", + "WRITEMASK_XZW", + "WRITEMASK_YZW", + "WRITEMASK_XYZW" +}; + +static const char *TGSI_DST_REGISTER_EXTS[] = +{ + "DST_REGISTER_EXT_TYPE_CONDCODE", + "DST_REGISTER_EXT_TYPE_MODULATE" +}; + +static const char *TGSI_MODULATES[] = +{ + "MODULATE_1X", + "MODULATE_2X", + "MODULATE_4X", + "MODULATE_8X", + "MODULATE_HALF", + "MODULATE_QUARTER", + "MODULATE_EIGHTH" +}; + +static void +dump_declaration_verbose( + struct tgsi_full_declaration *decl, + unsigned ignored, + unsigned deflt, + struct tgsi_full_declaration *fd ) +{ + TXT( "\nFile : " ); + ENM( decl->Declaration.File, TGSI_FILES ); + if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { + TXT( "\nUsageMask : " ); + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { + CHR( 'X' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { + CHR( 'Y' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { + CHR( 'Z' ); + } + if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { + CHR( 'W' ); + } + } + if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { + TXT( "\nInterpolate: " ); + ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); + } + if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { + TXT( "\nSemantic : " ); + UID( decl->Declaration.Semantic ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Declaration.Padding ); + } + + EOL(); + TXT( "\nFirst: " ); + UID( decl->DeclarationRange.First ); + TXT( "\nLast : " ); + UID( decl->DeclarationRange.Last ); + + if( decl->Declaration.Semantic ) { + EOL(); + TXT( "\nSemanticName : " ); + ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); + TXT( "\nSemanticIndex: " ); + UID( decl->Semantic.SemanticIndex ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( decl->Semantic.Padding ); + } + } +} + +static void +dump_immediate_verbose( + struct tgsi_full_immediate *imm, + unsigned ignored ) +{ + unsigned i; + + TXT( "\nDataType : " ); + ENM( imm->Immediate.DataType, TGSI_IMMS ); + if( ignored ) { + TXT( "\nPadding : " ); + UIX( imm->Immediate.Padding ); + } + + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + EOL(); + switch( imm->Immediate.DataType ) { + case TGSI_IMM_FLOAT32: + TXT( "\nFloat: " ); + FLT( imm->u.ImmediateFloat32[i].Float ); + break; + + default: + assert( 0 ); + } + } +} + +static void +dump_instruction_verbose( + struct tgsi_full_instruction *inst, + unsigned ignored, + unsigned deflt, + struct tgsi_full_instruction *fi ) +{ + unsigned i; + + TXT( "\nOpcode : " ); + ENM( inst->Instruction.Opcode, TGSI_OPCODES ); + if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { + TXT( "\nSaturate : " ); + ENM( inst->Instruction.Saturate, TGSI_SATS ); + } + if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { + TXT( "\nNumDstRegs : " ); + UID( inst->Instruction.NumDstRegs ); + } + if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { + TXT( "\nNumSrcRegs : " ); + UID( inst->Instruction.NumSrcRegs ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->Instruction.Padding ); + } + + if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { + TXT( "\nPrecision : " ); + ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); + } + if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { + TXT( "\nCondDstIndex : " ); + UID( inst->InstructionExtNv.CondDstIndex ); + } + if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { + TXT( "\nCondFlowIndex : " ); + UID( inst->InstructionExtNv.CondFlowIndex ); + } + if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { + TXT( "\nCondMask : " ); + ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { + TXT( "\nCondSwizzleX : " ); + ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { + TXT( "\nCondSwizzleY : " ); + ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ : " ); + ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { + TXT( "\nCondSwizzleW : " ); + ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { + TXT( "\nCondDstUpdate : " ); + UID( inst->InstructionExtNv.CondDstUpdate ); + } + if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { + TXT( "\nCondFlowEnable: " ); + UID( inst->InstructionExtNv.CondFlowEnable ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtNv.Padding ); + if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { + TXT( "\nExtended : " ); + UID( inst->InstructionExtNv.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { + TXT( "\nLabel : " ); + UID( inst->InstructionExtLabel.Label ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtLabel.Padding ); + if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtLabel.Extended ); + } + } + } + + if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { + EOL(); + TXT( "\nType : " ); + ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); + if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { + TXT( "\nTexture : " ); + ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( inst->InstructionExtTexture.Padding ); + if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { + TXT( "\nExtended: " ); + UID( inst->InstructionExtTexture.Extended ); + } + } + } + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; + struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; + + EOL(); + TXT( "\nFile : " ); + ENM( dst->DstRegister.File, TGSI_FILES ); + if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { + TXT( "\nWriteMask: " ); + ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); + } + if( ignored ) { + if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( dst->DstRegister.Indirect ); + } + if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( dst->DstRegister.Dimension ); + } + } + if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { + TXT( "\nIndex : " ); + SID( dst->DstRegister.Index ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegister.Padding ); + if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegister.Extended ); + } + } + + if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { + EOL(); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { + TXT( "\nCondMask : " ); + ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { + TXT( "\nCondSwizzleX: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { + TXT( "\nCondSwizzleY: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { + TXT( "\nCondSwizzleZ: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { + TXT( "\nCondSwizzleW: " ); + ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { + TXT( "\nCondSrcIndex: " ); + UID( dst->DstRegisterExtConcode.CondSrcIndex ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtConcode.Padding ); + if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { + TXT( "\nExtended : " ); + UID( dst->DstRegisterExtConcode.Extended ); + } + } + } + + if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { + EOL(); + TXT( "\nType : " ); + ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); + if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { + TXT( "\nModulate: " ); + ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( dst->DstRegisterExtModulate.Padding ); + if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { + TXT( "\nExtended: " ); + UID( dst->DstRegisterExtModulate.Extended ); + } + } + } + } + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; + struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; + + EOL(); + TXT( "\nFile : "); + ENM( src->SrcRegister.File, TGSI_FILES ); + if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { + TXT( "\nSwizzleX : " ); + ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { + TXT( "\nSwizzleY : " ); + ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { + TXT( "\nSwizzleZ : " ); + ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { + TXT( "\nSwizzleW : " ); + ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); + } + if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegister.Negate ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { + TXT( "\nIndirect : " ); + UID( src->SrcRegister.Indirect ); + } + if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { + TXT( "\nDimension: " ); + UID( src->SrcRegister.Dimension ); + } + } + if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { + TXT( "\nIndex : " ); + SID( src->SrcRegister.Index ); + } + if( ignored ) { + if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegister.Extended ); + } + } + + if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { + EOL(); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { + TXT( "\nExtSwizzleX: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { + TXT( "\nExtSwizzleY: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { + TXT( "\nExtSwizzleZ: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { + TXT( "\nExtSwizzleW: " ); + ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { + TXT( "\nNegateX : " ); + UID( src->SrcRegisterExtSwz.NegateX ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { + TXT( "\nNegateY : " ); + UID( src->SrcRegisterExtSwz.NegateY ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { + TXT( "\nNegateZ : " ); + UID( src->SrcRegisterExtSwz.NegateZ ); + } + if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { + TXT( "\nNegateW : " ); + UID( src->SrcRegisterExtSwz.NegateW ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtSwz.Padding ); + if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtSwz.Extended ); + } + } + } + + if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { + EOL(); + TXT( "\nType : " ); + ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); + if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { + TXT( "\nComplement: " ); + UID( src->SrcRegisterExtMod.Complement ); + } + if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { + TXT( "\nBias : " ); + UID( src->SrcRegisterExtMod.Bias ); + } + if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { + TXT( "\nScale2X : " ); + UID( src->SrcRegisterExtMod.Scale2X ); + } + if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { + TXT( "\nAbsolute : " ); + UID( src->SrcRegisterExtMod.Absolute ); + } + if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { + TXT( "\nNegate : " ); + UID( src->SrcRegisterExtMod.Negate ); + } + if( ignored ) { + TXT( "\nPadding : " ); + UIX( src->SrcRegisterExtMod.Padding ); + if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { + TXT( "\nExtended : " ); + UID( src->SrcRegisterExtMod.Extended ); + } + } + } + } +} + +void +tgsi_dump_c( + const struct tgsi_token *tokens, + uint flags ) +{ + struct tgsi_parse_context parse; + struct tgsi_full_instruction fi; + struct tgsi_full_declaration fd; + uint ignored = flags & TGSI_DUMP_C_IGNORED; + uint deflt = flags & TGSI_DUMP_C_DEFAULT; + uint instno = 0; + + /* sanity checks */ + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); + assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); + + tgsi_parse_init( &parse, tokens ); + + TXT( "tgsi-dump begin -----------------" ); + + TXT( "\nMajorVersion: " ); + UID( parse.FullVersion.Version.MajorVersion ); + TXT( "\nMinorVersion: " ); + UID( parse.FullVersion.Version.MinorVersion ); + EOL(); + + TXT( "\nHeaderSize: " ); + UID( parse.FullHeader.Header.HeaderSize ); + TXT( "\nBodySize : " ); + UID( parse.FullHeader.Header.BodySize ); + TXT( "\nProcessor : " ); + ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); + EOL(); + + fi = tgsi_default_full_instruction(); + fd = tgsi_default_full_declaration(); + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + tgsi_parse_token( &parse ); + + TXT( "\nType : " ); + ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); + if( ignored ) { + TXT( "\nSize : " ); + UID( parse.FullToken.Token.Size ); + if( deflt || parse.FullToken.Token.Extended ) { + TXT( "\nExtended : " ); + UID( parse.FullToken.Token.Extended ); + } + } + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + dump_declaration_verbose( + &parse.FullToken.FullDeclaration, + ignored, + deflt, + &fd ); + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + dump_immediate_verbose( + &parse.FullToken.FullImmediate, + ignored ); + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + dump_instruction_verbose( + &parse.FullToken.FullInstruction, + ignored, + deflt, + &fi ); + break; + + default: + assert( 0 ); + } + + EOL(); + } + + TXT( "\ntgsi-dump end -------------------\n" ); + + tgsi_parse_free( &parse ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h new file mode 100644 index 0000000000..d91cd35b3b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_DUMP_C_H +#define TGSI_DUMP_C_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +#define TGSI_DUMP_C_IGNORED 1 +#define TGSI_DUMP_C_DEFAULT 2 + +void +tgsi_dump_c( + const struct tgsi_token *tokens, + uint flags ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_DUMP_C_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c new file mode 100644 index 0000000000..8b430548bc --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -0,0 +1,2522 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI interpretor/executor. + * + * Flow control information: + * + * Since we operate on 'quads' (4 pixels or 4 vertices in parallel) + * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special + * care since a condition may be true for some quad components but false + * for other components. + * + * We basically execute all statements (even if they're in the part of + * an IF/ELSE clause that's "not taken") and use a special mask to + * control writing to destination registers. This is the ExecMask. + * See store_dest(). + * + * The ExecMask is computed from three other masks (CondMask, LoopMask and + * ContMask) which are controlled by the flow control instructions (namely: + * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT). + * + * + * Authors: + * Michal Krol + * Brian Paul + */ + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" + +#define TILE_TOP_LEFT 0 +#define TILE_TOP_RIGHT 1 +#define TILE_BOTTOM_LEFT 2 +#define TILE_BOTTOM_RIGHT 3 + +/* + * Shorthand locations of various utility registers (_I = Index, _C = Channel) + */ +#define TEMP_0_I TGSI_EXEC_TEMP_00000000_I +#define TEMP_0_C TGSI_EXEC_TEMP_00000000_C +#define TEMP_7F_I TGSI_EXEC_TEMP_7FFFFFFF_I +#define TEMP_7F_C TGSI_EXEC_TEMP_7FFFFFFF_C +#define TEMP_80_I TGSI_EXEC_TEMP_80000000_I +#define TEMP_80_C TGSI_EXEC_TEMP_80000000_C +#define TEMP_FF_I TGSI_EXEC_TEMP_FFFFFFFF_I +#define TEMP_FF_C TGSI_EXEC_TEMP_FFFFFFFF_C +#define TEMP_1_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_1_C TGSI_EXEC_TEMP_ONE_C +#define TEMP_2_I TGSI_EXEC_TEMP_TWO_I +#define TEMP_2_C TGSI_EXEC_TEMP_TWO_C +#define TEMP_128_I TGSI_EXEC_TEMP_128_I +#define TEMP_128_C TGSI_EXEC_TEMP_128_C +#define TEMP_M128_I TGSI_EXEC_TEMP_MINUS_128_I +#define TEMP_M128_C TGSI_EXEC_TEMP_MINUS_128_C +#define TEMP_KILMASK_I TGSI_EXEC_TEMP_KILMASK_I +#define TEMP_KILMASK_C TGSI_EXEC_TEMP_KILMASK_C +#define TEMP_OUTPUT_I TGSI_EXEC_TEMP_OUTPUT_I +#define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C +#define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I +#define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_3_I TGSI_EXEC_TEMP_THREE_I +#define TEMP_3_C TGSI_EXEC_TEMP_THREE_C +#define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I +#define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +#define FOR_EACH_CHANNEL(CHAN)\ + for (CHAN = 0; CHAN < 4; CHAN++) + +#define IS_CHANNEL_ENABLED(INST, CHAN)\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IS_CHANNEL_ENABLED2(INST, CHAN)\ + ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) + +#define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ + FOR_EACH_CHANNEL( CHAN )\ + if (IS_CHANNEL_ENABLED2( INST, CHAN )) + + +/** The execution mask depends on the conditional mask and the loop mask */ +#define UPDATE_EXEC_MASK(MACH) \ + MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask + + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + + + +/** + * Initialize machine state by expanding tokens to full instructions, + * allocating temporary storage, setting up constants, etc. + * After this, we can call tgsi_exec_machine_run() many times. + */ +void +tgsi_exec_machine_bind_shader( + struct tgsi_exec_machine *mach, + const struct tgsi_token *tokens, + uint numSamplers, + struct tgsi_sampler *samplers) +{ + uint k; + struct tgsi_parse_context parse; + struct tgsi_exec_labels *labels = &mach->Labels; + struct tgsi_full_instruction *instructions; + struct tgsi_full_declaration *declarations; + uint maxInstructions = 10, numInstructions = 0; + uint maxDeclarations = 10, numDeclarations = 0; + uint instno = 0; + +#if 0 + tgsi_dump(tokens, 0); +#endif + + mach->Tokens = tokens; + mach->Samplers = samplers; + + k = tgsi_parse_init (&parse, mach->Tokens); + if (k != TGSI_PARSE_OK) { + debug_printf( "Problem parsing!\n" ); + return; + } + + mach->Processor = parse.FullHeader.Processor.Processor; + mach->ImmLimit = 0; + labels->count = 0; + + declarations = (struct tgsi_full_declaration *) + MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) ); + + if (!declarations) { + return; + } + + instructions = (struct tgsi_full_instruction *) + MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) ); + + if (!instructions) { + FREE( declarations ); + return; + } + + while( !tgsi_parse_end_of_tokens( &parse ) ) { + uint pointer = parse.Position; + uint i; + + tgsi_parse_token( &parse ); + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + /* save expanded declaration */ + if (numDeclarations == maxDeclarations) { + declarations = REALLOC(declarations, + maxDeclarations + * sizeof(struct tgsi_full_declaration), + (maxDeclarations + 10) + * sizeof(struct tgsi_full_declaration)); + maxDeclarations += 10; + } + memcpy(declarations + numDeclarations, + &parse.FullToken.FullDeclaration, + sizeof(declarations[0])); + numDeclarations++; + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + assert( size % 4 == 0 ); + assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES ); + + for( i = 0; i < size; i++ ) { + mach->Imms[mach->ImmLimit + i / 4][i % 4] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } + mach->ImmLimit += size / 4; + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + assert( labels->count < MAX_LABELS ); + + labels->labels[labels->count][0] = instno; + labels->labels[labels->count][1] = pointer; + labels->count++; + + /* save expanded instruction */ + if (numInstructions == maxInstructions) { + instructions = REALLOC(instructions, + maxInstructions + * sizeof(struct tgsi_full_instruction), + (maxInstructions + 10) + * sizeof(struct tgsi_full_instruction)); + maxInstructions += 10; + } + memcpy(instructions + numInstructions, + &parse.FullToken.FullInstruction, + sizeof(instructions[0])); + numInstructions++; + break; + + default: + assert( 0 ); + } + } + tgsi_parse_free (&parse); + + if (mach->Declarations) { + FREE( mach->Declarations ); + } + mach->Declarations = declarations; + mach->NumDeclarations = numDeclarations; + + if (mach->Instructions) { + FREE( mach->Instructions ); + } + mach->Instructions = instructions; + mach->NumInstructions = numInstructions; +} + + +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach ) +{ + uint i; + + mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps); + mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR]; + + /* Setup constants. */ + for( i = 0; i < 4; i++ ) { + mach->Temps[TEMP_0_I].xyzw[TEMP_0_C].u[i] = 0x00000000; + mach->Temps[TEMP_7F_I].xyzw[TEMP_7F_C].u[i] = 0x7FFFFFFF; + mach->Temps[TEMP_80_I].xyzw[TEMP_80_C].u[i] = 0x80000000; + mach->Temps[TEMP_FF_I].xyzw[TEMP_FF_C].u[i] = 0xFFFFFFFF; + mach->Temps[TEMP_1_I].xyzw[TEMP_1_C].f[i] = 1.0f; + mach->Temps[TEMP_2_I].xyzw[TEMP_2_C].f[i] = 2.0f; + mach->Temps[TEMP_128_I].xyzw[TEMP_128_C].f[i] = 128.0f; + mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C].f[i] = -128.0f; + mach->Temps[TEMP_3_I].xyzw[TEMP_3_C].f[i] = 3.0f; + mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C].f[i] = 0.5f; + } +} + + +void +tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach) +{ + if (mach->Instructions) { + FREE(mach->Instructions); + mach->Instructions = NULL; + mach->NumInstructions = 0; + } + if (mach->Declarations) { + FREE(mach->Declarations); + mach->Declarations = NULL; + mach->NumDeclarations = 0; + } +} + + +static void +micro_abs( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = fabsf( src->f[0] ); + dst->f[1] = fabsf( src->f[1] ); + dst->f[2] = fabsf( src->f[2] ); + dst->f[3] = fabsf( src->f[3] ); +} + +static void +micro_add( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] + src1->f[0]; + dst->f[1] = src0->f[1] + src1->f[1]; + dst->f[2] = src0->f[2] + src1->f[2]; + dst->f[3] = src0->f[3] + src1->f[3]; +} + +static void +micro_iadd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] + src1->i[0]; + dst->i[1] = src0->i[1] + src1->i[1]; + dst->i[2] = src0->i[2] + src1->i[2]; + dst->i[3] = src0->i[3] + src1->i[3]; +} + +static void +micro_and( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] & src1->u[0]; + dst->u[1] = src0->u[1] & src1->u[1]; + dst->u[2] = src0->u[2] & src1->u[2]; + dst->u[3] = src0->u[3] & src1->u[3]; +} + +static void +micro_ceil( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = ceilf( src->f[0] ); + dst->f[1] = ceilf( src->f[1] ); + dst->f[2] = ceilf( src->f[2] ); + dst->f[3] = ceilf( src->f[3] ); +} + +static void +micro_cos( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = cosf( src->f[0] ); + dst->f[1] = cosf( src->f[1] ); + dst->f[2] = cosf( src->f[2] ); + dst->f[3] = cosf( src->f[3] ); +} + +static void +micro_ddx( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_ddy( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = + dst->f[1] = + dst->f[2] = + dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT]; +} + +static void +micro_div( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] / src1->f[0]; + dst->f[1] = src0->f[1] / src1->f[1]; + dst->f[2] = src0->f[2] / src1->f[2]; + dst->f[3] = src0->f[3] / src1->f[3]; +} + +static void +micro_udiv( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] / src1->u[0]; + dst->u[1] = src0->u[1] / src1->u[1]; + dst->u[2] = src0->u[2] / src1->u[2]; + dst->u[3] = src0->u[3] / src1->u[3]; +} + +static void +micro_eq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] == src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] == src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] == src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ieq( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] == src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] == src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_exp2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src) +{ + dst->f[0] = powf( 2.0f, src->f[0] ); + dst->f[1] = powf( 2.0f, src->f[1] ); + dst->f[2] = powf( 2.0f, src->f[2] ); + dst->f[3] = powf( 2.0f, src->f[3] ); +} + +static void +micro_f2it( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = (int) src->f[0]; + dst->i[1] = (int) src->f[1]; + dst->i[2] = (int) src->f[2]; + dst->i[3] = (int) src->f[3]; +} + +static void +micro_f2ut( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = (uint) src->f[0]; + dst->u[1] = (uint) src->f[1]; + dst->u[2] = (uint) src->f[2]; + dst->u[3] = (uint) src->f[3]; +} + +static void +micro_flr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = floorf( src->f[0] ); + dst->f[1] = floorf( src->f[1] ); + dst->f[2] = floorf( src->f[2] ); + dst->f[3] = floorf( src->f[3] ); +} + +static void +micro_frc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = src->f[0] - floorf( src->f[0] ); + dst->f[1] = src->f[1] - floorf( src->f[1] ); + dst->f[2] = src->f[2] - floorf( src->f[2] ); + dst->f[3] = src->f[3] - floorf( src->f[3] ); +} + +static void +micro_ge( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] >= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] >= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] >= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] >= src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_i2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) src->i[0]; + dst->f[1] = (float) src->i[1]; + dst->f[2] = (float) src->i[2]; + dst->f[3] = (float) src->i[3]; +} + +static void +micro_lg2( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = logf( src->f[0] ) * 1.442695f; + dst->f[1] = logf( src->f[1] ) * 1.442695f; + dst->f[2] = logf( src->f[2] ) * 1.442695f; + dst->f[3] = logf( src->f[3] ) * 1.442695f; +} + +static void +micro_le( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] <= src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] <= src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] <= src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] <= src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_lt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; +} + +static void +micro_ilt( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src2->i[0] : src3->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src2->i[1] : src3->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; +} + +static void +micro_ult( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2, + const union tgsi_exec_channel *src3 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src2->u[0] : src3->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src2->u[1] : src3->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; +} + +static void +micro_max( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] > src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] > src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] > src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umax( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_min( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] < src1->f[0] ? src0->f[0] : src1->f[0]; + dst->f[1] = src0->f[1] < src1->f[1] ? src0->f[1] : src1->f[1]; + dst->f[2] = src0->f[2] < src1->f[2] ? src0->f[2] : src1->f[2]; + dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; +} + +static void +micro_imin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0]; + dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1]; + dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; + dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; +} + +static void +micro_umin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0]; + dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1]; + dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; + dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; +} + +static void +micro_umod( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] % src1->u[0]; + dst->u[1] = src0->u[1] % src1->u[1]; + dst->u[2] = src0->u[2] % src1->u[2]; + dst->u[3] = src0->u[3] % src1->u[3]; +} + +static void +micro_mul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] * src1->f[0]; + dst->f[1] = src0->f[1] * src1->f[1]; + dst->f[2] = src0->f[2] * src1->f[2]; + dst->f[3] = src0->f[3] * src1->f[3]; +} + +static void +micro_imul( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] * src1->i[0]; + dst->i[1] = src0->i[1] * src1->i[1]; + dst->i[2] = src0->i[2] * src1->i[2]; + dst->i[3] = src0->i[3] * src1->i[3]; +} + +static void +micro_imul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->i[0] = src0->i[0] * src1->i[0]; + dst1->i[1] = src0->i[1] * src1->i[1]; + dst1->i[2] = src0->i[2] * src1->i[2]; + dst1->i[3] = src0->i[3] * src1->i[3]; + dst0->i[0] = 0; + dst0->i[1] = 0; + dst0->i[2] = 0; + dst0->i[3] = 0; +} + +static void +micro_umul64( + union tgsi_exec_channel *dst0, + union tgsi_exec_channel *dst1, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst1->u[0] = src0->u[0] * src1->u[0]; + dst1->u[1] = src0->u[1] * src1->u[1]; + dst1->u[2] = src0->u[2] * src1->u[2]; + dst1->u[3] = src0->u[3] * src1->u[3]; + dst0->u[0] = 0; + dst0->u[1] = 0; + dst0->u[2] = 0; + dst0->u[3] = 0; +} + +static void +micro_movc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1, + const union tgsi_exec_channel *src2 ) +{ + dst->u[0] = src0->u[0] ? src1->u[0] : src2->u[0]; + dst->u[1] = src0->u[1] ? src1->u[1] : src2->u[1]; + dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; + dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; +} + +static void +micro_neg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = -src->f[0]; + dst->f[1] = -src->f[1]; + dst->f[2] = -src->f[2]; + dst->f[3] = -src->f[3]; +} + +static void +micro_ineg( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->i[0] = -src->i[0]; + dst->i[1] = -src->i[1]; + dst->i[2] = -src->i[2]; + dst->i[3] = -src->i[3]; +} + +static void +micro_not( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->u[0] = ~src->u[0]; + dst->u[1] = ~src->u[1]; + dst->u[2] = ~src->u[2]; + dst->u[3] = ~src->u[3]; +} + +static void +micro_or( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] | src1->u[0]; + dst->u[1] = src0->u[1] | src1->u[1]; + dst->u[2] = src0->u[2] | src1->u[2]; + dst->u[3] = src0->u[3] | src1->u[3]; +} + +static void +micro_pow( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = powf( src0->f[0], src1->f[0] ); + dst->f[1] = powf( src0->f[1], src1->f[1] ); + dst->f[2] = powf( src0->f[2], src1->f[2] ); + dst->f[3] = powf( src0->f[3], src1->f[3] ); +} + +static void +micro_rnd( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = floorf( src->f[0] + 0.5f ); + dst->f[1] = floorf( src->f[1] + 0.5f ); + dst->f[2] = floorf( src->f[2] + 0.5f ); + dst->f[3] = floorf( src->f[3] + 0.5f ); +} + +static void +micro_shl( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] << src1->i[0]; + dst->i[1] = src0->i[1] << src1->i[1]; + dst->i[2] = src0->i[2] << src1->i[2]; + dst->i[3] = src0->i[3] << src1->i[3]; +} + +static void +micro_ishr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->i[0] = src0->i[0] >> src1->i[0]; + dst->i[1] = src0->i[1] >> src1->i[1]; + dst->i[2] = src0->i[2] >> src1->i[2]; + dst->i[3] = src0->i[3] >> src1->i[3]; +} + +static void +micro_trunc( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0 ) +{ + dst->f[0] = (float) (int) src0->f[0]; + dst->f[1] = (float) (int) src0->f[1]; + dst->f[2] = (float) (int) src0->f[2]; + dst->f[3] = (float) (int) src0->f[3]; +} + +static void +micro_ushr( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] >> src1->u[0]; + dst->u[1] = src0->u[1] >> src1->u[1]; + dst->u[2] = src0->u[2] >> src1->u[2]; + dst->u[3] = src0->u[3] >> src1->u[3]; +} + +static void +micro_sin( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = sinf( src->f[0] ); + dst->f[1] = sinf( src->f[1] ); + dst->f[2] = sinf( src->f[2] ); + dst->f[3] = sinf( src->f[3] ); +} + +static void +micro_sqrt( union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = sqrtf( src->f[0] ); + dst->f[1] = sqrtf( src->f[1] ); + dst->f[2] = sqrtf( src->f[2] ); + dst->f[3] = sqrtf( src->f[3] ); +} + +static void +micro_sub( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->f[0] = src0->f[0] - src1->f[0]; + dst->f[1] = src0->f[1] - src1->f[1]; + dst->f[2] = src0->f[2] - src1->f[2]; + dst->f[3] = src0->f[3] - src1->f[3]; +} + +static void +micro_u2f( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = (float) src->u[0]; + dst->f[1] = (float) src->u[1]; + dst->f[2] = (float) src->u[2]; + dst->f[3] = (float) src->u[3]; +} + +static void +micro_xor( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src0, + const union tgsi_exec_channel *src1 ) +{ + dst->u[0] = src0->u[0] ^ src1->u[0]; + dst->u[1] = src0->u[1] ^ src1->u[1]; + dst->u[2] = src0->u[2] ^ src1->u[2]; + dst->u[3] = src0->u[3] ^ src1->u[3]; +} + +static void +fetch_src_file_channel( + const struct tgsi_exec_machine *mach, + const uint file, + const uint swizzle, + const union tgsi_exec_channel *index, + union tgsi_exec_channel *chan ) +{ + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( file ) { + case TGSI_FILE_CONSTANT: + chan->f[0] = mach->Consts[index->i[0]][swizzle]; + chan->f[1] = mach->Consts[index->i[1]][swizzle]; + chan->f[2] = mach->Consts[index->i[2]][swizzle]; + chan->f[3] = mach->Consts[index->i[3]][swizzle]; + break; + + case TGSI_FILE_INPUT: + chan->u[0] = mach->Inputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Inputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Inputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Inputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_TEMPORARY: + assert(index->i[0] < TGSI_EXEC_NUM_TEMPS); + chan->u[0] = mach->Temps[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Temps[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Temps[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Temps[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_IMMEDIATE: + assert( index->i[0] < (int) mach->ImmLimit ); + chan->f[0] = mach->Imms[index->i[0]][swizzle]; + assert( index->i[1] < (int) mach->ImmLimit ); + chan->f[1] = mach->Imms[index->i[1]][swizzle]; + assert( index->i[2] < (int) mach->ImmLimit ); + chan->f[2] = mach->Imms[index->i[2]][swizzle]; + assert( index->i[3] < (int) mach->ImmLimit ); + chan->f[3] = mach->Imms[index->i[3]][swizzle]; + break; + + case TGSI_FILE_ADDRESS: + chan->u[0] = mach->Addrs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Addrs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Addrs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Addrs[index->i[3]].xyzw[swizzle].u[3]; + break; + + case TGSI_FILE_OUTPUT: + /* vertex/fragment output vars can be read too */ + chan->u[0] = mach->Outputs[index->i[0]].xyzw[swizzle].u[0]; + chan->u[1] = mach->Outputs[index->i[1]].xyzw[swizzle].u[1]; + chan->u[2] = mach->Outputs[index->i[2]].xyzw[swizzle].u[2]; + chan->u[3] = mach->Outputs[index->i[3]].xyzw[swizzle].u[3]; + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + *chan = mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]; + break; + + case TGSI_EXTSWIZZLE_ONE: + *chan = mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]; + break; + + default: + assert( 0 ); + } +} + +static void +fetch_source( + const struct tgsi_exec_machine *mach, + union tgsi_exec_channel *chan, + const struct tgsi_full_src_register *reg, + const uint chan_index ) +{ + union tgsi_exec_channel index; + uint swizzle; + + index.i[0] = + index.i[1] = + index.i[2] = + index.i[3] = reg->SrcRegister.Index; + + if (reg->SrcRegister.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + + if( reg->SrcRegister.Dimension ) { + switch( reg->SrcRegister.File ) { + case TGSI_FILE_INPUT: + index.i[0] *= 17; + index.i[1] *= 17; + index.i[2] *= 17; + index.i[3] *= 17; + break; + case TGSI_FILE_CONSTANT: + index.i[0] *= 4096; + index.i[1] *= 4096; + index.i[2] *= 4096; + index.i[3] *= 4096; + break; + default: + assert( 0 ); + } + + index.i[0] += reg->SrcRegisterDim.Index; + index.i[1] += reg->SrcRegisterDim.Index; + index.i[2] += reg->SrcRegisterDim.Index; + index.i[3] += reg->SrcRegisterDim.Index; + + if (reg->SrcRegisterDim.Indirect) { + union tgsi_exec_channel index2; + union tgsi_exec_channel indir_index; + + index2.i[0] = + index2.i[1] = + index2.i[2] = + index2.i[3] = reg->SrcRegisterDimInd.Index; + + swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterDimInd, CHAN_X ); + fetch_src_file_channel( + mach, + reg->SrcRegisterDimInd.File, + swizzle, + &index2, + &indir_index ); + + index.i[0] += indir_index.i[0]; + index.i[1] += indir_index.i[1]; + index.i[2] += indir_index.i[2]; + index.i[3] += indir_index.i[3]; + } + } + + swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + fetch_src_file_channel( + mach, + reg->SrcRegister.File, + swizzle, + &index, + chan ); + + switch (tgsi_util_get_full_src_register_sign_mode( reg, chan_index )) { + case TGSI_UTIL_SIGN_CLEAR: + micro_abs( chan, chan ); + break; + + case TGSI_UTIL_SIGN_SET: + micro_abs( chan, chan ); + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + micro_neg( chan, chan ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } + + if (reg->SrcRegisterExtMod.Complement) { + micro_sub( chan, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], chan ); + } +} + +static void +store_dest( + struct tgsi_exec_machine *mach, + const union tgsi_exec_channel *chan, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + uint chan_index ) +{ + union tgsi_exec_channel *dst; + + switch( reg->DstRegister.File ) { + case TGSI_FILE_NULL: + return; + + case TGSI_FILE_OUTPUT: + dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] + + reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_TEMPORARY: + assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS); + dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; + break; + + case TGSI_FILE_ADDRESS: + dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index]; + break; + + default: + assert( 0 ); + return; + } + + switch (inst->Instruction.Saturate) + { + case TGSI_SAT_NONE: + if (mach->ExecMask & 0x1) + dst->i[0] = chan->i[0]; + if (mach->ExecMask & 0x2) + dst->i[1] = chan->i[1]; + if (mach->ExecMask & 0x4) + dst->i[2] = chan->i[2]; + if (mach->ExecMask & 0x8) + dst->i[3] = chan->i[3]; + break; + + case TGSI_SAT_ZERO_ONE: + /* XXX need to obey ExecMask here */ + micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); + micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + + default: + assert( 0 ); + } +} + +#define FETCH(VAL,INDEX,CHAN)\ + fetch_source (mach, VAL, &inst->FullSrcRegisters[INDEX], CHAN) + +#define STORE(VAL,INDEX,CHAN)\ + store_dest (mach, VAL, &inst->FullDstRegisters[INDEX], inst, CHAN ) + + +/** + * Execute ARB-style KIL which is predicated by a src register. + * Kill fragment if any of the four values is less than zero. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint uniquemask; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + union tgsi_exec_channel r[1]; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint swizzle; + uint i; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle ( + &inst->FullSrcRegisters[0], + chan_index); + + /* check if the component has not been already tested */ + if (uniquemask & (1 << swizzle)) + continue; + uniquemask |= 1 << swizzle; + + FETCH(&r[0], 0, chan_index); + for (i = 0; i < 4; i++) + if (r[0].f[i] < 0.0f) + kilmask |= 1 << i; + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + + +/* + * Fetch a texel using STR texture coordinates. + */ +static void +fetch_texel( struct tgsi_sampler *sampler, + const union tgsi_exec_channel *s, + const union tgsi_exec_channel *t, + const union tgsi_exec_channel *p, + float lodbias, /* XXX should be float[4] */ + union tgsi_exec_channel *r, + union tgsi_exec_channel *g, + union tgsi_exec_channel *b, + union tgsi_exec_channel *a ) +{ + uint j; + float rgba[NUM_CHANNELS][QUAD_SIZE]; + + sampler->get_samples(sampler, s->f, t->f, p->f, lodbias, rgba); + + for (j = 0; j < 4; j++) { + r->f[j] = rgba[0][j]; + g->f[j] = rgba[1][j]; + b->f[j] = rgba[2][j]; + a->f[j] = rgba[3][j]; + } +} + + +static void +exec_tex(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + boolean biasLod, + boolean projected) +{ + const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; + union tgsi_exec_channel r[8]; + uint chan_index; + float lodBias; + + /* debug_printf("Sampler %u unit %u\n", sampler, unit); */ + + switch (inst->InstructionExtTexture.Texture) { + case TGSI_TEXTURE_1D: + + FETCH(&r[0], 0, CHAN_X); + + if (projected) { + FETCH(&r[1], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[1] ); + } + + if (biasLod) { + FETCH(&r[1], 0, CHAN_W); + lodBias = r[2].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ + &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ + break; + + case TGSI_TEXTURE_2D: + case TGSI_TEXTURE_RECT: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, /* inputs */ + &r[0], &r[1], &r[2], &r[3]); /* outputs */ + break; + + case TGSI_TEXTURE_3D: + case TGSI_TEXTURE_CUBE: + + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 0, CHAN_Z); + + if (projected) { + FETCH(&r[3], 0, CHAN_W); + micro_div( &r[0], &r[0], &r[3] ); + micro_div( &r[1], &r[1], &r[3] ); + micro_div( &r[2], &r[2], &r[3] ); + } + + if (biasLod) { + FETCH(&r[3], 0, CHAN_W); + lodBias = r[3].f[0]; + } + else + lodBias = 0.0; + + fetch_texel(&mach->Samplers[unit], + &r[0], &r[1], &r[2], lodBias, + &r[0], &r[1], &r[2], &r[3]); + break; + + default: + assert (0); + } + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[chan_index], 0, chan_index ); + } +} + + +/** + * Evaluate a constant-valued coefficient at the position of the + * current quad. + */ +static void +eval_constant_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + unsigned i; + + for( i = 0; i < QUAD_SIZE; i++ ) { + mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan]; + } +} + +/** + * Evaluate a linear-valued coefficient at the position of the + * current quad. + */ +static void +eval_linear_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + mach->Inputs[attrib].xyzw[chan].f[0] = a0; + mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx; + mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady; + mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady; +} + +/** + * Evaluate a perspective-valued coefficient at the position of the + * current quad. + */ +static void +eval_perspective_coef( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ) +{ + const float x = mach->QuadPos.xyzw[0].f[0]; + const float y = mach->QuadPos.xyzw[1].f[0]; + const float dadx = mach->InterpCoefs[attrib].dadx[chan]; + const float dady = mach->InterpCoefs[attrib].dady[chan]; + const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y; + const float *w = mach->QuadPos.xyzw[3].f; + /* divide by W here */ + mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0]; + mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1]; + mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2]; + mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3]; +} + + +typedef void (* eval_coef_func)( + struct tgsi_exec_machine *mach, + unsigned attrib, + unsigned chan ); + +static void +exec_declaration( + struct tgsi_exec_machine *mach, + const struct tgsi_full_declaration *decl ) +{ + if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) { + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + eval_coef_func eval; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + eval = eval_constant_coef; + break; + + case TGSI_INTERPOLATE_LINEAR: + eval = eval_linear_coef; + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + eval = eval_perspective_coef; + break; + + default: + assert( 0 ); + } + + if( mask == TGSI_WRITEMASK_XYZW ) { + unsigned i, j; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + eval( mach, i, j ); + } + } + } + else { + unsigned i, j; + + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + for( i = first; i <= last; i++ ) { + eval( mach, i, j ); + } + } + } + } + } + } +} + +static void +exec_instruction( + struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst, + int *pc ) +{ + uint chan_index; + union tgsi_exec_channel r[8]; + + (*pc)++; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_f2it( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_X ); + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[1], 0, CHAN_Y ); + micro_max( &r[1], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + + FETCH( &r[2], 0, CHAN_W ); + micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] ); + micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] ); + micro_pow( &r[1], &r[1], &r[2] ); + micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, CHAN_Z ); + } + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( &r[0], 0, CHAN_X ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( &r[0], 0, CHAN_X ); + micro_sqrt( &r[0], &r[0] ); + micro_div( &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + FETCH( &r[0], 0, CHAN_X ); + micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ + STORE( &r[2], 0, CHAN_X ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ + STORE( &r[2], 0, CHAN_Y ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ + STORE( &r[2], 0, CHAN_Z ); /* store r2 */ + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_LOG: + FETCH( &r[0], 0, CHAN_X ); + micro_abs( &r[2], &r[0] ); /* r2 = abs(r0) */ + micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ + micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[0], 0, CHAN_X ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ + micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ + STORE( &r[0], 0, CHAN_Y ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[1], 0, CHAN_Z ); + } + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) + { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_mul( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Z ); + FETCH( &r[2], 1, CHAN_Z ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_W); + FETCH(&r[2], 1, CHAN_W); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + FETCH( &r[0], 0, CHAN_Y ); + FETCH( &r[1], 1, CHAN_Y); + micro_mul( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, CHAN_Y ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( &r[0], 0, CHAN_Z ); + STORE( &r[0], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + FETCH( &r[0], 1, CHAN_W ); + STORE( &r[0], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + /* XXX use micro_min()?? */ + micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + /* XXX use micro_max()?? */ + micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] ); + + STORE(&r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ge( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_mul( &r[0], &r[0], &r[1] ); + FETCH( &r[1], 2, chan_index ); + micro_add( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + + micro_sub( &r[0], &r[0], &r[1] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_sub( &r[1], &r[1], &r[2] ); + micro_mul( &r[0], &r[0], &r[1] ); + micro_add( &r[0], &r[0], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_CND: + assert (0); + break; + + case TGSI_OPCODE_CND0: + assert (0); + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + assert (0); + break; + + case TGSI_OPCODE_INDEX: + assert (0); + break; + + case TGSI_OPCODE_NEGATE: + assert (0); + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_frc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + assert (0); + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_flr( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_rnd( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH(&r[0], 0, CHAN_X); + + micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( &r[0], 0, CHAN_X ); + micro_lg2( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_pow( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + FETCH(&r[0], 0, CHAN_Y); + FETCH(&r[1], 1, CHAN_Z); + + micro_mul( &r[2], &r[0], &r[1] ); + + FETCH(&r[3], 0, CHAN_Z); + FETCH(&r[4], 1, CHAN_Y); + + micro_mul( &r[5], &r[3], &r[4] ); + micro_sub( &r[2], &r[2], &r[5] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( &r[2], 0, CHAN_X ); + } + + FETCH(&r[2], 1, CHAN_X); + + micro_mul( &r[3], &r[3], &r[2] ); + + FETCH(&r[5], 0, CHAN_X); + + micro_mul( &r[1], &r[1], &r[5] ); + micro_sub( &r[3], &r[3], &r[1] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { + STORE( &r[3], 0, CHAN_Y ); + } + + micro_mul( &r[5], &r[5], &r[4] ); + micro_mul( &r[0], &r[0], &r[2] ); + micro_sub( &r[5], &r[5], &r[0] ); + + if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( &r[5], 0, CHAN_Z ); + } + + if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + assert (0); + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + + micro_abs( &r[0], &r[0] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_RCC: + assert (0); + break; + + case TGSI_OPCODE_DPH: + FETCH(&r[0], 0, CHAN_X); + FETCH(&r[1], 1, CHAN_X); + + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Y); + FETCH(&r[2], 1, CHAN_Y); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 0, CHAN_Z); + FETCH(&r[2], 1, CHAN_Z); + + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH(&r[1], 1, CHAN_W); + + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH(&r[0], 0, CHAN_X); + + micro_cos( &r[0], &r[0] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ddx( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDY: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ddy( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_KILP: + exec_kilp (mach, inst); + break; + + case TGSI_OPCODE_KIL: + /* for enabled ExecMask bits, set the killed bit */ + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + break; + + case TGSI_OPCODE_PK2H: + assert (0); + break; + + case TGSI_OPCODE_PK2US: + assert (0); + break; + + case TGSI_OPCODE_PK4B: + assert (0); + break; + + case TGSI_OPCODE_PK4UB: + assert (0); + break; + + case TGSI_OPCODE_RFL: + assert (0); + break; + + case TGSI_OPCODE_SEQ: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_eq( &r[0], &r[0], &r[1], + &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], + &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SFL: + assert (0); + break; + + case TGSI_OPCODE_SGT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SIN: + FETCH( &r[0], 0, CHAN_X ); + micro_sin( &r[0], &r[0] ); + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SNE: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_STR: + assert (0); + break; + + case TGSI_OPCODE_TEX: + /* simple texture lookup */ + /* src[0] = texcoord */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, FALSE); + break; + + case TGSI_OPCODE_TXB: + /* Texture lookup with lod bias */ + /* src[0] = texcoord (src[0].w = LOD bias) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXD: + /* Texture lookup with explict partial derivatives */ + /* src[0] = texcoord */ + /* src[1] = d[strq]/dx */ + /* src[2] = d[strq]/dy */ + /* src[3] = sampler unit */ + assert (0); + break; + + case TGSI_OPCODE_TXL: + /* Texture lookup with explit LOD */ + /* src[0] = texcoord (src[0].w = LOD) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, TRUE, FALSE); + break; + + case TGSI_OPCODE_TXP: + /* Texture lookup with projection */ + /* src[0] = texcoord (src[0].w = projection) */ + /* src[1] = sampler unit */ + exec_tex(mach, inst, FALSE, TRUE); + break; + + case TGSI_OPCODE_UP2H: + assert (0); + break; + + case TGSI_OPCODE_UP2US: + assert (0); + break; + + case TGSI_OPCODE_UP4B: + assert (0); + break; + + case TGSI_OPCODE_UP4UB: + assert (0); + break; + + case TGSI_OPCODE_X2D: + assert (0); + break; + + case TGSI_OPCODE_ARA: + assert (0); + break; + + case TGSI_OPCODE_ARR: + assert (0); + break; + + case TGSI_OPCODE_BRA: + assert (0); + break; + + case TGSI_OPCODE_CAL: + /* skip the call if no execution channels are enabled */ + if (mach->ExecMask) { + /* do the call */ + + /* push the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + + assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING); + mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask; + + /* note that PC was already incremented above */ + mach->CallStack[mach->CallStackTop++] = *pc; + *pc = inst->InstructionExtLabel.Label; + } + break; + + case TGSI_OPCODE_RET: + mach->FuncMask &= ~mach->ExecMask; + UPDATE_EXEC_MASK(mach); + + if (mach->ExecMask == 0x0) { + /* really return now (otherwise, keep executing */ + + if (mach->CallStackTop == 0) { + /* returning from main() */ + *pc = -1; + return; + } + *pc = mach->CallStack[--mach->CallStackTop]; + + /* pop the Cond, Loop, Cont stacks */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + assert(mach->FuncStackTop > 0); + mach->FuncMask = mach->FuncStack[--mach->FuncStackTop]; + + UPDATE_EXEC_MASK(mach); + } + break; + + case TGSI_OPCODE_SSG: + assert (0); + break; + + case TGSI_OPCODE_CMP: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH(&r[0], 0, chan_index); + FETCH(&r[1], 1, chan_index); + FETCH(&r[2], 2, chan_index); + + micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] ); + + STORE(&r[0], 0, chan_index); + } + break; + + case TGSI_OPCODE_SCS: + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( &r[0], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + micro_cos( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_X ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + micro_sin( &r[1], &r[0] ); + STORE( &r[1], 0, CHAN_Y ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + STORE( &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], 0, CHAN_Z ); + } + if( IS_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_NRM: + assert (0); + break; + + case TGSI_OPCODE_DIV: + assert( 0 ); + break; + + case TGSI_OPCODE_DP2: + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_IF: + /* push CondMask */ + assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING); + mach->CondStack[mach->CondStackTop++] = mach->CondMask; + FETCH( &r[0], 0, CHAN_X ); + /* update CondMask */ + if( ! r[0].u[0] ) { + mach->CondMask &= ~0x1; + } + if( ! r[0].u[1] ) { + mach->CondMask &= ~0x2; + } + if( ! r[0].u[2] ) { + mach->CondMask &= ~0x4; + } + if( ! r[0].u[3] ) { + mach->CondMask &= ~0x8; + } + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ELSE */ + break; + + case TGSI_OPCODE_ELSE: + /* invert CondMask wrt previous mask */ + { + uint prevMask; + assert(mach->CondStackTop > 0); + prevMask = mach->CondStack[mach->CondStackTop - 1]; + mach->CondMask = ~mach->CondMask & prevMask; + UPDATE_EXEC_MASK(mach); + /* Todo: If CondMask==0, jump to ENDIF */ + } + break; + + case TGSI_OPCODE_ENDIF: + /* pop CondMask */ + assert(mach->CondStackTop > 0); + mach->CondMask = mach->CondStack[--mach->CondStackTop]; + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_END: + /* halt execution */ + *pc = -1; + break; + + case TGSI_OPCODE_REP: + assert (0); + break; + + case TGSI_OPCODE_ENDREP: + assert (0); + break; + + case TGSI_OPCODE_PUSHA: + assert (0); + break; + + case TGSI_OPCODE_POPA: + assert (0); + break; + + case TGSI_OPCODE_CEIL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_ceil( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_I2F: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_i2f( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_NOT: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_not( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_TRUNC: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_trunc( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHL: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_shl( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SHR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_ishr( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_AND: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_and( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_OR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_or( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOD: + assert (0); + break; + + case TGSI_OPCODE_XOR: + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + FETCH( &r[1], 1, chan_index ); + micro_xor( &r[0], &r[0], &r[1] ); + STORE( &r[0], 0, chan_index ); + } + break; + + case TGSI_OPCODE_SAD: + assert (0); + break; + + case TGSI_OPCODE_TXF: + assert (0); + break; + + case TGSI_OPCODE_TXQ: + assert (0); + break; + + case TGSI_OPCODE_EMIT: + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] += 16; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]]++; + break; + + case TGSI_OPCODE_ENDPRIM: + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]++; + mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0; + break; + + case TGSI_OPCODE_LOOP: + /* fall-through (for now) */ + case TGSI_OPCODE_BGNLOOP2: + /* push LoopMask and ContMasks */ + assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask; + assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING); + mach->ContStack[mach->ContStackTop++] = mach->ContMask; + break; + + case TGSI_OPCODE_ENDLOOP: + /* fall-through (for now at least) */ + case TGSI_OPCODE_ENDLOOP2: + /* Restore ContMask, but don't pop */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[mach->ContStackTop - 1]; + UPDATE_EXEC_MASK(mach); + if (mach->ExecMask) { + /* repeat loop: jump to instruction just past BGNLOOP */ + *pc = inst->InstructionExtLabel.Label + 1; + } + else { + /* exit loop: pop LoopMask */ + assert(mach->LoopStackTop > 0); + mach->LoopMask = mach->LoopStack[--mach->LoopStackTop]; + /* pop ContMask */ + assert(mach->ContStackTop > 0); + mach->ContMask = mach->ContStack[--mach->ContStackTop]; + } + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BRK: + /* turn off loop channels for each enabled exec channel */ + mach->LoopMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_CONT: + /* turn off cont channels for each enabled exec channel */ + mach->ContMask &= ~mach->ExecMask; + /* Todo: if mach->LoopMask == 0, jump to end of loop */ + UPDATE_EXEC_MASK(mach); + break; + + case TGSI_OPCODE_BGNSUB: + /* no-op */ + break; + + case TGSI_OPCODE_ENDSUB: + /* no-op */ + break; + + case TGSI_OPCODE_NOISE1: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE2: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE3: + assert( 0 ); + break; + + case TGSI_OPCODE_NOISE4: + assert( 0 ); + break; + + case TGSI_OPCODE_NOP: + break; + + default: + assert( 0 ); + } +} + + +/** + * Run TGSI interpreter. + * \return bitmask of "alive" quad components + */ +uint +tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) +{ + uint i; + int pc = 0; + + mach->CondMask = 0xf; + mach->LoopMask = 0xf; + mach->ContMask = 0xf; + mach->FuncMask = 0xf; + mach->ExecMask = 0xf; + + mach->CondStackTop = 0; /* temporarily subvert this assertion */ + assert(mach->CondStackTop == 0); + assert(mach->LoopStackTop == 0); + assert(mach->ContStackTop == 0); + assert(mach->CallStackTop == 0); + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] = 0; + mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] = 0; + + if( mach->Processor == TGSI_PROCESSOR_GEOMETRY ) { + mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0] = 0; + mach->Primitives[0] = 0; + } + + + /* execute declarations (interpolants) */ + for (i = 0; i < mach->NumDeclarations; i++) { + exec_declaration( mach, mach->Declarations+i ); + } + + /* execute instructions, until pc is set to -1 */ + while (pc != -1) { + assert(pc < (int) mach->NumInstructions); + exec_instruction( mach, mach->Instructions + pc, &pc ); + } + +#if 0 + /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */ + if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) { + /* + * Scale back depth component. + */ + for (i = 0; i < 4; i++) + mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF; + } +#endif + + return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0]; +} + + diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h new file mode 100644 index 0000000000..4f30650b07 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -0,0 +1,253 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#if !defined TGSI_EXEC_H +#define TGSI_EXEC_H + +#include "pipe/p_compiler.h" + +#if defined __cplusplus +extern "C" { +#endif + +#define MAX_LABELS 1024 + +#define NUM_CHANNELS 4 /* R,G,B,A */ +#define QUAD_SIZE 4 /* 4 pixel/quad */ + +/** + * Registers may be treated as float, signed int or unsigned int. + */ +union tgsi_exec_channel +{ + float f[QUAD_SIZE]; + int i[QUAD_SIZE]; + unsigned u[QUAD_SIZE]; +}; + +/** + * A vector[RGBA] of channels[4 pixels] + */ +struct tgsi_exec_vector +{ + union tgsi_exec_channel xyzw[NUM_CHANNELS]; +}; + +/** + * For fragment programs, information for computing fragment input + * values from plane equation of the triangle/line. + */ +struct tgsi_interp_coef +{ + float a0[NUM_CHANNELS]; /* in an xyzw layout */ + float dadx[NUM_CHANNELS]; + float dady[NUM_CHANNELS]; +}; + + +struct softpipe_tile_cache; /**< Opaque to TGSI */ + +/** + * Information for sampling textures, which must be implemented + * by code outside the TGSI executor. + */ +struct tgsi_sampler +{ + const struct pipe_sampler_state *state; + struct pipe_texture *texture; + /** Get samples for four fragments in a quad */ + void (*get_samples)(struct tgsi_sampler *sampler, + const float s[QUAD_SIZE], + const float t[QUAD_SIZE], + const float p[QUAD_SIZE], + float lodbias, + float rgba[NUM_CHANNELS][QUAD_SIZE]); + void *pipe; /*XXX temporary*/ + struct softpipe_tile_cache *cache; +}; + +/** + * For branching/calling subroutines. + */ +struct tgsi_exec_labels +{ + unsigned labels[MAX_LABELS][2]; + unsigned count; +}; + + +#define TGSI_EXEC_NUM_TEMPS 128 +#define TGSI_EXEC_NUM_TEMP_EXTRAS 6 +#define TGSI_EXEC_NUM_IMMEDIATES 256 + +/* + * Locations of various utility registers (_I = Index, _C = Channel) + */ +#define TGSI_EXEC_TEMP_00000000_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_00000000_C 0 + +#define TGSI_EXEC_TEMP_7FFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_7FFFFFFF_C 1 + +#define TGSI_EXEC_TEMP_80000000_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_80000000_C 2 + +#define TGSI_EXEC_TEMP_FFFFFFFF_I (TGSI_EXEC_NUM_TEMPS + 0) +#define TGSI_EXEC_TEMP_FFFFFFFF_C 3 + +#define TGSI_EXEC_TEMP_ONE_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_ONE_C 0 + +#define TGSI_EXEC_TEMP_TWO_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_TWO_C 1 + +#define TGSI_EXEC_TEMP_128_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_128_C 2 + +#define TGSI_EXEC_TEMP_MINUS_128_I (TGSI_EXEC_NUM_TEMPS + 1) +#define TGSI_EXEC_TEMP_MINUS_128_C 3 + +#define TGSI_EXEC_TEMP_KILMASK_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_KILMASK_C 0 + +#define TGSI_EXEC_TEMP_OUTPUT_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_OUTPUT_C 1 + +#define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_PRIMITIVE_C 2 + +#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_THREE_C 3 + +#define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) +#define TGSI_EXEC_TEMP_HALF_C 0 + +#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) + +#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5) + + +#define TGSI_EXEC_MAX_COND_NESTING 20 +#define TGSI_EXEC_MAX_LOOP_NESTING 20 +#define TGSI_EXEC_MAX_CALL_NESTING 20 + +/** + * Run-time virtual machine state for executing TGSI shader. + */ +struct tgsi_exec_machine +{ + /* Total = program temporaries + internal temporaries + * + 1 padding to align to 16 bytes + */ + struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS + + TGSI_EXEC_NUM_TEMP_EXTRAS + 1]; + + /* + * This will point to _Temps after aligning to 16B boundary. + */ + struct tgsi_exec_vector *Temps; + struct tgsi_exec_vector *Addrs; + + struct tgsi_sampler *Samplers; + + float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; + unsigned ImmLimit; + const float (*Consts)[4]; + struct tgsi_exec_vector *Inputs; + struct tgsi_exec_vector *Outputs; + const struct tgsi_token *Tokens; + unsigned Processor; + + /* GEOMETRY processor only. */ + unsigned *Primitives; + + /* FRAGMENT processor only. */ + const struct tgsi_interp_coef *InterpCoefs; + struct tgsi_exec_vector QuadPos; + + /* Conditional execution masks */ + uint CondMask; /**< For IF/ELSE/ENDIF */ + uint LoopMask; /**< For BGNLOOP/ENDLOOP */ + uint ContMask; /**< For loop CONT statements */ + uint FuncMask; /**< For function calls */ + uint ExecMask; /**< = CondMask & LoopMask */ + + /** Condition mask stack (for nested conditionals) */ + uint CondStack[TGSI_EXEC_MAX_COND_NESTING]; + int CondStackTop; + + /** Loop mask stack (for nested loops) */ + uint LoopStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int LoopStackTop; + + /** Loop continue mask stack (see comments in tgsi_exec.c) */ + uint ContStack[TGSI_EXEC_MAX_LOOP_NESTING]; + int ContStackTop; + + /** Function execution mask stack (for executing subroutine code) */ + uint FuncStack[TGSI_EXEC_MAX_CALL_NESTING]; + int FuncStackTop; + + /** Function call stack for saving/restoring the program counter */ + uint CallStack[TGSI_EXEC_MAX_CALL_NESTING]; + int CallStackTop; + + struct tgsi_full_instruction *Instructions; + uint NumInstructions; + + struct tgsi_full_declaration *Declarations; + uint NumDeclarations; + + struct tgsi_exec_labels Labels; +}; + +void +tgsi_exec_machine_init( + struct tgsi_exec_machine *mach ); + + +void +tgsi_exec_machine_bind_shader( + struct tgsi_exec_machine *mach, + const struct tgsi_token *tokens, + uint numSamplers, + struct tgsi_sampler *samplers); + +uint +tgsi_exec_machine_run( + struct tgsi_exec_machine *mach ); + + +void +tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); + + +#if defined __cplusplus +} /* extern "C" */ +#endif + +#endif /* TGSI_EXEC_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/tgsi_iterate.c new file mode 100644 index 0000000000..5371a88b96 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.c @@ -0,0 +1,85 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_iterate.h" + +boolean +tgsi_iterate_shader( + const struct tgsi_token *tokens, + struct tgsi_iterate_context *ctx ) +{ + struct tgsi_parse_context parse; + + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) + return FALSE; + + ctx->processor = parse.FullHeader.Processor; + ctx->version = parse.FullVersion.Version; + + if (ctx->prolog) + if (!ctx->prolog( ctx )) + goto fail; + + while (!tgsi_parse_end_of_tokens( &parse )) { + tgsi_parse_token( &parse ); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (ctx->iterate_instruction) + if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + if (ctx->iterate_declaration) + if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration )) + goto fail; + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + if (ctx->iterate_immediate) + if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate )) + goto fail; + break; + + default: + assert( 0 ); + } + } + + if (ctx->epilog) + if (!ctx->epilog( ctx )) + goto fail; + + tgsi_parse_free( &parse ); + return TRUE; + +fail: + tgsi_parse_free( &parse ); + return FALSE; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/tgsi_iterate.h new file mode 100644 index 0000000000..ec7b85bf63 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_iterate.h @@ -0,0 +1,76 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_ITERATE_H +#define TGSI_ITERATE_H + +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_iterate_context +{ + boolean + (* prolog)( + struct tgsi_iterate_context *ctx ); + + boolean + (* iterate_instruction)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_instruction *inst ); + + boolean + (* iterate_declaration)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_declaration *decl ); + + boolean + (* iterate_immediate)( + struct tgsi_iterate_context *ctx, + struct tgsi_full_immediate *imm ); + + boolean + (* epilog)( + struct tgsi_iterate_context *ctx ); + + struct tgsi_processor processor; + struct tgsi_version version; +}; + +boolean +tgsi_iterate_shader( + const struct tgsi_token *tokens, + struct tgsi_iterate_context *ctx ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_ITERATE_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c new file mode 100644 index 0000000000..d16f0cdcad --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -0,0 +1,332 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" + +void +tgsi_full_token_init( + union tgsi_full_token *full_token ) +{ + full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION; +} + +void +tgsi_full_token_free( + union tgsi_full_token *full_token ) +{ + if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { + FREE( (void *) full_token->FullImmediate.u.Pointer ); + } +} + +unsigned +tgsi_parse_init( + struct tgsi_parse_context *ctx, + const struct tgsi_token *tokens ) +{ + ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; + if( ctx->FullVersion.Version.MajorVersion > 1 ) { + return TGSI_PARSE_ERROR; + } + + ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1]; + if( ctx->FullHeader.Header.HeaderSize >= 2 ) { + ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; + } + else { + ctx->FullHeader.Processor = tgsi_default_processor(); + } + + ctx->Tokens = tokens; + ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize; + + tgsi_full_token_init( &ctx->FullToken ); + + return TGSI_PARSE_OK; +} + +void +tgsi_parse_free( + struct tgsi_parse_context *ctx ) +{ + tgsi_full_token_free( &ctx->FullToken ); +} + +boolean +tgsi_parse_end_of_tokens( + struct tgsi_parse_context *ctx ) +{ + return ctx->Position >= + 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; +} + +static void +next_token( + struct tgsi_parse_context *ctx, + void *token ) +{ + assert( !tgsi_parse_end_of_tokens( ctx ) ); + + *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++]; +} + +void +tgsi_parse_token( + struct tgsi_parse_context *ctx ) +{ + struct tgsi_token token; + unsigned i; + + tgsi_full_token_free( &ctx->FullToken ); + tgsi_full_token_init( &ctx->FullToken ); + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; + + *decl = tgsi_default_full_declaration(); + decl->Declaration = *(struct tgsi_declaration *) &token; + + next_token( ctx, &decl->DeclarationRange ); + + if( decl->Declaration.Semantic ) { + next_token( ctx, &decl->Semantic ); + } + + break; + } + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; + + *imm = tgsi_default_full_immediate(); + imm->Immediate = *(struct tgsi_immediate *) &token; + + assert( !imm->Immediate.Extended ); + + switch (imm->Immediate.DataType) { + case TGSI_IMM_FLOAT32: + imm->u.Pointer = MALLOC( + sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); + for( i = 0; i < imm->Immediate.Size - 1; i++ ) { + next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); + } + break; + + default: + assert( 0 ); + } + + break; + } + + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; + unsigned extended; + + *inst = tgsi_default_full_instruction(); + inst->Instruction = *(struct tgsi_instruction *) &token; + + extended = inst->Instruction.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_INSTRUCTION_EXT_TYPE_NV: + inst->InstructionExtNv = + *(struct tgsi_instruction_ext_nv *) &token; + break; + + case TGSI_INSTRUCTION_EXT_TYPE_LABEL: + inst->InstructionExtLabel = + *(struct tgsi_instruction_ext_label *) &token; + break; + + case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: + inst->InstructionExtTexture = + *(struct tgsi_instruction_ext_texture *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + + assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); + + for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { + unsigned extended; + + next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullDstRegisters[i].DstRegister.Indirect ); + assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); + + extended = inst->FullDstRegisters[i].DstRegister.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: + inst->FullDstRegisters[i].DstRegisterExtConcode = + *(struct tgsi_dst_register_ext_concode *) &token; + break; + + case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: + inst->FullDstRegisters[i].DstRegisterExtModulate = + *(struct tgsi_dst_register_ext_modulate *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + } + + assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); + + for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { + unsigned extended; + + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); + + extended = inst->FullSrcRegisters[i].SrcRegister.Extended; + + while( extended ) { + struct tgsi_src_register_ext token; + + next_token( ctx, &token ); + + switch( token.Type ) { + case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: + inst->FullSrcRegisters[i].SrcRegisterExtSwz = + *(struct tgsi_src_register_ext_swz *) &token; + break; + + case TGSI_SRC_REGISTER_EXT_TYPE_MOD: + inst->FullSrcRegisters[i].SrcRegisterExtMod = + *(struct tgsi_src_register_ext_mod *) &token; + break; + + default: + assert( 0 ); + } + + extended = token.Extended; + } + + if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + } + + if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); + + /* + * No support for multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); + + if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { + next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); + + /* + * No support for indirect or multi-dimensional addressing. + */ + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); + assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); + } + } + } + + break; + } + + default: + assert( 0 ); + } +} + + +unsigned +tgsi_num_tokens(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context ctx; + if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) { + unsigned len = (ctx.FullHeader.Header.HeaderSize + + ctx.FullHeader.Header.BodySize + + 1); + return len; + } + return 0; +} + + +/** + * Make a new copy of a token array. + */ +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens) +{ + unsigned n = tgsi_num_tokens(tokens); + unsigned bytes = n * sizeof(struct tgsi_token); + struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes); + if (new_tokens) + memcpy(new_tokens, tokens, bytes); + return new_tokens; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h new file mode 100644 index 0000000000..054350712d --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h @@ -0,0 +1,151 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PARSE_H +#define TGSI_PARSE_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_full_version +{ + struct tgsi_version Version; +}; + +struct tgsi_full_header +{ + struct tgsi_header Header; + struct tgsi_processor Processor; +}; + +struct tgsi_full_dst_register +{ + struct tgsi_dst_register DstRegister; + struct tgsi_dst_register_ext_concode DstRegisterExtConcode; + struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; +}; + +struct tgsi_full_src_register +{ + struct tgsi_src_register SrcRegister; + struct tgsi_src_register_ext_swz SrcRegisterExtSwz; + struct tgsi_src_register_ext_mod SrcRegisterExtMod; + struct tgsi_src_register SrcRegisterInd; + struct tgsi_dimension SrcRegisterDim; + struct tgsi_src_register SrcRegisterDimInd; +}; + +struct tgsi_full_declaration +{ + struct tgsi_declaration Declaration; + struct tgsi_declaration_range DeclarationRange; + struct tgsi_declaration_semantic Semantic; +}; + +struct tgsi_full_immediate +{ + struct tgsi_immediate Immediate; + union + { + const void *Pointer; + const struct tgsi_immediate_float32 *ImmediateFloat32; + } u; +}; + +#define TGSI_FULL_MAX_DST_REGISTERS 2 +#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ + +struct tgsi_full_instruction +{ + struct tgsi_instruction Instruction; + struct tgsi_instruction_ext_nv InstructionExtNv; + struct tgsi_instruction_ext_label InstructionExtLabel; + struct tgsi_instruction_ext_texture InstructionExtTexture; + struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; + struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; +}; + +union tgsi_full_token +{ + struct tgsi_token Token; + struct tgsi_full_declaration FullDeclaration; + struct tgsi_full_immediate FullImmediate; + struct tgsi_full_instruction FullInstruction; +}; + +void +tgsi_full_token_init( + union tgsi_full_token *full_token ); + +void +tgsi_full_token_free( + union tgsi_full_token *full_token ); + +struct tgsi_parse_context +{ + const struct tgsi_token *Tokens; + unsigned Position; + struct tgsi_full_version FullVersion; + struct tgsi_full_header FullHeader; + union tgsi_full_token FullToken; +}; + +#define TGSI_PARSE_OK 0 +#define TGSI_PARSE_ERROR 1 + +unsigned +tgsi_parse_init( + struct tgsi_parse_context *ctx, + const struct tgsi_token *tokens ); + +void +tgsi_parse_free( + struct tgsi_parse_context *ctx ); + +boolean +tgsi_parse_end_of_tokens( + struct tgsi_parse_context *ctx ); + +void +tgsi_parse_token( + struct tgsi_parse_context *ctx ); + +unsigned +tgsi_num_tokens(const struct tgsi_token *tokens); + +struct tgsi_token * +tgsi_dup_tokens(const struct tgsi_token *tokens); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_PARSE_H */ + diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c new file mode 100644 index 0000000000..2e3ec96b5b --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -0,0 +1,341 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_sanity.h" +#include "tgsi_iterate.h" + +#define MAX_REGISTERS 256 + +typedef uint reg_flag; + +#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) + +struct sanity_check_ctx +{ + struct tgsi_iterate_context iter; + + reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; + boolean regs_ind_used[TGSI_FILE_COUNT]; + uint num_imms; + uint num_instructions; + uint index_of_END; + + uint errors; + uint warnings; +}; + +static void +report_error( + struct sanity_check_ctx *ctx, + const char *format, + ... ) +{ + va_list args; + + debug_printf( "Error : " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); + ctx->errors++; +} + +static void +report_warning( + struct sanity_check_ctx *ctx, + const char *format, + ... ) +{ + va_list args; + + debug_printf( "Warning: " ); + va_start( args, format ); + _debug_vprintf( format, args ); + va_end( args ); + debug_printf( "\n" ); + ctx->warnings++; +} + +static boolean +check_file_name( + struct sanity_check_ctx *ctx, + uint file ) +{ + if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { + report_error( ctx, "Invalid register file name" ); + return FALSE; + } + return TRUE; +} + +static boolean +is_register_declared( + struct sanity_check_ctx *ctx, + uint file, + int index ) +{ + assert( index >= 0 && index < MAX_REGISTERS ); + + return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +} + +static boolean +is_any_register_declared( + struct sanity_check_ctx *ctx, + uint file ) +{ + uint i; + + for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++) + if (ctx->regs_decl[file][i]) + return TRUE; + return FALSE; +} + +static boolean +is_register_used( + struct sanity_check_ctx *ctx, + uint file, + int index ) +{ + assert( index < MAX_REGISTERS ); + + return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; +} + +static const char *file_names[] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static boolean +check_register_usage( + struct sanity_check_ctx *ctx, + uint file, + int index, + const char *name, + boolean indirect_access ) +{ + if (!check_file_name( ctx, file )) + return FALSE; + if (indirect_access) { + if (!is_any_register_declared( ctx, file )) + report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); + ctx->regs_ind_used[file] = TRUE; + } + else { + if (!is_register_declared( ctx, file, index )) + report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); + ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); + } + return TRUE; +} + +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint i; + + /* There must be no other instructions after END. + */ + if (ctx->index_of_END != ~0) { + report_error( ctx, "Unexpected instruction after END" ); + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + ctx->index_of_END = ctx->num_instructions; + } + + /* Check destination and source registers' validity. + * Mark the registers as used. + */ + for (i = 0; i < inst->Instruction.NumDstRegs; i++) { + check_register_usage( + ctx, + inst->FullDstRegisters[i].DstRegister.File, + inst->FullDstRegisters[i].DstRegister.Index, + "destination", + FALSE ); + } + for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { + check_register_usage( + ctx, + inst->FullSrcRegisters[i].SrcRegister.File, + inst->FullSrcRegisters[i].SrcRegister.Index, + "source", + (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); + if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { + uint file; + int index; + + file = inst->FullSrcRegisters[i].SrcRegisterInd.File; + index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; + check_register_usage( + ctx, + file, + index, + "indirect", + FALSE ); + if (file != TGSI_FILE_ADDRESS || index != 0) + report_warning( ctx, "Indirect register not ADDR[0]" ); + } + } + + ctx->num_instructions++; + + return TRUE; +} + +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint file; + uint i; + + /* No declarations allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but declaration found" ); + + /* Check registers' validity. + * Mark the registers as declared. + */ + file = decl->Declaration.File; + if (!check_file_name( ctx, file )) + return TRUE; + for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { + if (is_register_declared( ctx, file, i )) + report_error( ctx, "The same register declared twice" ); + ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); + } + + return TRUE; +} + +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + + assert( ctx->num_imms < MAX_REGISTERS ); + + /* No immediates allowed after the first instruction. + */ + if (ctx->num_instructions > 0) + report_error( ctx, "Instruction expected but immediate found" ); + + /* Mark the register as declared. + */ + ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); + ctx->num_imms++; + + /* Check data type validity. + */ + if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { + report_error( ctx, "Invalid immediate data type" ); + return TRUE; + } + + return TRUE; +} + +static boolean +epilog( + struct tgsi_iterate_context *iter ) +{ + struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + uint file; + + /* There must be an END instruction at the end. + */ + if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) { + report_error( ctx, "Expected END at end of instruction sequence" ); + } + + /* Check if all declared registers were used. + */ + for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { + uint i; + + for (i = 0; i < MAX_REGISTERS; i++) { + if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { + report_warning( ctx, "Register never used" ); + } + } + } + + /* Print totals, if any. + */ + if (ctx->errors || ctx->warnings) + debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings ); + + return TRUE; +} + +boolean +tgsi_sanity_check( + struct tgsi_token *tokens ) +{ + struct sanity_check_ctx ctx; + + ctx.iter.prolog = NULL; + ctx.iter.iterate_instruction = iter_instruction; + ctx.iter.iterate_declaration = iter_declaration; + ctx.iter.iterate_immediate = iter_immediate; + ctx.iter.epilog = epilog; + + memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); + memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); + memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); + ctx.num_imms = 0; + ctx.num_instructions = 0; + ctx.index_of_END = ~0; + + ctx.errors = 0; + ctx.warnings = 0; + + if (!tgsi_iterate_shader( tokens, &ctx.iter )) + return FALSE; + + return ctx.errors == 0; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/tgsi_sanity.h new file mode 100644 index 0000000000..ca45e94c7a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SANITY_H +#define TGSI_SANITY_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +/* Check the given token stream for errors and common mistakes. + * Diagnostic messages are printed out to the debug output. + * Returns TRUE if there are no errors, even though there could be some warnings. + */ +boolean +tgsi_sanity_check( + struct tgsi_token *tokens ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_SANITY_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c new file mode 100644 index 0000000000..59bcf10b53 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -0,0 +1,226 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program scan utility. + * Used to determine which registers and instructions are used by a shader. + * + * Authors: Brian Paul + */ + + +#include "tgsi_scan.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" + +#include "pipe/p_util.h" + + + +/** + */ +void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info) +{ + uint procType, i; + struct tgsi_parse_context parse; + + memset(info, 0, sizeof(*info)); + for (i = 0; i < TGSI_FILE_COUNT; i++) + info->file_max[i] = -1; + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); + return; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + info->num_tokens++; + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); + info->opcode_count[fullinst->Instruction.Opcode]++; + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + uint file = fulldecl->Declaration.File; + uint i; + for (i = fulldecl->DeclarationRange.First; + i <= fulldecl->DeclarationRange.Last; + i++) { + + /* only first 32 regs will appear in this bitfield */ + info->file_mask[file] |= (1 << i); + info->file_count[file]++; + info->file_max[file] = MAX2(info->file_max[file], (int)i); + + if (file == TGSI_FILE_INPUT) { + info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->num_inputs++; + } + + if (file == TGSI_FILE_OUTPUT) { + info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; + info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->num_outputs++; + } + + /* special case */ + if (procType == TGSI_PROCESSOR_FRAGMENT && + file == TGSI_FILE_OUTPUT && + fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { + info->writes_z = TRUE; + } + } + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + info->immediate_count++; + break; + + default: + assert( 0 ); + } + } + + assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); + assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); + + info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || + info->opcode_count[TGSI_OPCODE_KILP]); + + tgsi_parse_free (&parse); +} + + + +/** + * Check if the given shader is a "passthrough" shader consisting of only + * MOV instructions of the form: MOV OUT[n], IN[n] + * + */ +boolean +tgsi_is_passthrough_shader(const struct tgsi_token *tokens) +{ + struct tgsi_parse_context parse; + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n"); + return FALSE; + } + + /** + ** Loop over incoming program tokens/instructions + */ + while (!tgsi_parse_end_of_tokens(&parse)) { + + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst = + &parse.FullToken.FullInstruction; + const struct tgsi_full_src_register *src = + &fullinst->FullSrcRegisters[0]; + const struct tgsi_full_dst_register *dst = + &fullinst->FullDstRegisters[0]; + + /* Do a whole bunch of checks for a simple move */ + if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || + src->SrcRegister.File != TGSI_FILE_INPUT || + dst->DstRegister.File != TGSI_FILE_OUTPUT || + src->SrcRegister.Index != dst->DstRegister.Index || + + src->SrcRegister.Negate || + src->SrcRegisterExtMod.Negate || + src->SrcRegisterExtMod.Absolute || + src->SrcRegisterExtMod.Scale2X || + src->SrcRegisterExtMod.Bias || + src->SrcRegisterExtMod.Complement || + + src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || + src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || + src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || + src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || + + src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || + src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || + src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || + src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W || + + dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) + { + tgsi_parse_free(&parse); + return FALSE; + } + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + /* fall-through */ + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* fall-through */ + default: + ; /* no-op */ + } + } + + tgsi_parse_free(&parse); + + /* if we get here, it's a pass-through shader */ + return TRUE; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h new file mode 100644 index 0000000000..5cb6efb343 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -0,0 +1,74 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SCAN_H +#define TGSI_SCAN_H + + +#include "pipe/p_compiler.h" +#include "pipe/p_state.h" +#include "pipe/p_shader_tokens.h" + + +/** + * Shader summary info + */ +struct tgsi_shader_info +{ + uint num_tokens; + + /* XXX eventually remove the corresponding fields from pipe_shader_state: */ + ubyte num_inputs; + ubyte num_outputs; + ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; + ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ + ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; + + uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ + uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ + int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ + + uint immediate_count; /**< number of immediates declared */ + + uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ + + boolean writes_z; /**< does fragment shader write Z value? */ + boolean uses_kill; /**< KIL or KILP instruction used? */ +}; + + +extern void +tgsi_scan_shader(const struct tgsi_token *tokens, + struct tgsi_shader_info *info); + + +extern boolean +tgsi_is_passthrough_shader(const struct tgsi_token *tokens); + + +#endif /* TGSI_SCAN_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c new file mode 100644 index 0000000000..0cb1f11ef2 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -0,0 +1,2275 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" +#include "tgsi_sse2.h" + +#include "rtasm/rtasm_x86sse.h" + +#ifdef PIPE_ARCH_X86 + +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 + + +#define FOR_EACH_CHANNEL( CHAN )\ + for( CHAN = 0; CHAN < 4; CHAN++ ) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if( IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define TEMP_R0 TGSI_EXEC_TEMP_R0 + +/** + * X86 utility functions. + */ + +static struct x86_reg +make_xmm( + unsigned xmm ) +{ + return x86_make_reg( + file_XMM, + (enum x86_reg_name) xmm ); +} + +/** + * X86 register mapping helpers. + */ + +static struct x86_reg +get_const_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_CX ); +} + +static struct x86_reg +get_input_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_AX ); +} + +static struct x86_reg +get_output_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DX ); +} + +static struct x86_reg +get_temp_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_BX ); +} + +static struct x86_reg +get_coef_base( void ) +{ + return get_output_base(); +} + +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + +/** + * Data access helpers. + */ + + +static struct x86_reg +get_immediate( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_immediate_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_const( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_const_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_input( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_input_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_output( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_output_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_temp( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_temp_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_coef( + unsigned vec, + unsigned chan, + unsigned member ) +{ + return x86_make_disp( + get_coef_base(), + ((vec * 3 + member) * 4 + chan) * 4 ); +} + + +static void +emit_ret( + struct x86_function *func ) +{ + x86_ret( func ); +} + + +/** + * Data fetch helpers. + */ + +/** + * Copy a shader constant to xmm register + * \param xmm the destination xmm register + * \param vec the src const buffer index + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_const( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_const( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + +/** + * Copy a shader input to xmm register + * \param xmm the destination xmm register + * \param vec the src input attrib + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_inputf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + make_xmm( xmm ), + get_input( vec, chan ) ); +} + +/** + * Store an xmm register to a shader output + * \param xmm the source xmm register + * \param vec the dest output attrib + * \param chan src dest channel to store (X, Y, Z or W) + */ +static void +emit_output( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_output( vec, chan ), + make_xmm( xmm ) ); +} + +/** + * Copy a shader temporary to xmm register + * \param xmm the destination xmm register + * \param vec the src temp register + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_tempf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + make_xmm( xmm ), + get_temp( vec, chan ) ); +} + +/** + * Load an xmm register with an input attrib coefficient (a0, dadx or dady) + * \param xmm the destination xmm register + * \param vec the src input/attribute coefficient index + * \param chan src channel to fetch (X, Y, Z or W) + * \param member 0=a0, 1=dadx, 2=dady + */ +static void +emit_coef( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan, + unsigned member ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_coef( vec, chan, member ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +/** + * Data store helpers. + */ + +static void +emit_inputs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_input( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_temps( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + get_temp( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_addrs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_temps( + func, + xmm, + vec + TGSI_EXEC_NUM_TEMPS, + chan ); +} + +/** + * Coefficent fetch helpers. + */ + +static void +emit_coef_a0( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 0 ); +} + +static void +emit_coef_dadx( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 1 ); +} + +static void +emit_coef_dady( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 2 ); +} + +/** + * Function call helpers. + */ + +static void +emit_push_gp( + struct x86_function *func ) +{ + x86_push( + func, + x86_make_reg( file_REG32, reg_AX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX) ); +} + +static void +x86_pop_gp( + struct x86_function *func ) +{ + /* Restore GP registers in a reverse order. + */ + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX) ); +} + +static void +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_dst, + void (PIPE_CDECL *code)() ) +{ + sse_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + + emit_push_gp( + func ); + + { + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + x86_pop(func, ecx ); + } + + + x86_pop_gp( + func ); + + sse_movaps( + func, + make_xmm( xmm_dst ), + get_temp( TEMP_R0, 0 ) ); +} + +static void +emit_func_call_dst_src( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src, + void (PIPE_CDECL *code)() ) +{ + sse_movaps( + func, + get_temp( TEMP_R0, 1 ), + make_xmm( xmm_src ) ); + + emit_func_call_dst( + func, + xmm_dst, + code ); +} + +/** + * Low-level instruction translators. + */ + +static void +emit_abs( + struct x86_function *func, + unsigned xmm ) +{ + sse_andps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_7FFFFFFF_I, + TGSI_EXEC_TEMP_7FFFFFFF_C ) ); +} + +static void +emit_add( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_addps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void PIPE_CDECL +cos4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = cosf( store[X + 0] ); + store[X + 1] = cosf( store[X + 1] ); + store[X + 2] = cosf( store[X + 2] ); + store[X + 3] = cosf( store[X + 3] ); +} + +static void +emit_cos( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + cos4f ); +} + +static void PIPE_CDECL +ex24f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = powf( 2.0f, store[X + 0] ); + store[X + 1] = powf( 2.0f, store[X + 1] ); + store[X + 2] = powf( 2.0f, store[X + 2] ); + store[X + 3] = powf( 2.0f, store[X + 3] ); +} + +static void +emit_ex2( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + ex24f ); +} + +static void +emit_f2it( + struct x86_function *func, + unsigned xmm ) +{ + sse2_cvttps2dq( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); +} + +static void PIPE_CDECL +flr4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = floorf( store[X + 0] ); + store[X + 1] = floorf( store[X + 1] ); + store[X + 2] = floorf( store[X + 2] ); + store[X + 3] = floorf( store[X + 3] ); +} + +static void +emit_flr( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + flr4f ); +} + +static void PIPE_CDECL +frc4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] -= floorf( store[X + 0] ); + store[X + 1] -= floorf( store[X + 1] ); + store[X + 2] -= floorf( store[X + 2] ); + store[X + 3] -= floorf( store[X + 3] ); +} + +static void +emit_frc( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + frc4f ); +} + +static void PIPE_CDECL +lg24f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = LOG2( store[X + 0] ); + store[X + 1] = LOG2( store[X + 1] ); + store[X + 2] = LOG2( store[X + 2] ); + store[X + 3] = LOG2( store[X + 3] ); +} + +static void +emit_lg2( + struct x86_function *func, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_dst, + lg24f ); +} + +static void +emit_MOV( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_movups( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_mul (struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src) +{ + sse_mulps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_neg( + struct x86_function *func, + unsigned xmm ) +{ + sse_xorps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +pow4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = powf( store[X + 0], store[X + 4] ); + store[X + 1] = powf( store[X + 1], store[X + 5] ); + store[X + 2] = powf( store[X + 2], store[X + 6] ); + store[X + 3] = powf( store[X + 3], store[X + 7] ); +} + +static void +emit_pow( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_func_call_dst_src( + func, + xmm_dst, + xmm_src, + pow4f ); +} + +static void +emit_rcp ( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. Need to either emit a proper divide or use the + * iterative technique described below in emit_rsqrt(). + */ + sse2_rcpps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_rsqrt( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ +#if HIGH_PRECISION + /* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); + } +#else + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. + */ + sse_rsqrtps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +#endif +} + +static void +emit_setsign( + struct x86_function *func, + unsigned xmm ) +{ + sse_orps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +sin4f( + float *store ) +{ + const unsigned X = 0; + + store[X + 0] = sinf( store[X + 0] ); + store[X + 1] = sinf( store[X + 1] ); + store[X + 2] = sinf( store[X + 2] ); + store[X + 3] = sinf( store[X + 3] ); +} + +static void +emit_sin (struct x86_function *func, + unsigned xmm_dst) +{ + emit_func_call_dst( + func, + xmm_dst, + sin4f ); +} + +static void +emit_sub( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_subps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +/** + * Register fetch. + */ + +static void +emit_fetch( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_src_register *reg, + const unsigned chan_index ) +{ + unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); + + switch( swizzle ) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch( reg->SrcRegister.File ) { + case TGSI_FILE_CONSTANT: + emit_const( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_IMMEDIATE: + emit_immediate( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_INPUT: + emit_inputf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + case TGSI_FILE_TEMPORARY: + emit_tempf( + func, + xmm, + reg->SrcRegister.Index, + swizzle ); + break; + + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + break; + + case TGSI_EXTSWIZZLE_ONE: + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + break; + + default: + assert( 0 ); + } + + switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { + case TGSI_UTIL_SIGN_CLEAR: + emit_abs( func, xmm ); + break; + + case TGSI_UTIL_SIGN_SET: + emit_setsign( func, xmm ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + emit_neg( func, xmm ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } +} + +#define FETCH( FUNC, INST, XMM, INDEX, CHAN )\ + emit_fetch( FUNC, XMM, &(INST).FullSrcRegisters[INDEX], CHAN ) + +/** + * Register store. + */ + +static void +emit_store( + struct x86_function *func, + unsigned xmm, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index ) +{ + switch( reg->DstRegister.File ) { + case TGSI_FILE_OUTPUT: + emit_output( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_TEMPORARY: + emit_temps( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; + + default: + assert( 0 ); + } + + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +} + +#define STORE( FUNC, INST, XMM, INDEX, CHAN )\ + emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + +/** + * High-level instruction translators. + */ + +static void +emit_kil( + struct x86_function *func, + const struct tgsi_full_src_register *reg ) +{ + unsigned uniquemask; + unsigned registers[4]; + unsigned nextregister = 0; + unsigned firstchan = ~0; + unsigned chan_index; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + FOR_EACH_CHANNEL( chan_index ) { + unsigned swizzle; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle( + reg, + chan_index ); + + /* check if the component has not been already tested */ + if( !(uniquemask & (1 << swizzle)) ) { + uniquemask |= 1 << swizzle; + + /* allocate register */ + registers[chan_index] = nextregister; + emit_fetch( + func, + nextregister, + reg, + chan_index ); + nextregister++; + + /* mark the first channel used */ + if( firstchan == ~0 ) { + firstchan = chan_index; + } + } + } + + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX ) ); + + FOR_EACH_CHANNEL( chan_index ) { + if( uniquemask & (1 << chan_index) ) { + sse_cmpps( + func, + make_xmm( registers[chan_index] ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + + if( chan_index == firstchan ) { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_AX ), + make_xmm( registers[chan_index] ) ); + } + else { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_DX ), + make_xmm( registers[chan_index] ) ); + x86_or( + func, + x86_make_reg( file_REG32, reg_AX ), + x86_make_reg( file_REG32, reg_DX ) ); + } + } + } + + x86_or( + func, + get_temp( + TGSI_EXEC_TEMP_KILMASK_I, + TGSI_EXEC_TEMP_KILMASK_C ), + x86_make_reg( file_REG32, reg_AX ) ); + + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + +static void +emit_setcc( + struct x86_function *func, + struct tgsi_full_instruction *inst, + enum sse_cc cc ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + make_xmm( 1 ), + cc ); + sse_andps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static void +emit_cmp( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + sse_andps( + func, + make_xmm( 1 ), + make_xmm( 0 ) ); + sse_andnps( + func, + make_xmm( 0 ), + make_xmm( 2 ) ); + sse_orps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static int +emit_instruction( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + switch( inst->Instruction.Opcode ) { + case TGSI_OPCODE_ARL: +#if 0 + /* XXX this isn't working properly (see glean vertProg1 test) */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } +#else + return 0; +#endif + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C); + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + STORE( func, *inst, 0, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( func, *inst, 0, 0, CHAN_W ); + } + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_maxps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + /* XMM[1] = SrcReg[0].yyyy */ + FETCH( func, *inst, 1, 0, CHAN_Y ); + /* XMM[1] = max(XMM[1], 0) */ + sse_maxps( + func, + make_xmm( 1 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + /* XMM[2] = SrcReg[0].wwww */ + FETCH( func, *inst, 2, 0, CHAN_W ); + /* XMM[2] = min(XMM[2], 128.0) */ + sse_minps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_128_I, + TGSI_EXEC_TEMP_128_C ) ); + /* XMM[2] = max(XMM[2], -128.0) */ + sse_maxps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_MINUS_128_I, + TGSI_EXEC_TEMP_MINUS_128_C ) ); + emit_pow( func, 1, 2 ); + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_xorps( + func, + make_xmm( 2 ), + make_xmm( 2 ) ); + sse_cmpps( + func, + make_xmm( 2 ), + make_xmm( 0 ), + cc_LessThanEqual ); + sse_andps( + func, + make_xmm( 2 ), + make_xmm( 1 ) ); + STORE( func, *inst, 2, 0, CHAN_Z ); + } + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rcp( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rsqrt( func, 1, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 1, 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + return 0; + break; + + case TGSI_OPCODE_LOG: + return 0; + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_add( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul(func, 1, 2 ); + emit_add(func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_W ); + FETCH( func, *inst, 2, 1, CHAN_W ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 1, 1, CHAN_Y ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, 0, CHAN_Z ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, 1, CHAN_W ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_minps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_maxps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + emit_setcc( func, inst, cc_LessThan ); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + emit_setcc( func, inst, cc_NotLessThan ); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_sub( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_sub( func, 1, 2 ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CND: + return 0; + break; + + case TGSI_OPCODE_CND0: + return 0; + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + return 0; + break; + + case TGSI_OPCODE_INDEX: + return 0; + break; + + case TGSI_OPCODE_NEGATE: + return 0; + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_frc( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + return 0; + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_flr( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + return 0; + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_ex2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_lg2( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_pow( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 1, 1, CHAN_Z ); + FETCH( func, *inst, 3, 0, CHAN_Z ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 4, 1, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_MOV( func, 2, 0 ); + emit_mul( func, 2, 1 ); + emit_MOV( func, 5, 3 ); + emit_mul( func, 5, 4 ); + emit_sub( func, 2, 5 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 2, 1, CHAN_X ); + FETCH( func, *inst, 5, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + emit_mul( func, 3, 2 ); + emit_mul( func, 1, 5 ); + emit_sub( func, 3, 1 ); + STORE( func, *inst, 3, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_mul( func, 5, 4 ); + emit_mul( func, 0, 2 ); + emit_sub( func, 5, 0 ); + STORE( func, *inst, 5, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + return 0; + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_abs( func, 0) ; + + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RCC: + return 0; + break; + + case TGSI_OPCODE_DPH: + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 1, CHAN_W ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + return 0; + break; + + case TGSI_OPCODE_DDY: + return 0; + break; + + case TGSI_OPCODE_KIL: + emit_kil( func, &inst->FullSrcRegisters[0] ); + break; + + case TGSI_OPCODE_PK2H: + return 0; + break; + + case TGSI_OPCODE_PK2US: + return 0; + break; + + case TGSI_OPCODE_PK4B: + return 0; + break; + + case TGSI_OPCODE_PK4UB: + return 0; + break; + + case TGSI_OPCODE_RFL: + return 0; + break; + + case TGSI_OPCODE_SEQ: + return 0; + break; + + case TGSI_OPCODE_SFL: + return 0; + break; + + case TGSI_OPCODE_SGT: + return 0; + break; + + case TGSI_OPCODE_SIN: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + return 0; + break; + + case TGSI_OPCODE_SNE: + return 0; + break; + + case TGSI_OPCODE_STR: + return 0; + break; + + case TGSI_OPCODE_TEX: + if (0) { + /* Disable dummy texture code: + */ + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + } + else { + return 0; + } + break; + + case TGSI_OPCODE_TXD: + return 0; + break; + + case TGSI_OPCODE_UP2H: + return 0; + break; + + case TGSI_OPCODE_UP2US: + return 0; + break; + + case TGSI_OPCODE_UP4B: + return 0; + break; + + case TGSI_OPCODE_UP4UB: + return 0; + break; + + case TGSI_OPCODE_X2D: + return 0; + break; + + case TGSI_OPCODE_ARA: + return 0; + break; + + case TGSI_OPCODE_ARR: + return 0; + break; + + case TGSI_OPCODE_BRA: + return 0; + break; + + case TGSI_OPCODE_CAL: + return 0; + break; + + case TGSI_OPCODE_RET: + emit_ret( func ); + break; + + case TGSI_OPCODE_END: + break; + + case TGSI_OPCODE_SSG: + return 0; + break; + + case TGSI_OPCODE_CMP: + emit_cmp (func, inst); + break; + + case TGSI_OPCODE_SCS: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_ONE_I, + TGSI_EXEC_TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_TXB: + return 0; + break; + + case TGSI_OPCODE_NRM: + return 0; + break; + + case TGSI_OPCODE_DIV: + return 0; + break; + + case TGSI_OPCODE_DP2: + return 0; + break; + + case TGSI_OPCODE_TXL: + return 0; + break; + + case TGSI_OPCODE_BRK: + return 0; + break; + + case TGSI_OPCODE_IF: + return 0; + break; + + case TGSI_OPCODE_LOOP: + return 0; + break; + + case TGSI_OPCODE_REP: + return 0; + break; + + case TGSI_OPCODE_ELSE: + return 0; + break; + + case TGSI_OPCODE_ENDIF: + return 0; + break; + + case TGSI_OPCODE_ENDLOOP: + return 0; + break; + + case TGSI_OPCODE_ENDREP: + return 0; + break; + + case TGSI_OPCODE_PUSHA: + return 0; + break; + + case TGSI_OPCODE_POPA: + return 0; + break; + + case TGSI_OPCODE_CEIL: + return 0; + break; + + case TGSI_OPCODE_I2F: + return 0; + break; + + case TGSI_OPCODE_NOT: + return 0; + break; + + case TGSI_OPCODE_TRUNC: + return 0; + break; + + case TGSI_OPCODE_SHL: + return 0; + break; + + case TGSI_OPCODE_SHR: + return 0; + break; + + case TGSI_OPCODE_AND: + return 0; + break; + + case TGSI_OPCODE_OR: + return 0; + break; + + case TGSI_OPCODE_MOD: + return 0; + break; + + case TGSI_OPCODE_XOR: + return 0; + break; + + case TGSI_OPCODE_SAD: + return 0; + break; + + case TGSI_OPCODE_TXF: + return 0; + break; + + case TGSI_OPCODE_TXQ: + return 0; + break; + + case TGSI_OPCODE_CONT: + return 0; + break; + + case TGSI_OPCODE_EMIT: + return 0; + break; + + case TGSI_OPCODE_ENDPRIM: + return 0; + break; + + default: + return 0; + } + + return 1; +} + +static void +emit_declaration( + struct x86_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } + } +} + +static void aos_to_soa( struct x86_function *func, + uint arg_aos, + uint arg_soa, + uint arg_num, + uint arg_stride ) +{ + struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); + struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); + struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); + int inner_loop; + + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); + x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); + x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); + x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); + } + /* while --num_inputs */ + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_input ); +} + +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + int inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, aos_output ); + + x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); + x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, x86_fn_arg( func, stride ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); + x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); + } + /* while --num_outputs */ + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_output ); +} + +/** + * Translate a TGSI vertex/fragment shader to SSE2 code. + * Slightly different things are done for vertex vs. fragment shaders. + * + * Note that fragment shaders are responsible for interpolating shader + * inputs. Because on x86 we have only 4 GP registers, and here we + * have 5 shader arguments (input, output, const, temp and coef), the + * code is split into two phases -- DECLARATION and INSTRUCTION phase. + * GP register holding the output argument is aliased with the coeff + * argument, as outputs are not needed in the DECLARATION phase. + * + * \param tokens the TGSI input shader + * \param func the output SSE code/function + * \param immediates buffer to place immediates, later passed to SSE func + * \param return 1 for success, 0 if translation failed + */ +unsigned +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + struct tgsi_parse_context parse; + boolean instruction_phase = FALSE; + unsigned ok = 1; + uint num_immediates = 0; + + func->csr = func->store; + + tgsi_parse_init( &parse, tokens ); + + /* Can't just use EDI, EBX without save/restoring them: + */ + x86_push( + func, + get_immediate_base() ); + + x86_push( + func, + get_temp_base() ); + + + /* + * Different function args for vertex/fragment shaders: + */ + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + /* skipping outputs argument here */ + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_coef_base(), + x86_fn_arg( func, 5 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 6 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + if (do_swizzles) + aos_to_soa( func, + 6, /* aos_input */ + 1, /* machine->input */ + 7, /* num_inputs */ + 8 ); /* input_stride */ + + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 5 ) ); + } + + while( !tgsi_parse_end_of_tokens( &parse ) && ok ) { + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration( + func, + &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + } + } + + ok = emit_instruction( + func, + &parse.FullToken.FullInstruction ); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* simply copy the immediate values into the next immediates[] slot */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for( i = 0; i < size; i++ ) { + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + } +#if 0 + debug_printf("SSE FS immediate[%d] = %f %f %f %f\n", + num_immediates, + immediates[num_immediates][0], + immediates[num_immediates][1], + immediates[num_immediates][2], + immediates[num_immediates][3]); +#endif + num_immediates++; + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 9, 2, 10, 11 ); + } + + /* Can't just use EBX, EDI without save/restoring them: + */ + x86_pop( + func, + get_temp_base() ); + + x86_pop( + func, + get_immediate_base() ); + + emit_ret( func ); + + tgsi_parse_free( &parse ); + + return ok; +} + +#endif /* PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h new file mode 100644 index 0000000000..af838b2a25 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h @@ -0,0 +1,49 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_SSE2_H +#define TGSI_SSE2_H + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct x86_function; + +unsigned +tgsi_emit_sse2( + const struct tgsi_token *tokens, + struct x86_function *function, + float (*immediates)[4], + boolean do_swizzles ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_SSE2_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c new file mode 100644 index 0000000000..35cb3055bb --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -0,0 +1,1221 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_text.h" +#include "tgsi_build.h" +#include "tgsi_parse.h" +#include "tgsi_sanity.h" +#include "tgsi_util.h" + +static boolean is_alpha_underscore( const char *cur ) +{ + return + (*cur >= 'a' && *cur <= 'z') || + (*cur >= 'A' && *cur <= 'Z') || + *cur == '_'; +} + +static boolean is_digit( const char *cur ) +{ + return *cur >= '0' && *cur <= '9'; +} + +static boolean is_digit_alpha_underscore( const char *cur ) +{ + return is_digit( cur ) || is_alpha_underscore( cur ); +} + +static boolean str_match_no_case( const char **pcur, const char *str ) +{ + const char *cur = *pcur; + + while (*str != '\0' && *str == toupper( *cur )) { + str++; + cur++; + } + if (*str == '\0') { + *pcur = cur; + return TRUE; + } + return FALSE; +} + +/* Eat zero or more whitespaces. + */ +static void eat_opt_white( const char **pcur ) +{ + while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n') + (*pcur)++; +} + +/* Eat one or more whitespaces. + * Return TRUE if at least one whitespace eaten. + */ +static boolean eat_white( const char **pcur ) +{ + const char *cur = *pcur; + + eat_opt_white( pcur ); + return *pcur > cur; +} + +/* Parse unsigned integer. + * No checks for overflow. + */ +static boolean parse_uint( const char **pcur, uint *val ) +{ + const char *cur = *pcur; + + if (is_digit( cur )) { + *val = *cur++ - '0'; + while (is_digit( cur )) + *val = *val * 10 + *cur++ - '0'; + *pcur = cur; + return TRUE; + } + return FALSE; +} + +/* Parse floating point. + */ +static boolean parse_float( const char **pcur, float *val ) +{ + const char *cur = *pcur; + boolean integral_part = FALSE; + boolean fractional_part = FALSE; + + *val = (float) atof( cur ); + + if (*cur == '-' || *cur == '+') + cur++; + if (is_digit( cur )) { + cur++; + integral_part = TRUE; + while (is_digit( cur )) + cur++; + } + if (*cur == '.') { + cur++; + if (is_digit( cur )) { + cur++; + fractional_part = TRUE; + while (is_digit( cur )) + cur++; + } + } + if (!integral_part && !fractional_part) + return FALSE; + if (toupper( *cur ) == 'E') { + cur++; + if (*cur == '-' || *cur == '+') + cur++; + if (is_digit( cur )) { + cur++; + while (is_digit( cur )) + cur++; + } + else + return FALSE; + } + *pcur = cur; + return TRUE; +} + +struct translate_ctx +{ + const char *text; + const char *cur; + struct tgsi_token *tokens; + struct tgsi_token *tokens_cur; + struct tgsi_token *tokens_end; + struct tgsi_header *header; +}; + +static void report_error( struct translate_ctx *ctx, const char *msg ) +{ + debug_printf( "\nError: %s", msg ); +} + +/* Parse shader header. + * Return TRUE for one of the following headers. + * FRAG1.1 + * GEOM1.1 + * VERT1.1 + */ +static boolean parse_header( struct translate_ctx *ctx ) +{ + uint processor; + + if (str_match_no_case( &ctx->cur, "FRAG1.1" )) + processor = TGSI_PROCESSOR_FRAGMENT; + else if (str_match_no_case( &ctx->cur, "VERT1.1" )) + processor = TGSI_PROCESSOR_VERTEX; + else if (str_match_no_case( &ctx->cur, "GEOM1.1" )) + processor = TGSI_PROCESSOR_GEOMETRY; + else { + report_error( ctx, "Unknown header" ); + return FALSE; + } + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version(); + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + ctx->header = (struct tgsi_header *) ctx->tokens_cur++; + *ctx->header = tgsi_build_header(); + + if (ctx->tokens_cur >= ctx->tokens_end) + return FALSE; + *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); + + return TRUE; +} + +static boolean parse_label( struct translate_ctx *ctx, uint *val ) +{ + const char *cur = ctx->cur; + + if (parse_uint( &cur, val )) { + eat_opt_white( &cur ); + if (*cur == ':') { + cur++; + ctx->cur = cur; + return TRUE; + } + } + return FALSE; +} + +static const char *file_names[TGSI_FILE_COUNT] = +{ + "NULL", + "CONST", + "IN", + "OUT", + "TEMP", + "SAMP", + "ADDR", + "IMM" +}; + +static boolean +parse_file( const char **pcur, uint *file ) +{ + uint i; + + for (i = 0; i < TGSI_FILE_COUNT; i++) { + const char *cur = *pcur; + + if (str_match_no_case( &cur, file_names[i] )) { + if (!is_digit_alpha_underscore( cur )) { + *pcur = cur; + *file = i; + return TRUE; + } + } + } + return FALSE; +} + +static boolean +parse_opt_writemask( + struct translate_ctx *ctx, + uint *writemask ) +{ + const char *cur; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '.') { + cur++; + *writemask = TGSI_WRITEMASK_NONE; + eat_opt_white( &cur ); + if (toupper( *cur ) == 'X') { + cur++; + *writemask |= TGSI_WRITEMASK_X; + } + if (toupper( *cur ) == 'Y') { + cur++; + *writemask |= TGSI_WRITEMASK_Y; + } + if (toupper( *cur ) == 'Z') { + cur++; + *writemask |= TGSI_WRITEMASK_Z; + } + if (toupper( *cur ) == 'W') { + cur++; + *writemask |= TGSI_WRITEMASK_W; + } + + if (*writemask == TGSI_WRITEMASK_NONE) { + report_error( ctx, "Writemask expected" ); + return FALSE; + } + + ctx->cur = cur; + } + else { + *writemask = TGSI_WRITEMASK_XYZW; + } + return TRUE; +} + +/* ::= `[' + */ +static boolean +parse_register_file_bracket( + struct translate_ctx *ctx, + uint *file ) +{ + if (!parse_file( &ctx->cur, file )) { + report_error( ctx, "Unknown register file" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '[') { + report_error( ctx, "Expected `['" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* ::= + */ +static boolean +parse_register_file_bracket_index( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + return TRUE; +} + +/* Parse destination register operand. + * ::= `]' + */ +static boolean +parse_register_dst( + struct translate_ctx *ctx, + uint *file, + int *index ) +{ + if (!parse_register_file_bracket_index( ctx, file, index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* Parse source register operand. + * ::= `]' | + * `]' | + * `+' `]' | + * `-' `]' + */ +static boolean +parse_register_src( + struct translate_ctx *ctx, + uint *file, + int *index, + uint *ind_file, + int *ind_index ) +{ + const char *cur; + uint uindex; + + if (!parse_register_file_bracket( ctx, file )) + return FALSE; + eat_opt_white( &ctx->cur ); + cur = ctx->cur; + if (parse_file( &cur, ind_file )) { + if (!parse_register_dst( ctx, ind_file, ind_index )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (*ctx->cur == '+' || *ctx->cur == '-') { + boolean negate; + + negate = *ctx->cur == '-'; + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + if (negate) + *index = -(int) uindex; + else + *index = (int) uindex; + } + else { + *index = 0; + } + } + else { + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal unsigned integer" ); + return FALSE; + } + *index = (int) uindex; + *ind_file = TGSI_FILE_NULL; + *ind_index = 0; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +/* Parse register declaration. + * ::= `]' | + * `..' `]' + */ +static boolean +parse_register_dcl( + struct translate_ctx *ctx, + uint *file, + int *first, + int *last ) +{ + if (!parse_register_file_bracket_index( ctx, file, first )) + return FALSE; + eat_opt_white( &ctx->cur ); + if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { + uint uindex; + + ctx->cur += 2; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &uindex )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + *last = (int) uindex; + eat_opt_white( &ctx->cur ); + } + else { + *last = *first; + } + if (*ctx->cur != ']') { + report_error( ctx, "Expected `]' or `..'" ); + return FALSE; + } + ctx->cur++; + return TRUE; +} + +static const char *modulate_names[TGSI_MODULATE_COUNT] = +{ + "_1X", + "_2X", + "_4X", + "_8X", + "_D2", + "_D4", + "_D8" +}; + +static boolean +parse_dst_operand( + struct translate_ctx *ctx, + struct tgsi_full_dst_register *dst ) +{ + uint file; + int index; + uint writemask; + const char *cur; + + if (!parse_register_dst( ctx, &file, &index )) + return FALSE; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == '_') { + uint i; + + for (i = 0; i < TGSI_MODULATE_COUNT; i++) { + if (str_match_no_case( &cur, modulate_names[i] )) { + if (!is_digit_alpha_underscore( cur )) { + dst->DstRegisterExtModulate.Modulate = i; + ctx->cur = cur; + break; + } + } + } + } + + if (!parse_opt_writemask( ctx, &writemask )) + return FALSE; + + dst->DstRegister.File = file; + dst->DstRegister.Index = index; + dst->DstRegister.WriteMask = writemask; + return TRUE; +} + +static boolean +parse_optional_swizzle( + struct translate_ctx *ctx, + uint swizzle[4], + boolean *parsed_swizzle, + boolean *parsed_extswizzle ) +{ + const char *cur = ctx->cur; + + *parsed_swizzle = FALSE; + *parsed_extswizzle = FALSE; + + eat_opt_white( &cur ); + if (*cur == '.') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < 4; i++) { + if (toupper( *cur ) == 'X') + swizzle[i] = TGSI_SWIZZLE_X; + else if (toupper( *cur ) == 'Y') + swizzle[i] = TGSI_SWIZZLE_Y; + else if (toupper( *cur ) == 'Z') + swizzle[i] = TGSI_SWIZZLE_Z; + else if (toupper( *cur ) == 'W') + swizzle[i] = TGSI_SWIZZLE_W; + else { + if (*cur == '0') + swizzle[i] = TGSI_EXTSWIZZLE_ZERO; + else if (*cur == '1') + swizzle[i] = TGSI_EXTSWIZZLE_ONE; + else { + report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); + return FALSE; + } + *parsed_extswizzle = TRUE; + } + cur++; + } + *parsed_swizzle = TRUE; + ctx->cur = cur; + } + return TRUE; +} + +static boolean +parse_src_operand( + struct translate_ctx *ctx, + struct tgsi_full_src_register *src ) +{ + const char *cur; + float value; + uint file; + int index; + uint ind_file; + int ind_index; + uint swizzle[4]; + boolean parsed_swizzle; + boolean parsed_extswizzle; + + if (*ctx->cur == '-') { + cur = ctx->cur; + cur++; + eat_opt_white( &cur ); + if (*cur == '(') { + cur++; + src->SrcRegisterExtMod.Negate = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (*ctx->cur == '|') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegisterExtMod.Absolute = 1; + } + + if (*ctx->cur == '-') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegister.Negate = 1; + } + + cur = ctx->cur; + if (parse_float( &cur, &value )) { + if (value == 2.0f) { + eat_opt_white( &cur ); + if (*cur != '*') { + report_error( ctx, "Expected `*'" ); + return FALSE; + } + cur++; + if (*cur != '(') { + report_error( ctx, "Expected `('" ); + return FALSE; + } + cur++; + src->SrcRegisterExtMod.Scale2X = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (*ctx->cur == '(') { + ctx->cur++; + eat_opt_white( &ctx->cur ); + src->SrcRegisterExtMod.Bias = 1; + } + + cur = ctx->cur; + if (parse_float( &cur, &value )) { + if (value == 1.0f) { + eat_opt_white( &cur ); + if (*cur != '-') { + report_error( ctx, "Expected `-'" ); + return FALSE; + } + cur++; + if (*cur != '(') { + report_error( ctx, "Expected `('" ); + return FALSE; + } + cur++; + src->SrcRegisterExtMod.Complement = 1; + eat_opt_white( &cur ); + ctx->cur = cur; + } + } + + if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index )) + return FALSE; + src->SrcRegister.File = file; + src->SrcRegister.Index = index; + if (ind_file != TGSI_FILE_NULL) { + src->SrcRegister.Indirect = 1; + src->SrcRegisterInd.File = ind_file; + src->SrcRegisterInd.Index = ind_index; + } + + /* Parse optional swizzle. + */ + if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) { + if (parsed_extswizzle) { + assert( parsed_swizzle ); + + src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0]; + src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1]; + src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2]; + src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3]; + } + else if (parsed_swizzle) { + src->SrcRegister.SwizzleX = swizzle[0]; + src->SrcRegister.SwizzleY = swizzle[1]; + src->SrcRegister.SwizzleZ = swizzle[2]; + src->SrcRegister.SwizzleW = swizzle[3]; + } + } + + if (src->SrcRegisterExtMod.Complement) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Bias) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '-') { + report_error( ctx, "Expected `-'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_float( &ctx->cur, &value )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + if (value != 0.5f) { + report_error( ctx, "Expected 0.5" ); + return FALSE; + } + } + + if (src->SrcRegisterExtMod.Scale2X) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Absolute) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '|') { + report_error( ctx, "Expected `|'" ); + return FALSE; + } + ctx->cur++; + } + + if (src->SrcRegisterExtMod.Negate) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ')') { + report_error( ctx, "Expected `)'" ); + return FALSE; + } + ctx->cur++; + } + + return TRUE; +} + +struct opcode_info +{ + uint num_dst; + uint num_src; + uint is_tex; + uint is_branch; + const char *mnemonic; +}; + +static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = +{ + { 1, 1, 0, 0, "ARL" }, + { 1, 1, 0, 0, "MOV" }, + { 1, 1, 0, 0, "LIT" }, + { 1, 1, 0, 0, "RCP" }, + { 1, 1, 0, 0, "RSQ" }, + { 1, 1, 0, 0, "EXP" }, + { 1, 1, 0, 0, "LOG" }, + { 1, 2, 0, 0, "MUL" }, + { 1, 2, 0, 0, "ADD" }, + { 1, 2, 0, 0, "DP3" }, + { 1, 2, 0, 0, "DP4" }, + { 1, 2, 0, 0, "DST" }, + { 1, 2, 0, 0, "MIN" }, + { 1, 2, 0, 0, "MAX" }, + { 1, 2, 0, 0, "SLT" }, + { 1, 2, 0, 0, "SGE" }, + { 1, 3, 0, 0, "MAD" }, + { 1, 2, 0, 0, "SUB" }, + { 1, 3, 0, 0, "LERP" }, + { 1, 3, 0, 0, "CND" }, + { 1, 3, 0, 0, "CND0" }, + { 1, 3, 0, 0, "DOT2ADD" }, + { 1, 2, 0, 0, "INDEX" }, + { 1, 1, 0, 0, "NEGATE" }, + { 1, 1, 0, 0, "FRAC" }, + { 1, 3, 0, 0, "CLAMP" }, + { 1, 1, 0, 0, "FLOOR" }, + { 1, 1, 0, 0, "ROUND" }, + { 1, 1, 0, 0, "EXPBASE2" }, + { 1, 1, 0, 0, "LOGBASE2" }, + { 1, 2, 0, 0, "POWER" }, + { 1, 2, 0, 0, "CROSSPRODUCT" }, + { 1, 2, 0, 0, "MULTIPLYMATRIX" }, + { 1, 1, 0, 0, "ABS" }, + { 1, 1, 0, 0, "RCC" }, + { 1, 2, 0, 0, "DPH" }, + { 1, 1, 0, 0, "COS" }, + { 1, 1, 0, 0, "DDX" }, + { 1, 1, 0, 0, "DDY" }, + { 0, 1, 0, 0, "KILP" }, + { 1, 1, 0, 0, "PK2H" }, + { 1, 1, 0, 0, "PK2US" }, + { 1, 1, 0, 0, "PK4B" }, + { 1, 1, 0, 0, "PK4UB" }, + { 1, 2, 0, 0, "RFL" }, + { 1, 2, 0, 0, "SEQ" }, + { 1, 2, 0, 0, "SFL" }, + { 1, 2, 0, 0, "SGT" }, + { 1, 1, 0, 0, "SIN" }, + { 1, 2, 0, 0, "SLE" }, + { 1, 2, 0, 0, "SNE" }, + { 1, 2, 0, 0, "STR" }, + { 1, 2, 1, 0, "TEX" }, + { 1, 4, 1, 0, "TXD" }, + { 1, 2, 1, 0, "TXP" }, + { 1, 1, 0, 0, "UP2H" }, + { 1, 1, 0, 0, "UP2US" }, + { 1, 1, 0, 0, "UP4B" }, + { 1, 1, 0, 0, "UP4UB" }, + { 1, 3, 0, 0, "X2D" }, + { 1, 1, 0, 0, "ARA" }, + { 1, 1, 0, 0, "ARR" }, + { 0, 1, 0, 0, "BRA" }, + { 0, 0, 0, 1, "CAL" }, + { 0, 0, 0, 0, "RET" }, + { 1, 1, 0, 0, "SSG" }, + { 1, 3, 0, 0, "CMP" }, + { 1, 1, 0, 0, "SCS" }, + { 1, 2, 1, 0, "TXB" }, + { 1, 1, 0, 0, "NRM" }, + { 1, 2, 0, 0, "DIV" }, + { 1, 2, 0, 0, "DP2" }, + { 1, 2, 1, 0, "TXL" }, + { 0, 0, 0, 0, "BRK" }, + { 0, 1, 0, 1, "IF" }, + { 0, 0, 0, 0, "LOOP" }, + { 0, 1, 0, 0, "REP" }, + { 0, 0, 0, 1, "ELSE" }, + { 0, 0, 0, 0, "ENDIF" }, + { 0, 0, 0, 0, "ENDLOOP" }, + { 0, 0, 0, 0, "ENDREP" }, + { 0, 1, 0, 0, "PUSHA" }, + { 1, 0, 0, 0, "POPA" }, + { 1, 1, 0, 0, "CEIL" }, + { 1, 1, 0, 0, "I2F" }, + { 1, 1, 0, 0, "NOT" }, + { 1, 1, 0, 0, "TRUNC" }, + { 1, 2, 0, 0, "SHL" }, + { 1, 2, 0, 0, "SHR" }, + { 1, 2, 0, 0, "AND" }, + { 1, 2, 0, 0, "OR" }, + { 1, 2, 0, 0, "MOD" }, + { 1, 2, 0, 0, "XOR" }, + { 1, 3, 0, 0, "SAD" }, + { 1, 2, 1, 0, "TXF" }, + { 1, 2, 1, 0, "TXQ" }, + { 0, 0, 0, 0, "CONT" }, + { 0, 0, 0, 0, "EMIT" }, + { 0, 0, 0, 0, "ENDPRIM" }, + { 0, 0, 0, 1, "BGNLOOP2" }, + { 0, 0, 0, 0, "BGNSUB" }, + { 0, 0, 0, 1, "ENDLOOP2" }, + { 0, 0, 0, 0, "ENDSUB" }, + { 1, 1, 0, 0, "NOISE1" }, + { 1, 1, 0, 0, "NOISE2" }, + { 1, 1, 0, 0, "NOISE3" }, + { 1, 1, 0, 0, "NOISE4" }, + { 0, 0, 0, 0, "NOP" }, + { 1, 2, 0, 0, "M4X3" }, + { 1, 2, 0, 0, "M3X4" }, + { 1, 2, 0, 0, "M3X3" }, + { 1, 2, 0, 0, "M3X2" }, + { 1, 1, 0, 0, "NRM4" }, + { 0, 1, 0, 0, "CALLNZ" }, + { 0, 1, 0, 0, "IFC" }, + { 0, 1, 0, 0, "BREAKC" }, + { 0, 0, 0, 0, "KIL" }, + { 0, 0, 0, 0, "END" }, + { 1, 1, 0, 0, "SWZ" } +}; + +static const char *texture_names[TGSI_TEXTURE_COUNT] = +{ + "UNKNOWN", + "1D", + "2D", + "3D", + "CUBE", + "RECT", + "SHADOW1D", + "SHADOW2D", + "SHADOWRECT" +}; + +static boolean +parse_instruction( + struct translate_ctx *ctx, + boolean has_label ) +{ + uint i; + uint saturate = TGSI_SAT_NONE; + const struct opcode_info *info; + struct tgsi_full_instruction inst; + uint advance; + + /* Parse instruction name. + */ + eat_opt_white( &ctx->cur ); + for (i = 0; i < TGSI_OPCODE_LAST; i++) { + const char *cur = ctx->cur; + + info = &opcode_info[i]; + if (str_match_no_case( &cur, info->mnemonic )) { + if (str_match_no_case( &cur, "_SATNV" )) + saturate = TGSI_SAT_MINUS_PLUS_ONE; + else if (str_match_no_case( &cur, "_SAT" )) + saturate = TGSI_SAT_ZERO_ONE; + + if (info->num_dst + info->num_src + info->is_tex == 0) { + if (!is_digit_alpha_underscore( cur )) { + ctx->cur = cur; + break; + } + } + else if (*cur == '\0' || eat_white( &cur )) { + ctx->cur = cur; + break; + } + } + } + if (i == TGSI_OPCODE_LAST) { + if (has_label) + report_error( ctx, "Unknown opcode" ); + else + report_error( ctx, "Expected `DCL', `IMM' or a label" ); + return FALSE; + } + + inst = tgsi_default_full_instruction(); + inst.Instruction.Opcode = i; + inst.Instruction.Saturate = saturate; + inst.Instruction.NumDstRegs = info->num_dst; + inst.Instruction.NumSrcRegs = info->num_src; + + /* Parse instruction operands. + */ + for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) { + if (i > 0) { + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + + if (i < info->num_dst) { + if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) + return FALSE; + } + else if (i < info->num_dst + info->num_src) { + if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) + return FALSE; + } + else { + uint j; + + for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { + if (str_match_no_case( &ctx->cur, texture_names[j] )) { + if (!is_digit_alpha_underscore( ctx->cur )) { + inst.InstructionExtTexture.Texture = j; + break; + } + } + } + if (j == TGSI_TEXTURE_COUNT) { + report_error( ctx, "Expected texture target" ); + return FALSE; + } + } + } + + if (info->is_branch) { + uint target; + + eat_opt_white( &ctx->cur ); + if (*ctx->cur != ':') { + report_error( ctx, "Expected `:'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + if (!parse_uint( &ctx->cur, &target )) { + report_error( ctx, "Expected a label" ); + return FALSE; + } + inst.InstructionExtLabel.Label = target; + } + + advance = tgsi_build_full_instruction( + &inst, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static const char *semantic_names[TGSI_SEMANTIC_COUNT] = +{ + "POSITION", + "COLOR", + "BCOLOR", + "FOG", + "PSIZE", + "GENERIC", + "NORMAL" +}; + +static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = +{ + "CONSTANT", + "LINEAR", + "PERSPECTIVE" +}; + +static boolean parse_declaration( struct translate_ctx *ctx ) +{ + struct tgsi_full_declaration decl; + uint file; + int first; + int last; + uint writemask; + const char *cur; + uint advance; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!parse_register_dcl( ctx, &file, &first, &last )) + return FALSE; + if (!parse_opt_writemask( ctx, &writemask )) + return FALSE; + + decl = tgsi_default_full_declaration(); + decl.Declaration.File = file; + decl.Declaration.UsageMask = writemask; + decl.DeclarationRange.First = first; + decl.DeclarationRange.Last = last; + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { + if (str_match_no_case( &cur, semantic_names[i] )) { + const char *cur2 = cur; + uint index; + + if (is_digit_alpha_underscore( cur )) + continue; + eat_opt_white( &cur2 ); + if (*cur2 == '[') { + cur2++; + eat_opt_white( &cur2 ); + if (!parse_uint( &cur2, &index )) { + report_error( ctx, "Expected literal integer" ); + return FALSE; + } + eat_opt_white( &cur2 ); + if (*cur2 != ']') { + report_error( ctx, "Expected `]'" ); + return FALSE; + } + cur2++; + + decl.Semantic.SemanticIndex = index; + + cur = cur2; + } + + decl.Declaration.Semantic = 1; + decl.Semantic.SemanticName = i; + + ctx->cur = cur; + break; + } + } + } + + cur = ctx->cur; + eat_opt_white( &cur ); + if (*cur == ',') { + uint i; + + cur++; + eat_opt_white( &cur ); + for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) { + if (str_match_no_case( &cur, interpolate_names[i] )) { + if (is_digit_alpha_underscore( cur )) + continue; + decl.Declaration.Interpolate = i; + + ctx->cur = cur; + break; + } + } + if (i == TGSI_INTERPOLATE_COUNT) { + report_error( ctx, "Expected semantic or interpolate attribute" ); + return FALSE; + } + } + + advance = tgsi_build_full_declaration( + &decl, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static boolean parse_immediate( struct translate_ctx *ctx ) +{ + struct tgsi_full_immediate imm; + uint i; + float values[4]; + uint advance; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { + report_error( ctx, "Expected `FLT32'" ); + return FALSE; + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '{') { + report_error( ctx, "Expected `{'" ); + return FALSE; + } + ctx->cur++; + for (i = 0; i < 4; i++) { + eat_opt_white( &ctx->cur ); + if (i > 0) { + if (*ctx->cur != ',') { + report_error( ctx, "Expected `,'" ); + return FALSE; + } + ctx->cur++; + eat_opt_white( &ctx->cur ); + } + if (!parse_float( &ctx->cur, &values[i] )) { + report_error( ctx, "Expected literal floating point" ); + return FALSE; + } + } + eat_opt_white( &ctx->cur ); + if (*ctx->cur != '}') { + report_error( ctx, "Expected `}'" ); + return FALSE; + } + ctx->cur++; + + imm = tgsi_default_full_immediate(); + imm.Immediate.Size += 4; + imm.Immediate.DataType = TGSI_IMM_FLOAT32; + imm.u.Pointer = values; + + advance = tgsi_build_full_immediate( + &imm, + ctx->tokens_cur, + ctx->header, + (uint) (ctx->tokens_end - ctx->tokens_cur) ); + if (advance == 0) + return FALSE; + ctx->tokens_cur += advance; + + return TRUE; +} + +static boolean translate( struct translate_ctx *ctx ) +{ + eat_opt_white( &ctx->cur ); + if (!parse_header( ctx )) + return FALSE; + + while (*ctx->cur != '\0') { + uint label_val = 0; + + if (!eat_white( &ctx->cur )) { + report_error( ctx, "Syntax error" ); + return FALSE; + } + + if (*ctx->cur == '\0') + break; + + if (parse_label( ctx, &label_val )) { + if (!parse_instruction( ctx, TRUE )) + return FALSE; + } + else if (str_match_no_case( &ctx->cur, "DCL" )) { + if (!parse_declaration( ctx )) + return FALSE; + } + else if (str_match_no_case( &ctx->cur, "IMM" )) { + if (!parse_immediate( ctx )) + return FALSE; + } + else if (!parse_instruction( ctx, FALSE )) { + return FALSE; + } + } + + return TRUE; +} + +boolean +tgsi_text_translate( + const char *text, + struct tgsi_token *tokens, + uint num_tokens ) +{ + struct translate_ctx ctx; + + ctx.text = text; + ctx.cur = text; + ctx.tokens = tokens; + ctx.tokens_cur = tokens; + ctx.tokens_end = tokens + num_tokens; + + if (!translate( &ctx )) + return FALSE; + + return tgsi_sanity_check( tokens ); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.h b/src/gallium/auxiliary/tgsi/tgsi_text.h new file mode 100644 index 0000000000..8eeeeef140 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_text.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TEXT_H +#define TGSI_TEXT_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +boolean +tgsi_text_translate( + const char *text, + struct tgsi_token *tokens, + uint num_tokens ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_TEXT_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c new file mode 100644 index 0000000000..357f77b05a --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -0,0 +1,199 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI program transformation utility. + * + * Authors: Brian Paul + */ + + +#include "tgsi_transform.h" + + + +static void +emit_instruction(struct tgsi_transform_context *ctx, + const struct tgsi_full_instruction *inst) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_instruction(inst, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + +static void +emit_declaration(struct tgsi_transform_context *ctx, + const struct tgsi_full_declaration *decl) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_declaration(decl, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + +static void +emit_immediate(struct tgsi_transform_context *ctx, + const struct tgsi_full_immediate *imm) +{ + uint ti = ctx->ti; + + ti += tgsi_build_full_immediate(imm, + ctx->tokens_out + ti, + ctx->header, + ctx->max_tokens_out - ti); + ctx->ti = ti; +} + + + +/** + * Apply user-defined transformations to the input shader to produce + * the output shader. + * For example, a register search-and-replace operation could be applied + * by defining a transform_instruction() callback that examined and changed + * the instruction src/dest regs. + * + * \return number of tokens emitted + */ +int +tgsi_transform_shader(const struct tgsi_token *tokens_in, + struct tgsi_token *tokens_out, + uint max_tokens_out, + struct tgsi_transform_context *ctx) +{ + uint procType; + + /* input shader */ + struct tgsi_parse_context parse; + + /* output shader */ + struct tgsi_processor *processor; + + + /** + ** callback context init + **/ + ctx->emit_instruction = emit_instruction; + ctx->emit_declaration = emit_declaration; + ctx->emit_immediate = emit_immediate; + ctx->tokens_out = tokens_out; + ctx->max_tokens_out = max_tokens_out; + + + /** + ** Setup to begin parsing input shader + **/ + if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) { + debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n"); + return -1; + } + procType = parse.FullHeader.Processor.Processor; + assert(procType == TGSI_PROCESSOR_FRAGMENT || + procType == TGSI_PROCESSOR_VERTEX || + procType == TGSI_PROCESSOR_GEOMETRY); + + + /** + ** Setup output shader + **/ + *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version(); + + ctx->header = (struct tgsi_header *) (tokens_out + 1); + *ctx->header = tgsi_build_header(); + + processor = (struct tgsi_processor *) (tokens_out + 2); + *processor = tgsi_build_processor( procType, ctx->header ); + + ctx->ti = 3; + + + /** + ** Loop over incoming program tokens/instructions + */ + while( !tgsi_parse_end_of_tokens( &parse ) ) { + + tgsi_parse_token( &parse ); + + switch( parse.FullToken.Token.Type ) { + case TGSI_TOKEN_TYPE_INSTRUCTION: + { + struct tgsi_full_instruction *fullinst + = &parse.FullToken.FullInstruction; + + if (ctx->transform_instruction) + ctx->transform_instruction(ctx, fullinst); + else + ctx->emit_instruction(ctx, fullinst); + } + break; + + case TGSI_TOKEN_TYPE_DECLARATION: + { + struct tgsi_full_declaration *fulldecl + = &parse.FullToken.FullDeclaration; + + if (ctx->transform_declaration) + ctx->transform_declaration(ctx, fulldecl); + else + ctx->emit_declaration(ctx, fulldecl); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + { + struct tgsi_full_immediate *fullimm + = &parse.FullToken.FullImmediate; + + if (ctx->transform_immediate) + ctx->transform_immediate(ctx, fullimm); + else + ctx->emit_immediate(ctx, fullimm); + } + break; + + default: + assert( 0 ); + } + } + + if (ctx->epilog) { + ctx->epilog(ctx); + } + + tgsi_parse_free (&parse); + + return ctx->ti; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h new file mode 100644 index 0000000000..3da0b38271 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -0,0 +1,93 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_TRANSFORM_H +#define TGSI_TRANSFORM_H + + +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" + + + +/** + * Subclass this to add caller-specific data + */ +struct tgsi_transform_context +{ +/**** PUBLIC ***/ + + /** + * User-defined callbacks invoked per instruction. + */ + void (*transform_instruction)(struct tgsi_transform_context *ctx, + struct tgsi_full_instruction *inst); + + void (*transform_declaration)(struct tgsi_transform_context *ctx, + struct tgsi_full_declaration *decl); + + void (*transform_immediate)(struct tgsi_transform_context *ctx, + struct tgsi_full_immediate *imm); + + /** + * Called at end of input program to allow caller to append extra + * instructions. Return number of tokens emitted. + */ + void (*epilog)(struct tgsi_transform_context *ctx); + + +/*** PRIVATE ***/ + + /** + * These are setup by tgsi_transform_shader() and cannot be overridden. + * Meant to be called from in the above user callback functions. + */ + void (*emit_instruction)(struct tgsi_transform_context *ctx, + const struct tgsi_full_instruction *inst); + void (*emit_declaration)(struct tgsi_transform_context *ctx, + const struct tgsi_full_declaration *decl); + void (*emit_immediate)(struct tgsi_transform_context *ctx, + const struct tgsi_full_immediate *imm); + + struct tgsi_header *header; + uint max_tokens_out; + struct tgsi_token *tokens_out; + uint ti; +}; + + + +extern int +tgsi_transform_shader(const struct tgsi_token *tokens_in, + struct tgsi_token *tokens_out, + uint max_tokens_out, + struct tgsi_transform_context *ctx); + + +#endif /* TGSI_TRANSFORM_H */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c new file mode 100644 index 0000000000..09486e649e --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -0,0 +1,300 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "pipe/p_util.h" +#include "pipe/p_shader_tokens.h" +#include "tgsi_parse.h" +#include "tgsi_build.h" +#include "tgsi_util.h" + +union pointer_hack +{ + void *pointer; + uint64_t uint64; +}; + +void * +tgsi_align_128bit( + void *unaligned ) +{ + union pointer_hack ph; + + ph.uint64 = 0; + ph.pointer = unaligned; + ph.uint64 = (ph.uint64 + 15) & ~15; + return ph.pointer; +} + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->SwizzleX; + case 1: + return reg->SwizzleY; + case 2: + return reg->SwizzleZ; + case 3: + return reg->SwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->ExtSwizzleX; + case 1: + return reg->ExtSwizzleY; + case 2: + return reg->ExtSwizzleZ; + case 3: + return reg->ExtSwizzleW; + default: + assert( 0 ); + } + return 0; +} + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned swizzle; + + /* + * First, calculate the extended swizzle for a given channel. This will give + * us either a channel index into the simple swizzle or a constant 1 or 0. + */ + swizzle = tgsi_util_get_src_register_extswizzle( + ®->SrcRegisterExtSwz, + component ); + + assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); + assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); + assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); + assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); + assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); + + /* + * Second, calculate the simple swizzle for the unswizzled channel index. + * Leave the constants intact, they are not affected by the simple swizzle. + */ + if( swizzle <= TGSI_SWIZZLE_W ) { + swizzle = tgsi_util_get_src_register_swizzle( + ®->SrcRegister, + swizzle ); + } + + return swizzle; +} + +void +tgsi_util_set_src_register_swizzle( + struct tgsi_src_register *reg, + unsigned swizzle, + unsigned component ) +{ + switch( component ) { + case 0: + reg->SwizzleX = swizzle; + break; + case 1: + reg->SwizzleY = swizzle; + break; + case 2: + reg->SwizzleZ = swizzle; + break; + case 3: + reg->SwizzleW = swizzle; + break; + default: + assert( 0 ); + } +} + +void +tgsi_util_set_src_register_extswizzle( + struct tgsi_src_register_ext_swz *reg, + unsigned swizzle, + unsigned component ) +{ + switch( component ) { + case 0: + reg->ExtSwizzleX = swizzle; + break; + case 1: + reg->ExtSwizzleY = swizzle; + break; + case 2: + reg->ExtSwizzleZ = swizzle; + break; + case 3: + reg->ExtSwizzleW = swizzle; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ) +{ + switch( component ) { + case 0: + return reg->NegateX; + case 1: + return reg->NegateY; + case 2: + return reg->NegateZ; + case 3: + return reg->NegateW; + default: + assert( 0 ); + } + return 0; +} + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ) +{ + switch( component ) { + case 0: + reg->NegateX = negate; + break; + case 1: + reg->NegateY = negate; + break; + case 2: + reg->NegateZ = negate; + break; + case 3: + reg->NegateW = negate; + break; + default: + assert( 0 ); + } +} + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ) +{ + unsigned sign_mode; + + if( reg->SrcRegisterExtMod.Absolute ) { + /* Consider only the post-abs negation. */ + + if( reg->SrcRegisterExtMod.Negate ) { + sign_mode = TGSI_UTIL_SIGN_SET; + } + else { + sign_mode = TGSI_UTIL_SIGN_CLEAR; + } + } + else { + /* Accumulate the three negations. */ + + unsigned negate; + + negate = reg->SrcRegister.Negate; + if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { + negate = !negate; + } + if( reg->SrcRegisterExtMod.Negate ) { + negate = !negate; + } + + if( negate ) { + sign_mode = TGSI_UTIL_SIGN_TOGGLE; + } + else { + sign_mode = TGSI_UTIL_SIGN_KEEP; + } + } + + return sign_mode; +} + +void +tgsi_util_set_full_src_register_sign_mode( + struct tgsi_full_src_register *reg, + unsigned sign_mode ) +{ + reg->SrcRegisterExtSwz.NegateX = 0; + reg->SrcRegisterExtSwz.NegateY = 0; + reg->SrcRegisterExtSwz.NegateZ = 0; + reg->SrcRegisterExtSwz.NegateW = 0; + + switch (sign_mode) + { + case TGSI_UTIL_SIGN_CLEAR: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 1; + reg->SrcRegisterExtMod.Negate = 0; + break; + + case TGSI_UTIL_SIGN_SET: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 1; + reg->SrcRegisterExtMod.Negate = 1; + break; + + case TGSI_UTIL_SIGN_TOGGLE: + reg->SrcRegister.Negate = 1; + reg->SrcRegisterExtMod.Absolute = 0; + reg->SrcRegisterExtMod.Negate = 0; + break; + + case TGSI_UTIL_SIGN_KEEP: + reg->SrcRegister.Negate = 0; + reg->SrcRegisterExtMod.Absolute = 0; + reg->SrcRegisterExtMod.Negate = 0; + break; + + default: + assert( 0 ); + } +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.h b/src/gallium/auxiliary/tgsi/tgsi_util.h new file mode 100644 index 0000000000..7877f34558 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_util.h @@ -0,0 +1,96 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_UTIL_H +#define TGSI_UTIL_H + +#if defined __cplusplus +extern "C" { +#endif + +void * +tgsi_align_128bit( + void *unaligned ); + +unsigned +tgsi_util_get_src_register_swizzle( + const struct tgsi_src_register *reg, + unsigned component ); + +unsigned +tgsi_util_get_src_register_extswizzle( + const struct tgsi_src_register_ext_swz *reg, + unsigned component); + +unsigned +tgsi_util_get_full_src_register_extswizzle( + const struct tgsi_full_src_register *reg, + unsigned component ); + +void +tgsi_util_set_src_register_swizzle( + struct tgsi_src_register *reg, + unsigned swizzle, + unsigned component ); + +void +tgsi_util_set_src_register_extswizzle( + struct tgsi_src_register_ext_swz *reg, + unsigned swizzle, + unsigned component ); + +unsigned +tgsi_util_get_src_register_extnegate( + const struct tgsi_src_register_ext_swz *reg, + unsigned component ); + +void +tgsi_util_set_src_register_extnegate( + struct tgsi_src_register_ext_swz *reg, + unsigned negate, + unsigned component ); + +#define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */ +#define TGSI_UTIL_SIGN_SET 1 /* Force negative */ +#define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */ +#define TGSI_UTIL_SIGN_KEEP 3 /* No change */ + +unsigned +tgsi_util_get_full_src_register_sign_mode( + const struct tgsi_full_src_register *reg, + unsigned component ); + +void +tgsi_util_set_full_src_register_sign_mode( + struct tgsi_full_src_register *reg, + unsigned sign_mode ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_UTIL_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.c b/src/gallium/auxiliary/tgsi/util/tgsi_build.c deleted file mode 100644 index 742ef14c35..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.c +++ /dev/null @@ -1,1324 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi_build.h" -#include "tgsi_parse.h" - -/* - * version - */ - -struct tgsi_version -tgsi_build_version( void ) -{ - struct tgsi_version version; - - version.MajorVersion = 1; - version.MinorVersion = 1; - version.Padding = 0; - - return version; -} - -/* - * header - */ - -struct tgsi_header -tgsi_build_header( void ) -{ - struct tgsi_header header; - - header.HeaderSize = 1; - header.BodySize = 0; - - return header; -} - -static void -header_headersize_grow( struct tgsi_header *header ) -{ - assert( header->HeaderSize < 0xFF ); - assert( header->BodySize == 0 ); - - header->HeaderSize++; -} - -static void -header_bodysize_grow( struct tgsi_header *header ) -{ - assert( header->BodySize < 0xFFFFFF ); - - header->BodySize++; -} - -struct tgsi_processor -tgsi_default_processor( void ) -{ - struct tgsi_processor processor; - - processor.Processor = TGSI_PROCESSOR_FRAGMENT; - processor.Padding = 0; - - return processor; -} - -struct tgsi_processor -tgsi_build_processor( - unsigned type, - struct tgsi_header *header ) -{ - struct tgsi_processor processor; - - processor = tgsi_default_processor(); - processor.Processor = type; - - header_headersize_grow( header ); - - return processor; -} - -/* - * declaration - */ - -struct tgsi_declaration -tgsi_default_declaration( void ) -{ - struct tgsi_declaration declaration; - - declaration.Type = TGSI_TOKEN_TYPE_DECLARATION; - declaration.Size = 1; - declaration.File = TGSI_FILE_NULL; - declaration.UsageMask = TGSI_WRITEMASK_XYZW; - declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; - declaration.Semantic = 0; - declaration.Padding = 0; - declaration.Extended = 0; - - return declaration; -} - -struct tgsi_declaration -tgsi_build_declaration( - unsigned file, - unsigned usage_mask, - unsigned interpolate, - unsigned semantic, - struct tgsi_header *header ) -{ - struct tgsi_declaration declaration; - - assert( file <= TGSI_FILE_IMMEDIATE ); - assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE ); - - declaration = tgsi_default_declaration(); - declaration.File = file; - declaration.UsageMask = usage_mask; - declaration.Interpolate = interpolate; - declaration.Semantic = semantic; - - header_bodysize_grow( header ); - - return declaration; -} - -static void -declaration_grow( - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - assert( declaration->Size < 0xFF ); - - declaration->Size++; - - header_bodysize_grow( header ); -} - -struct tgsi_full_declaration -tgsi_default_full_declaration( void ) -{ - struct tgsi_full_declaration full_declaration; - - full_declaration.Declaration = tgsi_default_declaration(); - full_declaration.DeclarationRange = tgsi_default_declaration_range(); - full_declaration.Semantic = tgsi_default_declaration_semantic(); - - return full_declaration; -} - -unsigned -tgsi_build_full_declaration( - const struct tgsi_full_declaration *full_decl, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ) -{ - unsigned size = 0; - struct tgsi_declaration *declaration; - struct tgsi_declaration_range *dr; - - if( maxsize <= size ) - return 0; - declaration = (struct tgsi_declaration *) &tokens[size]; - size++; - - *declaration = tgsi_build_declaration( - full_decl->Declaration.File, - full_decl->Declaration.UsageMask, - full_decl->Declaration.Interpolate, - full_decl->Declaration.Semantic, - header ); - - if (maxsize <= size) - return 0; - dr = (struct tgsi_declaration_range *) &tokens[size]; - size++; - - *dr = tgsi_build_declaration_range( - full_decl->DeclarationRange.First, - full_decl->DeclarationRange.Last, - declaration, - header ); - - if( full_decl->Declaration.Semantic ) { - struct tgsi_declaration_semantic *ds; - - if( maxsize <= size ) - return 0; - ds = (struct tgsi_declaration_semantic *) &tokens[size]; - size++; - - *ds = tgsi_build_declaration_semantic( - full_decl->Semantic.SemanticName, - full_decl->Semantic.SemanticIndex, - declaration, - header ); - } - - return size; -} - -struct tgsi_declaration_range -tgsi_default_declaration_range( void ) -{ - struct tgsi_declaration_range dr; - - dr.First = 0; - dr.Last = 0; - - return dr; -} - -struct tgsi_declaration_range -tgsi_build_declaration_range( - unsigned first, - unsigned last, - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - struct tgsi_declaration_range declaration_range; - - assert( last >= first ); - assert( last <= 0xFFFF ); - - declaration_range = tgsi_default_declaration_range(); - declaration_range.First = first; - declaration_range.Last = last; - - declaration_grow( declaration, header ); - - return declaration_range; -} - -struct tgsi_declaration_semantic -tgsi_default_declaration_semantic( void ) -{ - struct tgsi_declaration_semantic ds; - - ds.SemanticName = TGSI_SEMANTIC_POSITION; - ds.SemanticIndex = 0; - ds.Padding = 0; - - return ds; -} - -struct tgsi_declaration_semantic -tgsi_build_declaration_semantic( - unsigned semantic_name, - unsigned semantic_index, - struct tgsi_declaration *declaration, - struct tgsi_header *header ) -{ - struct tgsi_declaration_semantic ds; - - assert( semantic_name <= TGSI_SEMANTIC_COUNT ); - assert( semantic_index <= 0xFFFF ); - - ds = tgsi_default_declaration_semantic(); - ds.SemanticName = semantic_name; - ds.SemanticIndex = semantic_index; - - declaration_grow( declaration, header ); - - return ds; -} - -/* - * immediate - */ - -struct tgsi_immediate -tgsi_default_immediate( void ) -{ - struct tgsi_immediate immediate; - - immediate.Type = TGSI_TOKEN_TYPE_IMMEDIATE; - immediate.Size = 1; - immediate.DataType = TGSI_IMM_FLOAT32; - immediate.Padding = 0; - immediate.Extended = 0; - - return immediate; -} - -struct tgsi_immediate -tgsi_build_immediate( - struct tgsi_header *header ) -{ - struct tgsi_immediate immediate; - - immediate = tgsi_default_immediate(); - - header_bodysize_grow( header ); - - return immediate; -} - -struct tgsi_full_immediate -tgsi_default_full_immediate( void ) -{ - struct tgsi_full_immediate fullimm; - - fullimm.Immediate = tgsi_default_immediate(); - fullimm.u.Pointer = (void *) 0; - - return fullimm; -} - -static void -immediate_grow( - struct tgsi_immediate *immediate, - struct tgsi_header *header ) -{ - assert( immediate->Size < 0xFF ); - - immediate->Size++; - - header_bodysize_grow( header ); -} - -struct tgsi_immediate_float32 -tgsi_build_immediate_float32( - float value, - struct tgsi_immediate *immediate, - struct tgsi_header *header ) -{ - struct tgsi_immediate_float32 immediate_float32; - - immediate_float32.Float = value; - - immediate_grow( immediate, header ); - - return immediate_float32; -} - -unsigned -tgsi_build_full_immediate( - const struct tgsi_full_immediate *full_imm, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ) -{ - unsigned size = 0, i; - struct tgsi_immediate *immediate; - - if( maxsize <= size ) - return 0; - immediate = (struct tgsi_immediate *) &tokens[size]; - size++; - - *immediate = tgsi_build_immediate( header ); - - for( i = 0; i < full_imm->Immediate.Size - 1; i++ ) { - struct tgsi_immediate_float32 *if32; - - if( maxsize <= size ) - return 0; - if32 = (struct tgsi_immediate_float32 *) &tokens[size]; - size++; - - *if32 = tgsi_build_immediate_float32( - full_imm->u.ImmediateFloat32[i].Float, - immediate, - header ); - } - - return size; -} - -/* - * instruction - */ - -struct tgsi_instruction -tgsi_default_instruction( void ) -{ - struct tgsi_instruction instruction; - - instruction.Type = TGSI_TOKEN_TYPE_INSTRUCTION; - instruction.Size = 1; - instruction.Opcode = TGSI_OPCODE_MOV; - instruction.Saturate = TGSI_SAT_NONE; - instruction.NumDstRegs = 1; - instruction.NumSrcRegs = 1; - instruction.Padding = 0; - instruction.Extended = 0; - - return instruction; -} - -struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ) -{ - struct tgsi_instruction instruction; - - assert (opcode <= TGSI_OPCODE_LAST); - assert (saturate <= TGSI_SAT_MINUS_PLUS_ONE); - assert (num_dst_regs <= 3); - assert (num_src_regs <= 15); - - instruction = tgsi_default_instruction(); - instruction.Opcode = opcode; - instruction.Saturate = saturate; - instruction.NumDstRegs = num_dst_regs; - instruction.NumSrcRegs = num_src_regs; - - header_bodysize_grow( header ); - - return instruction; -} - -static void -instruction_grow( - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - assert (instruction->Size < 0xFF); - - instruction->Size++; - - header_bodysize_grow( header ); -} - -struct tgsi_full_instruction -tgsi_default_full_instruction( void ) -{ - struct tgsi_full_instruction full_instruction; - unsigned i; - - full_instruction.Instruction = tgsi_default_instruction(); - full_instruction.InstructionExtNv = tgsi_default_instruction_ext_nv(); - full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label(); - full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture(); - for( i = 0; i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) { - full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register(); - } - for( i = 0; i < TGSI_FULL_MAX_SRC_REGISTERS; i++ ) { - full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register(); - } - - return full_instruction; -} - -unsigned -tgsi_build_full_instruction( - const struct tgsi_full_instruction *full_inst, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ) -{ - unsigned size = 0; - unsigned i; - struct tgsi_instruction *instruction; - struct tgsi_token *prev_token; - - if( maxsize <= size ) - return 0; - instruction = (struct tgsi_instruction *) &tokens[size]; - size++; - - *instruction = tgsi_build_instruction( - full_inst->Instruction.Opcode, - full_inst->Instruction.Saturate, - full_inst->Instruction.NumDstRegs, - full_inst->Instruction.NumSrcRegs, - header ); - prev_token = (struct tgsi_token *) instruction; - - if( tgsi_compare_instruction_ext_nv( - full_inst->InstructionExtNv, - tgsi_default_instruction_ext_nv() ) ) { - struct tgsi_instruction_ext_nv *instruction_ext_nv; - - if( maxsize <= size ) - return 0; - instruction_ext_nv = - (struct tgsi_instruction_ext_nv *) &tokens[size]; - size++; - - *instruction_ext_nv = tgsi_build_instruction_ext_nv( - full_inst->InstructionExtNv.Precision, - full_inst->InstructionExtNv.CondDstIndex, - full_inst->InstructionExtNv.CondFlowIndex, - full_inst->InstructionExtNv.CondMask, - full_inst->InstructionExtNv.CondSwizzleX, - full_inst->InstructionExtNv.CondSwizzleY, - full_inst->InstructionExtNv.CondSwizzleZ, - full_inst->InstructionExtNv.CondSwizzleW, - full_inst->InstructionExtNv.CondDstUpdate, - full_inst->InstructionExtNv.CondFlowEnable, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_nv; - } - - if( tgsi_compare_instruction_ext_label( - full_inst->InstructionExtLabel, - tgsi_default_instruction_ext_label() ) ) { - struct tgsi_instruction_ext_label *instruction_ext_label; - - if( maxsize <= size ) - return 0; - instruction_ext_label = - (struct tgsi_instruction_ext_label *) &tokens[size]; - size++; - - *instruction_ext_label = tgsi_build_instruction_ext_label( - full_inst->InstructionExtLabel.Label, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_label; - } - - if( tgsi_compare_instruction_ext_texture( - full_inst->InstructionExtTexture, - tgsi_default_instruction_ext_texture() ) ) { - struct tgsi_instruction_ext_texture *instruction_ext_texture; - - if( maxsize <= size ) - return 0; - instruction_ext_texture = - (struct tgsi_instruction_ext_texture *) &tokens[size]; - size++; - - *instruction_ext_texture = tgsi_build_instruction_ext_texture( - full_inst->InstructionExtTexture.Texture, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) instruction_ext_texture; - } - - for( i = 0; i < full_inst->Instruction.NumDstRegs; i++ ) { - const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i]; - struct tgsi_dst_register *dst_register; - struct tgsi_token *prev_token; - - if( maxsize <= size ) - return 0; - dst_register = (struct tgsi_dst_register *) &tokens[size]; - size++; - - *dst_register = tgsi_build_dst_register( - reg->DstRegister.File, - reg->DstRegister.WriteMask, - reg->DstRegister.Index, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register; - - if( tgsi_compare_dst_register_ext_concode( - reg->DstRegisterExtConcode, - tgsi_default_dst_register_ext_concode() ) ) { - struct tgsi_dst_register_ext_concode *dst_register_ext_concode; - - if( maxsize <= size ) - return 0; - dst_register_ext_concode = - (struct tgsi_dst_register_ext_concode *) &tokens[size]; - size++; - - *dst_register_ext_concode = tgsi_build_dst_register_ext_concode( - reg->DstRegisterExtConcode.CondMask, - reg->DstRegisterExtConcode.CondSwizzleX, - reg->DstRegisterExtConcode.CondSwizzleY, - reg->DstRegisterExtConcode.CondSwizzleZ, - reg->DstRegisterExtConcode.CondSwizzleW, - reg->DstRegisterExtConcode.CondSrcIndex, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register_ext_concode; - } - - if( tgsi_compare_dst_register_ext_modulate( - reg->DstRegisterExtModulate, - tgsi_default_dst_register_ext_modulate() ) ) { - struct tgsi_dst_register_ext_modulate *dst_register_ext_modulate; - - if( maxsize <= size ) - return 0; - dst_register_ext_modulate = - (struct tgsi_dst_register_ext_modulate *) &tokens[size]; - size++; - - *dst_register_ext_modulate = tgsi_build_dst_register_ext_modulate( - reg->DstRegisterExtModulate.Modulate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) dst_register_ext_modulate; - } - } - - for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) { - const struct tgsi_full_src_register *reg = &full_inst->FullSrcRegisters[i]; - struct tgsi_src_register *src_register; - struct tgsi_token *prev_token; - - if( maxsize <= size ) - return 0; - src_register = (struct tgsi_src_register *) &tokens[size]; - size++; - - *src_register = tgsi_build_src_register( - reg->SrcRegister.File, - reg->SrcRegister.SwizzleX, - reg->SrcRegister.SwizzleY, - reg->SrcRegister.SwizzleZ, - reg->SrcRegister.SwizzleW, - reg->SrcRegister.Negate, - reg->SrcRegister.Indirect, - reg->SrcRegister.Dimension, - reg->SrcRegister.Index, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register; - - if( tgsi_compare_src_register_ext_swz( - reg->SrcRegisterExtSwz, - tgsi_default_src_register_ext_swz() ) ) { - struct tgsi_src_register_ext_swz *src_register_ext_swz; - - /* Use of the extended swizzle requires the simple swizzle to be identity. - */ - assert( reg->SrcRegister.SwizzleX == TGSI_SWIZZLE_X ); - assert( reg->SrcRegister.SwizzleY == TGSI_SWIZZLE_Y ); - assert( reg->SrcRegister.SwizzleZ == TGSI_SWIZZLE_Z ); - assert( reg->SrcRegister.SwizzleW == TGSI_SWIZZLE_W ); - assert( reg->SrcRegister.Negate == FALSE ); - - if( maxsize <= size ) - return 0; - src_register_ext_swz = - (struct tgsi_src_register_ext_swz *) &tokens[size]; - size++; - - *src_register_ext_swz = tgsi_build_src_register_ext_swz( - reg->SrcRegisterExtSwz.ExtSwizzleX, - reg->SrcRegisterExtSwz.ExtSwizzleY, - reg->SrcRegisterExtSwz.ExtSwizzleZ, - reg->SrcRegisterExtSwz.ExtSwizzleW, - reg->SrcRegisterExtSwz.NegateX, - reg->SrcRegisterExtSwz.NegateY, - reg->SrcRegisterExtSwz.NegateZ, - reg->SrcRegisterExtSwz.NegateW, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_swz; - } - - if( tgsi_compare_src_register_ext_mod( - reg->SrcRegisterExtMod, - tgsi_default_src_register_ext_mod() ) ) { - struct tgsi_src_register_ext_mod *src_register_ext_mod; - - if( maxsize <= size ) - return 0; - src_register_ext_mod = - (struct tgsi_src_register_ext_mod *) &tokens[size]; - size++; - - *src_register_ext_mod = tgsi_build_src_register_ext_mod( - reg->SrcRegisterExtMod.Complement, - reg->SrcRegisterExtMod.Bias, - reg->SrcRegisterExtMod.Scale2X, - reg->SrcRegisterExtMod.Absolute, - reg->SrcRegisterExtMod.Negate, - prev_token, - instruction, - header ); - prev_token = (struct tgsi_token *) src_register_ext_mod; - } - - if( reg->SrcRegister.Indirect ) { - struct tgsi_src_register *ind; - - if( maxsize <= size ) - return 0; - ind = (struct tgsi_src_register *) &tokens[size]; - size++; - - *ind = tgsi_build_src_register( - reg->SrcRegisterInd.File, - reg->SrcRegisterInd.SwizzleX, - reg->SrcRegisterInd.SwizzleY, - reg->SrcRegisterInd.SwizzleZ, - reg->SrcRegisterInd.SwizzleW, - reg->SrcRegisterInd.Negate, - reg->SrcRegisterInd.Indirect, - reg->SrcRegisterInd.Dimension, - reg->SrcRegisterInd.Index, - instruction, - header ); - } - - if( reg->SrcRegister.Dimension ) { - struct tgsi_dimension *dim; - - assert( !reg->SrcRegisterDim.Dimension ); - - if( maxsize <= size ) - return 0; - dim = (struct tgsi_dimension *) &tokens[size]; - size++; - - *dim = tgsi_build_dimension( - reg->SrcRegisterDim.Indirect, - reg->SrcRegisterDim.Index, - instruction, - header ); - - if( reg->SrcRegisterDim.Indirect ) { - struct tgsi_src_register *ind; - - if( maxsize <= size ) - return 0; - ind = (struct tgsi_src_register *) &tokens[size]; - size++; - - *ind = tgsi_build_src_register( - reg->SrcRegisterDimInd.File, - reg->SrcRegisterDimInd.SwizzleX, - reg->SrcRegisterDimInd.SwizzleY, - reg->SrcRegisterDimInd.SwizzleZ, - reg->SrcRegisterDimInd.SwizzleW, - reg->SrcRegisterDimInd.Negate, - reg->SrcRegisterDimInd.Indirect, - reg->SrcRegisterDimInd.Dimension, - reg->SrcRegisterDimInd.Index, - instruction, - header ); - } - } - } - - return size; -} - -struct tgsi_instruction_ext_nv -tgsi_default_instruction_ext_nv( void ) -{ - struct tgsi_instruction_ext_nv instruction_ext_nv; - - instruction_ext_nv.Type = TGSI_INSTRUCTION_EXT_TYPE_NV; - instruction_ext_nv.Precision = TGSI_PRECISION_DEFAULT; - instruction_ext_nv.CondDstIndex = 0; - instruction_ext_nv.CondFlowIndex = 0; - instruction_ext_nv.CondMask = TGSI_CC_TR; - instruction_ext_nv.CondSwizzleX = TGSI_SWIZZLE_X; - instruction_ext_nv.CondSwizzleY = TGSI_SWIZZLE_Y; - instruction_ext_nv.CondSwizzleZ = TGSI_SWIZZLE_Z; - instruction_ext_nv.CondSwizzleW = TGSI_SWIZZLE_W; - instruction_ext_nv.CondDstUpdate = 0; - instruction_ext_nv.CondFlowEnable = 0; - instruction_ext_nv.Padding = 0; - instruction_ext_nv.Extended = 0; - - return instruction_ext_nv; -} - -union token_u32 -{ - unsigned u32; -}; - -unsigned -tgsi_compare_instruction_ext_nv( - struct tgsi_instruction_ext_nv a, - struct tgsi_instruction_ext_nv b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_instruction_ext_nv -tgsi_build_instruction_ext_nv( - unsigned precision, - unsigned cond_dst_index, - unsigned cond_flow_index, - unsigned cond_mask, - unsigned cond_swizzle_x, - unsigned cond_swizzle_y, - unsigned cond_swizzle_z, - unsigned cond_swizzle_w, - unsigned cond_dst_update, - unsigned cond_flow_update, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_ext_nv instruction_ext_nv; - - instruction_ext_nv = tgsi_default_instruction_ext_nv(); - instruction_ext_nv.Precision = precision; - instruction_ext_nv.CondDstIndex = cond_dst_index; - instruction_ext_nv.CondFlowIndex = cond_flow_index; - instruction_ext_nv.CondMask = cond_mask; - instruction_ext_nv.CondSwizzleX = cond_swizzle_x; - instruction_ext_nv.CondSwizzleY = cond_swizzle_y; - instruction_ext_nv.CondSwizzleZ = cond_swizzle_z; - instruction_ext_nv.CondSwizzleW = cond_swizzle_w; - instruction_ext_nv.CondDstUpdate = cond_dst_update; - instruction_ext_nv.CondFlowEnable = cond_flow_update; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return instruction_ext_nv; -} - -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ) -{ - struct tgsi_instruction_ext_label instruction_ext_label; - - instruction_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL; - instruction_ext_label.Label = 0; - instruction_ext_label.Padding = 0; - instruction_ext_label.Extended = 0; - - return instruction_ext_label; -} - -unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( - unsigned label, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_ext_label instruction_ext_label; - - instruction_ext_label = tgsi_default_instruction_ext_label(); - instruction_ext_label.Label = label; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return instruction_ext_label; -} - -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ) -{ - struct tgsi_instruction_ext_texture instruction_ext_texture; - - instruction_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE; - instruction_ext_texture.Texture = TGSI_TEXTURE_UNKNOWN; - instruction_ext_texture.Padding = 0; - instruction_ext_texture.Extended = 0; - - return instruction_ext_texture; -} - -unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( - unsigned texture, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_instruction_ext_texture instruction_ext_texture; - - instruction_ext_texture = tgsi_default_instruction_ext_texture(); - instruction_ext_texture.Texture = texture; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return instruction_ext_texture; -} - -struct tgsi_src_register -tgsi_default_src_register( void ) -{ - struct tgsi_src_register src_register; - - src_register.File = TGSI_FILE_NULL; - src_register.SwizzleX = TGSI_SWIZZLE_X; - src_register.SwizzleY = TGSI_SWIZZLE_Y; - src_register.SwizzleZ = TGSI_SWIZZLE_Z; - src_register.SwizzleW = TGSI_SWIZZLE_W; - src_register.Negate = 0; - src_register.Indirect = 0; - src_register.Dimension = 0; - src_register.Index = 0; - src_register.Extended = 0; - - return src_register; -} - -struct tgsi_src_register -tgsi_build_src_register( - unsigned file, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - unsigned negate, - unsigned indirect, - unsigned dimension, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register src_register; - - assert( file <= TGSI_FILE_IMMEDIATE ); - assert( swizzle_x <= TGSI_SWIZZLE_W ); - assert( swizzle_y <= TGSI_SWIZZLE_W ); - assert( swizzle_z <= TGSI_SWIZZLE_W ); - assert( swizzle_w <= TGSI_SWIZZLE_W ); - assert( negate <= 1 ); - assert( index >= -0x8000 && index <= 0x7FFF ); - - src_register = tgsi_default_src_register(); - src_register.File = file; - src_register.SwizzleX = swizzle_x; - src_register.SwizzleY = swizzle_y; - src_register.SwizzleZ = swizzle_z; - src_register.SwizzleW = swizzle_w; - src_register.Negate = negate; - src_register.Indirect = indirect; - src_register.Dimension = dimension; - src_register.Index = index; - - instruction_grow( instruction, header ); - - return src_register; -} - -struct tgsi_full_src_register -tgsi_default_full_src_register( void ) -{ - struct tgsi_full_src_register full_src_register; - - full_src_register.SrcRegister = tgsi_default_src_register(); - full_src_register.SrcRegisterExtSwz = tgsi_default_src_register_ext_swz(); - full_src_register.SrcRegisterExtMod = tgsi_default_src_register_ext_mod(); - full_src_register.SrcRegisterInd = tgsi_default_src_register(); - full_src_register.SrcRegisterDim = tgsi_default_dimension(); - full_src_register.SrcRegisterDimInd = tgsi_default_src_register(); - - return full_src_register; -} - -struct tgsi_src_register_ext_swz -tgsi_default_src_register_ext_swz( void ) -{ - struct tgsi_src_register_ext_swz src_register_ext_swz; - - src_register_ext_swz.Type = TGSI_SRC_REGISTER_EXT_TYPE_SWZ; - src_register_ext_swz.ExtSwizzleX = TGSI_EXTSWIZZLE_X; - src_register_ext_swz.ExtSwizzleY = TGSI_EXTSWIZZLE_Y; - src_register_ext_swz.ExtSwizzleZ = TGSI_EXTSWIZZLE_Z; - src_register_ext_swz.ExtSwizzleW = TGSI_EXTSWIZZLE_W; - src_register_ext_swz.NegateX = 0; - src_register_ext_swz.NegateY = 0; - src_register_ext_swz.NegateZ = 0; - src_register_ext_swz.NegateW = 0; - src_register_ext_swz.Padding = 0; - src_register_ext_swz.Extended = 0; - - return src_register_ext_swz; -} - -unsigned -tgsi_compare_src_register_ext_swz( - struct tgsi_src_register_ext_swz a, - struct tgsi_src_register_ext_swz b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_src_register_ext_swz -tgsi_build_src_register_ext_swz( - unsigned ext_swizzle_x, - unsigned ext_swizzle_y, - unsigned ext_swizzle_z, - unsigned ext_swizzle_w, - unsigned negate_x, - unsigned negate_y, - unsigned negate_z, - unsigned negate_w, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_swz src_register_ext_swz; - - assert( ext_swizzle_x <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_y <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_z <= TGSI_EXTSWIZZLE_ONE ); - assert( ext_swizzle_w <= TGSI_EXTSWIZZLE_ONE ); - assert( negate_x <= 1 ); - assert( negate_y <= 1 ); - assert( negate_z <= 1 ); - assert( negate_w <= 1 ); - - src_register_ext_swz = tgsi_default_src_register_ext_swz(); - src_register_ext_swz.ExtSwizzleX = ext_swizzle_x; - src_register_ext_swz.ExtSwizzleY = ext_swizzle_y; - src_register_ext_swz.ExtSwizzleZ = ext_swizzle_z; - src_register_ext_swz.ExtSwizzleW = ext_swizzle_w; - src_register_ext_swz.NegateX = negate_x; - src_register_ext_swz.NegateY = negate_y; - src_register_ext_swz.NegateZ = negate_z; - src_register_ext_swz.NegateW = negate_w; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_swz; -} - -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - src_register_ext_mod.Type = TGSI_SRC_REGISTER_EXT_TYPE_MOD; - src_register_ext_mod.Complement = 0; - src_register_ext_mod.Bias = 0; - src_register_ext_mod.Scale2X = 0; - src_register_ext_mod.Absolute = 0; - src_register_ext_mod.Negate = 0; - src_register_ext_mod.Padding = 0; - src_register_ext_mod.Extended = 0; - - return src_register_ext_mod; -} - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_src_register_ext_mod src_register_ext_mod; - - assert( complement <= 1 ); - assert( bias <= 1 ); - assert( scale_2x <= 1 ); - assert( absolute <= 1 ); - assert( negate <= 1 ); - - src_register_ext_mod = tgsi_default_src_register_ext_mod(); - src_register_ext_mod.Complement = complement; - src_register_ext_mod.Bias = bias; - src_register_ext_mod.Scale2X = scale_2x; - src_register_ext_mod.Absolute = absolute; - src_register_ext_mod.Negate = negate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return src_register_ext_mod; -} - -struct tgsi_dimension -tgsi_default_dimension( void ) -{ - struct tgsi_dimension dimension; - - dimension.Indirect = 0; - dimension.Dimension = 0; - dimension.Padding = 0; - dimension.Index = 0; - dimension.Extended = 0; - - return dimension; -} - -struct tgsi_dimension -tgsi_build_dimension( - unsigned indirect, - unsigned index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dimension dimension; - - dimension = tgsi_default_dimension(); - dimension.Indirect = indirect; - dimension.Index = index; - - instruction_grow( instruction, header ); - - return dimension; -} - -struct tgsi_dst_register -tgsi_default_dst_register( void ) -{ - struct tgsi_dst_register dst_register; - - dst_register.File = TGSI_FILE_NULL; - dst_register.WriteMask = TGSI_WRITEMASK_XYZW; - dst_register.Indirect = 0; - dst_register.Dimension = 0; - dst_register.Index = 0; - dst_register.Padding = 0; - dst_register.Extended = 0; - - return dst_register; -} - -struct tgsi_dst_register -tgsi_build_dst_register( - unsigned file, - unsigned mask, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register dst_register; - - assert( file <= TGSI_FILE_IMMEDIATE ); - assert( mask <= TGSI_WRITEMASK_XYZW ); - assert( index >= -32768 && index <= 32767 ); - - dst_register = tgsi_default_dst_register(); - dst_register.File = file; - dst_register.WriteMask = mask; - dst_register.Index = index; - - instruction_grow( instruction, header ); - - return dst_register; -} - -struct tgsi_full_dst_register -tgsi_default_full_dst_register( void ) -{ - struct tgsi_full_dst_register full_dst_register; - - full_dst_register.DstRegister = tgsi_default_dst_register(); - full_dst_register.DstRegisterExtConcode = - tgsi_default_dst_register_ext_concode(); - full_dst_register.DstRegisterExtModulate = - tgsi_default_dst_register_ext_modulate(); - - return full_dst_register; -} - -struct tgsi_dst_register_ext_concode -tgsi_default_dst_register_ext_concode( void ) -{ - struct tgsi_dst_register_ext_concode dst_register_ext_concode; - - dst_register_ext_concode.Type = TGSI_DST_REGISTER_EXT_TYPE_CONDCODE; - dst_register_ext_concode.CondMask = TGSI_CC_TR; - dst_register_ext_concode.CondSwizzleX = TGSI_SWIZZLE_X; - dst_register_ext_concode.CondSwizzleY = TGSI_SWIZZLE_Y; - dst_register_ext_concode.CondSwizzleZ = TGSI_SWIZZLE_Z; - dst_register_ext_concode.CondSwizzleW = TGSI_SWIZZLE_W; - dst_register_ext_concode.CondSrcIndex = 0; - dst_register_ext_concode.Padding = 0; - dst_register_ext_concode.Extended = 0; - - return dst_register_ext_concode; -} - -unsigned -tgsi_compare_dst_register_ext_concode( - struct tgsi_dst_register_ext_concode a, - struct tgsi_dst_register_ext_concode b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_dst_register_ext_concode -tgsi_build_dst_register_ext_concode( - unsigned cc, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - int index, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register_ext_concode dst_register_ext_concode; - - assert( cc <= TGSI_CC_FL ); - assert( swizzle_x <= TGSI_SWIZZLE_W ); - assert( swizzle_y <= TGSI_SWIZZLE_W ); - assert( swizzle_z <= TGSI_SWIZZLE_W ); - assert( swizzle_w <= TGSI_SWIZZLE_W ); - assert( index >= -32768 && index <= 32767 ); - - dst_register_ext_concode = tgsi_default_dst_register_ext_concode(); - dst_register_ext_concode.CondMask = cc; - dst_register_ext_concode.CondSwizzleX = swizzle_x; - dst_register_ext_concode.CondSwizzleY = swizzle_y; - dst_register_ext_concode.CondSwizzleZ = swizzle_z; - dst_register_ext_concode.CondSwizzleW = swizzle_w; - dst_register_ext_concode.CondSrcIndex = index; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return dst_register_ext_concode; -} - -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ) -{ - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; - - dst_register_ext_modulate.Type = TGSI_DST_REGISTER_EXT_TYPE_MODULATE; - dst_register_ext_modulate.Modulate = TGSI_MODULATE_1X; - dst_register_ext_modulate.Padding = 0; - dst_register_ext_modulate.Extended = 0; - - return dst_register_ext_modulate; -} - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ) -{ - a.Padding = b.Padding = 0; - a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; -} - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ) -{ - struct tgsi_dst_register_ext_modulate dst_register_ext_modulate; - - assert( modulate <= TGSI_MODULATE_EIGHTH ); - - dst_register_ext_modulate = tgsi_default_dst_register_ext_modulate(); - dst_register_ext_modulate.Modulate = modulate; - - prev_token->Extended = 1; - instruction_grow( instruction, header ); - - return dst_register_ext_modulate; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_build.h b/src/gallium/auxiliary/tgsi/util/tgsi_build.h deleted file mode 100644 index ed25830248..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_build.h +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_BUILD_H -#define TGSI_BUILD_H - -#if defined __cplusplus -extern "C" { -#endif - -/* - * version - */ - -struct tgsi_version -tgsi_build_version( void ); - -/* - * header - */ - -struct tgsi_header -tgsi_build_header( void ); - -struct tgsi_processor -tgsi_default_processor( void ); - -struct tgsi_processor -tgsi_build_processor( - unsigned processor, - struct tgsi_header *header ); - -/* - * declaration - */ - -struct tgsi_declaration -tgsi_default_declaration( void ); - -struct tgsi_declaration -tgsi_build_declaration( - unsigned file, - unsigned usage_mask, - unsigned interpolate, - unsigned semantic, - struct tgsi_header *header ); - -struct tgsi_full_declaration -tgsi_default_full_declaration( void ); - -unsigned -tgsi_build_full_declaration( - const struct tgsi_full_declaration *full_decl, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ); - -struct tgsi_declaration_range -tgsi_default_declaration_range( void ); - -struct tgsi_declaration_range -tgsi_build_declaration_range( - unsigned first, - unsigned last, - struct tgsi_declaration *declaration, - struct tgsi_header *header ); - -struct tgsi_declaration_semantic -tgsi_default_declaration_semantic( void ); - -struct tgsi_declaration_semantic -tgsi_build_declaration_semantic( - unsigned semantic_name, - unsigned semantic_index, - struct tgsi_declaration *declaration, - struct tgsi_header *header ); - -/* - * immediate - */ - -struct tgsi_immediate -tgsi_default_immediate( void ); - -struct tgsi_immediate -tgsi_build_immediate( - struct tgsi_header *header ); - -struct tgsi_full_immediate -tgsi_default_full_immediate( void ); - -struct tgsi_immediate_float32 -tgsi_build_immediate_float32( - float value, - struct tgsi_immediate *immediate, - struct tgsi_header *header ); - -unsigned -tgsi_build_full_immediate( - const struct tgsi_full_immediate *full_imm, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ); - -/* - * instruction - */ - -struct tgsi_instruction -tgsi_default_instruction( void ); - -struct tgsi_instruction -tgsi_build_instruction( - unsigned opcode, - unsigned saturate, - unsigned num_dst_regs, - unsigned num_src_regs, - struct tgsi_header *header ); - -struct tgsi_full_instruction -tgsi_default_full_instruction( void ); - -unsigned -tgsi_build_full_instruction( - const struct tgsi_full_instruction *full_inst, - struct tgsi_token *tokens, - struct tgsi_header *header, - unsigned maxsize ); - -struct tgsi_instruction_ext_nv -tgsi_default_instruction_ext_nv( void ); - -unsigned -tgsi_compare_instruction_ext_nv( - struct tgsi_instruction_ext_nv a, - struct tgsi_instruction_ext_nv b ); - -struct tgsi_instruction_ext_nv -tgsi_build_instruction_ext_nv( - unsigned precision, - unsigned cond_dst_index, - unsigned cond_flow_index, - unsigned cond_mask, - unsigned cond_swizzle_x, - unsigned cond_swizzle_y, - unsigned cond_swizzle_z, - unsigned cond_swizzle_w, - unsigned cond_dst_update, - unsigned cond_flow_update, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_instruction_ext_label -tgsi_default_instruction_ext_label( void ); - -unsigned -tgsi_compare_instruction_ext_label( - struct tgsi_instruction_ext_label a, - struct tgsi_instruction_ext_label b ); - -struct tgsi_instruction_ext_label -tgsi_build_instruction_ext_label( - unsigned label, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_instruction_ext_texture -tgsi_default_instruction_ext_texture( void ); - -unsigned -tgsi_compare_instruction_ext_texture( - struct tgsi_instruction_ext_texture a, - struct tgsi_instruction_ext_texture b ); - -struct tgsi_instruction_ext_texture -tgsi_build_instruction_ext_texture( - unsigned texture, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_src_register -tgsi_default_src_register( void ); - -struct tgsi_src_register -tgsi_build_src_register( - unsigned file, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - unsigned negate, - unsigned indirect, - unsigned dimension, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_full_src_register -tgsi_default_full_src_register( void ); - -struct tgsi_src_register_ext_swz -tgsi_default_src_register_ext_swz( void ); - -unsigned -tgsi_compare_src_register_ext_swz( - struct tgsi_src_register_ext_swz a, - struct tgsi_src_register_ext_swz b ); - -struct tgsi_src_register_ext_swz -tgsi_build_src_register_ext_swz( - unsigned ext_swizzle_x, - unsigned ext_swizzle_y, - unsigned ext_swizzle_z, - unsigned ext_swizzle_w, - unsigned negate_x, - unsigned negate_y, - unsigned negate_z, - unsigned negate_w, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_src_register_ext_mod -tgsi_default_src_register_ext_mod( void ); - -unsigned -tgsi_compare_src_register_ext_mod( - struct tgsi_src_register_ext_mod a, - struct tgsi_src_register_ext_mod b ); - -struct tgsi_src_register_ext_mod -tgsi_build_src_register_ext_mod( - unsigned complement, - unsigned bias, - unsigned scale_2x, - unsigned absolute, - unsigned negate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_dimension -tgsi_default_dimension( void ); - -struct tgsi_dimension -tgsi_build_dimension( - unsigned indirect, - unsigned index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_dst_register -tgsi_default_dst_register( void ); - -struct tgsi_dst_register -tgsi_build_dst_register( - unsigned file, - unsigned mask, - int index, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_full_dst_register -tgsi_default_full_dst_register( void ); - -struct tgsi_dst_register_ext_concode -tgsi_default_dst_register_ext_concode( void ); - -unsigned -tgsi_compare_dst_register_ext_concode( - struct tgsi_dst_register_ext_concode a, - struct tgsi_dst_register_ext_concode b ); - -struct tgsi_dst_register_ext_concode -tgsi_build_dst_register_ext_concode( - unsigned cc, - unsigned swizzle_x, - unsigned swizzle_y, - unsigned swizzle_z, - unsigned swizzle_w, - int index, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -struct tgsi_dst_register_ext_modulate -tgsi_default_dst_register_ext_modulate( void ); - -unsigned -tgsi_compare_dst_register_ext_modulate( - struct tgsi_dst_register_ext_modulate a, - struct tgsi_dst_register_ext_modulate b ); - -struct tgsi_dst_register_ext_modulate -tgsi_build_dst_register_ext_modulate( - unsigned modulate, - struct tgsi_token *prev_token, - struct tgsi_instruction *instruction, - struct tgsi_header *header ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_BUILD_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump.c deleted file mode 100644 index d2e6375212..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.c +++ /dev/null @@ -1,582 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "tgsi_dump.h" -#include "tgsi_iterate.h" - -struct dump_ctx -{ - struct tgsi_iterate_context iter; - - uint instno; -}; - -static void -dump_enum( - uint e, - const char **enums, - uint enum_count ) -{ - if (e >= enum_count) - debug_printf( "%u", e ); - else - debug_printf( "%s", enums[e] ); -} - -#define EOL() debug_printf( "\n" ) -#define TXT(S) debug_printf( "%s", S ) -#define CHR(C) debug_printf( "%c", C ) -#define UIX(I) debug_printf( "0x%x", I ) -#define UID(I) debug_printf( "%u", I ) -#define SID(I) debug_printf( "%d", I ) -#define FLT(F) debug_printf( "%10.4f", F ) -#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) - -static const char *processor_type_names[] = -{ - "FRAG", - "VERT", - "GEOM" -}; - -static const char *file_names[] = -{ - "NULL", - "CONST", - "IN", - "OUT", - "TEMP", - "SAMP", - "ADDR", - "IMM" -}; - -static const char *interpolate_names[] = -{ - "CONSTANT", - "LINEAR", - "PERSPECTIVE" -}; - -static const char *semantic_names[] = -{ - "POSITION", - "COLOR", - "BCOLOR", - "FOG", - "PSIZE", - "GENERIC", - "NORMAL" -}; - -static const char *immediate_type_names[] = -{ - "FLT32" -}; - -static const char *opcode_names[TGSI_OPCODE_LAST] = -{ - "ARL", - "MOV", - "LIT", - "RCP", - "RSQ", - "EXP", - "LOG", - "MUL", - "ADD", - "DP3", - "DP4", - "DST", - "MIN", - "MAX", - "SLT", - "SGE", - "MAD", - "SUB", - "LERP", - "CND", - "CND0", - "DOT2ADD", - "INDEX", - "NEGATE", - "FRAC", - "CLAMP", - "FLOOR", - "ROUND", - "EXPBASE2", - "LOGBASE2", - "POWER", - "CROSSPRODUCT", - "MULTIPLYMATRIX", - "ABS", - "RCC", - "DPH", - "COS", - "DDX", - "DDY", - "KILP", - "PK2H", - "PK2US", - "PK4B", - "PK4UB", - "RFL", - "SEQ", - "SFL", - "SGT", - "SIN", - "SLE", - "SNE", - "STR", - "TEX", - "TXD", - "TXP", - "UP2H", - "UP2US", - "UP4B", - "UP4UB", - "X2D", - "ARA", - "ARR", - "BRA", - "CAL", - "RET", - "SSG", - "CMP", - "SCS", - "TXB", - "NRM", - "DIV", - "DP2", - "TXL", - "BRK", - "IF", - "LOOP", - "REP", - "ELSE", - "ENDIF", - "ENDLOOP", - "ENDREP", - "PUSHA", - "POPA", - "CEIL", - "I2F", - "NOT", - "TRUNC", - "SHL", - "SHR", - "AND", - "OR", - "MOD", - "XOR", - "SAD", - "TXF", - "TXQ", - "CONT", - "EMIT", - "ENDPRIM", - "BGNLOOP2", - "BGNSUB", - "ENDLOOP2", - "ENDSUB", - "NOISE1", - "NOISE2", - "NOISE3", - "NOISE4", - "NOP", - "M4X3", - "M3X4", - "M3X3", - "M3X2", - "NRM4", - "CALLNZ", - "IFC", - "BREAKC", - "KIL", - "END", - "SWZ" -}; - -static const char *swizzle_names[] = -{ - "x", - "y", - "z", - "w" -}; - -static const char *texture_names[] = -{ - "UNKNOWN", - "1D", - "2D", - "3D", - "CUBE", - "RECT", - "SHADOW1D", - "SHADOW2D", - "SHADOWRECT" -}; - -static const char *extswizzle_names[] = -{ - "x", - "y", - "z", - "w", - "0", - "1" -}; - -static const char *modulate_names[TGSI_MODULATE_COUNT] = -{ - "", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; - -static void -_dump_register_prefix( - uint file, - uint first, - uint last ) -{ - - -} - -static void -_dump_register( - uint file, - int first, - int last ) -{ - ENM( file, file_names ); - CHR( '[' ); - SID( first ); - if (first != last) { - TXT( ".." ); - SID( last ); - } - CHR( ']' ); -} - -static void -_dump_register_ind( - uint file, - int index, - uint ind_file, - int ind_index ) -{ - ENM( file, file_names ); - CHR( '[' ); - ENM( ind_file, file_names ); - CHR( '[' ); - SID( ind_index ); - CHR( ']' ); - if (index != 0) { - if (index > 0) - CHR( '+' ); - SID( index ); - } - CHR( ']' ); -} - -static void -_dump_writemask( - uint writemask ) -{ - if (writemask != TGSI_WRITEMASK_XYZW) { - CHR( '.' ); - if (writemask & TGSI_WRITEMASK_X) - CHR( 'x' ); - if (writemask & TGSI_WRITEMASK_Y) - CHR( 'y' ); - if (writemask & TGSI_WRITEMASK_Z) - CHR( 'z' ); - if (writemask & TGSI_WRITEMASK_W) - CHR( 'w' ); - } -} - -void -tgsi_dump_declaration( - const struct tgsi_full_declaration *decl ) -{ - TXT( "\nDCL " ); - - _dump_register( - decl->Declaration.File, - decl->DeclarationRange.First, - decl->DeclarationRange.Last ); - _dump_writemask( - decl->Declaration.UsageMask ); - - if (decl->Declaration.Semantic) { - TXT( ", " ); - ENM( decl->Semantic.SemanticName, semantic_names ); - if (decl->Semantic.SemanticIndex != 0 || - decl->Semantic.SemanticName == TGSI_SEMANTIC_GENERIC) { - CHR( '[' ); - UID( decl->Semantic.SemanticIndex ); - CHR( ']' ); - } - } - - TXT( ", " ); - ENM( decl->Declaration.Interpolate, interpolate_names ); -} - -static boolean -iter_declaration( - struct tgsi_iterate_context *iter, - struct tgsi_full_declaration *decl ) -{ - tgsi_dump_declaration( decl ); - return TRUE; -} - -void -tgsi_dump_immediate( - const struct tgsi_full_immediate *imm ) -{ - uint i; - - TXT( "\nIMM " ); - ENM( imm->Immediate.DataType, immediate_type_names ); - - TXT( " { " ); - for (i = 0; i < imm->Immediate.Size - 1; i++) { - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - FLT( imm->u.ImmediateFloat32[i].Float ); - break; - default: - assert( 0 ); - } - - if (i < imm->Immediate.Size - 2) - TXT( ", " ); - } - TXT( " }" ); -} - -static boolean -iter_immediate( - struct tgsi_iterate_context *iter, - struct tgsi_full_immediate *imm ) -{ - tgsi_dump_immediate( imm ); - return TRUE; -} - -void -tgsi_dump_instruction( - const struct tgsi_full_instruction *inst, - uint instno ) -{ - uint i; - boolean first_reg = TRUE; - - EOL(); - UID( instno ); - CHR( ':' ); - ENM( inst->Instruction.Opcode, opcode_names ); - - switch (inst->Instruction.Saturate) { - case TGSI_SAT_NONE: - break; - case TGSI_SAT_ZERO_ONE: - TXT( "_SAT" ); - break; - case TGSI_SAT_MINUS_PLUS_ONE: - TXT( "_SATNV" ); - break; - default: - assert( 0 ); - } - - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - const struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - - if (!first_reg) - CHR( ',' ); - CHR( ' ' ); - - _dump_register( - dst->DstRegister.File, - dst->DstRegister.Index, - dst->DstRegister.Index ); - ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); - _dump_writemask( dst->DstRegister.WriteMask ); - - first_reg = FALSE; - } - - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - const struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - - if (!first_reg) - CHR( ',' ); - CHR( ' ' ); - - if (src->SrcRegisterExtMod.Negate) - TXT( "-(" ); - if (src->SrcRegisterExtMod.Absolute) - CHR( '|' ); - if (src->SrcRegisterExtMod.Scale2X) - TXT( "2*(" ); - if (src->SrcRegisterExtMod.Bias) - CHR( '(' ); - if (src->SrcRegisterExtMod.Complement) - TXT( "1-(" ); - if (src->SrcRegister.Negate) - CHR( '-' ); - - if (src->SrcRegister.Indirect) { - _dump_register_ind( - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegisterInd.File, - src->SrcRegisterInd.Index ); - } - else { - _dump_register( - src->SrcRegister.File, - src->SrcRegister.Index, - src->SrcRegister.Index ); - } - - if (src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W) { - CHR( '.' ); - ENM( src->SrcRegister.SwizzleX, swizzle_names ); - ENM( src->SrcRegister.SwizzleY, swizzle_names ); - ENM( src->SrcRegister.SwizzleZ, swizzle_names ); - ENM( src->SrcRegister.SwizzleW, swizzle_names ); - } - if (src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W) { - CHR( '.' ); - if (src->SrcRegisterExtSwz.NegateX) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateY) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateZ) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, extswizzle_names ); - if (src->SrcRegisterExtSwz.NegateW) - TXT("-"); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, extswizzle_names ); - } - - if (src->SrcRegisterExtMod.Complement) - CHR( ')' ); - if (src->SrcRegisterExtMod.Bias) - TXT( ")-.5" ); - if (src->SrcRegisterExtMod.Scale2X) - CHR( ')' ); - if (src->SrcRegisterExtMod.Absolute) - CHR( '|' ); - if (src->SrcRegisterExtMod.Negate) - CHR( ')' ); - - first_reg = FALSE; - } - - if (inst->InstructionExtTexture.Texture != TGSI_TEXTURE_UNKNOWN) { - TXT( ", " ); - ENM( inst->InstructionExtTexture.Texture, texture_names ); - } - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_IF: - case TGSI_OPCODE_ELSE: - case TGSI_OPCODE_BGNLOOP2: - case TGSI_OPCODE_ENDLOOP2: - case TGSI_OPCODE_CAL: - TXT( " :" ); - UID( inst->InstructionExtLabel.Label ); - break; - } -} - -static boolean -iter_instruction( - struct tgsi_iterate_context *iter, - struct tgsi_full_instruction *inst ) -{ - struct dump_ctx *ctx = (struct dump_ctx *) iter; - - tgsi_dump_instruction( inst, ctx->instno++ ); - return TRUE; -} - -static boolean -prolog( - struct tgsi_iterate_context *ctx ) -{ - EOL(); - ENM( ctx->processor.Processor, processor_type_names ); - UID( ctx->version.MajorVersion ); - CHR( '.' ); - UID( ctx->version.MinorVersion ); - return TRUE; -} - -void -tgsi_dump( - const struct tgsi_token *tokens, - uint flags ) -{ - struct dump_ctx ctx; - - /* sanity checks */ - assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); - assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); - - ctx.iter.prolog = prolog; - ctx.iter.iterate_instruction = iter_instruction; - ctx.iter.iterate_declaration = iter_declaration; - ctx.iter.iterate_immediate = iter_immediate; - ctx.iter.epilog = NULL; - - ctx.instno = 0; - - tgsi_iterate_shader( tokens, &ctx.iter ); -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump.h deleted file mode 100644 index 51c230b5db..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump.h +++ /dev/null @@ -1,63 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_DUMP_H -#define TGSI_DUMP_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -void -tgsi_dump( - const struct tgsi_token *tokens, - uint flags ); - -struct tgsi_full_immediate; -struct tgsi_full_instruction; -struct tgsi_full_declaration; - -void -tgsi_dump_immediate( - const struct tgsi_full_immediate *imm ); - -void -tgsi_dump_instruction( - const struct tgsi_full_instruction *inst, - uint instno ); - -void -tgsi_dump_declaration( - const struct tgsi_full_declaration *decl ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_DUMP_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c deleted file mode 100644 index eabd74bd6d..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.c +++ /dev/null @@ -1,845 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "util/u_string.h" -#include "tgsi_dump_c.h" -#include "tgsi_parse.h" -#include "tgsi_build.h" - -static void -dump_enum( - const unsigned e, - const char **enums, - const unsigned enums_count ) -{ - if (e >= enums_count) { - debug_printf( "%u", e ); - } - else { - debug_printf( "%s", enums[e] ); - } -} - -#define EOL() debug_printf( "\n" ) -#define TXT(S) debug_printf( "%s", S ) -#define CHR(C) debug_printf( "%c", C ) -#define UIX(I) debug_printf( "0x%x", I ) -#define UID(I) debug_printf( "%u", I ) -#define SID(I) debug_printf( "%d", I ) -#define FLT(F) debug_printf( "%10.4f", F ) -#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) - -static const char *TGSI_PROCESSOR_TYPES[] = -{ - "PROCESSOR_FRAGMENT", - "PROCESSOR_VERTEX", - "PROCESSOR_GEOMETRY" -}; - -static const char *TGSI_TOKEN_TYPES[] = -{ - "TOKEN_TYPE_DECLARATION", - "TOKEN_TYPE_IMMEDIATE", - "TOKEN_TYPE_INSTRUCTION" -}; - -static const char *TGSI_FILES[] = -{ - "FILE_NULL", - "FILE_CONSTANT", - "FILE_INPUT", - "FILE_OUTPUT", - "FILE_TEMPORARY", - "FILE_SAMPLER", - "FILE_ADDRESS", - "FILE_IMMEDIATE" -}; - -static const char *TGSI_INTERPOLATES[] = -{ - "INTERPOLATE_CONSTANT", - "INTERPOLATE_LINEAR", - "INTERPOLATE_PERSPECTIVE" -}; - -static const char *TGSI_SEMANTICS[] = -{ - "SEMANTIC_POSITION", - "SEMANTIC_COLOR", - "SEMANTIC_BCOLOR", - "SEMANTIC_FOG", - "SEMANTIC_PSIZE", - "SEMANTIC_GENERIC", - "SEMANTIC_NORMAL" -}; - -static const char *TGSI_IMMS[] = -{ - "IMM_FLOAT32" -}; - -static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] = -{ - "OPCODE_ARL", - "OPCODE_MOV", - "OPCODE_LIT", - "OPCODE_RCP", - "OPCODE_RSQ", - "OPCODE_EXP", - "OPCODE_LOG", - "OPCODE_MUL", - "OPCODE_ADD", - "OPCODE_DP3", - "OPCODE_DP4", - "OPCODE_DST", - "OPCODE_MIN", - "OPCODE_MAX", - "OPCODE_SLT", - "OPCODE_SGE", - "OPCODE_MAD", - "OPCODE_SUB", - "OPCODE_LERP", - "OPCODE_CND", - "OPCODE_CND0", - "OPCODE_DOT2ADD", - "OPCODE_INDEX", - "OPCODE_NEGATE", - "OPCODE_FRAC", - "OPCODE_CLAMP", - "OPCODE_FLOOR", - "OPCODE_ROUND", - "OPCODE_EXPBASE2", - "OPCODE_LOGBASE2", - "OPCODE_POWER", - "OPCODE_CROSSPRODUCT", - "OPCODE_MULTIPLYMATRIX", - "OPCODE_ABS", - "OPCODE_RCC", - "OPCODE_DPH", - "OPCODE_COS", - "OPCODE_DDX", - "OPCODE_DDY", - "OPCODE_KILP", - "OPCODE_PK2H", - "OPCODE_PK2US", - "OPCODE_PK4B", - "OPCODE_PK4UB", - "OPCODE_RFL", - "OPCODE_SEQ", - "OPCODE_SFL", - "OPCODE_SGT", - "OPCODE_SIN", - "OPCODE_SLE", - "OPCODE_SNE", - "OPCODE_STR", - "OPCODE_TEX", - "OPCODE_TXD", - "OPCODE_TXP", - "OPCODE_UP2H", - "OPCODE_UP2US", - "OPCODE_UP4B", - "OPCODE_UP4UB", - "OPCODE_X2D", - "OPCODE_ARA", - "OPCODE_ARR", - "OPCODE_BRA", - "OPCODE_CAL", - "OPCODE_RET", - "OPCODE_SSG", - "OPCODE_CMP", - "OPCODE_SCS", - "OPCODE_TXB", - "OPCODE_NRM", - "OPCODE_DIV", - "OPCODE_DP2", - "OPCODE_TXL", - "OPCODE_BRK", - "OPCODE_IF", - "OPCODE_LOOP", - "OPCODE_REP", - "OPCODE_ELSE", - "OPCODE_ENDIF", - "OPCODE_ENDLOOP", - "OPCODE_ENDREP", - "OPCODE_PUSHA", - "OPCODE_POPA", - "OPCODE_CEIL", - "OPCODE_I2F", - "OPCODE_NOT", - "OPCODE_TRUNC", - "OPCODE_SHL", - "OPCODE_SHR", - "OPCODE_AND", - "OPCODE_OR", - "OPCODE_MOD", - "OPCODE_XOR", - "OPCODE_SAD", - "OPCODE_TXF", - "OPCODE_TXQ", - "OPCODE_CONT", - "OPCODE_EMIT", - "OPCODE_ENDPRIM", - "OPCODE_BGNLOOP2", - "OPCODE_BGNSUB", - "OPCODE_ENDLOOP2", - "OPCODE_ENDSUB", - "OPCODE_NOISE1", - "OPCODE_NOISE2", - "OPCODE_NOISE3", - "OPCODE_NOISE4", - "OPCODE_NOP", - "OPCODE_M4X3", - "OPCODE_M3X4", - "OPCODE_M3X3", - "OPCODE_M3X2", - "OPCODE_NRM4", - "OPCODE_CALLNZ", - "OPCODE_IFC", - "OPCODE_BREAKC", - "OPCODE_KIL", - "OPCODE_END" -}; - -static const char *TGSI_SATS[] = -{ - "SAT_NONE", - "SAT_ZERO_ONE", - "SAT_MINUS_PLUS_ONE" -}; - -static const char *TGSI_INSTRUCTION_EXTS[] = -{ - "INSTRUCTION_EXT_TYPE_NV", - "INSTRUCTION_EXT_TYPE_LABEL", - "INSTRUCTION_EXT_TYPE_TEXTURE" -}; - -static const char *TGSI_PRECISIONS[] = -{ - "PRECISION_DEFAULT", - "PRECISION_FLOAT32", - "PRECISION_FLOAT16", - "PRECISION_FIXED12" -}; - -static const char *TGSI_CCS[] = -{ - "CC_GT", - "CC_EQ", - "CC_LT", - "CC_UN", - "CC_GE", - "CC_LE", - "CC_NE", - "CC_TR", - "CC_FL" -}; - -static const char *TGSI_SWIZZLES[] = -{ - "SWIZZLE_X", - "SWIZZLE_Y", - "SWIZZLE_Z", - "SWIZZLE_W" -}; - -static const char *TGSI_TEXTURES[] = -{ - "TEXTURE_UNKNOWN", - "TEXTURE_1D", - "TEXTURE_2D", - "TEXTURE_3D", - "TEXTURE_CUBE", - "TEXTURE_RECT", - "TEXTURE_SHADOW1D", - "TEXTURE_SHADOW2D", - "TEXTURE_SHADOWRECT" -}; - -static const char *TGSI_SRC_REGISTER_EXTS[] = -{ - "SRC_REGISTER_EXT_TYPE_SWZ", - "SRC_REGISTER_EXT_TYPE_MOD" -}; - -static const char *TGSI_EXTSWIZZLES[] = -{ - "EXTSWIZZLE_X", - "EXTSWIZZLE_Y", - "EXTSWIZZLE_Z", - "EXTSWIZZLE_W", - "EXTSWIZZLE_ZERO", - "EXTSWIZZLE_ONE" -}; - -static const char *TGSI_WRITEMASKS[] = -{ - "0", - "WRITEMASK_X", - "WRITEMASK_Y", - "WRITEMASK_XY", - "WRITEMASK_Z", - "WRITEMASK_XZ", - "WRITEMASK_YZ", - "WRITEMASK_XYZ", - "WRITEMASK_W", - "WRITEMASK_XW", - "WRITEMASK_YW", - "WRITEMASK_XYW", - "WRITEMASK_ZW", - "WRITEMASK_XZW", - "WRITEMASK_YZW", - "WRITEMASK_XYZW" -}; - -static const char *TGSI_DST_REGISTER_EXTS[] = -{ - "DST_REGISTER_EXT_TYPE_CONDCODE", - "DST_REGISTER_EXT_TYPE_MODULATE" -}; - -static const char *TGSI_MODULATES[] = -{ - "MODULATE_1X", - "MODULATE_2X", - "MODULATE_4X", - "MODULATE_8X", - "MODULATE_HALF", - "MODULATE_QUARTER", - "MODULATE_EIGHTH" -}; - -static void -dump_declaration_verbose( - struct tgsi_full_declaration *decl, - unsigned ignored, - unsigned deflt, - struct tgsi_full_declaration *fd ) -{ - TXT( "\nFile : " ); - ENM( decl->Declaration.File, TGSI_FILES ); - if( deflt || fd->Declaration.UsageMask != decl->Declaration.UsageMask ) { - TXT( "\nUsageMask : " ); - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_X ) { - CHR( 'X' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Y ) { - CHR( 'Y' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_Z ) { - CHR( 'Z' ); - } - if( decl->Declaration.UsageMask & TGSI_WRITEMASK_W ) { - CHR( 'W' ); - } - } - if( deflt || fd->Declaration.Interpolate != decl->Declaration.Interpolate ) { - TXT( "\nInterpolate: " ); - ENM( decl->Declaration.Interpolate, TGSI_INTERPOLATES ); - } - if( deflt || fd->Declaration.Semantic != decl->Declaration.Semantic ) { - TXT( "\nSemantic : " ); - UID( decl->Declaration.Semantic ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Declaration.Padding ); - } - - EOL(); - TXT( "\nFirst: " ); - UID( decl->DeclarationRange.First ); - TXT( "\nLast : " ); - UID( decl->DeclarationRange.Last ); - - if( decl->Declaration.Semantic ) { - EOL(); - TXT( "\nSemanticName : " ); - ENM( decl->Semantic.SemanticName, TGSI_SEMANTICS ); - TXT( "\nSemanticIndex: " ); - UID( decl->Semantic.SemanticIndex ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( decl->Semantic.Padding ); - } - } -} - -static void -dump_immediate_verbose( - struct tgsi_full_immediate *imm, - unsigned ignored ) -{ - unsigned i; - - TXT( "\nDataType : " ); - ENM( imm->Immediate.DataType, TGSI_IMMS ); - if( ignored ) { - TXT( "\nPadding : " ); - UIX( imm->Immediate.Padding ); - } - - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - EOL(); - switch( imm->Immediate.DataType ) { - case TGSI_IMM_FLOAT32: - TXT( "\nFloat: " ); - FLT( imm->u.ImmediateFloat32[i].Float ); - break; - - default: - assert( 0 ); - } - } -} - -static void -dump_instruction_verbose( - struct tgsi_full_instruction *inst, - unsigned ignored, - unsigned deflt, - struct tgsi_full_instruction *fi ) -{ - unsigned i; - - TXT( "\nOpcode : " ); - ENM( inst->Instruction.Opcode, TGSI_OPCODES ); - if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { - TXT( "\nSaturate : " ); - ENM( inst->Instruction.Saturate, TGSI_SATS ); - } - if( deflt || fi->Instruction.NumDstRegs != inst->Instruction.NumDstRegs ) { - TXT( "\nNumDstRegs : " ); - UID( inst->Instruction.NumDstRegs ); - } - if( deflt || fi->Instruction.NumSrcRegs != inst->Instruction.NumSrcRegs ) { - TXT( "\nNumSrcRegs : " ); - UID( inst->Instruction.NumSrcRegs ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->Instruction.Padding ); - } - - if( deflt || tgsi_compare_instruction_ext_nv( inst->InstructionExtNv, fi->InstructionExtNv ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtNv.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtNv.Precision != inst->InstructionExtNv.Precision ) { - TXT( "\nPrecision : " ); - ENM( inst->InstructionExtNv.Precision, TGSI_PRECISIONS ); - } - if( deflt || fi->InstructionExtNv.CondDstIndex != inst->InstructionExtNv.CondDstIndex ) { - TXT( "\nCondDstIndex : " ); - UID( inst->InstructionExtNv.CondDstIndex ); - } - if( deflt || fi->InstructionExtNv.CondFlowIndex != inst->InstructionExtNv.CondFlowIndex ) { - TXT( "\nCondFlowIndex : " ); - UID( inst->InstructionExtNv.CondFlowIndex ); - } - if( deflt || fi->InstructionExtNv.CondMask != inst->InstructionExtNv.CondMask ) { - TXT( "\nCondMask : " ); - ENM( inst->InstructionExtNv.CondMask, TGSI_CCS ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleX != inst->InstructionExtNv.CondSwizzleX ) { - TXT( "\nCondSwizzleX : " ); - ENM( inst->InstructionExtNv.CondSwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleY != inst->InstructionExtNv.CondSwizzleY ) { - TXT( "\nCondSwizzleY : " ); - ENM( inst->InstructionExtNv.CondSwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleZ != inst->InstructionExtNv.CondSwizzleZ ) { - TXT( "\nCondSwizzleZ : " ); - ENM( inst->InstructionExtNv.CondSwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondSwizzleW != inst->InstructionExtNv.CondSwizzleW ) { - TXT( "\nCondSwizzleW : " ); - ENM( inst->InstructionExtNv.CondSwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fi->InstructionExtNv.CondDstUpdate != inst->InstructionExtNv.CondDstUpdate ) { - TXT( "\nCondDstUpdate : " ); - UID( inst->InstructionExtNv.CondDstUpdate ); - } - if( deflt || fi->InstructionExtNv.CondFlowEnable != inst->InstructionExtNv.CondFlowEnable ) { - TXT( "\nCondFlowEnable: " ); - UID( inst->InstructionExtNv.CondFlowEnable ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtNv.Padding ); - if( deflt || fi->InstructionExtNv.Extended != inst->InstructionExtNv.Extended ) { - TXT( "\nExtended : " ); - UID( inst->InstructionExtNv.Extended ); - } - } - } - - if( deflt || tgsi_compare_instruction_ext_label( inst->InstructionExtLabel, fi->InstructionExtLabel ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtLabel.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtLabel.Label != inst->InstructionExtLabel.Label ) { - TXT( "\nLabel : " ); - UID( inst->InstructionExtLabel.Label ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtLabel.Padding ); - if( deflt || fi->InstructionExtLabel.Extended != inst->InstructionExtLabel.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtLabel.Extended ); - } - } - } - - if( deflt || tgsi_compare_instruction_ext_texture( inst->InstructionExtTexture, fi->InstructionExtTexture ) ) { - EOL(); - TXT( "\nType : " ); - ENM( inst->InstructionExtTexture.Type, TGSI_INSTRUCTION_EXTS ); - if( deflt || fi->InstructionExtTexture.Texture != inst->InstructionExtTexture.Texture ) { - TXT( "\nTexture : " ); - ENM( inst->InstructionExtTexture.Texture, TGSI_TEXTURES ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( inst->InstructionExtTexture.Padding ); - if( deflt || fi->InstructionExtTexture.Extended != inst->InstructionExtTexture.Extended ) { - TXT( "\nExtended: " ); - UID( inst->InstructionExtTexture.Extended ); - } - } - } - - for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - struct tgsi_full_dst_register *fd = &fi->FullDstRegisters[i]; - - EOL(); - TXT( "\nFile : " ); - ENM( dst->DstRegister.File, TGSI_FILES ); - if( deflt || fd->DstRegister.WriteMask != dst->DstRegister.WriteMask ) { - TXT( "\nWriteMask: " ); - ENM( dst->DstRegister.WriteMask, TGSI_WRITEMASKS ); - } - if( ignored ) { - if( deflt || fd->DstRegister.Indirect != dst->DstRegister.Indirect ) { - TXT( "\nIndirect : " ); - UID( dst->DstRegister.Indirect ); - } - if( deflt || fd->DstRegister.Dimension != dst->DstRegister.Dimension ) { - TXT( "\nDimension: " ); - UID( dst->DstRegister.Dimension ); - } - } - if( deflt || fd->DstRegister.Index != dst->DstRegister.Index ) { - TXT( "\nIndex : " ); - SID( dst->DstRegister.Index ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegister.Padding ); - if( deflt || fd->DstRegister.Extended != dst->DstRegister.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegister.Extended ); - } - } - - if( deflt || tgsi_compare_dst_register_ext_concode( dst->DstRegisterExtConcode, fd->DstRegisterExtConcode ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtConcode.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtConcode.CondMask != dst->DstRegisterExtConcode.CondMask ) { - TXT( "\nCondMask : " ); - ENM( dst->DstRegisterExtConcode.CondMask, TGSI_CCS ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleX != dst->DstRegisterExtConcode.CondSwizzleX ) { - TXT( "\nCondSwizzleX: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleY != dst->DstRegisterExtConcode.CondSwizzleY ) { - TXT( "\nCondSwizzleY: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleZ != dst->DstRegisterExtConcode.CondSwizzleZ ) { - TXT( "\nCondSwizzleZ: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSwizzleW != dst->DstRegisterExtConcode.CondSwizzleW ) { - TXT( "\nCondSwizzleW: " ); - ENM( dst->DstRegisterExtConcode.CondSwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fd->DstRegisterExtConcode.CondSrcIndex != dst->DstRegisterExtConcode.CondSrcIndex ) { - TXT( "\nCondSrcIndex: " ); - UID( dst->DstRegisterExtConcode.CondSrcIndex ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtConcode.Padding ); - if( deflt || fd->DstRegisterExtConcode.Extended != dst->DstRegisterExtConcode.Extended ) { - TXT( "\nExtended : " ); - UID( dst->DstRegisterExtConcode.Extended ); - } - } - } - - if( deflt || tgsi_compare_dst_register_ext_modulate( dst->DstRegisterExtModulate, fd->DstRegisterExtModulate ) ) { - EOL(); - TXT( "\nType : " ); - ENM( dst->DstRegisterExtModulate.Type, TGSI_DST_REGISTER_EXTS ); - if( deflt || fd->DstRegisterExtModulate.Modulate != dst->DstRegisterExtModulate.Modulate ) { - TXT( "\nModulate: " ); - ENM( dst->DstRegisterExtModulate.Modulate, TGSI_MODULATES ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( dst->DstRegisterExtModulate.Padding ); - if( deflt || fd->DstRegisterExtModulate.Extended != dst->DstRegisterExtModulate.Extended ) { - TXT( "\nExtended: " ); - UID( dst->DstRegisterExtModulate.Extended ); - } - } - } - } - - for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - struct tgsi_full_src_register *src = &inst->FullSrcRegisters[i]; - struct tgsi_full_src_register *fs = &fi->FullSrcRegisters[i]; - - EOL(); - TXT( "\nFile : "); - ENM( src->SrcRegister.File, TGSI_FILES ); - if( deflt || fs->SrcRegister.SwizzleX != src->SrcRegister.SwizzleX ) { - TXT( "\nSwizzleX : " ); - ENM( src->SrcRegister.SwizzleX, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleY != src->SrcRegister.SwizzleY ) { - TXT( "\nSwizzleY : " ); - ENM( src->SrcRegister.SwizzleY, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleZ != src->SrcRegister.SwizzleZ ) { - TXT( "\nSwizzleZ : " ); - ENM( src->SrcRegister.SwizzleZ, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.SwizzleW != src->SrcRegister.SwizzleW ) { - TXT( "\nSwizzleW : " ); - ENM( src->SrcRegister.SwizzleW, TGSI_SWIZZLES ); - } - if( deflt || fs->SrcRegister.Negate != src->SrcRegister.Negate ) { - TXT( "\nNegate : " ); - UID( src->SrcRegister.Negate ); - } - if( ignored ) { - if( deflt || fs->SrcRegister.Indirect != src->SrcRegister.Indirect ) { - TXT( "\nIndirect : " ); - UID( src->SrcRegister.Indirect ); - } - if( deflt || fs->SrcRegister.Dimension != src->SrcRegister.Dimension ) { - TXT( "\nDimension: " ); - UID( src->SrcRegister.Dimension ); - } - } - if( deflt || fs->SrcRegister.Index != src->SrcRegister.Index ) { - TXT( "\nIndex : " ); - SID( src->SrcRegister.Index ); - } - if( ignored ) { - if( deflt || fs->SrcRegister.Extended != src->SrcRegister.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegister.Extended ); - } - } - - if( deflt || tgsi_compare_src_register_ext_swz( src->SrcRegisterExtSwz, fs->SrcRegisterExtSwz ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtSwz.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleX != src->SrcRegisterExtSwz.ExtSwizzleX ) { - TXT( "\nExtSwizzleX: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleX, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleY != src->SrcRegisterExtSwz.ExtSwizzleY ) { - TXT( "\nExtSwizzleY: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleY, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleZ != src->SrcRegisterExtSwz.ExtSwizzleZ ) { - TXT( "\nExtSwizzleZ: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleZ, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.ExtSwizzleW != src->SrcRegisterExtSwz.ExtSwizzleW ) { - TXT( "\nExtSwizzleW: " ); - ENM( src->SrcRegisterExtSwz.ExtSwizzleW, TGSI_EXTSWIZZLES ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateX != src->SrcRegisterExtSwz.NegateX ) { - TXT( "\nNegateX : " ); - UID( src->SrcRegisterExtSwz.NegateX ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateY != src->SrcRegisterExtSwz.NegateY ) { - TXT( "\nNegateY : " ); - UID( src->SrcRegisterExtSwz.NegateY ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateZ != src->SrcRegisterExtSwz.NegateZ ) { - TXT( "\nNegateZ : " ); - UID( src->SrcRegisterExtSwz.NegateZ ); - } - if( deflt || fs->SrcRegisterExtSwz.NegateW != src->SrcRegisterExtSwz.NegateW ) { - TXT( "\nNegateW : " ); - UID( src->SrcRegisterExtSwz.NegateW ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtSwz.Padding ); - if( deflt || fs->SrcRegisterExtSwz.Extended != src->SrcRegisterExtSwz.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtSwz.Extended ); - } - } - } - - if( deflt || tgsi_compare_src_register_ext_mod( src->SrcRegisterExtMod, fs->SrcRegisterExtMod ) ) { - EOL(); - TXT( "\nType : " ); - ENM( src->SrcRegisterExtMod.Type, TGSI_SRC_REGISTER_EXTS ); - if( deflt || fs->SrcRegisterExtMod.Complement != src->SrcRegisterExtMod.Complement ) { - TXT( "\nComplement: " ); - UID( src->SrcRegisterExtMod.Complement ); - } - if( deflt || fs->SrcRegisterExtMod.Bias != src->SrcRegisterExtMod.Bias ) { - TXT( "\nBias : " ); - UID( src->SrcRegisterExtMod.Bias ); - } - if( deflt || fs->SrcRegisterExtMod.Scale2X != src->SrcRegisterExtMod.Scale2X ) { - TXT( "\nScale2X : " ); - UID( src->SrcRegisterExtMod.Scale2X ); - } - if( deflt || fs->SrcRegisterExtMod.Absolute != src->SrcRegisterExtMod.Absolute ) { - TXT( "\nAbsolute : " ); - UID( src->SrcRegisterExtMod.Absolute ); - } - if( deflt || fs->SrcRegisterExtMod.Negate != src->SrcRegisterExtMod.Negate ) { - TXT( "\nNegate : " ); - UID( src->SrcRegisterExtMod.Negate ); - } - if( ignored ) { - TXT( "\nPadding : " ); - UIX( src->SrcRegisterExtMod.Padding ); - if( deflt || fs->SrcRegisterExtMod.Extended != src->SrcRegisterExtMod.Extended ) { - TXT( "\nExtended : " ); - UID( src->SrcRegisterExtMod.Extended ); - } - } - } - } -} - -void -tgsi_dump_c( - const struct tgsi_token *tokens, - uint flags ) -{ - struct tgsi_parse_context parse; - struct tgsi_full_instruction fi; - struct tgsi_full_declaration fd; - uint ignored = flags & TGSI_DUMP_C_IGNORED; - uint deflt = flags & TGSI_DUMP_C_DEFAULT; - uint instno = 0; - - /* sanity checks */ - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); - - tgsi_parse_init( &parse, tokens ); - - TXT( "tgsi-dump begin -----------------" ); - - TXT( "\nMajorVersion: " ); - UID( parse.FullVersion.Version.MajorVersion ); - TXT( "\nMinorVersion: " ); - UID( parse.FullVersion.Version.MinorVersion ); - EOL(); - - TXT( "\nHeaderSize: " ); - UID( parse.FullHeader.Header.HeaderSize ); - TXT( "\nBodySize : " ); - UID( parse.FullHeader.Header.BodySize ); - TXT( "\nProcessor : " ); - ENM( parse.FullHeader.Processor.Processor, TGSI_PROCESSOR_TYPES ); - EOL(); - - fi = tgsi_default_full_instruction(); - fd = tgsi_default_full_declaration(); - - while( !tgsi_parse_end_of_tokens( &parse ) ) { - tgsi_parse_token( &parse ); - - TXT( "\nType : " ); - ENM( parse.FullToken.Token.Type, TGSI_TOKEN_TYPES ); - if( ignored ) { - TXT( "\nSize : " ); - UID( parse.FullToken.Token.Size ); - if( deflt || parse.FullToken.Token.Extended ) { - TXT( "\nExtended : " ); - UID( parse.FullToken.Token.Extended ); - } - } - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - dump_declaration_verbose( - &parse.FullToken.FullDeclaration, - ignored, - deflt, - &fd ); - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - dump_immediate_verbose( - &parse.FullToken.FullImmediate, - ignored ); - break; - - case TGSI_TOKEN_TYPE_INSTRUCTION: - dump_instruction_verbose( - &parse.FullToken.FullInstruction, - ignored, - deflt, - &fi ); - break; - - default: - assert( 0 ); - } - - EOL(); - } - - TXT( "\ntgsi-dump end -------------------\n" ); - - tgsi_parse_free( &parse ); -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h b/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h deleted file mode 100644 index d91cd35b3b..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_dump_c.h +++ /dev/null @@ -1,49 +0,0 @@ -/************************************************************************** - * - * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_DUMP_C_H -#define TGSI_DUMP_C_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -#define TGSI_DUMP_C_IGNORED 1 -#define TGSI_DUMP_C_DEFAULT 2 - -void -tgsi_dump_c( - const struct tgsi_token *tokens, - uint flags ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_DUMP_C_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c deleted file mode 100644 index 5371a88b96..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.c +++ /dev/null @@ -1,85 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "tgsi_iterate.h" - -boolean -tgsi_iterate_shader( - const struct tgsi_token *tokens, - struct tgsi_iterate_context *ctx ) -{ - struct tgsi_parse_context parse; - - if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) - return FALSE; - - ctx->processor = parse.FullHeader.Processor; - ctx->version = parse.FullVersion.Version; - - if (ctx->prolog) - if (!ctx->prolog( ctx )) - goto fail; - - while (!tgsi_parse_end_of_tokens( &parse )) { - tgsi_parse_token( &parse ); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - if (ctx->iterate_instruction) - if (!ctx->iterate_instruction( ctx, &parse.FullToken.FullInstruction )) - goto fail; - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - if (ctx->iterate_declaration) - if (!ctx->iterate_declaration( ctx, &parse.FullToken.FullDeclaration )) - goto fail; - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - if (ctx->iterate_immediate) - if (!ctx->iterate_immediate( ctx, &parse.FullToken.FullImmediate )) - goto fail; - break; - - default: - assert( 0 ); - } - } - - if (ctx->epilog) - if (!ctx->epilog( ctx )) - goto fail; - - tgsi_parse_free( &parse ); - return TRUE; - -fail: - tgsi_parse_free( &parse ); - return FALSE; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h b/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h deleted file mode 100644 index f5bebf89b8..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_iterate.h +++ /dev/null @@ -1,76 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_ITERATE_H -#define TGSI_ITERATE_H - -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" - -#if defined __cplusplus -extern "C" { -#endif - -struct tgsi_iterate_context -{ - boolean - (* prolog)( - struct tgsi_iterate_context *ctx ); - - boolean - (* iterate_instruction)( - struct tgsi_iterate_context *ctx, - struct tgsi_full_instruction *inst ); - - boolean - (* iterate_declaration)( - struct tgsi_iterate_context *ctx, - struct tgsi_full_declaration *decl ); - - boolean - (* iterate_immediate)( - struct tgsi_iterate_context *ctx, - struct tgsi_full_immediate *imm ); - - boolean - (* epilog)( - struct tgsi_iterate_context *ctx ); - - struct tgsi_processor processor; - struct tgsi_version version; -}; - -boolean -tgsi_iterate_shader( - const struct tgsi_token *tokens, - struct tgsi_iterate_context *ctx ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_ITERATE_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c b/src/gallium/auxiliary/tgsi/util/tgsi_parse.c deleted file mode 100644 index d16f0cdcad..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.c +++ /dev/null @@ -1,332 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi_parse.h" -#include "tgsi_build.h" - -void -tgsi_full_token_init( - union tgsi_full_token *full_token ) -{ - full_token->Token.Type = TGSI_TOKEN_TYPE_DECLARATION; -} - -void -tgsi_full_token_free( - union tgsi_full_token *full_token ) -{ - if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) { - FREE( (void *) full_token->FullImmediate.u.Pointer ); - } -} - -unsigned -tgsi_parse_init( - struct tgsi_parse_context *ctx, - const struct tgsi_token *tokens ) -{ - ctx->FullVersion.Version = *(struct tgsi_version *) &tokens[0]; - if( ctx->FullVersion.Version.MajorVersion > 1 ) { - return TGSI_PARSE_ERROR; - } - - ctx->FullHeader.Header = *(struct tgsi_header *) &tokens[1]; - if( ctx->FullHeader.Header.HeaderSize >= 2 ) { - ctx->FullHeader.Processor = *(struct tgsi_processor *) &tokens[2]; - } - else { - ctx->FullHeader.Processor = tgsi_default_processor(); - } - - ctx->Tokens = tokens; - ctx->Position = 1 + ctx->FullHeader.Header.HeaderSize; - - tgsi_full_token_init( &ctx->FullToken ); - - return TGSI_PARSE_OK; -} - -void -tgsi_parse_free( - struct tgsi_parse_context *ctx ) -{ - tgsi_full_token_free( &ctx->FullToken ); -} - -boolean -tgsi_parse_end_of_tokens( - struct tgsi_parse_context *ctx ) -{ - return ctx->Position >= - 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; -} - -static void -next_token( - struct tgsi_parse_context *ctx, - void *token ) -{ - assert( !tgsi_parse_end_of_tokens( ctx ) ); - - *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++]; -} - -void -tgsi_parse_token( - struct tgsi_parse_context *ctx ) -{ - struct tgsi_token token; - unsigned i; - - tgsi_full_token_free( &ctx->FullToken ); - tgsi_full_token_init( &ctx->FullToken ); - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_TOKEN_TYPE_DECLARATION: - { - struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; - - *decl = tgsi_default_full_declaration(); - decl->Declaration = *(struct tgsi_declaration *) &token; - - next_token( ctx, &decl->DeclarationRange ); - - if( decl->Declaration.Semantic ) { - next_token( ctx, &decl->Semantic ); - } - - break; - } - - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; - - *imm = tgsi_default_full_immediate(); - imm->Immediate = *(struct tgsi_immediate *) &token; - - assert( !imm->Immediate.Extended ); - - switch (imm->Immediate.DataType) { - case TGSI_IMM_FLOAT32: - imm->u.Pointer = MALLOC( - sizeof( struct tgsi_immediate_float32 ) * (imm->Immediate.Size - 1) ); - for( i = 0; i < imm->Immediate.Size - 1; i++ ) { - next_token( ctx, (struct tgsi_immediate_float32 *) &imm->u.ImmediateFloat32[i] ); - } - break; - - default: - assert( 0 ); - } - - break; - } - - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *inst = &ctx->FullToken.FullInstruction; - unsigned extended; - - *inst = tgsi_default_full_instruction(); - inst->Instruction = *(struct tgsi_instruction *) &token; - - extended = inst->Instruction.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_INSTRUCTION_EXT_TYPE_NV: - inst->InstructionExtNv = - *(struct tgsi_instruction_ext_nv *) &token; - break; - - case TGSI_INSTRUCTION_EXT_TYPE_LABEL: - inst->InstructionExtLabel = - *(struct tgsi_instruction_ext_label *) &token; - break; - - case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: - inst->InstructionExtTexture = - *(struct tgsi_instruction_ext_texture *) &token; - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - - assert( inst->Instruction.NumDstRegs <= TGSI_FULL_MAX_DST_REGISTERS ); - - for( i = 0; i < inst->Instruction.NumDstRegs; i++ ) { - unsigned extended; - - next_token( ctx, &inst->FullDstRegisters[i].DstRegister ); - - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->FullDstRegisters[i].DstRegister.Indirect ); - assert( !inst->FullDstRegisters[i].DstRegister.Dimension ); - - extended = inst->FullDstRegisters[i].DstRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: - inst->FullDstRegisters[i].DstRegisterExtConcode = - *(struct tgsi_dst_register_ext_concode *) &token; - break; - - case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: - inst->FullDstRegisters[i].DstRegisterExtModulate = - *(struct tgsi_dst_register_ext_modulate *) &token; - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - } - - assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS ); - - for( i = 0; i < inst->Instruction.NumSrcRegs; i++ ) { - unsigned extended; - - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegister ); - - extended = inst->FullSrcRegisters[i].SrcRegister.Extended; - - while( extended ) { - struct tgsi_src_register_ext token; - - next_token( ctx, &token ); - - switch( token.Type ) { - case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: - inst->FullSrcRegisters[i].SrcRegisterExtSwz = - *(struct tgsi_src_register_ext_swz *) &token; - break; - - case TGSI_SRC_REGISTER_EXT_TYPE_MOD: - inst->FullSrcRegisters[i].SrcRegisterExtMod = - *(struct tgsi_src_register_ext_mod *) &token; - break; - - default: - assert( 0 ); - } - - extended = token.Extended; - } - - if( inst->FullSrcRegisters[i].SrcRegister.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterInd ); - - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); - } - - if( inst->FullSrcRegisters[i].SrcRegister.Dimension ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDim ); - - /* - * No support for multi-dimensional addressing. - */ - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterDim.Extended ); - - if( inst->FullSrcRegisters[i].SrcRegisterDim.Indirect ) { - next_token( ctx, &inst->FullSrcRegisters[i].SrcRegisterDimInd ); - - /* - * No support for indirect or multi-dimensional addressing. - */ - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Indirect ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Dimension ); - assert( !inst->FullSrcRegisters[i].SrcRegisterInd.Extended ); - } - } - } - - break; - } - - default: - assert( 0 ); - } -} - - -unsigned -tgsi_num_tokens(const struct tgsi_token *tokens) -{ - struct tgsi_parse_context ctx; - if (tgsi_parse_init(&ctx, tokens) == TGSI_PARSE_OK) { - unsigned len = (ctx.FullHeader.Header.HeaderSize + - ctx.FullHeader.Header.BodySize + - 1); - return len; - } - return 0; -} - - -/** - * Make a new copy of a token array. - */ -struct tgsi_token * -tgsi_dup_tokens(const struct tgsi_token *tokens) -{ - unsigned n = tgsi_num_tokens(tokens); - unsigned bytes = n * sizeof(struct tgsi_token); - struct tgsi_token *new_tokens = (struct tgsi_token *) MALLOC(bytes); - if (new_tokens) - memcpy(new_tokens, tokens, bytes); - return new_tokens; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h b/src/gallium/auxiliary/tgsi/util/tgsi_parse.h deleted file mode 100644 index 054350712d..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_parse.h +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_PARSE_H -#define TGSI_PARSE_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -struct tgsi_full_version -{ - struct tgsi_version Version; -}; - -struct tgsi_full_header -{ - struct tgsi_header Header; - struct tgsi_processor Processor; -}; - -struct tgsi_full_dst_register -{ - struct tgsi_dst_register DstRegister; - struct tgsi_dst_register_ext_concode DstRegisterExtConcode; - struct tgsi_dst_register_ext_modulate DstRegisterExtModulate; -}; - -struct tgsi_full_src_register -{ - struct tgsi_src_register SrcRegister; - struct tgsi_src_register_ext_swz SrcRegisterExtSwz; - struct tgsi_src_register_ext_mod SrcRegisterExtMod; - struct tgsi_src_register SrcRegisterInd; - struct tgsi_dimension SrcRegisterDim; - struct tgsi_src_register SrcRegisterDimInd; -}; - -struct tgsi_full_declaration -{ - struct tgsi_declaration Declaration; - struct tgsi_declaration_range DeclarationRange; - struct tgsi_declaration_semantic Semantic; -}; - -struct tgsi_full_immediate -{ - struct tgsi_immediate Immediate; - union - { - const void *Pointer; - const struct tgsi_immediate_float32 *ImmediateFloat32; - } u; -}; - -#define TGSI_FULL_MAX_DST_REGISTERS 2 -#define TGSI_FULL_MAX_SRC_REGISTERS 4 /* TXD has 4 */ - -struct tgsi_full_instruction -{ - struct tgsi_instruction Instruction; - struct tgsi_instruction_ext_nv InstructionExtNv; - struct tgsi_instruction_ext_label InstructionExtLabel; - struct tgsi_instruction_ext_texture InstructionExtTexture; - struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS]; - struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS]; -}; - -union tgsi_full_token -{ - struct tgsi_token Token; - struct tgsi_full_declaration FullDeclaration; - struct tgsi_full_immediate FullImmediate; - struct tgsi_full_instruction FullInstruction; -}; - -void -tgsi_full_token_init( - union tgsi_full_token *full_token ); - -void -tgsi_full_token_free( - union tgsi_full_token *full_token ); - -struct tgsi_parse_context -{ - const struct tgsi_token *Tokens; - unsigned Position; - struct tgsi_full_version FullVersion; - struct tgsi_full_header FullHeader; - union tgsi_full_token FullToken; -}; - -#define TGSI_PARSE_OK 0 -#define TGSI_PARSE_ERROR 1 - -unsigned -tgsi_parse_init( - struct tgsi_parse_context *ctx, - const struct tgsi_token *tokens ); - -void -tgsi_parse_free( - struct tgsi_parse_context *ctx ); - -boolean -tgsi_parse_end_of_tokens( - struct tgsi_parse_context *ctx ); - -void -tgsi_parse_token( - struct tgsi_parse_context *ctx ); - -unsigned -tgsi_num_tokens(const struct tgsi_token *tokens); - -struct tgsi_token * -tgsi_dup_tokens(const struct tgsi_token *tokens); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_PARSE_H */ - diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c deleted file mode 100644 index 2e3ec96b5b..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.c +++ /dev/null @@ -1,341 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "tgsi_sanity.h" -#include "tgsi_iterate.h" - -#define MAX_REGISTERS 256 - -typedef uint reg_flag; - -#define BITS_IN_REG_FLAG (sizeof( reg_flag ) * 8) - -struct sanity_check_ctx -{ - struct tgsi_iterate_context iter; - - reg_flag regs_decl[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; - reg_flag regs_used[TGSI_FILE_COUNT][MAX_REGISTERS / BITS_IN_REG_FLAG]; - boolean regs_ind_used[TGSI_FILE_COUNT]; - uint num_imms; - uint num_instructions; - uint index_of_END; - - uint errors; - uint warnings; -}; - -static void -report_error( - struct sanity_check_ctx *ctx, - const char *format, - ... ) -{ - va_list args; - - debug_printf( "Error : " ); - va_start( args, format ); - _debug_vprintf( format, args ); - va_end( args ); - debug_printf( "\n" ); - ctx->errors++; -} - -static void -report_warning( - struct sanity_check_ctx *ctx, - const char *format, - ... ) -{ - va_list args; - - debug_printf( "Warning: " ); - va_start( args, format ); - _debug_vprintf( format, args ); - va_end( args ); - debug_printf( "\n" ); - ctx->warnings++; -} - -static boolean -check_file_name( - struct sanity_check_ctx *ctx, - uint file ) -{ - if (file <= TGSI_FILE_NULL || file >= TGSI_FILE_COUNT) { - report_error( ctx, "Invalid register file name" ); - return FALSE; - } - return TRUE; -} - -static boolean -is_register_declared( - struct sanity_check_ctx *ctx, - uint file, - int index ) -{ - assert( index >= 0 && index < MAX_REGISTERS ); - - return (ctx->regs_decl[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; -} - -static boolean -is_any_register_declared( - struct sanity_check_ctx *ctx, - uint file ) -{ - uint i; - - for (i = 0; i < MAX_REGISTERS / BITS_IN_REG_FLAG; i++) - if (ctx->regs_decl[file][i]) - return TRUE; - return FALSE; -} - -static boolean -is_register_used( - struct sanity_check_ctx *ctx, - uint file, - int index ) -{ - assert( index < MAX_REGISTERS ); - - return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE; -} - -static const char *file_names[] = -{ - "NULL", - "CONST", - "IN", - "OUT", - "TEMP", - "SAMP", - "ADDR", - "IMM" -}; - -static boolean -check_register_usage( - struct sanity_check_ctx *ctx, - uint file, - int index, - const char *name, - boolean indirect_access ) -{ - if (!check_file_name( ctx, file )) - return FALSE; - if (indirect_access) { - if (!is_any_register_declared( ctx, file )) - report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); - ctx->regs_ind_used[file] = TRUE; - } - else { - if (!is_register_declared( ctx, file, index )) - report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); - ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); - } - return TRUE; -} - -static boolean -iter_instruction( - struct tgsi_iterate_context *iter, - struct tgsi_full_instruction *inst ) -{ - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint i; - - /* There must be no other instructions after END. - */ - if (ctx->index_of_END != ~0) { - report_error( ctx, "Unexpected instruction after END" ); - } - else if (inst->Instruction.Opcode == TGSI_OPCODE_END) { - ctx->index_of_END = ctx->num_instructions; - } - - /* Check destination and source registers' validity. - * Mark the registers as used. - */ - for (i = 0; i < inst->Instruction.NumDstRegs; i++) { - check_register_usage( - ctx, - inst->FullDstRegisters[i].DstRegister.File, - inst->FullDstRegisters[i].DstRegister.Index, - "destination", - FALSE ); - } - for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - check_register_usage( - ctx, - inst->FullSrcRegisters[i].SrcRegister.File, - inst->FullSrcRegisters[i].SrcRegister.Index, - "source", - (boolean)inst->FullSrcRegisters[i].SrcRegister.Indirect ); - if (inst->FullSrcRegisters[i].SrcRegister.Indirect) { - uint file; - int index; - - file = inst->FullSrcRegisters[i].SrcRegisterInd.File; - index = inst->FullSrcRegisters[i].SrcRegisterInd.Index; - check_register_usage( - ctx, - file, - index, - "indirect", - FALSE ); - if (file != TGSI_FILE_ADDRESS || index != 0) - report_warning( ctx, "Indirect register not ADDR[0]" ); - } - } - - ctx->num_instructions++; - - return TRUE; -} - -static boolean -iter_declaration( - struct tgsi_iterate_context *iter, - struct tgsi_full_declaration *decl ) -{ - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint file; - uint i; - - /* No declarations allowed after the first instruction. - */ - if (ctx->num_instructions > 0) - report_error( ctx, "Instruction expected but declaration found" ); - - /* Check registers' validity. - * Mark the registers as declared. - */ - file = decl->Declaration.File; - if (!check_file_name( ctx, file )) - return TRUE; - for (i = decl->DeclarationRange.First; i <= decl->DeclarationRange.Last; i++) { - if (is_register_declared( ctx, file, i )) - report_error( ctx, "The same register declared twice" ); - ctx->regs_decl[file][i / BITS_IN_REG_FLAG] |= (1 << (i % BITS_IN_REG_FLAG)); - } - - return TRUE; -} - -static boolean -iter_immediate( - struct tgsi_iterate_context *iter, - struct tgsi_full_immediate *imm ) -{ - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - - assert( ctx->num_imms < MAX_REGISTERS ); - - /* No immediates allowed after the first instruction. - */ - if (ctx->num_instructions > 0) - report_error( ctx, "Instruction expected but immediate found" ); - - /* Mark the register as declared. - */ - ctx->regs_decl[TGSI_FILE_IMMEDIATE][ctx->num_imms / BITS_IN_REG_FLAG] |= (1 << (ctx->num_imms % BITS_IN_REG_FLAG)); - ctx->num_imms++; - - /* Check data type validity. - */ - if (imm->Immediate.DataType != TGSI_IMM_FLOAT32) { - report_error( ctx, "Invalid immediate data type" ); - return TRUE; - } - - return TRUE; -} - -static boolean -epilog( - struct tgsi_iterate_context *iter ) -{ - struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; - uint file; - - /* There must be an END instruction at the end. - */ - if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) { - report_error( ctx, "Expected END at end of instruction sequence" ); - } - - /* Check if all declared registers were used. - */ - for (file = TGSI_FILE_NULL; file < TGSI_FILE_COUNT; file++) { - uint i; - - for (i = 0; i < MAX_REGISTERS; i++) { - if (is_register_declared( ctx, file, i ) && !is_register_used( ctx, file, i ) && !ctx->regs_ind_used[file]) { - report_warning( ctx, "Register never used" ); - } - } - } - - /* Print totals, if any. - */ - if (ctx->errors || ctx->warnings) - debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings ); - - return TRUE; -} - -boolean -tgsi_sanity_check( - struct tgsi_token *tokens ) -{ - struct sanity_check_ctx ctx; - - ctx.iter.prolog = NULL; - ctx.iter.iterate_instruction = iter_instruction; - ctx.iter.iterate_declaration = iter_declaration; - ctx.iter.iterate_immediate = iter_immediate; - ctx.iter.epilog = epilog; - - memset( ctx.regs_decl, 0, sizeof( ctx.regs_decl ) ); - memset( ctx.regs_used, 0, sizeof( ctx.regs_used ) ); - memset( ctx.regs_ind_used, 0, sizeof( ctx.regs_ind_used ) ); - ctx.num_imms = 0; - ctx.num_instructions = 0; - ctx.index_of_END = ~0; - - ctx.errors = 0; - ctx.warnings = 0; - - if (!tgsi_iterate_shader( tokens, &ctx.iter )) - return FALSE; - - return ctx.errors == 0; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h b/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h deleted file mode 100644 index ca45e94c7a..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_sanity.h +++ /dev/null @@ -1,49 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_SANITY_H -#define TGSI_SANITY_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -/* Check the given token stream for errors and common mistakes. - * Diagnostic messages are printed out to the debug output. - * Returns TRUE if there are no errors, even though there could be some warnings. - */ -boolean -tgsi_sanity_check( - struct tgsi_token *tokens ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_SANITY_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c b/src/gallium/auxiliary/tgsi/util/tgsi_scan.c deleted file mode 100644 index 240aaaf362..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.c +++ /dev/null @@ -1,226 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI program scan utility. - * Used to determine which registers and instructions are used by a shader. - * - * Authors: Brian Paul - */ - - -#include "tgsi_scan.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_build.h" - -#include "pipe/p_util.h" - - - -/** - */ -void -tgsi_scan_shader(const struct tgsi_token *tokens, - struct tgsi_shader_info *info) -{ - uint procType, i; - struct tgsi_parse_context parse; - - memset(info, 0, sizeof(*info)); - for (i = 0; i < TGSI_FILE_COUNT; i++) - info->file_max[i] = -1; - - /** - ** Setup to begin parsing input shader - **/ - if (tgsi_parse_init( &parse, tokens ) != TGSI_PARSE_OK) { - debug_printf("tgsi_parse_init() failed in tgsi_scan_shader()!\n"); - return; - } - procType = parse.FullHeader.Processor.Processor; - assert(procType == TGSI_PROCESSOR_FRAGMENT || - procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY); - - - /** - ** Loop over incoming program tokens/instructions - */ - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - info->num_tokens++; - - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *fullinst - = &parse.FullToken.FullInstruction; - - assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); - info->opcode_count[fullinst->Instruction.Opcode]++; - } - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - { - struct tgsi_full_declaration *fulldecl - = &parse.FullToken.FullDeclaration; - uint file = fulldecl->Declaration.File; - uint i; - for (i = fulldecl->DeclarationRange.First; - i <= fulldecl->DeclarationRange.Last; - i++) { - - /* only first 32 regs will appear in this bitfield */ - info->file_mask[file] |= (1 << i); - info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)i); - - if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; - info->num_inputs++; - } - - if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; - info->num_outputs++; - } - - /* special case */ - if (procType == TGSI_PROCESSOR_FRAGMENT && - file == TGSI_FILE_OUTPUT && - fulldecl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) { - info->writes_z = TRUE; - } - } - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - info->immediate_count++; - break; - - default: - assert( 0 ); - } - } - - assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); - assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); - - info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || - info->opcode_count[TGSI_OPCODE_KILP]); - - tgsi_parse_free (&parse); -} - - - -/** - * Check if the given shader is a "passthrough" shader consisting of only - * MOV instructions of the form: MOV OUT[n], IN[n] - * - */ -boolean -tgsi_is_passthrough_shader(const struct tgsi_token *tokens) -{ - struct tgsi_parse_context parse; - - /** - ** Setup to begin parsing input shader - **/ - if (tgsi_parse_init(&parse, tokens) != TGSI_PARSE_OK) { - debug_printf("tgsi_parse_init() failed in tgsi_is_passthrough_shader()!\n"); - return FALSE; - } - - /** - ** Loop over incoming program tokens/instructions - */ - while (!tgsi_parse_end_of_tokens(&parse)) { - - tgsi_parse_token(&parse); - - switch (parse.FullToken.Token.Type) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *fullinst = - &parse.FullToken.FullInstruction; - const struct tgsi_full_src_register *src = - &fullinst->FullSrcRegisters[0]; - const struct tgsi_full_dst_register *dst = - &fullinst->FullDstRegisters[0]; - - /* Do a whole bunch of checks for a simple move */ - if (fullinst->Instruction.Opcode != TGSI_OPCODE_MOV || - src->SrcRegister.File != TGSI_FILE_INPUT || - dst->DstRegister.File != TGSI_FILE_OUTPUT || - src->SrcRegister.Index != dst->DstRegister.Index || - - src->SrcRegister.Negate || - src->SrcRegisterExtMod.Negate || - src->SrcRegisterExtMod.Absolute || - src->SrcRegisterExtMod.Scale2X || - src->SrcRegisterExtMod.Bias || - src->SrcRegisterExtMod.Complement || - - src->SrcRegister.SwizzleX != TGSI_SWIZZLE_X || - src->SrcRegister.SwizzleY != TGSI_SWIZZLE_Y || - src->SrcRegister.SwizzleZ != TGSI_SWIZZLE_Z || - src->SrcRegister.SwizzleW != TGSI_SWIZZLE_W || - - src->SrcRegisterExtSwz.ExtSwizzleX != TGSI_EXTSWIZZLE_X || - src->SrcRegisterExtSwz.ExtSwizzleY != TGSI_EXTSWIZZLE_Y || - src->SrcRegisterExtSwz.ExtSwizzleZ != TGSI_EXTSWIZZLE_Z || - src->SrcRegisterExtSwz.ExtSwizzleW != TGSI_EXTSWIZZLE_W || - - dst->DstRegister.WriteMask != TGSI_WRITEMASK_XYZW) - { - tgsi_parse_free(&parse); - return FALSE; - } - } - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - /* fall-through */ - case TGSI_TOKEN_TYPE_IMMEDIATE: - /* fall-through */ - default: - ; /* no-op */ - } - } - - tgsi_parse_free(&parse); - - /* if we get here, it's a pass-through shader */ - return TRUE; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h b/src/gallium/auxiliary/tgsi/util/tgsi_scan.h deleted file mode 100644 index 5cb6efb343..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_scan.h +++ /dev/null @@ -1,74 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_SCAN_H -#define TGSI_SCAN_H - - -#include "pipe/p_compiler.h" -#include "pipe/p_state.h" -#include "pipe/p_shader_tokens.h" - - -/** - * Shader summary info - */ -struct tgsi_shader_info -{ - uint num_tokens; - - /* XXX eventually remove the corresponding fields from pipe_shader_state: */ - ubyte num_inputs; - ubyte num_outputs; - ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ - ubyte input_semantic_index[PIPE_MAX_SHADER_INPUTS]; - ubyte output_semantic_name[PIPE_MAX_SHADER_OUTPUTS]; /**< TGSI_SEMANTIC_x */ - ubyte output_semantic_index[PIPE_MAX_SHADER_OUTPUTS]; - - uint file_mask[TGSI_FILE_COUNT]; /**< bitmask of declared registers */ - uint file_count[TGSI_FILE_COUNT]; /**< number of declared registers */ - int file_max[TGSI_FILE_COUNT]; /**< highest index of declared registers */ - - uint immediate_count; /**< number of immediates declared */ - - uint opcode_count[TGSI_OPCODE_LAST]; /**< opcode histogram */ - - boolean writes_z; /**< does fragment shader write Z value? */ - boolean uses_kill; /**< KIL or KILP instruction used? */ -}; - - -extern void -tgsi_scan_shader(const struct tgsi_token *tokens, - struct tgsi_shader_info *info); - - -extern boolean -tgsi_is_passthrough_shader(const struct tgsi_token *tokens); - - -#endif /* TGSI_SCAN_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.c b/src/gallium/auxiliary/tgsi/util/tgsi_text.c deleted file mode 100644 index 35cb3055bb..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.c +++ /dev/null @@ -1,1221 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "tgsi_text.h" -#include "tgsi_build.h" -#include "tgsi_parse.h" -#include "tgsi_sanity.h" -#include "tgsi_util.h" - -static boolean is_alpha_underscore( const char *cur ) -{ - return - (*cur >= 'a' && *cur <= 'z') || - (*cur >= 'A' && *cur <= 'Z') || - *cur == '_'; -} - -static boolean is_digit( const char *cur ) -{ - return *cur >= '0' && *cur <= '9'; -} - -static boolean is_digit_alpha_underscore( const char *cur ) -{ - return is_digit( cur ) || is_alpha_underscore( cur ); -} - -static boolean str_match_no_case( const char **pcur, const char *str ) -{ - const char *cur = *pcur; - - while (*str != '\0' && *str == toupper( *cur )) { - str++; - cur++; - } - if (*str == '\0') { - *pcur = cur; - return TRUE; - } - return FALSE; -} - -/* Eat zero or more whitespaces. - */ -static void eat_opt_white( const char **pcur ) -{ - while (**pcur == ' ' || **pcur == '\t' || **pcur == '\n') - (*pcur)++; -} - -/* Eat one or more whitespaces. - * Return TRUE if at least one whitespace eaten. - */ -static boolean eat_white( const char **pcur ) -{ - const char *cur = *pcur; - - eat_opt_white( pcur ); - return *pcur > cur; -} - -/* Parse unsigned integer. - * No checks for overflow. - */ -static boolean parse_uint( const char **pcur, uint *val ) -{ - const char *cur = *pcur; - - if (is_digit( cur )) { - *val = *cur++ - '0'; - while (is_digit( cur )) - *val = *val * 10 + *cur++ - '0'; - *pcur = cur; - return TRUE; - } - return FALSE; -} - -/* Parse floating point. - */ -static boolean parse_float( const char **pcur, float *val ) -{ - const char *cur = *pcur; - boolean integral_part = FALSE; - boolean fractional_part = FALSE; - - *val = (float) atof( cur ); - - if (*cur == '-' || *cur == '+') - cur++; - if (is_digit( cur )) { - cur++; - integral_part = TRUE; - while (is_digit( cur )) - cur++; - } - if (*cur == '.') { - cur++; - if (is_digit( cur )) { - cur++; - fractional_part = TRUE; - while (is_digit( cur )) - cur++; - } - } - if (!integral_part && !fractional_part) - return FALSE; - if (toupper( *cur ) == 'E') { - cur++; - if (*cur == '-' || *cur == '+') - cur++; - if (is_digit( cur )) { - cur++; - while (is_digit( cur )) - cur++; - } - else - return FALSE; - } - *pcur = cur; - return TRUE; -} - -struct translate_ctx -{ - const char *text; - const char *cur; - struct tgsi_token *tokens; - struct tgsi_token *tokens_cur; - struct tgsi_token *tokens_end; - struct tgsi_header *header; -}; - -static void report_error( struct translate_ctx *ctx, const char *msg ) -{ - debug_printf( "\nError: %s", msg ); -} - -/* Parse shader header. - * Return TRUE for one of the following headers. - * FRAG1.1 - * GEOM1.1 - * VERT1.1 - */ -static boolean parse_header( struct translate_ctx *ctx ) -{ - uint processor; - - if (str_match_no_case( &ctx->cur, "FRAG1.1" )) - processor = TGSI_PROCESSOR_FRAGMENT; - else if (str_match_no_case( &ctx->cur, "VERT1.1" )) - processor = TGSI_PROCESSOR_VERTEX; - else if (str_match_no_case( &ctx->cur, "GEOM1.1" )) - processor = TGSI_PROCESSOR_GEOMETRY; - else { - report_error( ctx, "Unknown header" ); - return FALSE; - } - - if (ctx->tokens_cur >= ctx->tokens_end) - return FALSE; - *(struct tgsi_version *) ctx->tokens_cur++ = tgsi_build_version(); - - if (ctx->tokens_cur >= ctx->tokens_end) - return FALSE; - ctx->header = (struct tgsi_header *) ctx->tokens_cur++; - *ctx->header = tgsi_build_header(); - - if (ctx->tokens_cur >= ctx->tokens_end) - return FALSE; - *(struct tgsi_processor *) ctx->tokens_cur++ = tgsi_build_processor( processor, ctx->header ); - - return TRUE; -} - -static boolean parse_label( struct translate_ctx *ctx, uint *val ) -{ - const char *cur = ctx->cur; - - if (parse_uint( &cur, val )) { - eat_opt_white( &cur ); - if (*cur == ':') { - cur++; - ctx->cur = cur; - return TRUE; - } - } - return FALSE; -} - -static const char *file_names[TGSI_FILE_COUNT] = -{ - "NULL", - "CONST", - "IN", - "OUT", - "TEMP", - "SAMP", - "ADDR", - "IMM" -}; - -static boolean -parse_file( const char **pcur, uint *file ) -{ - uint i; - - for (i = 0; i < TGSI_FILE_COUNT; i++) { - const char *cur = *pcur; - - if (str_match_no_case( &cur, file_names[i] )) { - if (!is_digit_alpha_underscore( cur )) { - *pcur = cur; - *file = i; - return TRUE; - } - } - } - return FALSE; -} - -static boolean -parse_opt_writemask( - struct translate_ctx *ctx, - uint *writemask ) -{ - const char *cur; - - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == '.') { - cur++; - *writemask = TGSI_WRITEMASK_NONE; - eat_opt_white( &cur ); - if (toupper( *cur ) == 'X') { - cur++; - *writemask |= TGSI_WRITEMASK_X; - } - if (toupper( *cur ) == 'Y') { - cur++; - *writemask |= TGSI_WRITEMASK_Y; - } - if (toupper( *cur ) == 'Z') { - cur++; - *writemask |= TGSI_WRITEMASK_Z; - } - if (toupper( *cur ) == 'W') { - cur++; - *writemask |= TGSI_WRITEMASK_W; - } - - if (*writemask == TGSI_WRITEMASK_NONE) { - report_error( ctx, "Writemask expected" ); - return FALSE; - } - - ctx->cur = cur; - } - else { - *writemask = TGSI_WRITEMASK_XYZW; - } - return TRUE; -} - -/* ::= `[' - */ -static boolean -parse_register_file_bracket( - struct translate_ctx *ctx, - uint *file ) -{ - if (!parse_file( &ctx->cur, file )) { - report_error( ctx, "Unknown register file" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '[') { - report_error( ctx, "Expected `['" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* ::= - */ -static boolean -parse_register_file_bracket_index( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - uint uindex; - - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - return TRUE; -} - -/* Parse destination register operand. - * ::= `]' - */ -static boolean -parse_register_dst( - struct translate_ctx *ctx, - uint *file, - int *index ) -{ - if (!parse_register_file_bracket_index( ctx, file, index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* Parse source register operand. - * ::= `]' | - * `]' | - * `+' `]' | - * `-' `]' - */ -static boolean -parse_register_src( - struct translate_ctx *ctx, - uint *file, - int *index, - uint *ind_file, - int *ind_index ) -{ - const char *cur; - uint uindex; - - if (!parse_register_file_bracket( ctx, file )) - return FALSE; - eat_opt_white( &ctx->cur ); - cur = ctx->cur; - if (parse_file( &cur, ind_file )) { - if (!parse_register_dst( ctx, ind_file, ind_index )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (*ctx->cur == '+' || *ctx->cur == '-') { - boolean negate; - - negate = *ctx->cur == '-'; - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - if (negate) - *index = -(int) uindex; - else - *index = (int) uindex; - } - else { - *index = 0; - } - } - else { - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal unsigned integer" ); - return FALSE; - } - *index = (int) uindex; - *ind_file = TGSI_FILE_NULL; - *ind_index = 0; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -/* Parse register declaration. - * ::= `]' | - * `..' `]' - */ -static boolean -parse_register_dcl( - struct translate_ctx *ctx, - uint *file, - int *first, - int *last ) -{ - if (!parse_register_file_bracket_index( ctx, file, first )) - return FALSE; - eat_opt_white( &ctx->cur ); - if (ctx->cur[0] == '.' && ctx->cur[1] == '.') { - uint uindex; - - ctx->cur += 2; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &uindex )) { - report_error( ctx, "Expected literal integer" ); - return FALSE; - } - *last = (int) uindex; - eat_opt_white( &ctx->cur ); - } - else { - *last = *first; - } - if (*ctx->cur != ']') { - report_error( ctx, "Expected `]' or `..'" ); - return FALSE; - } - ctx->cur++; - return TRUE; -} - -static const char *modulate_names[TGSI_MODULATE_COUNT] = -{ - "_1X", - "_2X", - "_4X", - "_8X", - "_D2", - "_D4", - "_D8" -}; - -static boolean -parse_dst_operand( - struct translate_ctx *ctx, - struct tgsi_full_dst_register *dst ) -{ - uint file; - int index; - uint writemask; - const char *cur; - - if (!parse_register_dst( ctx, &file, &index )) - return FALSE; - - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == '_') { - uint i; - - for (i = 0; i < TGSI_MODULATE_COUNT; i++) { - if (str_match_no_case( &cur, modulate_names[i] )) { - if (!is_digit_alpha_underscore( cur )) { - dst->DstRegisterExtModulate.Modulate = i; - ctx->cur = cur; - break; - } - } - } - } - - if (!parse_opt_writemask( ctx, &writemask )) - return FALSE; - - dst->DstRegister.File = file; - dst->DstRegister.Index = index; - dst->DstRegister.WriteMask = writemask; - return TRUE; -} - -static boolean -parse_optional_swizzle( - struct translate_ctx *ctx, - uint swizzle[4], - boolean *parsed_swizzle, - boolean *parsed_extswizzle ) -{ - const char *cur = ctx->cur; - - *parsed_swizzle = FALSE; - *parsed_extswizzle = FALSE; - - eat_opt_white( &cur ); - if (*cur == '.') { - uint i; - - cur++; - eat_opt_white( &cur ); - for (i = 0; i < 4; i++) { - if (toupper( *cur ) == 'X') - swizzle[i] = TGSI_SWIZZLE_X; - else if (toupper( *cur ) == 'Y') - swizzle[i] = TGSI_SWIZZLE_Y; - else if (toupper( *cur ) == 'Z') - swizzle[i] = TGSI_SWIZZLE_Z; - else if (toupper( *cur ) == 'W') - swizzle[i] = TGSI_SWIZZLE_W; - else { - if (*cur == '0') - swizzle[i] = TGSI_EXTSWIZZLE_ZERO; - else if (*cur == '1') - swizzle[i] = TGSI_EXTSWIZZLE_ONE; - else { - report_error( ctx, "Expected register swizzle component `x', `y', `z', `w', `0' or `1'" ); - return FALSE; - } - *parsed_extswizzle = TRUE; - } - cur++; - } - *parsed_swizzle = TRUE; - ctx->cur = cur; - } - return TRUE; -} - -static boolean -parse_src_operand( - struct translate_ctx *ctx, - struct tgsi_full_src_register *src ) -{ - const char *cur; - float value; - uint file; - int index; - uint ind_file; - int ind_index; - uint swizzle[4]; - boolean parsed_swizzle; - boolean parsed_extswizzle; - - if (*ctx->cur == '-') { - cur = ctx->cur; - cur++; - eat_opt_white( &cur ); - if (*cur == '(') { - cur++; - src->SrcRegisterExtMod.Negate = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } - - if (*ctx->cur == '|') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Absolute = 1; - } - - if (*ctx->cur == '-') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegister.Negate = 1; - } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 2.0f) { - eat_opt_white( &cur ); - if (*cur != '*') { - report_error( ctx, "Expected `*'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Scale2X = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } - - if (*ctx->cur == '(') { - ctx->cur++; - eat_opt_white( &ctx->cur ); - src->SrcRegisterExtMod.Bias = 1; - } - - cur = ctx->cur; - if (parse_float( &cur, &value )) { - if (value == 1.0f) { - eat_opt_white( &cur ); - if (*cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - cur++; - if (*cur != '(') { - report_error( ctx, "Expected `('" ); - return FALSE; - } - cur++; - src->SrcRegisterExtMod.Complement = 1; - eat_opt_white( &cur ); - ctx->cur = cur; - } - } - - if (!parse_register_src( ctx, &file, &index, &ind_file, &ind_index )) - return FALSE; - src->SrcRegister.File = file; - src->SrcRegister.Index = index; - if (ind_file != TGSI_FILE_NULL) { - src->SrcRegister.Indirect = 1; - src->SrcRegisterInd.File = ind_file; - src->SrcRegisterInd.Index = ind_index; - } - - /* Parse optional swizzle. - */ - if (parse_optional_swizzle( ctx, swizzle, &parsed_swizzle, &parsed_extswizzle )) { - if (parsed_extswizzle) { - assert( parsed_swizzle ); - - src->SrcRegisterExtSwz.ExtSwizzleX = swizzle[0]; - src->SrcRegisterExtSwz.ExtSwizzleY = swizzle[1]; - src->SrcRegisterExtSwz.ExtSwizzleZ = swizzle[2]; - src->SrcRegisterExtSwz.ExtSwizzleW = swizzle[3]; - } - else if (parsed_swizzle) { - src->SrcRegister.SwizzleX = swizzle[0]; - src->SrcRegister.SwizzleY = swizzle[1]; - src->SrcRegister.SwizzleZ = swizzle[2]; - src->SrcRegister.SwizzleW = swizzle[3]; - } - } - - if (src->SrcRegisterExtMod.Complement) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Bias) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '-') { - report_error( ctx, "Expected `-'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_float( &ctx->cur, &value )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - if (value != 0.5f) { - report_error( ctx, "Expected 0.5" ); - return FALSE; - } - } - - if (src->SrcRegisterExtMod.Scale2X) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Absolute) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '|') { - report_error( ctx, "Expected `|'" ); - return FALSE; - } - ctx->cur++; - } - - if (src->SrcRegisterExtMod.Negate) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ')') { - report_error( ctx, "Expected `)'" ); - return FALSE; - } - ctx->cur++; - } - - return TRUE; -} - -struct opcode_info -{ - uint num_dst; - uint num_src; - uint is_tex; - uint is_branch; - const char *mnemonic; -}; - -static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = -{ - { 1, 1, 0, 0, "ARL" }, - { 1, 1, 0, 0, "MOV" }, - { 1, 1, 0, 0, "LIT" }, - { 1, 1, 0, 0, "RCP" }, - { 1, 1, 0, 0, "RSQ" }, - { 1, 1, 0, 0, "EXP" }, - { 1, 1, 0, 0, "LOG" }, - { 1, 2, 0, 0, "MUL" }, - { 1, 2, 0, 0, "ADD" }, - { 1, 2, 0, 0, "DP3" }, - { 1, 2, 0, 0, "DP4" }, - { 1, 2, 0, 0, "DST" }, - { 1, 2, 0, 0, "MIN" }, - { 1, 2, 0, 0, "MAX" }, - { 1, 2, 0, 0, "SLT" }, - { 1, 2, 0, 0, "SGE" }, - { 1, 3, 0, 0, "MAD" }, - { 1, 2, 0, 0, "SUB" }, - { 1, 3, 0, 0, "LERP" }, - { 1, 3, 0, 0, "CND" }, - { 1, 3, 0, 0, "CND0" }, - { 1, 3, 0, 0, "DOT2ADD" }, - { 1, 2, 0, 0, "INDEX" }, - { 1, 1, 0, 0, "NEGATE" }, - { 1, 1, 0, 0, "FRAC" }, - { 1, 3, 0, 0, "CLAMP" }, - { 1, 1, 0, 0, "FLOOR" }, - { 1, 1, 0, 0, "ROUND" }, - { 1, 1, 0, 0, "EXPBASE2" }, - { 1, 1, 0, 0, "LOGBASE2" }, - { 1, 2, 0, 0, "POWER" }, - { 1, 2, 0, 0, "CROSSPRODUCT" }, - { 1, 2, 0, 0, "MULTIPLYMATRIX" }, - { 1, 1, 0, 0, "ABS" }, - { 1, 1, 0, 0, "RCC" }, - { 1, 2, 0, 0, "DPH" }, - { 1, 1, 0, 0, "COS" }, - { 1, 1, 0, 0, "DDX" }, - { 1, 1, 0, 0, "DDY" }, - { 0, 1, 0, 0, "KILP" }, - { 1, 1, 0, 0, "PK2H" }, - { 1, 1, 0, 0, "PK2US" }, - { 1, 1, 0, 0, "PK4B" }, - { 1, 1, 0, 0, "PK4UB" }, - { 1, 2, 0, 0, "RFL" }, - { 1, 2, 0, 0, "SEQ" }, - { 1, 2, 0, 0, "SFL" }, - { 1, 2, 0, 0, "SGT" }, - { 1, 1, 0, 0, "SIN" }, - { 1, 2, 0, 0, "SLE" }, - { 1, 2, 0, 0, "SNE" }, - { 1, 2, 0, 0, "STR" }, - { 1, 2, 1, 0, "TEX" }, - { 1, 4, 1, 0, "TXD" }, - { 1, 2, 1, 0, "TXP" }, - { 1, 1, 0, 0, "UP2H" }, - { 1, 1, 0, 0, "UP2US" }, - { 1, 1, 0, 0, "UP4B" }, - { 1, 1, 0, 0, "UP4UB" }, - { 1, 3, 0, 0, "X2D" }, - { 1, 1, 0, 0, "ARA" }, - { 1, 1, 0, 0, "ARR" }, - { 0, 1, 0, 0, "BRA" }, - { 0, 0, 0, 1, "CAL" }, - { 0, 0, 0, 0, "RET" }, - { 1, 1, 0, 0, "SSG" }, - { 1, 3, 0, 0, "CMP" }, - { 1, 1, 0, 0, "SCS" }, - { 1, 2, 1, 0, "TXB" }, - { 1, 1, 0, 0, "NRM" }, - { 1, 2, 0, 0, "DIV" }, - { 1, 2, 0, 0, "DP2" }, - { 1, 2, 1, 0, "TXL" }, - { 0, 0, 0, 0, "BRK" }, - { 0, 1, 0, 1, "IF" }, - { 0, 0, 0, 0, "LOOP" }, - { 0, 1, 0, 0, "REP" }, - { 0, 0, 0, 1, "ELSE" }, - { 0, 0, 0, 0, "ENDIF" }, - { 0, 0, 0, 0, "ENDLOOP" }, - { 0, 0, 0, 0, "ENDREP" }, - { 0, 1, 0, 0, "PUSHA" }, - { 1, 0, 0, 0, "POPA" }, - { 1, 1, 0, 0, "CEIL" }, - { 1, 1, 0, 0, "I2F" }, - { 1, 1, 0, 0, "NOT" }, - { 1, 1, 0, 0, "TRUNC" }, - { 1, 2, 0, 0, "SHL" }, - { 1, 2, 0, 0, "SHR" }, - { 1, 2, 0, 0, "AND" }, - { 1, 2, 0, 0, "OR" }, - { 1, 2, 0, 0, "MOD" }, - { 1, 2, 0, 0, "XOR" }, - { 1, 3, 0, 0, "SAD" }, - { 1, 2, 1, 0, "TXF" }, - { 1, 2, 1, 0, "TXQ" }, - { 0, 0, 0, 0, "CONT" }, - { 0, 0, 0, 0, "EMIT" }, - { 0, 0, 0, 0, "ENDPRIM" }, - { 0, 0, 0, 1, "BGNLOOP2" }, - { 0, 0, 0, 0, "BGNSUB" }, - { 0, 0, 0, 1, "ENDLOOP2" }, - { 0, 0, 0, 0, "ENDSUB" }, - { 1, 1, 0, 0, "NOISE1" }, - { 1, 1, 0, 0, "NOISE2" }, - { 1, 1, 0, 0, "NOISE3" }, - { 1, 1, 0, 0, "NOISE4" }, - { 0, 0, 0, 0, "NOP" }, - { 1, 2, 0, 0, "M4X3" }, - { 1, 2, 0, 0, "M3X4" }, - { 1, 2, 0, 0, "M3X3" }, - { 1, 2, 0, 0, "M3X2" }, - { 1, 1, 0, 0, "NRM4" }, - { 0, 1, 0, 0, "CALLNZ" }, - { 0, 1, 0, 0, "IFC" }, - { 0, 1, 0, 0, "BREAKC" }, - { 0, 0, 0, 0, "KIL" }, - { 0, 0, 0, 0, "END" }, - { 1, 1, 0, 0, "SWZ" } -}; - -static const char *texture_names[TGSI_TEXTURE_COUNT] = -{ - "UNKNOWN", - "1D", - "2D", - "3D", - "CUBE", - "RECT", - "SHADOW1D", - "SHADOW2D", - "SHADOWRECT" -}; - -static boolean -parse_instruction( - struct translate_ctx *ctx, - boolean has_label ) -{ - uint i; - uint saturate = TGSI_SAT_NONE; - const struct opcode_info *info; - struct tgsi_full_instruction inst; - uint advance; - - /* Parse instruction name. - */ - eat_opt_white( &ctx->cur ); - for (i = 0; i < TGSI_OPCODE_LAST; i++) { - const char *cur = ctx->cur; - - info = &opcode_info[i]; - if (str_match_no_case( &cur, info->mnemonic )) { - if (str_match_no_case( &cur, "_SATNV" )) - saturate = TGSI_SAT_MINUS_PLUS_ONE; - else if (str_match_no_case( &cur, "_SAT" )) - saturate = TGSI_SAT_ZERO_ONE; - - if (info->num_dst + info->num_src + info->is_tex == 0) { - if (!is_digit_alpha_underscore( cur )) { - ctx->cur = cur; - break; - } - } - else if (*cur == '\0' || eat_white( &cur )) { - ctx->cur = cur; - break; - } - } - } - if (i == TGSI_OPCODE_LAST) { - if (has_label) - report_error( ctx, "Unknown opcode" ); - else - report_error( ctx, "Expected `DCL', `IMM' or a label" ); - return FALSE; - } - - inst = tgsi_default_full_instruction(); - inst.Instruction.Opcode = i; - inst.Instruction.Saturate = saturate; - inst.Instruction.NumDstRegs = info->num_dst; - inst.Instruction.NumSrcRegs = info->num_src; - - /* Parse instruction operands. - */ - for (i = 0; i < info->num_dst + info->num_src + info->is_tex; i++) { - if (i > 0) { - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ',') { - report_error( ctx, "Expected `,'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - } - - if (i < info->num_dst) { - if (!parse_dst_operand( ctx, &inst.FullDstRegisters[i] )) - return FALSE; - } - else if (i < info->num_dst + info->num_src) { - if (!parse_src_operand( ctx, &inst.FullSrcRegisters[i - info->num_dst] )) - return FALSE; - } - else { - uint j; - - for (j = 0; j < TGSI_TEXTURE_COUNT; j++) { - if (str_match_no_case( &ctx->cur, texture_names[j] )) { - if (!is_digit_alpha_underscore( ctx->cur )) { - inst.InstructionExtTexture.Texture = j; - break; - } - } - } - if (j == TGSI_TEXTURE_COUNT) { - report_error( ctx, "Expected texture target" ); - return FALSE; - } - } - } - - if (info->is_branch) { - uint target; - - eat_opt_white( &ctx->cur ); - if (*ctx->cur != ':') { - report_error( ctx, "Expected `:'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - if (!parse_uint( &ctx->cur, &target )) { - report_error( ctx, "Expected a label" ); - return FALSE; - } - inst.InstructionExtLabel.Label = target; - } - - advance = tgsi_build_full_instruction( - &inst, - ctx->tokens_cur, - ctx->header, - (uint) (ctx->tokens_end - ctx->tokens_cur) ); - if (advance == 0) - return FALSE; - ctx->tokens_cur += advance; - - return TRUE; -} - -static const char *semantic_names[TGSI_SEMANTIC_COUNT] = -{ - "POSITION", - "COLOR", - "BCOLOR", - "FOG", - "PSIZE", - "GENERIC", - "NORMAL" -}; - -static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] = -{ - "CONSTANT", - "LINEAR", - "PERSPECTIVE" -}; - -static boolean parse_declaration( struct translate_ctx *ctx ) -{ - struct tgsi_full_declaration decl; - uint file; - int first; - int last; - uint writemask; - const char *cur; - uint advance; - - if (!eat_white( &ctx->cur )) { - report_error( ctx, "Syntax error" ); - return FALSE; - } - if (!parse_register_dcl( ctx, &file, &first, &last )) - return FALSE; - if (!parse_opt_writemask( ctx, &writemask )) - return FALSE; - - decl = tgsi_default_full_declaration(); - decl.Declaration.File = file; - decl.Declaration.UsageMask = writemask; - decl.DeclarationRange.First = first; - decl.DeclarationRange.Last = last; - - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == ',') { - uint i; - - cur++; - eat_opt_white( &cur ); - for (i = 0; i < TGSI_SEMANTIC_COUNT; i++) { - if (str_match_no_case( &cur, semantic_names[i] )) { - const char *cur2 = cur; - uint index; - - if (is_digit_alpha_underscore( cur )) - continue; - eat_opt_white( &cur2 ); - if (*cur2 == '[') { - cur2++; - eat_opt_white( &cur2 ); - if (!parse_uint( &cur2, &index )) { - report_error( ctx, "Expected literal integer" ); - return FALSE; - } - eat_opt_white( &cur2 ); - if (*cur2 != ']') { - report_error( ctx, "Expected `]'" ); - return FALSE; - } - cur2++; - - decl.Semantic.SemanticIndex = index; - - cur = cur2; - } - - decl.Declaration.Semantic = 1; - decl.Semantic.SemanticName = i; - - ctx->cur = cur; - break; - } - } - } - - cur = ctx->cur; - eat_opt_white( &cur ); - if (*cur == ',') { - uint i; - - cur++; - eat_opt_white( &cur ); - for (i = 0; i < TGSI_INTERPOLATE_COUNT; i++) { - if (str_match_no_case( &cur, interpolate_names[i] )) { - if (is_digit_alpha_underscore( cur )) - continue; - decl.Declaration.Interpolate = i; - - ctx->cur = cur; - break; - } - } - if (i == TGSI_INTERPOLATE_COUNT) { - report_error( ctx, "Expected semantic or interpolate attribute" ); - return FALSE; - } - } - - advance = tgsi_build_full_declaration( - &decl, - ctx->tokens_cur, - ctx->header, - (uint) (ctx->tokens_end - ctx->tokens_cur) ); - if (advance == 0) - return FALSE; - ctx->tokens_cur += advance; - - return TRUE; -} - -static boolean parse_immediate( struct translate_ctx *ctx ) -{ - struct tgsi_full_immediate imm; - uint i; - float values[4]; - uint advance; - - if (!eat_white( &ctx->cur )) { - report_error( ctx, "Syntax error" ); - return FALSE; - } - if (!str_match_no_case( &ctx->cur, "FLT32" ) || is_digit_alpha_underscore( ctx->cur )) { - report_error( ctx, "Expected `FLT32'" ); - return FALSE; - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '{') { - report_error( ctx, "Expected `{'" ); - return FALSE; - } - ctx->cur++; - for (i = 0; i < 4; i++) { - eat_opt_white( &ctx->cur ); - if (i > 0) { - if (*ctx->cur != ',') { - report_error( ctx, "Expected `,'" ); - return FALSE; - } - ctx->cur++; - eat_opt_white( &ctx->cur ); - } - if (!parse_float( &ctx->cur, &values[i] )) { - report_error( ctx, "Expected literal floating point" ); - return FALSE; - } - } - eat_opt_white( &ctx->cur ); - if (*ctx->cur != '}') { - report_error( ctx, "Expected `}'" ); - return FALSE; - } - ctx->cur++; - - imm = tgsi_default_full_immediate(); - imm.Immediate.Size += 4; - imm.Immediate.DataType = TGSI_IMM_FLOAT32; - imm.u.Pointer = values; - - advance = tgsi_build_full_immediate( - &imm, - ctx->tokens_cur, - ctx->header, - (uint) (ctx->tokens_end - ctx->tokens_cur) ); - if (advance == 0) - return FALSE; - ctx->tokens_cur += advance; - - return TRUE; -} - -static boolean translate( struct translate_ctx *ctx ) -{ - eat_opt_white( &ctx->cur ); - if (!parse_header( ctx )) - return FALSE; - - while (*ctx->cur != '\0') { - uint label_val = 0; - - if (!eat_white( &ctx->cur )) { - report_error( ctx, "Syntax error" ); - return FALSE; - } - - if (*ctx->cur == '\0') - break; - - if (parse_label( ctx, &label_val )) { - if (!parse_instruction( ctx, TRUE )) - return FALSE; - } - else if (str_match_no_case( &ctx->cur, "DCL" )) { - if (!parse_declaration( ctx )) - return FALSE; - } - else if (str_match_no_case( &ctx->cur, "IMM" )) { - if (!parse_immediate( ctx )) - return FALSE; - } - else if (!parse_instruction( ctx, FALSE )) { - return FALSE; - } - } - - return TRUE; -} - -boolean -tgsi_text_translate( - const char *text, - struct tgsi_token *tokens, - uint num_tokens ) -{ - struct translate_ctx ctx; - - ctx.text = text; - ctx.cur = text; - ctx.tokens = tokens; - ctx.tokens_cur = tokens; - ctx.tokens_end = tokens + num_tokens; - - if (!translate( &ctx )) - return FALSE; - - return tgsi_sanity_check( tokens ); -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_text.h b/src/gallium/auxiliary/tgsi/util/tgsi_text.h deleted file mode 100644 index 8eeeeef140..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_text.h +++ /dev/null @@ -1,47 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_TEXT_H -#define TGSI_TEXT_H - -#include "pipe/p_shader_tokens.h" - -#if defined __cplusplus -extern "C" { -#endif - -boolean -tgsi_text_translate( - const char *text, - struct tgsi_token *tokens, - uint num_tokens ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_TEXT_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.c b/src/gallium/auxiliary/tgsi/util/tgsi_transform.c deleted file mode 100644 index 357f77b05a..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_transform.c +++ /dev/null @@ -1,199 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * TGSI program transformation utility. - * - * Authors: Brian Paul - */ - - -#include "tgsi_transform.h" - - - -static void -emit_instruction(struct tgsi_transform_context *ctx, - const struct tgsi_full_instruction *inst) -{ - uint ti = ctx->ti; - - ti += tgsi_build_full_instruction(inst, - ctx->tokens_out + ti, - ctx->header, - ctx->max_tokens_out - ti); - ctx->ti = ti; -} - - -static void -emit_declaration(struct tgsi_transform_context *ctx, - const struct tgsi_full_declaration *decl) -{ - uint ti = ctx->ti; - - ti += tgsi_build_full_declaration(decl, - ctx->tokens_out + ti, - ctx->header, - ctx->max_tokens_out - ti); - ctx->ti = ti; -} - - -static void -emit_immediate(struct tgsi_transform_context *ctx, - const struct tgsi_full_immediate *imm) -{ - uint ti = ctx->ti; - - ti += tgsi_build_full_immediate(imm, - ctx->tokens_out + ti, - ctx->header, - ctx->max_tokens_out - ti); - ctx->ti = ti; -} - - - -/** - * Apply user-defined transformations to the input shader to produce - * the output shader. - * For example, a register search-and-replace operation could be applied - * by defining a transform_instruction() callback that examined and changed - * the instruction src/dest regs. - * - * \return number of tokens emitted - */ -int -tgsi_transform_shader(const struct tgsi_token *tokens_in, - struct tgsi_token *tokens_out, - uint max_tokens_out, - struct tgsi_transform_context *ctx) -{ - uint procType; - - /* input shader */ - struct tgsi_parse_context parse; - - /* output shader */ - struct tgsi_processor *processor; - - - /** - ** callback context init - **/ - ctx->emit_instruction = emit_instruction; - ctx->emit_declaration = emit_declaration; - ctx->emit_immediate = emit_immediate; - ctx->tokens_out = tokens_out; - ctx->max_tokens_out = max_tokens_out; - - - /** - ** Setup to begin parsing input shader - **/ - if (tgsi_parse_init( &parse, tokens_in ) != TGSI_PARSE_OK) { - debug_printf("tgsi_parse_init() failed in tgsi_transform_shader()!\n"); - return -1; - } - procType = parse.FullHeader.Processor.Processor; - assert(procType == TGSI_PROCESSOR_FRAGMENT || - procType == TGSI_PROCESSOR_VERTEX || - procType == TGSI_PROCESSOR_GEOMETRY); - - - /** - ** Setup output shader - **/ - *(struct tgsi_version *) &tokens_out[0] = tgsi_build_version(); - - ctx->header = (struct tgsi_header *) (tokens_out + 1); - *ctx->header = tgsi_build_header(); - - processor = (struct tgsi_processor *) (tokens_out + 2); - *processor = tgsi_build_processor( procType, ctx->header ); - - ctx->ti = 3; - - - /** - ** Loop over incoming program tokens/instructions - */ - while( !tgsi_parse_end_of_tokens( &parse ) ) { - - tgsi_parse_token( &parse ); - - switch( parse.FullToken.Token.Type ) { - case TGSI_TOKEN_TYPE_INSTRUCTION: - { - struct tgsi_full_instruction *fullinst - = &parse.FullToken.FullInstruction; - - if (ctx->transform_instruction) - ctx->transform_instruction(ctx, fullinst); - else - ctx->emit_instruction(ctx, fullinst); - } - break; - - case TGSI_TOKEN_TYPE_DECLARATION: - { - struct tgsi_full_declaration *fulldecl - = &parse.FullToken.FullDeclaration; - - if (ctx->transform_declaration) - ctx->transform_declaration(ctx, fulldecl); - else - ctx->emit_declaration(ctx, fulldecl); - } - break; - - case TGSI_TOKEN_TYPE_IMMEDIATE: - { - struct tgsi_full_immediate *fullimm - = &parse.FullToken.FullImmediate; - - if (ctx->transform_immediate) - ctx->transform_immediate(ctx, fullimm); - else - ctx->emit_immediate(ctx, fullimm); - } - break; - - default: - assert( 0 ); - } - } - - if (ctx->epilog) { - ctx->epilog(ctx); - } - - tgsi_parse_free (&parse); - - return ctx->ti; -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_transform.h b/src/gallium/auxiliary/tgsi/util/tgsi_transform.h deleted file mode 100644 index fcf85d603b..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_transform.h +++ /dev/null @@ -1,93 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_TRANSFORM_H -#define TGSI_TRANSFORM_H - - -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_build.h" - - - -/** - * Subclass this to add caller-specific data - */ -struct tgsi_transform_context -{ -/**** PUBLIC ***/ - - /** - * User-defined callbacks invoked per instruction. - */ - void (*transform_instruction)(struct tgsi_transform_context *ctx, - struct tgsi_full_instruction *inst); - - void (*transform_declaration)(struct tgsi_transform_context *ctx, - struct tgsi_full_declaration *decl); - - void (*transform_immediate)(struct tgsi_transform_context *ctx, - struct tgsi_full_immediate *imm); - - /** - * Called at end of input program to allow caller to append extra - * instructions. Return number of tokens emitted. - */ - void (*epilog)(struct tgsi_transform_context *ctx); - - -/*** PRIVATE ***/ - - /** - * These are setup by tgsi_transform_shader() and cannot be overridden. - * Meant to be called from in the above user callback functions. - */ - void (*emit_instruction)(struct tgsi_transform_context *ctx, - const struct tgsi_full_instruction *inst); - void (*emit_declaration)(struct tgsi_transform_context *ctx, - const struct tgsi_full_declaration *decl); - void (*emit_immediate)(struct tgsi_transform_context *ctx, - const struct tgsi_full_immediate *imm); - - struct tgsi_header *header; - uint max_tokens_out; - struct tgsi_token *tokens_out; - uint ti; -}; - - - -extern int -tgsi_transform_shader(const struct tgsi_token *tokens_in, - struct tgsi_token *tokens_out, - uint max_tokens_out, - struct tgsi_transform_context *ctx); - - -#endif /* TGSI_TRANSFORM_H */ diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.c b/src/gallium/auxiliary/tgsi/util/tgsi_util.c deleted file mode 100644 index 09486e649e..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.c +++ /dev/null @@ -1,300 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" -#include "tgsi_parse.h" -#include "tgsi_build.h" -#include "tgsi_util.h" - -union pointer_hack -{ - void *pointer; - uint64_t uint64; -}; - -void * -tgsi_align_128bit( - void *unaligned ) -{ - union pointer_hack ph; - - ph.uint64 = 0; - ph.pointer = unaligned; - ph.uint64 = (ph.uint64 + 15) & ~15; - return ph.pointer; -} - -unsigned -tgsi_util_get_src_register_swizzle( - const struct tgsi_src_register *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->SwizzleX; - case 1: - return reg->SwizzleY; - case 2: - return reg->SwizzleZ; - case 3: - return reg->SwizzleW; - default: - assert( 0 ); - } - return 0; -} - -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->ExtSwizzleX; - case 1: - return reg->ExtSwizzleY; - case 2: - return reg->ExtSwizzleZ; - case 3: - return reg->ExtSwizzleW; - default: - assert( 0 ); - } - return 0; -} - -unsigned -tgsi_util_get_full_src_register_extswizzle( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - unsigned swizzle; - - /* - * First, calculate the extended swizzle for a given channel. This will give - * us either a channel index into the simple swizzle or a constant 1 or 0. - */ - swizzle = tgsi_util_get_src_register_extswizzle( - ®->SrcRegisterExtSwz, - component ); - - assert (TGSI_SWIZZLE_X == TGSI_EXTSWIZZLE_X); - assert (TGSI_SWIZZLE_Y == TGSI_EXTSWIZZLE_Y); - assert (TGSI_SWIZZLE_Z == TGSI_EXTSWIZZLE_Z); - assert (TGSI_SWIZZLE_W == TGSI_EXTSWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ZERO > TGSI_SWIZZLE_W); - assert (TGSI_EXTSWIZZLE_ONE > TGSI_SWIZZLE_W); - - /* - * Second, calculate the simple swizzle for the unswizzled channel index. - * Leave the constants intact, they are not affected by the simple swizzle. - */ - if( swizzle <= TGSI_SWIZZLE_W ) { - swizzle = tgsi_util_get_src_register_swizzle( - ®->SrcRegister, - swizzle ); - } - - return swizzle; -} - -void -tgsi_util_set_src_register_swizzle( - struct tgsi_src_register *reg, - unsigned swizzle, - unsigned component ) -{ - switch( component ) { - case 0: - reg->SwizzleX = swizzle; - break; - case 1: - reg->SwizzleY = swizzle; - break; - case 2: - reg->SwizzleZ = swizzle; - break; - case 3: - reg->SwizzleW = swizzle; - break; - default: - assert( 0 ); - } -} - -void -tgsi_util_set_src_register_extswizzle( - struct tgsi_src_register_ext_swz *reg, - unsigned swizzle, - unsigned component ) -{ - switch( component ) { - case 0: - reg->ExtSwizzleX = swizzle; - break; - case 1: - reg->ExtSwizzleY = swizzle; - break; - case 2: - reg->ExtSwizzleZ = swizzle; - break; - case 3: - reg->ExtSwizzleW = swizzle; - break; - default: - assert( 0 ); - } -} - -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ) -{ - switch( component ) { - case 0: - return reg->NegateX; - case 1: - return reg->NegateY; - case 2: - return reg->NegateZ; - case 3: - return reg->NegateW; - default: - assert( 0 ); - } - return 0; -} - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ) -{ - switch( component ) { - case 0: - reg->NegateX = negate; - break; - case 1: - reg->NegateY = negate; - break; - case 2: - reg->NegateZ = negate; - break; - case 3: - reg->NegateW = negate; - break; - default: - assert( 0 ); - } -} - -unsigned -tgsi_util_get_full_src_register_sign_mode( - const struct tgsi_full_src_register *reg, - unsigned component ) -{ - unsigned sign_mode; - - if( reg->SrcRegisterExtMod.Absolute ) { - /* Consider only the post-abs negation. */ - - if( reg->SrcRegisterExtMod.Negate ) { - sign_mode = TGSI_UTIL_SIGN_SET; - } - else { - sign_mode = TGSI_UTIL_SIGN_CLEAR; - } - } - else { - /* Accumulate the three negations. */ - - unsigned negate; - - negate = reg->SrcRegister.Negate; - if( tgsi_util_get_src_register_extnegate( ®->SrcRegisterExtSwz, component ) ) { - negate = !negate; - } - if( reg->SrcRegisterExtMod.Negate ) { - negate = !negate; - } - - if( negate ) { - sign_mode = TGSI_UTIL_SIGN_TOGGLE; - } - else { - sign_mode = TGSI_UTIL_SIGN_KEEP; - } - } - - return sign_mode; -} - -void -tgsi_util_set_full_src_register_sign_mode( - struct tgsi_full_src_register *reg, - unsigned sign_mode ) -{ - reg->SrcRegisterExtSwz.NegateX = 0; - reg->SrcRegisterExtSwz.NegateY = 0; - reg->SrcRegisterExtSwz.NegateZ = 0; - reg->SrcRegisterExtSwz.NegateW = 0; - - switch (sign_mode) - { - case TGSI_UTIL_SIGN_CLEAR: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 0; - break; - - case TGSI_UTIL_SIGN_SET: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 1; - reg->SrcRegisterExtMod.Negate = 1; - break; - - case TGSI_UTIL_SIGN_TOGGLE: - reg->SrcRegister.Negate = 1; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; - break; - - case TGSI_UTIL_SIGN_KEEP: - reg->SrcRegister.Negate = 0; - reg->SrcRegisterExtMod.Absolute = 0; - reg->SrcRegisterExtMod.Negate = 0; - break; - - default: - assert( 0 ); - } -} diff --git a/src/gallium/auxiliary/tgsi/util/tgsi_util.h b/src/gallium/auxiliary/tgsi/util/tgsi_util.h deleted file mode 100644 index 7877f34558..0000000000 --- a/src/gallium/auxiliary/tgsi/util/tgsi_util.h +++ /dev/null @@ -1,96 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef TGSI_UTIL_H -#define TGSI_UTIL_H - -#if defined __cplusplus -extern "C" { -#endif - -void * -tgsi_align_128bit( - void *unaligned ); - -unsigned -tgsi_util_get_src_register_swizzle( - const struct tgsi_src_register *reg, - unsigned component ); - -unsigned -tgsi_util_get_src_register_extswizzle( - const struct tgsi_src_register_ext_swz *reg, - unsigned component); - -unsigned -tgsi_util_get_full_src_register_extswizzle( - const struct tgsi_full_src_register *reg, - unsigned component ); - -void -tgsi_util_set_src_register_swizzle( - struct tgsi_src_register *reg, - unsigned swizzle, - unsigned component ); - -void -tgsi_util_set_src_register_extswizzle( - struct tgsi_src_register_ext_swz *reg, - unsigned swizzle, - unsigned component ); - -unsigned -tgsi_util_get_src_register_extnegate( - const struct tgsi_src_register_ext_swz *reg, - unsigned component ); - -void -tgsi_util_set_src_register_extnegate( - struct tgsi_src_register_ext_swz *reg, - unsigned negate, - unsigned component ); - -#define TGSI_UTIL_SIGN_CLEAR 0 /* Force positive */ -#define TGSI_UTIL_SIGN_SET 1 /* Force negative */ -#define TGSI_UTIL_SIGN_TOGGLE 2 /* Negate */ -#define TGSI_UTIL_SIGN_KEEP 3 /* No change */ - -unsigned -tgsi_util_get_full_src_register_sign_mode( - const struct tgsi_full_src_register *reg, - unsigned component ); - -void -tgsi_util_set_full_src_register_sign_mode( - struct tgsi_full_src_register *reg, - unsigned sign_mode ); - -#if defined __cplusplus -} -#endif - -#endif /* TGSI_UTIL_H */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 4999822068..8713ff5d58 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -45,9 +45,9 @@ #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" #include "cso_cache/cso_context.h" diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index 505d93d727..c34fb6ee33 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -43,9 +43,9 @@ #include "util/u_simple_shaders.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.h b/src/gallium/drivers/cell/ppu/cell_context.h index 8df41c1d4c..f1d1ca89a9 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.h +++ b/src/gallium/drivers/cell/ppu/cell_context.h @@ -37,7 +37,7 @@ #include "cell_winsys.h" #include "cell/common.h" #include "rtasm/rtasm_ppc_spe.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" struct cell_vbuf_render; diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index c3a3fbd066..f5707f2bb8 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -30,7 +30,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "draw/draw_context.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "cell_context.h" #include "cell_state.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 3a80df427d..96393732ed 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -65,8 +65,8 @@ #include "pipe/p_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" #include "spu_exec.h" #include "spu_main.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.h b/src/gallium/drivers/cell/spu/spu_exec.h index 3e17c490d2..c68f78f59b 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.h +++ b/src/gallium/drivers/cell/spu/spu_exec.h @@ -29,7 +29,7 @@ #define SPU_EXEC_H #include "pipe/p_compiler.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_exec.h" #if defined __cplusplus extern "C" { diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index ea4274a0a7..74ab2bbd1f 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,8 +1,8 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" //#include "tgsi_build.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_util.h" unsigned tgsi_util_get_src_register_swizzle( diff --git a/src/gallium/drivers/i915simple/i915_context.h b/src/gallium/drivers/i915simple/i915_context.h index c8db4f608c..3cdabe45f9 100644 --- a/src/gallium/drivers/i915simple/i915_context.h +++ b/src/gallium/drivers/i915simple/i915_context.h @@ -35,7 +35,7 @@ #include "draw/draw_vertex.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" #define I915_TEX_UNITS 8 diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 23cd909337..04507ab8ad 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -34,8 +34,8 @@ #include "pipe/p_shader_tokens.h" #include "util/u_string.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index dbb33f2695..e8521b385e 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -33,7 +33,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" #include "i915_context.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i965simple/brw_context.h b/src/gallium/drivers/i965simple/brw_context.h index 2cae7665f7..f00eb34f92 100644 --- a/src/gallium/drivers/i965simple/brw_context.h +++ b/src/gallium/drivers/i965simple/brw_context.h @@ -38,7 +38,7 @@ #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" #include "brw_structs.h" #include "brw_winsys.h" diff --git a/src/gallium/drivers/i965simple/brw_sf.c b/src/gallium/drivers/i965simple/brw_sf.c index 96f8fb87a3..b82a2e143b 100644 --- a/src/gallium/drivers/i965simple/brw_sf.c +++ b/src/gallium/drivers/i965simple/brw_sf.c @@ -36,7 +36,7 @@ #include "brw_util.h" #include "brw_sf.h" #include "brw_state.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" static void compile_sf_prog( struct brw_context *brw, diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index fb3da92421..30f37a99d4 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -3,7 +3,7 @@ #include "brw_state.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" /** diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index caeeba4630..27ca32843d 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -34,8 +34,8 @@ #include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_parse.h" #include "brw_context.h" #include "brw_defines.h" diff --git a/src/gallium/drivers/i965simple/brw_vs_emit.c b/src/gallium/drivers/i965simple/brw_vs_emit.c index 81423e2d7d..34dbc0624d 100644 --- a/src/gallium/drivers/i965simple/brw_vs_emit.c +++ b/src/gallium/drivers/i965simple/brw_vs_emit.c @@ -33,7 +33,7 @@ #include "brw_vs.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" struct brw_prog_info { unsigned num_temps; diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index bf1b4d961a..e6f1a44817 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" static struct brw_reg alloc_tmp(struct brw_wm_compile *c) { diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index 5c90583824..6a4a5aef09 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -4,7 +4,7 @@ #include "brw_wm.h" #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index 0b199a2193..cc171bbc39 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -36,8 +36,8 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/util/tgsi_parse.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_parse.h" struct sp_exec_fragment_shader { struct sp_fragment_shader base; diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 6e1d9280bb..20226da78c 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -38,7 +38,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/exec/tgsi_sse2.h" +#include "tgsi/tgsi_sse2.h" #if 0 diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 69f7f960aa..8b7da7c747 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -36,8 +36,8 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "tgsi/exec/tgsi_exec.h" -#include "tgsi/exec/tgsi_sse2.h" +#include "tgsi/tgsi_exec.h" +#include "tgsi/tgsi_sse2.h" #ifdef PIPE_ARCH_X86 diff --git a/src/gallium/drivers/softpipe/sp_headers.h b/src/gallium/drivers/softpipe/sp_headers.h index 3d9ede69bb..ae2ee210fc 100644 --- a/src/gallium/drivers/softpipe/sp_headers.h +++ b/src/gallium/drivers/softpipe/sp_headers.h @@ -32,7 +32,7 @@ #define SP_HEADERS_H #include "pipe/p_state.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_exec.h" #define PRIM_POINT 1 #define PRIM_LINE 2 diff --git a/src/gallium/drivers/softpipe/sp_state.h b/src/gallium/drivers/softpipe/sp_state.h index 701e02b295..476ef3dc8f 100644 --- a/src/gallium/drivers/softpipe/sp_state.h +++ b/src/gallium/drivers/softpipe/sp_state.h @@ -32,7 +32,7 @@ #define SP_STATE_H #include "pipe/p_state.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_scan.h" #define SP_NEW_VIEWPORT 0x1 diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 901c8f83e7..76fe6bfef9 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -35,8 +35,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" -#include "tgsi/util/tgsi_dump.h" -#include "tgsi/util/tgsi_scan.h" +#include "tgsi/tgsi_dump.h" +#include "tgsi/tgsi_scan.h" void * diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index be0b57d9fa..63b3b91110 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -40,7 +40,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_util.h" -#include "tgsi/exec/tgsi_exec.h" +#include "tgsi/tgsi_exec.h" /* diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 8d8b762ea5..284ecb827d 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -47,8 +47,8 @@ #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" #include "util/p_tile.h" -#include "tgsi/util/tgsi_text.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_text.h" +#include "tgsi/tgsi_dump.h" #include "st_device.h" #include "st_sample.h" diff --git a/src/mesa/state_tracker/st_debug.c b/src/mesa/state_tracker/st_debug.c index 23ecfff0aa..c7d26ce33c 100644 --- a/src/mesa/state_tracker/st_debug.c +++ b/src/mesa/state_tracker/st_debug.c @@ -31,7 +31,7 @@ #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_dump.h" #include "cso_cache/cso_cache.h" diff --git a/src/mesa/state_tracker/st_mesa_to_tgsi.c b/src/mesa/state_tracker/st_mesa_to_tgsi.c index 9029f12056..6565107b10 100644 --- a/src/mesa/state_tracker/st_mesa_to_tgsi.c +++ b/src/mesa/state_tracker/st_mesa_to_tgsi.c @@ -32,9 +32,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_shader_tokens.h" -#include "tgsi/util/tgsi_parse.h" -#include "tgsi/util/tgsi_build.h" -#include "tgsi/util/tgsi_util.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_util.h" #include "st_mesa_to_tgsi.h" #include "shader/prog_instruction.h" #include "shader/prog_parameter.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 5966bbadae..c25c668329 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -40,7 +40,7 @@ #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" -#include "tgsi/util/tgsi_dump.h" +#include "tgsi/tgsi_dump.h" #include "st_context.h" #include "st_atom.h" -- cgit v1.2.3 From 76164bf7a20ef6dabc3204a766f604becfba9997 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 30 Jul 2008 00:44:56 +0900 Subject: tgsi: Insert newlines after the statements, instead of before. Prevents shader dumps from getting concatenated with the next debug message. --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index d2e6375212..290d78bb14 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -332,7 +332,7 @@ void tgsi_dump_declaration( const struct tgsi_full_declaration *decl ) { - TXT( "\nDCL " ); + TXT( "DCL " ); _dump_register( decl->Declaration.File, @@ -354,6 +354,8 @@ tgsi_dump_declaration( TXT( ", " ); ENM( decl->Declaration.Interpolate, interpolate_names ); + + EOL(); } static boolean @@ -407,7 +409,6 @@ tgsi_dump_instruction( uint i; boolean first_reg = TRUE; - EOL(); UID( instno ); CHR( ':' ); ENM( inst->Instruction.Opcode, opcode_names ); @@ -534,6 +535,8 @@ tgsi_dump_instruction( UID( inst->InstructionExtLabel.Label ); break; } + + EOL(); } static boolean @@ -551,11 +554,11 @@ static boolean prolog( struct tgsi_iterate_context *ctx ) { - EOL(); ENM( ctx->processor.Processor, processor_type_names ); UID( ctx->version.MajorVersion ); CHR( '.' ); UID( ctx->version.MinorVersion ); + EOL(); return TRUE; } -- cgit v1.2.3 From 4a8b908a46375ea2543a81bdd8d5b313d807f140 Mon Sep 17 00:00:00 2001 From: Younes Manton Date: Sat, 2 Aug 2008 19:37:16 +0200 Subject: softpipe: support PIPE_FORMAT_R16_SNORM. --- src/gallium/auxiliary/util/p_tile.c | 46 +++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 1bf0d72733..2b6db43bee 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -441,6 +441,46 @@ a8_get_tile_rgba(ubyte *src, } } +/*** PIPE_FORMAT_R16_SNORM ***/ + +static void +r16_get_tile_rgba(short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = + pRow[2] = 0.0; + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +r16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst++, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + } + p += src_stride; + } +} + /*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ @@ -742,6 +782,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_A8L8_UNORM: a8_l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_R16_SNORM: + r16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_R16G16B16A16_SNORM: r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride); break; @@ -847,6 +890,9 @@ pipe_put_tile_rgba(struct pipe_surface *ps, case PIPE_FORMAT_A8L8_UNORM: /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ break; + case PIPE_FORMAT_R16_SNORM: + /*r16_put_tile_rgba((short *) packed, w, h, p, src_stride);*/ + break; case PIPE_FORMAT_R16G16B16A16_SNORM: r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); break; -- cgit v1.2.3 From 2783f3bfabd6e316f7e221e950499c3631c041ce Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 4 Aug 2008 12:18:09 +0200 Subject: tgsi: Put a newline after IMM. --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 290d78bb14..0eedb1c91e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -264,16 +264,6 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] = "_D8" }; -static void -_dump_register_prefix( - uint file, - uint first, - uint last ) -{ - - -} - static void _dump_register( uint file, @@ -354,7 +344,7 @@ tgsi_dump_declaration( TXT( ", " ); ENM( decl->Declaration.Interpolate, interpolate_names ); - + EOL(); } @@ -373,7 +363,7 @@ tgsi_dump_immediate( { uint i; - TXT( "\nIMM " ); + TXT( "IMM " ); ENM( imm->Immediate.DataType, immediate_type_names ); TXT( " { " ); @@ -390,6 +380,8 @@ tgsi_dump_immediate( TXT( ", " ); } TXT( " }" ); + + EOL(); } static boolean @@ -535,7 +527,7 @@ tgsi_dump_instruction( UID( inst->InstructionExtLabel.Label ); break; } - + EOL(); } -- cgit v1.2.3 From a56ccb90c6a374c86158ac2323f844a4003560fa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 6 Aug 2008 19:15:10 -0600 Subject: gallium: fix clipping/stride bugs in pipe_get_tile_raw(), pipe_put_tile_raw() We need to compute the default dst_stride and src_stride _before_ clipping. After clipping, the width value may have changed. This fixes visible tile glitches in some demos like progs/glsl/texdemo.c --- src/gallium/auxiliary/util/p_tile.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 2b6db43bee..d65e603785 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -51,12 +51,12 @@ pipe_get_tile_raw(struct pipe_surface *ps, { const void *src; - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - if (dst_stride == 0) dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); assert(src); if(!src) @@ -79,12 +79,12 @@ pipe_put_tile_raw(struct pipe_surface *ps, { void *dst; - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - if (src_stride == 0) src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); assert(dst); if(!dst) -- cgit v1.2.3 From 72a5e479789febb552ec783a1cba0ed628dfa427 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 6 Aug 2008 14:48:11 +0100 Subject: gallium: New function to dump surfaces. --- src/gallium/auxiliary/util/p_debug.c | 43 +++++++++++++++++++++++++++++++----- src/gallium/include/pipe/p_debug.h | 8 +++++-- 2 files changed, 44 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index cdc7e66361..a3db13f96d 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -50,12 +50,12 @@ #endif - - #include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "pipe/p_debug.h" #include "pipe/p_format.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" #include "util/u_string.h" @@ -509,7 +509,7 @@ char *pf_sprint_name( char *str, enum pipe_format format ) void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, - unsigned pitch, + unsigned stride, const void *data) { #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY @@ -530,7 +530,7 @@ void debug_dump_image(const char *prefix, for(i = 0; i < sizeof(filename); ++i) wfilename[i] = (WCHAR)filename[i]; - pMap = (unsigned char *)EngMapFile(wfilename, sizeof(header) + cpp*width*height, &iFile); + pMap = (unsigned char *)EngMapFile(wfilename, sizeof(header) + height*width*cpp, &iFile); if(!pMap) return; @@ -542,11 +542,44 @@ void debug_dump_image(const char *prefix, pMap += sizeof(header); for(i = 0; i < height; ++i) { - memcpy(pMap, (unsigned char *)data + cpp*pitch*i, cpp*width); + memcpy(pMap, (unsigned char *)data + stride*i, cpp*width); pMap += cpp*width; } EngUnmapFile(iFile); #endif } + +void debug_dump_surface(const char *prefix, + struct pipe_surface *surface) +{ + unsigned surface_usage; + void *data; + + if (!surface) + goto error1; + + /* XXX: force mappable surface */ + surface_usage = surface->usage; + surface->usage |= PIPE_BUFFER_USAGE_CPU_READ; + + data = pipe_surface_map(surface, + PIPE_BUFFER_USAGE_CPU_READ); + if(!data) + goto error2; + + debug_dump_image(prefix, + surface->format, + surface->block.size, + surface->nblocksx, + surface->nblocksy, + surface->stride, + data); + + pipe_surface_unmap(surface); +error2: + surface->usage = surface_usage; +error1: + ; +} #endif diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index a5816c3cbb..6478ae2f08 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -332,13 +332,17 @@ debug_profile_stop(void); #ifdef DEBUG +struct pipe_surface; void debug_dump_image(const char *prefix, unsigned format, unsigned cpp, unsigned width, unsigned height, - unsigned pitch, + unsigned stride, const void *data); +void debug_dump_surface(const char *prefix, + struct pipe_surface *surface); #else -#define debug_dump_image(prefix, format, cpp, width, height, pitch, data) ((void)0) +#define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) +#define debug_dump_surface(prefix, surface) ((void)0) #endif -- cgit v1.2.3 From 1ee500ac73fa657f02321c46cf5d9a4bfdea54de Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 6 Aug 2008 14:51:15 +0100 Subject: pipebuffer: Add an extra assertion to ensure buffers do not jump between lists. --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index f599bee07e..ce41418a0f 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -142,12 +142,13 @@ _fenced_buffer_destroy(struct fenced_buffer *fenced_buf) static INLINE void -_fenced_buffer_remove(struct fenced_buffer *fenced_buf) +_fenced_buffer_remove(struct fenced_buffer_list *fenced_list, + struct fenced_buffer *fenced_buf) { - struct fenced_buffer_list *fenced_list = fenced_buf->list; struct pipe_winsys *winsys = fenced_list->winsys; assert(fenced_buf->fence); + assert(fenced_buf->list == fenced_list); winsys->fence_reference(winsys, &fenced_buf->fence, NULL); fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; @@ -184,7 +185,8 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf) return PIPE_ERROR; } /* Remove from the fenced list */ - _fenced_buffer_remove(fenced_buf); /* TODO: remove consequents */ + /* TODO: remove consequents */ + _fenced_buffer_remove(fenced_list, fenced_buf); } fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE; @@ -223,7 +225,7 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); } - _fenced_buffer_remove(fenced_buf); + _fenced_buffer_remove(fenced_list, fenced_buf); curr = next; next = curr->next; @@ -248,7 +250,7 @@ fenced_buffer_destroy(struct pb_buffer *buf) do { fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head); assert(winsys->fence_signalled(winsys, fenced_buf->fence, 0) == 0); - _fenced_buffer_remove(fenced_buf); + _fenced_buffer_remove(fenced_list, fenced_buf); curr = prev; prev = curr->prev; } while (curr != &fenced_list->delayed); @@ -395,7 +397,7 @@ buffer_fence(struct pb_buffer *buf, _glthread_LOCK_MUTEX(fenced_list->mutex); if (fenced_buf->fence) - _fenced_buffer_remove(fenced_buf); + _fenced_buffer_remove(fenced_list, fenced_buf); if (fence) { winsys->fence_reference(winsys, &fenced_buf->fence, fence); fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; -- cgit v1.2.3 From fdafa1f8632bf4e92996291a094e98f4e58dc37c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 7 Aug 2008 19:25:28 +0100 Subject: gallium: Simplify format->name conversion. --- src/gallium/auxiliary/util/p_debug.c | 7 +++---- src/gallium/include/pipe/p_format.h | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index a3db13f96d..082b0e9fb5 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -335,7 +335,7 @@ const char * debug_dump_enum(const struct debug_named_value *names, unsigned long value) { - static char rest[256]; + static char rest[64]; while(names->name) { if(names->value == value) @@ -498,10 +498,9 @@ void debug_print_format(const char *msg, unsigned fmt ) } #endif -char *pf_sprint_name( char *str, enum pipe_format format ) +const char *pf_name( enum pipe_format format ) { - strcpy( str, debug_dump_enum(pipe_format_names, format) ); - return str; + return debug_dump_enum(pipe_format_names, format); } diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 947c445583..0a9cad8993 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -379,7 +379,7 @@ enum pipe_format { /** * Builds pipe format name from format token. */ -extern char *pf_sprint_name( char *str, enum pipe_format format ); +extern const char *pf_name( enum pipe_format format ); /** * Return bits for a particular component. -- cgit v1.2.3 From c8f319138355a5365303e6649a953422ecc88c5f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 7 Aug 2008 11:04:05 -0600 Subject: gallium: enable the call to r16_put_tile_rgba(), silences warning --- src/gallium/auxiliary/util/p_tile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index d65e603785..5521a6d08f 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -891,7 +891,7 @@ pipe_put_tile_rgba(struct pipe_surface *ps, /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ break; case PIPE_FORMAT_R16_SNORM: - /*r16_put_tile_rgba((short *) packed, w, h, p, src_stride);*/ + r16_put_tile_rgba((short *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_R16G16B16A16_SNORM: r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); -- cgit v1.2.3 From 600aa7501f95c520ad97ac894cfd1a639562f94c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 7 Aug 2008 13:30:24 -0600 Subject: gallium: implement a bunch of missing put_tile functions --- src/gallium/auxiliary/util/p_tile.c | 163 ++++++++++++++++++++++++++++++------ 1 file changed, 137 insertions(+), 26 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index 5521a6d08f..e366039a27 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -109,7 +109,7 @@ pipe_put_tile_raw(struct pipe_surface *ps, /*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ static void -a8r8g8b8_get_tile_rgba(unsigned *src, +a8r8g8b8_get_tile_rgba(const unsigned *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -156,7 +156,7 @@ a8r8g8b8_put_tile_rgba(unsigned *dst, /*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ static void -x8r8g8b8_get_tile_rgba(unsigned *src, +x8r8g8b8_get_tile_rgba(const unsigned *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -202,7 +202,7 @@ x8r8g8b8_put_tile_rgba(unsigned *dst, /*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ static void -b8g8r8a8_get_tile_rgba(unsigned *src, +b8g8r8a8_get_tile_rgba(const unsigned *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -249,7 +249,7 @@ b8g8r8a8_put_tile_rgba(unsigned *dst, /*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ static void -a1r5g5b5_get_tile_rgba(ushort *src, +a1r5g5b5_get_tile_rgba(const ushort *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -270,10 +270,37 @@ a1r5g5b5_get_tile_rgba(ushort *src, } +static void +a1r5g5b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); + UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); + UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); + r = r >> 3; /* 5 bits */ + g = g >> 3; /* 5 bits */ + b = b >> 3; /* 5 bits */ + a = a >> 7; /* 1 bit */ + *dst++ = (a << 15) | (r << 10) | (g << 5) | b; + } + p += src_stride; + } +} + + /*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ static void -a4r4g4b4_get_tile_rgba(ushort *src, +a4r4g4b4_get_tile_rgba(const ushort *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -324,7 +351,7 @@ a4r4g4b4_put_tile_rgba(ushort *dst, /*** PIPE_FORMAT_R5G6B5_UNORM ***/ static void -r5g6b5_get_tile_rgba(ushort *src, +r5g6b5_get_tile_rgba(const ushort *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -373,7 +400,7 @@ r5g6b5_put_tile_rgba(ushort *dst, * Return each Z value as four floats in [0,1]. */ static void -z16_get_tile_rgba(ushort *src, +z16_get_tile_rgba(const ushort *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -399,7 +426,7 @@ z16_get_tile_rgba(ushort *src, /*** PIPE_FORMAT_L8_UNORM ***/ static void -l8_get_tile_rgba(ubyte *src, +l8_get_tile_rgba(const ubyte *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -419,10 +446,31 @@ l8_get_tile_rgba(ubyte *src, } +static void +l8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + + /*** PIPE_FORMAT_A8_UNORM ***/ static void -a8_get_tile_rgba(ubyte *src, +a8_get_tile_rgba(const ubyte *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -441,10 +489,32 @@ a8_get_tile_rgba(ubyte *src, } } + +static void +a8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned a; + UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); + *dst++ = a; + } + p += src_stride; + } +} + + + /*** PIPE_FORMAT_R16_SNORM ***/ static void -r16_get_tile_rgba(short *src, +r16_get_tile_rgba(const short *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -485,7 +555,7 @@ r16_put_tile_rgba(short *dst, /*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ static void -r16g16b16a16_get_tile_rgba(short *src, +r16g16b16a16_get_tile_rgba(const short *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -530,7 +600,7 @@ r16g16b16a16_put_tile_rgba(short *dst, /*** PIPE_FORMAT_I8_UNORM ***/ static void -i8_get_tile_rgba(ubyte *src, +i8_get_tile_rgba(const ubyte *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -550,13 +620,33 @@ i8_get_tile_rgba(ubyte *src, } +static void +i8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + /*** PIPE_FORMAT_A8L8_UNORM ***/ static void -a8_l8_get_tile_rgba(ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) +a8l8_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) { unsigned i, j; @@ -574,6 +664,27 @@ a8_l8_get_tile_rgba(ushort *src, } +static void +a8l8_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, a; + UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); + UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); + *dst++ = (a << 8) | r; + } + p += src_stride; + } +} + + /*** PIPE_FORMAT_Z32_UNORM ***/ @@ -582,7 +693,7 @@ a8_l8_get_tile_rgba(ushort *src, * Return each Z value as four floats in [0,1]. */ static void -z32_get_tile_rgba(unsigned *src, +z32_get_tile_rgba(const unsigned *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -609,7 +720,7 @@ z32_get_tile_rgba(unsigned *src, * Return Z component as four float in [0,1]. Stencil part ignored. */ static void -s8z24_get_tile_rgba(unsigned *src, +s8z24_get_tile_rgba(const unsigned *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -636,7 +747,7 @@ s8z24_get_tile_rgba(unsigned *src, * Return Z component as four float in [0,1]. Stencil part ignored. */ static void -z24s8_get_tile_rgba(unsigned *src, +z24s8_get_tile_rgba(const unsigned *src, unsigned w, unsigned h, float *p, unsigned dst_stride) @@ -663,7 +774,7 @@ z24s8_get_tile_rgba(unsigned *src, * Convert YCbCr (or YCrCb) to RGBA. */ static void -ycbcr_get_tile_rgba(ushort *src, +ycbcr_get_tile_rgba(const ushort *src, unsigned w, unsigned h, float *p, unsigned dst_stride, @@ -780,7 +891,7 @@ pipe_tile_raw_to_rgba(enum pipe_format format, i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_A8L8_UNORM: - a8_l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); break; case PIPE_FORMAT_R16_SNORM: r16_get_tile_rgba((short *) src, w, h, dst, dst_stride); @@ -867,7 +978,7 @@ pipe_put_tile_rgba(struct pipe_surface *ps, b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_A1R5G5B5_UNORM: - /*a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_R5G6B5_UNORM: r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); @@ -879,16 +990,16 @@ pipe_put_tile_rgba(struct pipe_surface *ps, a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_L8_UNORM: - /*l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ + l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_A8_UNORM: - /*a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ + a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_I8_UNORM: - /*i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride);*/ + i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_A8L8_UNORM: - /*a8_l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride); break; case PIPE_FORMAT_R16_SNORM: r16_put_tile_rgba((short *) packed, w, h, p, src_stride); -- cgit v1.2.3 From 3e1974f94ef8796a5ac9e750d47ccb63c677a85b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 9 Aug 2008 11:51:29 +0100 Subject: util: Utility functions to print to a string buffer without overflowing. --- src/gallium/auxiliary/util/u_string.h | 37 +++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h index 73c88d87b4..abc3232b49 100644 --- a/src/gallium/auxiliary/util/u_string.h +++ b/src/gallium/auxiliary/util/u_string.h @@ -176,6 +176,43 @@ util_memmove(void *dest, const void *src, size_t n) #endif +/** + * Printable string buffer + */ +struct util_strbuf +{ + char *str; + char *ptr; + size_t left; +}; + + +static INLINE void +util_strbuf_init(struct util_strbuf *sbuf, char *str, size_t size) +{ + sbuf->str = str; + sbuf->str[0] = 0; + sbuf->ptr = sbuf->str; + sbuf->left = size; +} + + +static INLINE void +util_strbuf_printf(struct util_strbuf *sbuf, const char *format, ...) +{ + if(sbuf->left > 1) { + size_t written; + va_list ap; + va_start(ap, format); + written = util_vsnprintf(sbuf->ptr, sbuf->left, format, ap); + va_end(ap); + sbuf->ptr += written; + sbuf->left -= written; + } +} + + + #ifdef __cplusplus } #endif -- cgit v1.2.3 From 673489fa5cde4ce8d49918f20f007201a17bc45e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 9 Aug 2008 11:52:27 +0100 Subject: tgsi: Dump shaders to a string too. Again. --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 166 ++++++++++++++++++++++++--------- src/gallium/auxiliary/tgsi/tgsi_dump.h | 7 ++ 2 files changed, 127 insertions(+), 46 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 0eedb1c91e..29bb530b4d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -26,6 +26,7 @@ **************************************************************************/ #include "pipe/p_debug.h" +#include "util/u_string.h" #include "tgsi_dump.h" #include "tgsi_iterate.h" @@ -34,28 +35,31 @@ struct dump_ctx struct tgsi_iterate_context iter; uint instno; + + struct util_strbuf *sbuf; }; static void dump_enum( + struct util_strbuf *sbuf, uint e, const char **enums, uint enum_count ) { if (e >= enum_count) - debug_printf( "%u", e ); + util_strbuf_printf( sbuf, "%u", e ); else - debug_printf( "%s", enums[e] ); + util_strbuf_printf( sbuf, "%s", enums[e] ); } -#define EOL() debug_printf( "\n" ) -#define TXT(S) debug_printf( "%s", S ) -#define CHR(C) debug_printf( "%c", C ) -#define UIX(I) debug_printf( "0x%x", I ) -#define UID(I) debug_printf( "%u", I ) -#define SID(I) debug_printf( "%d", I ) -#define FLT(F) debug_printf( "%10.4f", F ) -#define ENM(E,ENUMS) dump_enum( E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) +#define EOL() util_strbuf_printf( sbuf, "\n" ) +#define TXT(S) util_strbuf_printf( sbuf, "%s", S ) +#define CHR(C) util_strbuf_printf( sbuf, "%c", C ) +#define UIX(I) util_strbuf_printf( sbuf, "0x%x", I ) +#define UID(I) util_strbuf_printf( sbuf, "%u", I ) +#define SID(I) util_strbuf_printf( sbuf, "%d", I ) +#define FLT(F) util_strbuf_printf( sbuf, "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( sbuf, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) static const char *processor_type_names[] = { @@ -266,6 +270,7 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] = static void _dump_register( + struct util_strbuf *sbuf, uint file, int first, int last ) @@ -282,6 +287,7 @@ _dump_register( static void _dump_register_ind( + struct util_strbuf *sbuf, uint file, int index, uint ind_file, @@ -303,6 +309,7 @@ _dump_register_ind( static void _dump_writemask( + struct util_strbuf *sbuf, uint writemask ) { if (writemask != TGSI_WRITEMASK_XYZW) { @@ -318,17 +325,23 @@ _dump_writemask( } } -void -tgsi_dump_declaration( - const struct tgsi_full_declaration *decl ) +static boolean +iter_declaration( + struct tgsi_iterate_context *iter, + struct tgsi_full_declaration *decl ) { + struct dump_ctx *ctx = (struct dump_ctx *) iter; + struct util_strbuf *sbuf = ctx->sbuf; + TXT( "DCL " ); _dump_register( + sbuf, decl->Declaration.File, decl->DeclarationRange.First, decl->DeclarationRange.Last ); _dump_writemask( + sbuf, decl->Declaration.UsageMask ); if (decl->Declaration.Semantic) { @@ -346,21 +359,35 @@ tgsi_dump_declaration( ENM( decl->Declaration.Interpolate, interpolate_names ); EOL(); -} -static boolean -iter_declaration( - struct tgsi_iterate_context *iter, - struct tgsi_full_declaration *decl ) -{ - tgsi_dump_declaration( decl ); return TRUE; } void -tgsi_dump_immediate( - const struct tgsi_full_immediate *imm ) +tgsi_dump_declaration( + const struct tgsi_full_declaration *decl ) { + static char str[1024]; + struct util_strbuf sbuf; + struct dump_ctx ctx; + + util_strbuf_init(&sbuf, str, sizeof(str)); + + ctx.sbuf = &sbuf; + + iter_declaration( &ctx.iter, (struct tgsi_full_declaration *)decl ); + + debug_printf("%s", str); +} + +static boolean +iter_immediate( + struct tgsi_iterate_context *iter, + struct tgsi_full_immediate *imm ) +{ + struct dump_ctx *ctx = (struct dump_ctx *) iter; + struct util_strbuf *sbuf = ctx->sbuf; + uint i; TXT( "IMM " ); @@ -382,22 +409,36 @@ tgsi_dump_immediate( TXT( " }" ); EOL(); -} -static boolean -iter_immediate( - struct tgsi_iterate_context *iter, - struct tgsi_full_immediate *imm ) -{ - tgsi_dump_immediate( imm ); return TRUE; } void -tgsi_dump_instruction( - const struct tgsi_full_instruction *inst, - uint instno ) +tgsi_dump_immediate( + const struct tgsi_full_immediate *imm ) { + static char str[1024]; + struct util_strbuf sbuf; + struct dump_ctx ctx; + + util_strbuf_init(&sbuf, str, sizeof(str)); + + ctx.sbuf = &sbuf; + + iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm ); + + debug_printf("%s", str); +} + +static boolean +iter_instruction( + struct tgsi_iterate_context *iter, + struct tgsi_full_instruction *inst ) +{ + struct dump_ctx *ctx = (struct dump_ctx *) iter; + struct util_strbuf *sbuf = ctx->sbuf; + uint instno = ctx->instno++; + uint i; boolean first_reg = TRUE; @@ -426,11 +467,12 @@ tgsi_dump_instruction( CHR( ' ' ); _dump_register( + sbuf, dst->DstRegister.File, dst->DstRegister.Index, dst->DstRegister.Index ); ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); - _dump_writemask( dst->DstRegister.WriteMask ); + _dump_writemask( sbuf, dst->DstRegister.WriteMask ); first_reg = FALSE; } @@ -457,6 +499,7 @@ tgsi_dump_instruction( if (src->SrcRegister.Indirect) { _dump_register_ind( + sbuf, src->SrcRegister.File, src->SrcRegister.Index, src->SrcRegisterInd.File, @@ -464,6 +507,7 @@ tgsi_dump_instruction( } else { _dump_register( + sbuf, src->SrcRegister.File, src->SrcRegister.Index, src->SrcRegister.Index ); @@ -529,38 +573,55 @@ tgsi_dump_instruction( } EOL(); + + return TRUE; } -static boolean -iter_instruction( - struct tgsi_iterate_context *iter, - struct tgsi_full_instruction *inst ) +void +tgsi_dump_instruction( + const struct tgsi_full_instruction *inst, + uint instno ) { - struct dump_ctx *ctx = (struct dump_ctx *) iter; + static char str[1024]; + struct util_strbuf sbuf; + struct dump_ctx ctx; - tgsi_dump_instruction( inst, ctx->instno++ ); - return TRUE; + util_strbuf_init(&sbuf, str, sizeof(str)); + + ctx.instno = instno; + ctx.sbuf = &sbuf; + + iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst ); + + debug_printf("%s", str); } static boolean prolog( - struct tgsi_iterate_context *ctx ) + struct tgsi_iterate_context *iter ) { - ENM( ctx->processor.Processor, processor_type_names ); - UID( ctx->version.MajorVersion ); + struct dump_ctx *ctx = (struct dump_ctx *) iter; + struct util_strbuf *sbuf = ctx->sbuf; + ENM( iter->processor.Processor, processor_type_names ); + UID( iter->version.MajorVersion ); CHR( '.' ); - UID( ctx->version.MinorVersion ); + UID( iter->version.MinorVersion ); EOL(); return TRUE; } void -tgsi_dump( +tgsi_dump_str( const struct tgsi_token *tokens, - uint flags ) + uint flags, + char *str, + size_t size) { + struct util_strbuf sbuf; struct dump_ctx ctx; + util_strbuf_init(&sbuf, str, size); + /* sanity checks */ assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); @@ -572,6 +633,19 @@ tgsi_dump( ctx.iter.epilog = NULL; ctx.instno = 0; + ctx.sbuf = &sbuf; tgsi_iterate_shader( tokens, &ctx.iter ); } + +void +tgsi_dump( + const struct tgsi_token *tokens, + uint flags ) +{ + static char str[4096]; + + tgsi_dump_str(tokens, flags, str, sizeof(str)); + + debug_printf("%s", str); +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.h b/src/gallium/auxiliary/tgsi/tgsi_dump.h index 51c230b5db..ad1e647ec9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.h +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.h @@ -34,6 +34,13 @@ extern "C" { #endif +void +tgsi_dump_str( + const struct tgsi_token *tokens, + uint flags, + char *str, + size_t size); + void tgsi_dump( const struct tgsi_token *tokens, -- cgit v1.2.3 From 22604727e469f4a9881f722be319670d5cde4519 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 11 Aug 2008 14:59:26 -0600 Subject: gallium: emit sprite coords (gl_PointCoord) --- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index d40a07f4ae..54590984c6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -50,6 +50,8 @@ struct widepoint_stage { uint num_texcoords; int psize_slot; + + int point_coord_fs_input; /**< input for pointcoord (and fog) */ }; @@ -84,6 +86,13 @@ static void set_texcoords(const struct widepoint_stage *wide, v->data[j][3] = tc[3]; } } + + if (wide->point_coord_fs_input >= 0) { + /* put gl_PointCoord into extra vertex output's zw components */ + uint k = wide->stage.draw->extra_vp_outputs.slot; + v->data[k][2] = tc[0]; + v->data[k][3] = tc[1]; + } } @@ -208,6 +217,18 @@ static void widepoint_first_point( struct draw_stage *stage, } } wide->num_texcoords = j; + + /* find fragment shader PointCoord/Fog input */ + wide->point_coord_fs_input = 0; /* XXX fix this! */ + + /* setup extra vp output */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_FOG; + draw->extra_vp_outputs.semantic_index = 0; + draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs; + } + else { + wide->point_coord_fs_input = -1; + draw->extra_vp_outputs.slot = 0; } wide->psize_slot = -1; -- cgit v1.2.3 From 3b5ee3d6de2c08faf69c701bf05d8f33ccd01502 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 7 Aug 2008 09:12:55 +0100 Subject: gallium: Allow compilation inside X. --- src/gallium/auxiliary/util/u_string.h | 12 ++++++------ src/gallium/include/pipe/p_compiler.h | 5 +++++ 2 files changed, 11 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_string.h b/src/gallium/auxiliary/util/u_string.h index abc3232b49..08c89bbf77 100644 --- a/src/gallium/auxiliary/util/u_string.h +++ b/src/gallium/auxiliary/util/u_string.h @@ -28,14 +28,14 @@ /** * @file * Platform independent functions for string manipulation. - * + * * @author Jose Fonseca */ #ifndef U_STRING_H_ #define U_STRING_H_ -#ifndef WIN32 +#if !defined(WIN32) && !defined(XF86_LIBC_H) #include #endif #include @@ -48,19 +48,19 @@ extern "C" { #endif - + #ifdef WIN32 - + int util_vsnprintf(char *, size_t, const char *, va_list); int util_snprintf(char *str, size_t size, const char *format, ...); -static INLINE void +static INLINE void util_vsprintf(char *str, const char *format, va_list ap) { util_vsnprintf(str, (size_t)-1, format, ap); } -static INLINE void +static INLINE void util_sprintf(char *str, const char *format, ...) { va_list ap; diff --git a/src/gallium/include/pipe/p_compiler.h b/src/gallium/include/pipe/p_compiler.h index 2afb8464c7..4d64c74a4a 100644 --- a/src/gallium/include/pipe/p_compiler.h +++ b/src/gallium/include/pipe/p_compiler.h @@ -31,8 +31,13 @@ #include "p_config.h" +#ifndef XFree86Server #include #include +#else +#include "xf86_ansic.h" +#include "xf86_libc.h" +#endif #if defined(_WIN32) && !defined(__WIN32__) -- cgit v1.2.3 From 80d3a653f0172f01be694a29456c70f1f4da1812 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 7 Aug 2008 09:13:11 +0100 Subject: tgsi: Prevent division by zero. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 8b430548bc..b93f3d5222 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -393,10 +393,18 @@ micro_div( const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { - dst->f[0] = src0->f[0] / src1->f[0]; - dst->f[1] = src0->f[1] / src1->f[1]; - dst->f[2] = src0->f[2] / src1->f[2]; - dst->f[3] = src0->f[3] / src1->f[3]; + if (src1->f[0] != 0) { + dst->f[0] = src0->f[0] / src1->f[0]; + } + if (src1->f[1] != 0) { + dst->f[1] = src0->f[1] / src1->f[1]; + } + if (src1->f[2] != 0) { + dst->f[2] = src0->f[2] / src1->f[2]; + } + if (src1->f[3] != 0) { + dst->f[3] = src0->f[3] / src1->f[3]; + } } static void -- cgit v1.2.3 From 0f5a2ebec4c16e71bde7132edb8c2209b4000c61 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 7 Aug 2008 09:15:32 +0100 Subject: gallium: Disable debug break by default on windows. --- src/gallium/auxiliary/util/p_debug.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 082b0e9fb5..2c2f2f8931 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -324,7 +324,11 @@ void _debug_assert_fail(const char *expr, const char *function) { _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr); +#if defined(PIPE_OS_WINDOWS) + if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", FALSE)) +#else if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE)) +#endif debug_break(); else _debug_printf("continuing...\n"); -- cgit v1.2.3 From faad6655946968dd16ab30cc8d5fbd5a09321976 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 12 Aug 2008 12:00:57 -0600 Subject: gallium: distinguish between KIL and KILP Note: KIL (unconditional) not done yet. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 0cb1f11ef2..f3a202ae89 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1002,7 +1002,7 @@ emit_store( */ static void -emit_kil( +emit_kilp( struct x86_function *func, const struct tgsi_full_src_register *reg ) { @@ -1096,6 +1096,15 @@ emit_kil( x86_make_reg( file_REG32, reg_AX ) ); } + +static void +emit_kil( + struct x86_function *func ) +{ + /* XXX todo / fix me */ +} + + static void emit_setcc( struct x86_function *func, @@ -1609,8 +1618,15 @@ emit_instruction( return 0; break; + case TGSI_OPCODE_KILP: + /* predicated kill */ + emit_kilp( func, &inst->FullSrcRegisters[0] ); + break; + case TGSI_OPCODE_KIL: - emit_kil( func, &inst->FullSrcRegisters[0] ); + /* unconditional kill */ + emit_kil( func ); + return 0; /* XXX fix me */ break; case TGSI_OPCODE_PK2H: -- cgit v1.2.3 From 0c8f4c25ff1cdf8d4cad21789e0c73b41aa29c98 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 10:52:55 +0200 Subject: draw: Use KIL instead of KILP. --- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 ++-- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index 13b4401521..c7f4349cb3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -301,9 +301,9 @@ aa_transform_inst(struct tgsi_transform_context *ctx, newInst.FullSrcRegisters[1].SrcRegister.SwizzleY = TGSI_SWIZZLE_W; ctx->emit_instruction(ctx, &newInst); - /* KILP -t0.yyyy; # if b, KILL */ + /* KIL -tmp0.yyyy; # if -tmp0.y < 0, KILL */ newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index d3bd9baddd..e97136fa1f 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -299,9 +299,9 @@ pstip_transform_inst(struct tgsi_transform_context *ctx, newInst.FullSrcRegisters[1].SrcRegister.Index = pctx->freeSampler; ctx->emit_instruction(ctx, &newInst); - /* KILP texTemp; # if texTemp < 0, KILL fragment */ + /* KIL -texTemp; # if -texTemp < 0, KILL fragment */ newInst = tgsi_default_full_instruction(); - newInst.Instruction.Opcode = TGSI_OPCODE_KILP; + newInst.Instruction.Opcode = TGSI_OPCODE_KIL; newInst.Instruction.NumDstRegs = 0; newInst.Instruction.NumSrcRegs = 1; newInst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_TEMPORARY; -- cgit v1.2.3 From f633b14b92352d8abc2a178a1f6ff80a58662bb1 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 10:58:54 +0200 Subject: gallivm: Translate KIL instead of KILP. --- src/gallium/auxiliary/gallivm/instructions.cpp | 4 ++-- src/gallium/auxiliary/gallivm/instructions.h | 2 +- src/gallium/auxiliary/gallivm/llvm_builtins.c | 2 +- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 18 +++++++++--------- 4 files changed, 13 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 1a98491b82..035224e8f3 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -878,9 +878,9 @@ llvm::Value * Instructions::scs(llvm::Value *in) return call; } -llvm::Value * Instructions::kilp(llvm::Value *in) +llvm::Value * Instructions::kil(llvm::Value *in) { - llvm::Function *func = m_mod->getFunction("kilp"); + llvm::Function *func = m_mod->getFunction("kil"); assert(func); CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index 3a476928b6..d286ce80c7 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -79,7 +79,7 @@ public: llvm::Value *floor(llvm::Value *in); llvm::Value *frc(llvm::Value *in); void ifop(llvm::Value *in); - llvm::Value *kilp(llvm::Value *in); + llvm::Value *kil(llvm::Value *in); llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); llvm::Value *lit(llvm::Value *in); diff --git a/src/gallium/auxiliary/gallivm/llvm_builtins.c b/src/gallium/auxiliary/gallivm/llvm_builtins.c index 6b9d626ed4..d5a003a48b 100644 --- a/src/gallium/auxiliary/gallivm/llvm_builtins.c +++ b/src/gallium/auxiliary/gallivm/llvm_builtins.c @@ -105,7 +105,7 @@ inline float4 vsin(float4 val) return result; } -inline int kilp(float4 val) +inline int kil(float4 val) { if (val.x < 0 || val.y < 0 || val.z < 0 || val.w < 0) return 1; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index b14e2affd6..cc1516a45e 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -396,11 +396,7 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_DDY: break; - case TGSI_OPCODE_KILP: { - out = instr->kilp(inputs[0]); - storage->setKilElement(out); - return; - } + case TGSI_OPCODE_KILP: break; case TGSI_OPCODE_PK2H: break; @@ -602,7 +598,11 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_BREAKC: break; - case TGSI_OPCODE_KIL: + case TGSI_OPCODE_KIL: { + out = instr->kil(inputs[0]); + storage->setKilElement(out); + return; + } break; case TGSI_OPCODE_END: instr->end(); @@ -799,8 +799,7 @@ translate_instructionir(llvm::Module *module, break; case TGSI_OPCODE_DDY: break; - case TGSI_OPCODE_KILP: { - } + case TGSI_OPCODE_KILP: break; case TGSI_OPCODE_PK2H: break; @@ -967,7 +966,8 @@ translate_instructionir(llvm::Module *module, break; case TGSI_OPCODE_BREAKC: break; - case TGSI_OPCODE_KIL: + case TGSI_OPCODE_KIL: { + } break; case TGSI_OPCODE_END: instr->end(); -- cgit v1.2.3 From f0874d1a6b832e1bb29256661cb8beab3ddeb528 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 11:09:20 +0200 Subject: tgsi: Swap meanings of KIL and KILP opcodes. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 ++++++++++++++++++---- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 12 ++++++------ 2 files changed, 24 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b93f3d5222..c4ba667d32 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1189,8 +1189,8 @@ store_dest( * Kill fragment if any of the four values is less than zero. */ static void -exec_kilp(struct tgsi_exec_machine *mach, - const struct tgsi_full_instruction *inst) +exec_kil(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) { uint uniquemask; uint chan_index; @@ -1226,6 +1226,21 @@ exec_kilp(struct tgsi_exec_machine *mach, mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } +/** + * Execute NVIDIA-style KIL which is predicated by a condition code. + * Kill fragment if the condition code is TRUE. + */ +static void +exec_kilp(struct tgsi_exec_machine *mach, + const struct tgsi_full_instruction *inst) +{ + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + /* TODO: build kilmask from CC mask */ + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; +} + /* * Fetch a texel using STR texture coordinates. @@ -1971,8 +1986,7 @@ exec_instruction( break; case TGSI_OPCODE_KIL: - /* for enabled ExecMask bits, set the killed bit */ - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= mach->ExecMask; + exec_kil (mach, inst); break; case TGSI_OPCODE_PK2H: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index f3a202ae89..47dc06faf6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1002,7 +1002,7 @@ emit_store( */ static void -emit_kilp( +emit_kil( struct x86_function *func, const struct tgsi_full_src_register *reg ) { @@ -1098,7 +1098,7 @@ emit_kilp( static void -emit_kil( +emit_kilp( struct x86_function *func ) { /* XXX todo / fix me */ @@ -1620,13 +1620,13 @@ emit_instruction( case TGSI_OPCODE_KILP: /* predicated kill */ - emit_kilp( func, &inst->FullSrcRegisters[0] ); + emit_kilp( func ); + return 0; /* XXX fix me */ break; case TGSI_OPCODE_KIL: - /* unconditional kill */ - emit_kil( func ); - return 0; /* XXX fix me */ + /* conditional kill */ + emit_kil( func, &inst->FullSrcRegisters[0] ); break; case TGSI_OPCODE_PK2H: -- cgit v1.2.3 From 94f9faab31f7fbf5f14d23d993f9830fa51ce076 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 17:16:59 +0200 Subject: tgsi: Opcode information. --- src/gallium/auxiliary/tgsi/tgsi_info.c | 161 +++++++++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_info.h | 53 +++++++++++ 2 files changed, 214 insertions(+) create mode 100644 src/gallium/auxiliary/tgsi/tgsi_info.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_info.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c new file mode 100644 index 0000000000..a4899cd4c2 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -0,0 +1,161 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "pipe/p_debug.h" +#include "tgsi_info.h" + +static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = +{ + { 1, 1, 0, 0, "ARL" }, + { 1, 1, 0, 0, "MOV" }, + { 1, 1, 0, 0, "LIT" }, + { 1, 1, 0, 0, "RCP" }, + { 1, 1, 0, 0, "RSQ" }, + { 1, 1, 0, 0, "EXP" }, + { 1, 1, 0, 0, "LOG" }, + { 1, 2, 0, 0, "MUL" }, + { 1, 2, 0, 0, "ADD" }, + { 1, 2, 0, 0, "DP3" }, + { 1, 2, 0, 0, "DP4" }, + { 1, 2, 0, 0, "DST" }, + { 1, 2, 0, 0, "MIN" }, + { 1, 2, 0, 0, "MAX" }, + { 1, 2, 0, 0, "SLT" }, + { 1, 2, 0, 0, "SGE" }, + { 1, 3, 0, 0, "MAD" }, + { 1, 2, 0, 0, "SUB" }, + { 1, 3, 0, 0, "LERP" }, + { 1, 3, 0, 0, "CND" }, + { 1, 3, 0, 0, "CND0" }, + { 1, 3, 0, 0, "DOT2ADD" }, + { 1, 2, 0, 0, "INDEX" }, + { 1, 1, 0, 0, "NEGATE" }, + { 1, 1, 0, 0, "FRAC" }, + { 1, 3, 0, 0, "CLAMP" }, + { 1, 1, 0, 0, "FLOOR" }, + { 1, 1, 0, 0, "ROUND" }, + { 1, 1, 0, 0, "EXPBASE2" }, + { 1, 1, 0, 0, "LOGBASE2" }, + { 1, 2, 0, 0, "POWER" }, + { 1, 2, 0, 0, "CROSSPRODUCT" }, + { 1, 2, 0, 0, "MULTIPLYMATRIX" }, + { 1, 1, 0, 0, "ABS" }, + { 1, 1, 0, 0, "RCC" }, + { 1, 2, 0, 0, "DPH" }, + { 1, 1, 0, 0, "COS" }, + { 1, 1, 0, 0, "DDX" }, + { 1, 1, 0, 0, "DDY" }, + { 0, 1, 0, 0, "KILP" }, + { 1, 1, 0, 0, "PK2H" }, + { 1, 1, 0, 0, "PK2US" }, + { 1, 1, 0, 0, "PK4B" }, + { 1, 1, 0, 0, "PK4UB" }, + { 1, 2, 0, 0, "RFL" }, + { 1, 2, 0, 0, "SEQ" }, + { 1, 2, 0, 0, "SFL" }, + { 1, 2, 0, 0, "SGT" }, + { 1, 1, 0, 0, "SIN" }, + { 1, 2, 0, 0, "SLE" }, + { 1, 2, 0, 0, "SNE" }, + { 1, 2, 0, 0, "STR" }, + { 1, 2, 1, 0, "TEX" }, + { 1, 4, 1, 0, "TXD" }, + { 1, 2, 1, 0, "TXP" }, + { 1, 1, 0, 0, "UP2H" }, + { 1, 1, 0, 0, "UP2US" }, + { 1, 1, 0, 0, "UP4B" }, + { 1, 1, 0, 0, "UP4UB" }, + { 1, 3, 0, 0, "X2D" }, + { 1, 1, 0, 0, "ARA" }, + { 1, 1, 0, 0, "ARR" }, + { 0, 1, 0, 0, "BRA" }, + { 0, 0, 0, 1, "CAL" }, + { 0, 0, 0, 0, "RET" }, + { 1, 1, 0, 0, "SSG" }, + { 1, 3, 0, 0, "CMP" }, + { 1, 1, 0, 0, "SCS" }, + { 1, 2, 1, 0, "TXB" }, + { 1, 1, 0, 0, "NRM" }, + { 1, 2, 0, 0, "DIV" }, + { 1, 2, 0, 0, "DP2" }, + { 1, 2, 1, 0, "TXL" }, + { 0, 0, 0, 0, "BRK" }, + { 0, 1, 0, 1, "IF" }, + { 0, 0, 0, 0, "LOOP" }, + { 0, 1, 0, 0, "REP" }, + { 0, 0, 0, 1, "ELSE" }, + { 0, 0, 0, 0, "ENDIF" }, + { 0, 0, 0, 0, "ENDLOOP" }, + { 0, 0, 0, 0, "ENDREP" }, + { 0, 1, 0, 0, "PUSHA" }, + { 1, 0, 0, 0, "POPA" }, + { 1, 1, 0, 0, "CEIL" }, + { 1, 1, 0, 0, "I2F" }, + { 1, 1, 0, 0, "NOT" }, + { 1, 1, 0, 0, "TRUNC" }, + { 1, 2, 0, 0, "SHL" }, + { 1, 2, 0, 0, "SHR" }, + { 1, 2, 0, 0, "AND" }, + { 1, 2, 0, 0, "OR" }, + { 1, 2, 0, 0, "MOD" }, + { 1, 2, 0, 0, "XOR" }, + { 1, 3, 0, 0, "SAD" }, + { 1, 2, 1, 0, "TXF" }, + { 1, 2, 1, 0, "TXQ" }, + { 0, 0, 0, 0, "CONT" }, + { 0, 0, 0, 0, "EMIT" }, + { 0, 0, 0, 0, "ENDPRIM" }, + { 0, 0, 0, 1, "BGNLOOP2" }, + { 0, 0, 0, 0, "BGNSUB" }, + { 0, 0, 0, 1, "ENDLOOP2" }, + { 0, 0, 0, 0, "ENDSUB" }, + { 1, 1, 0, 0, "NOISE1" }, + { 1, 1, 0, 0, "NOISE2" }, + { 1, 1, 0, 0, "NOISE3" }, + { 1, 1, 0, 0, "NOISE4" }, + { 0, 0, 0, 0, "NOP" }, + { 1, 2, 0, 0, "M4X3" }, + { 1, 2, 0, 0, "M3X4" }, + { 1, 2, 0, 0, "M3X3" }, + { 1, 2, 0, 0, "M3X2" }, + { 1, 1, 0, 0, "NRM4" }, + { 0, 1, 0, 0, "CALLNZ" }, + { 0, 1, 0, 0, "IFC" }, + { 0, 1, 0, 0, "BREAKC" }, + { 0, 0, 0, 0, "KIL" }, + { 0, 0, 0, 0, "END" }, + { 1, 1, 0, 0, "SWZ" } +}; + +const struct tgsi_opcode_info * +tgsi_get_opcode_info( uint opcode ) +{ + if (opcode < TGSI_OPCODE_LAST) + return &opcode_info[opcode]; + assert( 0 ); + return NULL; +} diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h new file mode 100644 index 0000000000..7230bdaae3 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_info.h @@ -0,0 +1,53 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_INFO_H +#define TGSI_INFO_H + +#include "pipe/p_shader_tokens.h" + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_opcode_info +{ + uint num_dst; + uint num_src; + boolean is_tex; + boolean is_branch; + const char *mnemonic; +}; + +const struct tgsi_opcode_info * +tgsi_get_opcode_info( uint opcode ); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_INFO_H */ -- cgit v1.2.3 From 668ac2572548b7818b89a424f44e9c9c59786df0 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 17:19:20 +0200 Subject: scons: List tgsi_info.c. --- src/gallium/auxiliary/tgsi/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 03982e2194..45bf3f6d57 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -7,6 +7,7 @@ tgsi = env.ConvenienceLibrary( 'tgsi_dump.c', 'tgsi_dump_c.c', 'tgsi_exec.c', + 'tgsi_info.c', 'tgsi_iterate.c', 'tgsi_parse.c', 'tgsi_sanity.c', -- cgit v1.2.3 From 6d83a0cc6846adb546794c0483694fdb1d1b4664 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 17:20:14 +0200 Subject: make: List tgsi_info.c. --- src/gallium/auxiliary/tgsi/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index bbeff1304d..806a2bd4c5 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ tgsi_build.c \ tgsi_dump.c \ tgsi_exec.c \ + tgsi_info.c \ tgsi_iterate.c \ tgsi_parse.c \ tgsi_scan.c \ -- cgit v1.2.3 From 79e52779bdcd1fa0ff8e1cbdc7555746205ee519 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 17:22:34 +0200 Subject: tgsi: Use tgsi_opcode_info. --- src/gallium/auxiliary/tgsi/tgsi_text.c | 137 +-------------------------------- 1 file changed, 3 insertions(+), 134 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 35cb3055bb..7cdf1b67c5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -28,6 +28,7 @@ #include "pipe/p_debug.h" #include "tgsi_text.h" #include "tgsi_build.h" +#include "tgsi_info.h" #include "tgsi_parse.h" #include "tgsi_sanity.h" #include "tgsi_util.h" @@ -719,138 +720,6 @@ parse_src_operand( return TRUE; } -struct opcode_info -{ - uint num_dst; - uint num_src; - uint is_tex; - uint is_branch; - const char *mnemonic; -}; - -static const struct opcode_info opcode_info[TGSI_OPCODE_LAST] = -{ - { 1, 1, 0, 0, "ARL" }, - { 1, 1, 0, 0, "MOV" }, - { 1, 1, 0, 0, "LIT" }, - { 1, 1, 0, 0, "RCP" }, - { 1, 1, 0, 0, "RSQ" }, - { 1, 1, 0, 0, "EXP" }, - { 1, 1, 0, 0, "LOG" }, - { 1, 2, 0, 0, "MUL" }, - { 1, 2, 0, 0, "ADD" }, - { 1, 2, 0, 0, "DP3" }, - { 1, 2, 0, 0, "DP4" }, - { 1, 2, 0, 0, "DST" }, - { 1, 2, 0, 0, "MIN" }, - { 1, 2, 0, 0, "MAX" }, - { 1, 2, 0, 0, "SLT" }, - { 1, 2, 0, 0, "SGE" }, - { 1, 3, 0, 0, "MAD" }, - { 1, 2, 0, 0, "SUB" }, - { 1, 3, 0, 0, "LERP" }, - { 1, 3, 0, 0, "CND" }, - { 1, 3, 0, 0, "CND0" }, - { 1, 3, 0, 0, "DOT2ADD" }, - { 1, 2, 0, 0, "INDEX" }, - { 1, 1, 0, 0, "NEGATE" }, - { 1, 1, 0, 0, "FRAC" }, - { 1, 3, 0, 0, "CLAMP" }, - { 1, 1, 0, 0, "FLOOR" }, - { 1, 1, 0, 0, "ROUND" }, - { 1, 1, 0, 0, "EXPBASE2" }, - { 1, 1, 0, 0, "LOGBASE2" }, - { 1, 2, 0, 0, "POWER" }, - { 1, 2, 0, 0, "CROSSPRODUCT" }, - { 1, 2, 0, 0, "MULTIPLYMATRIX" }, - { 1, 1, 0, 0, "ABS" }, - { 1, 1, 0, 0, "RCC" }, - { 1, 2, 0, 0, "DPH" }, - { 1, 1, 0, 0, "COS" }, - { 1, 1, 0, 0, "DDX" }, - { 1, 1, 0, 0, "DDY" }, - { 0, 1, 0, 0, "KILP" }, - { 1, 1, 0, 0, "PK2H" }, - { 1, 1, 0, 0, "PK2US" }, - { 1, 1, 0, 0, "PK4B" }, - { 1, 1, 0, 0, "PK4UB" }, - { 1, 2, 0, 0, "RFL" }, - { 1, 2, 0, 0, "SEQ" }, - { 1, 2, 0, 0, "SFL" }, - { 1, 2, 0, 0, "SGT" }, - { 1, 1, 0, 0, "SIN" }, - { 1, 2, 0, 0, "SLE" }, - { 1, 2, 0, 0, "SNE" }, - { 1, 2, 0, 0, "STR" }, - { 1, 2, 1, 0, "TEX" }, - { 1, 4, 1, 0, "TXD" }, - { 1, 2, 1, 0, "TXP" }, - { 1, 1, 0, 0, "UP2H" }, - { 1, 1, 0, 0, "UP2US" }, - { 1, 1, 0, 0, "UP4B" }, - { 1, 1, 0, 0, "UP4UB" }, - { 1, 3, 0, 0, "X2D" }, - { 1, 1, 0, 0, "ARA" }, - { 1, 1, 0, 0, "ARR" }, - { 0, 1, 0, 0, "BRA" }, - { 0, 0, 0, 1, "CAL" }, - { 0, 0, 0, 0, "RET" }, - { 1, 1, 0, 0, "SSG" }, - { 1, 3, 0, 0, "CMP" }, - { 1, 1, 0, 0, "SCS" }, - { 1, 2, 1, 0, "TXB" }, - { 1, 1, 0, 0, "NRM" }, - { 1, 2, 0, 0, "DIV" }, - { 1, 2, 0, 0, "DP2" }, - { 1, 2, 1, 0, "TXL" }, - { 0, 0, 0, 0, "BRK" }, - { 0, 1, 0, 1, "IF" }, - { 0, 0, 0, 0, "LOOP" }, - { 0, 1, 0, 0, "REP" }, - { 0, 0, 0, 1, "ELSE" }, - { 0, 0, 0, 0, "ENDIF" }, - { 0, 0, 0, 0, "ENDLOOP" }, - { 0, 0, 0, 0, "ENDREP" }, - { 0, 1, 0, 0, "PUSHA" }, - { 1, 0, 0, 0, "POPA" }, - { 1, 1, 0, 0, "CEIL" }, - { 1, 1, 0, 0, "I2F" }, - { 1, 1, 0, 0, "NOT" }, - { 1, 1, 0, 0, "TRUNC" }, - { 1, 2, 0, 0, "SHL" }, - { 1, 2, 0, 0, "SHR" }, - { 1, 2, 0, 0, "AND" }, - { 1, 2, 0, 0, "OR" }, - { 1, 2, 0, 0, "MOD" }, - { 1, 2, 0, 0, "XOR" }, - { 1, 3, 0, 0, "SAD" }, - { 1, 2, 1, 0, "TXF" }, - { 1, 2, 1, 0, "TXQ" }, - { 0, 0, 0, 0, "CONT" }, - { 0, 0, 0, 0, "EMIT" }, - { 0, 0, 0, 0, "ENDPRIM" }, - { 0, 0, 0, 1, "BGNLOOP2" }, - { 0, 0, 0, 0, "BGNSUB" }, - { 0, 0, 0, 1, "ENDLOOP2" }, - { 0, 0, 0, 0, "ENDSUB" }, - { 1, 1, 0, 0, "NOISE1" }, - { 1, 1, 0, 0, "NOISE2" }, - { 1, 1, 0, 0, "NOISE3" }, - { 1, 1, 0, 0, "NOISE4" }, - { 0, 0, 0, 0, "NOP" }, - { 1, 2, 0, 0, "M4X3" }, - { 1, 2, 0, 0, "M3X4" }, - { 1, 2, 0, 0, "M3X3" }, - { 1, 2, 0, 0, "M3X2" }, - { 1, 1, 0, 0, "NRM4" }, - { 0, 1, 0, 0, "CALLNZ" }, - { 0, 1, 0, 0, "IFC" }, - { 0, 1, 0, 0, "BREAKC" }, - { 0, 0, 0, 0, "KIL" }, - { 0, 0, 0, 0, "END" }, - { 1, 1, 0, 0, "SWZ" } -}; - static const char *texture_names[TGSI_TEXTURE_COUNT] = { "UNKNOWN", @@ -871,7 +740,7 @@ parse_instruction( { uint i; uint saturate = TGSI_SAT_NONE; - const struct opcode_info *info; + const struct tgsi_opcode_info *info; struct tgsi_full_instruction inst; uint advance; @@ -881,7 +750,7 @@ parse_instruction( for (i = 0; i < TGSI_OPCODE_LAST; i++) { const char *cur = ctx->cur; - info = &opcode_info[i]; + info = tgsi_get_opcode_info( i ); if (str_match_no_case( &cur, info->mnemonic )) { if (str_match_no_case( &cur, "_SATNV" )) saturate = TGSI_SAT_MINUS_PLUS_ONE; -- cgit v1.2.3 From 2caaba8195d1019702246bd7f0c02aa95364a8bd Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 17:27:15 +0200 Subject: tgsi: Use tgsi_info to dump opcode names. --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 130 +----------------------------- src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 133 +------------------------------ 2 files changed, 6 insertions(+), 257 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 29bb530b4d..4309d1bc76 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -28,6 +28,7 @@ #include "pipe/p_debug.h" #include "util/u_string.h" #include "tgsi_dump.h" +#include "tgsi_info.h" #include "tgsi_iterate.h" struct dump_ctx @@ -103,129 +104,6 @@ static const char *immediate_type_names[] = "FLT32" }; -static const char *opcode_names[TGSI_OPCODE_LAST] = -{ - "ARL", - "MOV", - "LIT", - "RCP", - "RSQ", - "EXP", - "LOG", - "MUL", - "ADD", - "DP3", - "DP4", - "DST", - "MIN", - "MAX", - "SLT", - "SGE", - "MAD", - "SUB", - "LERP", - "CND", - "CND0", - "DOT2ADD", - "INDEX", - "NEGATE", - "FRAC", - "CLAMP", - "FLOOR", - "ROUND", - "EXPBASE2", - "LOGBASE2", - "POWER", - "CROSSPRODUCT", - "MULTIPLYMATRIX", - "ABS", - "RCC", - "DPH", - "COS", - "DDX", - "DDY", - "KILP", - "PK2H", - "PK2US", - "PK4B", - "PK4UB", - "RFL", - "SEQ", - "SFL", - "SGT", - "SIN", - "SLE", - "SNE", - "STR", - "TEX", - "TXD", - "TXP", - "UP2H", - "UP2US", - "UP4B", - "UP4UB", - "X2D", - "ARA", - "ARR", - "BRA", - "CAL", - "RET", - "SSG", - "CMP", - "SCS", - "TXB", - "NRM", - "DIV", - "DP2", - "TXL", - "BRK", - "IF", - "LOOP", - "REP", - "ELSE", - "ENDIF", - "ENDLOOP", - "ENDREP", - "PUSHA", - "POPA", - "CEIL", - "I2F", - "NOT", - "TRUNC", - "SHL", - "SHR", - "AND", - "OR", - "MOD", - "XOR", - "SAD", - "TXF", - "TXQ", - "CONT", - "EMIT", - "ENDPRIM", - "BGNLOOP2", - "BGNSUB", - "ENDLOOP2", - "ENDSUB", - "NOISE1", - "NOISE2", - "NOISE3", - "NOISE4", - "NOP", - "M4X3", - "M3X4", - "M3X3", - "M3X2", - "NRM4", - "CALLNZ", - "IFC", - "BREAKC", - "KIL", - "END", - "SWZ" -}; - static const char *swizzle_names[] = { "x", @@ -444,7 +322,7 @@ iter_instruction( UID( instno ); CHR( ':' ); - ENM( inst->Instruction.Opcode, opcode_names ); + TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: @@ -622,10 +500,6 @@ tgsi_dump_str( util_strbuf_init(&sbuf, str, size); - /* sanity checks */ - assert( strcmp( opcode_names[TGSI_OPCODE_CONT], "CONT" ) == 0 ); - assert( strcmp( opcode_names[TGSI_OPCODE_END], "END" ) == 0 ); - ctx.iter.prolog = prolog; ctx.iter.iterate_instruction = iter_instruction; ctx.iter.iterate_declaration = iter_declaration; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index eabd74bd6d..1025866a25 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -29,8 +29,9 @@ #include "pipe/p_util.h" #include "util/u_string.h" #include "tgsi_dump_c.h" -#include "tgsi_parse.h" #include "tgsi_build.h" +#include "tgsi_info.h" +#include "tgsi_parse.h" static void dump_enum( @@ -104,128 +105,6 @@ static const char *TGSI_IMMS[] = "IMM_FLOAT32" }; -static const char *TGSI_OPCODES[TGSI_OPCODE_LAST] = -{ - "OPCODE_ARL", - "OPCODE_MOV", - "OPCODE_LIT", - "OPCODE_RCP", - "OPCODE_RSQ", - "OPCODE_EXP", - "OPCODE_LOG", - "OPCODE_MUL", - "OPCODE_ADD", - "OPCODE_DP3", - "OPCODE_DP4", - "OPCODE_DST", - "OPCODE_MIN", - "OPCODE_MAX", - "OPCODE_SLT", - "OPCODE_SGE", - "OPCODE_MAD", - "OPCODE_SUB", - "OPCODE_LERP", - "OPCODE_CND", - "OPCODE_CND0", - "OPCODE_DOT2ADD", - "OPCODE_INDEX", - "OPCODE_NEGATE", - "OPCODE_FRAC", - "OPCODE_CLAMP", - "OPCODE_FLOOR", - "OPCODE_ROUND", - "OPCODE_EXPBASE2", - "OPCODE_LOGBASE2", - "OPCODE_POWER", - "OPCODE_CROSSPRODUCT", - "OPCODE_MULTIPLYMATRIX", - "OPCODE_ABS", - "OPCODE_RCC", - "OPCODE_DPH", - "OPCODE_COS", - "OPCODE_DDX", - "OPCODE_DDY", - "OPCODE_KILP", - "OPCODE_PK2H", - "OPCODE_PK2US", - "OPCODE_PK4B", - "OPCODE_PK4UB", - "OPCODE_RFL", - "OPCODE_SEQ", - "OPCODE_SFL", - "OPCODE_SGT", - "OPCODE_SIN", - "OPCODE_SLE", - "OPCODE_SNE", - "OPCODE_STR", - "OPCODE_TEX", - "OPCODE_TXD", - "OPCODE_TXP", - "OPCODE_UP2H", - "OPCODE_UP2US", - "OPCODE_UP4B", - "OPCODE_UP4UB", - "OPCODE_X2D", - "OPCODE_ARA", - "OPCODE_ARR", - "OPCODE_BRA", - "OPCODE_CAL", - "OPCODE_RET", - "OPCODE_SSG", - "OPCODE_CMP", - "OPCODE_SCS", - "OPCODE_TXB", - "OPCODE_NRM", - "OPCODE_DIV", - "OPCODE_DP2", - "OPCODE_TXL", - "OPCODE_BRK", - "OPCODE_IF", - "OPCODE_LOOP", - "OPCODE_REP", - "OPCODE_ELSE", - "OPCODE_ENDIF", - "OPCODE_ENDLOOP", - "OPCODE_ENDREP", - "OPCODE_PUSHA", - "OPCODE_POPA", - "OPCODE_CEIL", - "OPCODE_I2F", - "OPCODE_NOT", - "OPCODE_TRUNC", - "OPCODE_SHL", - "OPCODE_SHR", - "OPCODE_AND", - "OPCODE_OR", - "OPCODE_MOD", - "OPCODE_XOR", - "OPCODE_SAD", - "OPCODE_TXF", - "OPCODE_TXQ", - "OPCODE_CONT", - "OPCODE_EMIT", - "OPCODE_ENDPRIM", - "OPCODE_BGNLOOP2", - "OPCODE_BGNSUB", - "OPCODE_ENDLOOP2", - "OPCODE_ENDSUB", - "OPCODE_NOISE1", - "OPCODE_NOISE2", - "OPCODE_NOISE3", - "OPCODE_NOISE4", - "OPCODE_NOP", - "OPCODE_M4X3", - "OPCODE_M3X4", - "OPCODE_M3X3", - "OPCODE_M3X2", - "OPCODE_NRM4", - "OPCODE_CALLNZ", - "OPCODE_IFC", - "OPCODE_BREAKC", - "OPCODE_KIL", - "OPCODE_END" -}; - static const char *TGSI_SATS[] = { "SAT_NONE", @@ -428,8 +307,8 @@ dump_instruction_verbose( { unsigned i; - TXT( "\nOpcode : " ); - ENM( inst->Instruction.Opcode, TGSI_OPCODES ); + TXT( "\nOpcode : OPCODE_" ); + TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); if( deflt || fi->Instruction.Saturate != inst->Instruction.Saturate ) { TXT( "\nSaturate : " ); ENM( inst->Instruction.Saturate, TGSI_SATS ); @@ -770,10 +649,6 @@ tgsi_dump_c( uint deflt = flags & TGSI_DUMP_C_DEFAULT; uint instno = 0; - /* sanity checks */ - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_CONT], "OPCODE_CONT") == 0); - assert(strcmp(TGSI_OPCODES[TGSI_OPCODE_END], "OPCODE_END") == 0); - tgsi_parse_init( &parse, tokens ); TXT( "tgsi-dump begin -----------------" ); -- cgit v1.2.3 From 4b929b32d03a58d80cacbd63c172dbd7221c8a8f Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 17:31:26 +0200 Subject: tgsi: Validate instruction opcode and operand counts. --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 2e3ec96b5b..0b5bdd6ba1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -27,6 +27,7 @@ #include "pipe/p_debug.h" #include "tgsi_sanity.h" +#include "tgsi_info.h" #include "tgsi_iterate.h" #define MAX_REGISTERS 256 @@ -170,6 +171,7 @@ iter_instruction( struct tgsi_full_instruction *inst ) { struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; + const struct tgsi_opcode_info *info; uint i; /* There must be no other instructions after END. @@ -181,6 +183,19 @@ iter_instruction( ctx->index_of_END = ctx->num_instructions; } + info = tgsi_get_opcode_info( inst->Instruction.Opcode ); + if (info == NULL) { + report_error( ctx, "Invalid instruction opcode" ); + return TRUE; + } + + if (info->num_dst != inst->Instruction.NumDstRegs) { + report_error( ctx, "Invalid number of destination operands" ); + } + if (info->num_src != inst->Instruction.NumSrcRegs) { + report_error( ctx, "Invalid number of source operands" ); + } + /* Check destination and source registers' validity. * Mark the registers as used. */ -- cgit v1.2.3 From bfdb1d55d58d70044af9fcd6f8465179145581dd Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 19:28:10 +0200 Subject: tgsi: Fix typo. --- src/gallium/auxiliary/tgsi/tgsi_build.c | 4 ++-- src/gallium/auxiliary/tgsi/tgsi_build.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 742ef14c35..050b448fe7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -820,7 +820,7 @@ tgsi_build_instruction_ext_nv( unsigned cond_swizzle_z, unsigned cond_swizzle_w, unsigned cond_dst_update, - unsigned cond_flow_update, + unsigned cond_flow_enable, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ) @@ -837,7 +837,7 @@ tgsi_build_instruction_ext_nv( instruction_ext_nv.CondSwizzleZ = cond_swizzle_z; instruction_ext_nv.CondSwizzleW = cond_swizzle_w; instruction_ext_nv.CondDstUpdate = cond_dst_update; - instruction_ext_nv.CondFlowEnable = cond_flow_update; + instruction_ext_nv.CondFlowEnable = cond_flow_enable; prev_token->Extended = 1; instruction_grow( instruction, header ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index ed25830248..6ae7f324f8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -170,7 +170,7 @@ tgsi_build_instruction_ext_nv( unsigned cond_swizzle_z, unsigned cond_swizzle_w, unsigned cond_dst_update, - unsigned cond_flow_update, + unsigned cond_flow_enable, struct tgsi_token *prev_token, struct tgsi_instruction *instruction, struct tgsi_header *header ); -- cgit v1.2.3 From 83a5a225d773c119857f58672fd51c1d425f21d3 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 19:31:13 +0200 Subject: tgsi: Use a homebrew version of toupper(). --- src/gallium/auxiliary/tgsi/tgsi_text.c | 27 +++++++++++++++++---------- 1 file changed, 17 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c index 7cdf1b67c5..9454563361 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_text.c +++ b/src/gallium/auxiliary/tgsi/tgsi_text.c @@ -51,11 +51,18 @@ static boolean is_digit_alpha_underscore( const char *cur ) return is_digit( cur ) || is_alpha_underscore( cur ); } +static boolean uprcase( char c ) +{ + if (c >= 'a' && c <= 'z') + return c += 'A' - 'a'; + return c; +} + static boolean str_match_no_case( const char **pcur, const char *str ) { const char *cur = *pcur; - while (*str != '\0' && *str == toupper( *cur )) { + while (*str != '\0' && *str == uprcase( *cur )) { str++; cur++; } @@ -131,7 +138,7 @@ static boolean parse_float( const char **pcur, float *val ) } if (!integral_part && !fractional_part) return FALSE; - if (toupper( *cur ) == 'E') { + if (uprcase( *cur ) == 'E') { cur++; if (*cur == '-' || *cur == '+') cur++; @@ -258,19 +265,19 @@ parse_opt_writemask( cur++; *writemask = TGSI_WRITEMASK_NONE; eat_opt_white( &cur ); - if (toupper( *cur ) == 'X') { + if (uprcase( *cur ) == 'X') { cur++; *writemask |= TGSI_WRITEMASK_X; } - if (toupper( *cur ) == 'Y') { + if (uprcase( *cur ) == 'Y') { cur++; *writemask |= TGSI_WRITEMASK_Y; } - if (toupper( *cur ) == 'Z') { + if (uprcase( *cur ) == 'Z') { cur++; *writemask |= TGSI_WRITEMASK_Z; } - if (toupper( *cur ) == 'W') { + if (uprcase( *cur ) == 'W') { cur++; *writemask |= TGSI_WRITEMASK_W; } @@ -516,13 +523,13 @@ parse_optional_swizzle( cur++; eat_opt_white( &cur ); for (i = 0; i < 4; i++) { - if (toupper( *cur ) == 'X') + if (uprcase( *cur ) == 'X') swizzle[i] = TGSI_SWIZZLE_X; - else if (toupper( *cur ) == 'Y') + else if (uprcase( *cur ) == 'Y') swizzle[i] = TGSI_SWIZZLE_Y; - else if (toupper( *cur ) == 'Z') + else if (uprcase( *cur ) == 'Z') swizzle[i] = TGSI_SWIZZLE_Z; - else if (toupper( *cur ) == 'W') + else if (uprcase( *cur ) == 'W') swizzle[i] = TGSI_SWIZZLE_W; else { if (*cur == '0') -- cgit v1.2.3 From 3a1af846fe13c2a9b8c24eb7e37e11a9b42668d5 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 13 Aug 2008 20:09:56 +0200 Subject: tgsi: Initial code for KILP, needs CCs working to be complete. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index c4ba667d32..df2a7cc4b3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1234,11 +1234,29 @@ static void exec_kilp(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + if (inst->InstructionExtNv.CondFlowEnable) { + uint swizzle[4]; + uint chan_index; + uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ - /* TODO: build kilmask from CC mask */ + swizzle[0] = inst->InstructionExtNv.CondSwizzleX; + swizzle[1] = inst->InstructionExtNv.CondSwizzleY; + swizzle[2] = inst->InstructionExtNv.CondSwizzleZ; + swizzle[3] = inst->InstructionExtNv.CondSwizzleW; - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; + for (chan_index = 0; chan_index < 4; chan_index++) + { + uint i; + + for (i = 0; i < 4; i++) { + /* TODO: evaluate the condition code */ + if (0) + kilmask |= 1 << i; + } + } + + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; + } } -- cgit v1.2.3 From 19ff2326e94a7a773bcb1dd9bfd22197b999daf9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 13 Aug 2008 17:20:40 -0600 Subject: gallium: fix exec_kilp(), fix Exec/FuncMask test for TGSI_OPCODE_RET Fixes a few glean glsl regressions. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index df2a7cc4b3..a96209db1a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1234,10 +1234,13 @@ static void exec_kilp(struct tgsi_exec_machine *mach, const struct tgsi_full_instruction *inst) { + uint kilmask; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + if (inst->InstructionExtNv.CondFlowEnable) { uint swizzle[4]; uint chan_index; - uint kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */ + + kilmask = 0x0; swizzle[0] = inst->InstructionExtNv.CondSwizzleX; swizzle[1] = inst->InstructionExtNv.CondSwizzleY; @@ -1254,9 +1257,12 @@ exec_kilp(struct tgsi_exec_machine *mach, kilmask |= 1 << i; } } - - mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } + else { + /* "unconditional" kil */ + kilmask = mach->ExecMask; + } + mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0] |= kilmask; } @@ -2176,7 +2182,7 @@ exec_instruction( mach->FuncMask &= ~mach->ExecMask; UPDATE_EXEC_MASK(mach); - if (mach->ExecMask == 0x0) { + if (mach->FuncMask == 0x0) { /* really return now (otherwise, keep executing */ if (mach->CallStackTop == 0) { -- cgit v1.2.3 From d756f9512d295531ff6a600c736a68ac2dcff58b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 13 Aug 2008 12:13:54 +0100 Subject: translate: Draw can request up to PIPE_MAX_ATTRIBS + 1 vertex elements. --- src/gallium/auxiliary/translate/translate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index c3b754a902..650cd81fa6 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -57,7 +57,7 @@ struct translate_element struct translate_key { unsigned output_stride; unsigned nr_elements; - struct translate_element element[PIPE_MAX_ATTRIBS]; + struct translate_element element[PIPE_MAX_ATTRIBS + 1]; }; -- cgit v1.2.3 From e2da7edd642198d7c515dbc0b9ba77d4286c3262 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 18 Aug 2008 15:42:26 +0200 Subject: tgsi: Add condition code (CC) register. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 13 +++++++++++-- src/gallium/auxiliary/tgsi/tgsi_exec.h | 29 ++++++++++++++++++++++++----- 2 files changed, 35 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a96209db1a..7974915211 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -26,7 +26,7 @@ **************************************************************************/ /** - * TGSI interpretor/executor. + * TGSI interpreter/executor. * * Flow control information: * @@ -88,6 +88,8 @@ #define TEMP_OUTPUT_C TGSI_EXEC_TEMP_OUTPUT_C #define TEMP_PRIMITIVE_I TGSI_EXEC_TEMP_PRIMITIVE_I #define TEMP_PRIMITIVE_C TGSI_EXEC_TEMP_PRIMITIVE_C +#define TEMP_CC_I TGSI_EXEC_TEMP_CC_I +#define TEMP_CC_C TGSI_EXEC_TEMP_CC_C #define TEMP_3_I TGSI_EXEC_TEMP_THREE_I #define TEMP_3_C TGSI_EXEC_TEMP_THREE_C #define TEMP_HALF_I TGSI_EXEC_TEMP_HALF_I @@ -2539,6 +2541,13 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach ) mach->Primitives[0] = 0; } + for (i = 0; i < QUAD_SIZE; i++) { + mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C].u[i] = + (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_X_SHIFT) | + (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Y_SHIFT) | + (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_Z_SHIFT) | + (TGSI_EXEC_CC_EQ << TGSI_EXEC_CC_W_SHIFT); + } /* execute declarations (interpolants) */ for (i = 0; i < mach->NumDeclarations; i++) { diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index 4f30650b07..c4e649e69c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -25,7 +25,7 @@ * **************************************************************************/ -#if !defined TGSI_EXEC_H +#ifndef TGSI_EXEC_H #define TGSI_EXEC_H #include "pipe/p_compiler.h" @@ -140,11 +140,30 @@ struct tgsi_exec_labels #define TGSI_EXEC_TEMP_PRIMITIVE_I (TGSI_EXEC_NUM_TEMPS + 2) #define TGSI_EXEC_TEMP_PRIMITIVE_C 2 -#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 2) -#define TGSI_EXEC_TEMP_THREE_C 3 +/* NVIDIA condition code (CC) vector + */ +#define TGSI_EXEC_CC_GT 0x01 +#define TGSI_EXEC_CC_EQ 0x02 +#define TGSI_EXEC_CC_LT 0x04 +#define TGSI_EXEC_CC_UN 0x08 + +#define TGSI_EXEC_CC_X_MASK 0x000000ff +#define TGSI_EXEC_CC_X_SHIFT 0 +#define TGSI_EXEC_CC_Y_MASK 0x0000ff00 +#define TGSI_EXEC_CC_Y_SHIFT 8 +#define TGSI_EXEC_CC_Z_MASK 0x00ff0000 +#define TGSI_EXEC_CC_Z_SHIFT 16 +#define TGSI_EXEC_CC_W_MASK 0xff000000 +#define TGSI_EXEC_CC_W_SHIFT 24 + +#define TGSI_EXEC_TEMP_CC_I (TGSI_EXEC_NUM_TEMPS + 2) +#define TGSI_EXEC_TEMP_CC_C 3 + +#define TGSI_EXEC_TEMP_THREE_I (TGSI_EXEC_NUM_TEMPS + 3) +#define TGSI_EXEC_TEMP_THREE_C 0 #define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) -#define TGSI_EXEC_TEMP_HALF_C 0 +#define TGSI_EXEC_TEMP_HALF_C 1 #define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) -- cgit v1.2.3 From 880b751e8e21cab21a0d522346300f463fd9c634 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 18 Aug 2008 15:57:39 +0200 Subject: tgsi: Cosmetic changes. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 7974915211..6b19ab5c8e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -63,6 +63,11 @@ #define TILE_BOTTOM_LEFT 2 #define TILE_BOTTOM_RIGHT 3 +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + /* * Shorthand locations of various utility registers (_I = Index, _C = Channel) */ @@ -96,9 +101,6 @@ #define TEMP_HALF_C TGSI_EXEC_TEMP_HALF_C #define TEMP_R0 TGSI_EXEC_TEMP_R0 -#define FOR_EACH_CHANNEL(CHAN)\ - for (CHAN = 0; CHAN < 4; CHAN++) - #define IS_CHANNEL_ENABLED(INST, CHAN)\ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) @@ -106,11 +108,11 @@ ((INST).FullDstRegisters[1].DstRegister.WriteMask & (1 << (CHAN))) #define FOR_EACH_ENABLED_CHANNEL(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ if (IS_CHANNEL_ENABLED( INST, CHAN )) #define FOR_EACH_ENABLED_CHANNEL2(INST, CHAN)\ - FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++)\ if (IS_CHANNEL_ENABLED2( INST, CHAN )) @@ -118,14 +120,6 @@ #define UPDATE_EXEC_MASK(MACH) \ MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->FuncMask - -#define CHAN_X 0 -#define CHAN_Y 1 -#define CHAN_Z 2 -#define CHAN_W 3 - - - /** * Initialize machine state by expanding tokens to full instructions, * allocating temporary storage, setting up constants, etc. @@ -1128,7 +1122,7 @@ store_dest( { union tgsi_exec_channel *dst; - switch( reg->DstRegister.File ) { + switch (reg->DstRegister.File) { case TGSI_FILE_NULL: return; @@ -1138,7 +1132,7 @@ store_dest( break; case TGSI_FILE_TEMPORARY: - assert(reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS); + assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS ); dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index]; break; @@ -1151,8 +1145,7 @@ store_dest( return; } - switch (inst->Instruction.Saturate) - { + switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: if (mach->ExecMask & 0x1) dst->i[0] = chan->i[0]; @@ -1166,8 +1159,8 @@ store_dest( case TGSI_SAT_ZERO_ONE: /* XXX need to obey ExecMask here */ - micro_max(dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]); - micro_min(dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]); + micro_max( dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); + micro_min( dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); break; case TGSI_SAT_MINUS_PLUS_ONE: -- cgit v1.2.3 From 6aacca106b0619d87015aece1a0b1d6332910926 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 18 Aug 2008 17:03:56 +0200 Subject: tgsi: Respect condition codes when storing destination register. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 121 ++++++++++++++++++++++++++++++--- 1 file changed, 113 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 6b19ab5c8e..1217ee71a5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1120,7 +1120,9 @@ store_dest( const struct tgsi_full_instruction *inst, uint chan_index ) { + uint i; union tgsi_exec_channel *dst; + uint execmask = mach->ExecMask; switch (reg->DstRegister.File) { case TGSI_FILE_NULL: @@ -1145,16 +1147,119 @@ store_dest( return; } + if (inst->InstructionExtNv.CondFlowEnable) { + union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; + uint swizzle; + uint shift; + uint mask; + uint test; + + /* Only CC0 supported. + */ + assert( inst->InstructionExtNv.CondFlowIndex < 1 ); + + switch (chan_index) { + case CHAN_X: + swizzle = inst->InstructionExtNv.CondSwizzleX; + break; + case CHAN_Y: + swizzle = inst->InstructionExtNv.CondSwizzleY; + break; + case CHAN_Z: + swizzle = inst->InstructionExtNv.CondSwizzleZ; + break; + case CHAN_W: + swizzle = inst->InstructionExtNv.CondSwizzleW; + break; + default: + assert( 0 ); + return; + } + + switch (swizzle) { + case TGSI_SWIZZLE_X: + shift = TGSI_EXEC_CC_X_SHIFT; + mask = TGSI_EXEC_CC_X_MASK; + break; + case TGSI_SWIZZLE_Y: + shift = TGSI_EXEC_CC_Y_SHIFT; + mask = TGSI_EXEC_CC_Y_MASK; + break; + case TGSI_SWIZZLE_Z: + shift = TGSI_EXEC_CC_Z_SHIFT; + mask = TGSI_EXEC_CC_Z_MASK; + break; + case TGSI_SWIZZLE_W: + shift = TGSI_EXEC_CC_W_SHIFT; + mask = TGSI_EXEC_CC_W_MASK; + break; + default: + assert( 0 ); + return; + } + + switch (inst->InstructionExtNv.CondMask) { + case TGSI_CC_GT: + test = ~(TGSI_EXEC_CC_GT << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_EQ: + test = ~(TGSI_EXEC_CC_EQ << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_LT: + test = ~(TGSI_EXEC_CC_LT << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_GE: + test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_EQ) << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_LE: + test = ~((TGSI_EXEC_CC_LT | TGSI_EXEC_CC_EQ) << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_NE: + test = ~((TGSI_EXEC_CC_GT | TGSI_EXEC_CC_LT | TGSI_EXEC_CC_UN) << shift) & mask; + for (i = 0; i < QUAD_SIZE; i++) + if (cc->u[i] & test) + execmask &= ~(1 << i); + break; + + case TGSI_CC_TR: + break; + + case TGSI_CC_FL: + for (i = 0; i < QUAD_SIZE; i++) + execmask &= ~(1 << i); + break; + + default: + assert( 0 ); + return; + } + } + switch (inst->Instruction.Saturate) { case TGSI_SAT_NONE: - if (mach->ExecMask & 0x1) - dst->i[0] = chan->i[0]; - if (mach->ExecMask & 0x2) - dst->i[1] = chan->i[1]; - if (mach->ExecMask & 0x4) - dst->i[2] = chan->i[2]; - if (mach->ExecMask & 0x8) - dst->i[3] = chan->i[3]; + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) + dst->i[i] = chan->i[i]; break; case TGSI_SAT_ZERO_ONE: -- cgit v1.2.3 From 56c30bf17b9f57efdb93ae5d1b801677535a9651 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 18 Aug 2008 17:09:20 +0200 Subject: tgsi: Saturate modifier obeys ExecMask. Implement NVIDIA [-1;+1] saturate mode. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 ++++++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1217ee71a5..dfd22270c8 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1263,13 +1263,27 @@ store_dest( break; case TGSI_SAT_ZERO_ONE: - /* XXX need to obey ExecMask here */ - micro_max( dst, chan, &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] ); - micro_min( dst, dst, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] ); + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) { + if (chan->f[i] < 0.0f) + dst->f[i] = 0.0f; + else if (chan->f[i] > 1.0f) + dst->f[i] = 1.0f; + else + dst->i[i] = chan->i[i]; + } break; case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) { + if (chan->f[i] < -1.0f) + dst->f[i] = -1.0f; + else if (chan->f[i] > 1.0f) + dst->f[i] = 1.0f; + else + dst->i[i] = chan->i[i]; + } break; default: -- cgit v1.2.3 From e9ec60097cfed372f202aa64aa04674448881f0e Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 18 Aug 2008 17:18:58 +0200 Subject: tgsi: Update condition code vector when storing dest register. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 49 +++++++++++++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index dfd22270c8..a2235de059 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1121,12 +1121,14 @@ store_dest( uint chan_index ) { uint i; + union tgsi_exec_channel null; union tgsi_exec_channel *dst; uint execmask = mach->ExecMask; switch (reg->DstRegister.File) { case TGSI_FILE_NULL: - return; + dst = &null; + break; case TGSI_FILE_OUTPUT: dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0] @@ -1289,6 +1291,51 @@ store_dest( default: assert( 0 ); } + + if (inst->InstructionExtNv.CondDstUpdate) { + union tgsi_exec_channel *cc = &mach->Temps[TEMP_CC_I].xyzw[TEMP_CC_C]; + uint shift; + uint mask; + + /* Only CC0 supported. + */ + assert( inst->InstructionExtNv.CondDstIndex < 1 ); + + switch (chan_index) { + case CHAN_X: + shift = TGSI_EXEC_CC_X_SHIFT; + mask = ~TGSI_EXEC_CC_X_MASK; + break; + case CHAN_Y: + shift = TGSI_EXEC_CC_Y_SHIFT; + mask = ~TGSI_EXEC_CC_Y_MASK; + break; + case CHAN_Z: + shift = TGSI_EXEC_CC_Z_SHIFT; + mask = ~TGSI_EXEC_CC_Z_MASK; + break; + case CHAN_W: + shift = TGSI_EXEC_CC_W_SHIFT; + mask = ~TGSI_EXEC_CC_W_MASK; + break; + default: + assert( 0 ); + return; + } + + for (i = 0; i < QUAD_SIZE; i++) + if (execmask & (1 << i)) { + cc->u[i] &= mask; + if (dst->f[i] < 0.0f) + cc->u[i] |= TGSI_EXEC_CC_LT << shift; + else if (dst->f[i] > 0.0f) + cc->u[i] |= TGSI_EXEC_CC_GT << shift; + else if (dst->f[i] == 0.0f) + cc->u[i] |= TGSI_EXEC_CC_EQ << shift; + else + cc->u[i] |= TGSI_EXEC_CC_UN << shift; + } + } } #define FETCH(VAL,INDEX,CHAN)\ -- cgit v1.2.3 From 93305bd6808787964c0088a88c428ecd1208aa44 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 18 Aug 2008 18:09:23 +0200 Subject: tgsi: Use NUM_CHANNELS. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 47dc06faf6..0bf260c4dd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1,6 +1,6 @@ /************************************************************************** * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -44,13 +44,13 @@ #define FOR_EACH_CHANNEL( CHAN )\ - for( CHAN = 0; CHAN < 4; CHAN++ ) + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) #define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) #define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ - if( IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) #define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ FOR_EACH_CHANNEL( CHAN )\ @@ -1172,7 +1172,7 @@ emit_instruction( { unsigned chan_index; - switch( inst->Instruction.Opcode ) { + switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: #if 0 /* XXX this isn't working properly (see glean vertProg1 test) */ -- cgit v1.2.3 From 2b512c0135bc8512cc80009ea7430f7cc0c869d6 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 18 Aug 2008 22:21:32 +0200 Subject: tgsi: Workaround debug output buffer size limitations. --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 4309d1bc76..a168c94928 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -518,8 +518,23 @@ tgsi_dump( uint flags ) { static char str[4096]; - + uint len; + char *p = str; + tgsi_dump_str(tokens, flags, str, sizeof(str)); - - debug_printf("%s", str); + + /* Workaround output buffer size limitations. + */ + len = strlen( str ); + while (len > 256) { + char piggy_bank; + + piggy_bank = p[256]; + p[256] = '\0'; + debug_printf( "%s", p ); + p[256] = piggy_bank; + p += 256; + len -= 256; + } + debug_printf( "%s", p ); } -- cgit v1.2.3 From de3083be7197cccb8dbf223d644ebdb78a8c809d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 19 Aug 2008 00:42:30 +0200 Subject: tgsi: Fix ARL opcode in SSE2 codegen. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 6 +-- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 87 +++++++++++++++++++++++++--------- 2 files changed, 68 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a2235de059..b793e1ebae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1688,9 +1688,9 @@ exec_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( &r[0], 0, chan_index ); - micro_f2it( &r[0], &r[0] ); - STORE( &r[0], 0, chan_index ); + FETCH( &r[0], 0, chan_index ); + micro_f2it( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 0bf260c4dd..ca48d422e0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -62,6 +62,7 @@ #define CHAN_W 3 #define TEMP_R0 TGSI_EXEC_TEMP_R0 +#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR /** * X86 utility functions. @@ -215,19 +216,61 @@ emit_ret( static void emit_const( struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_const( vec, chan ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); + uint xmm, + int vec, + uint chan, + uint indirect, + uint indirectFile, + int indirectIndex ) +{ + if (indirect) { + struct x86_reg r0 = get_input_base(); + struct x86_reg r1 = get_output_base(); + uint i; + + assert( indirectFile == TGSI_FILE_ADDRESS ); + assert( indirectIndex == 0 ); + + x86_push( func, r0 ); + x86_push( func, r1 ); + + for (i = 0; i < QUAD_SIZE; i++) { + x86_lea( func, r0, get_const( vec, chan ) ); + x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) ); + + /* Quick hack to multiply by 16 -- need to add SHL to rtasm. + */ + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + + x86_add( func, r0, r1 ); + x86_mov( func, r1, x86_deref( r0 ) ); + x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 ); + } + + x86_pop( func, r1 ); + x86_pop( func, r0 ); + + sse_movaps( + func, + make_xmm( xmm ), + get_temp( TEMP_R0, CHAN_X ) ); + } + else { + assert( vec >= 0 ); + + sse_movss( + func, + make_xmm( xmm ), + get_const( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); + } } static void @@ -369,10 +412,12 @@ emit_addrs( unsigned vec, unsigned chan ) { + assert( vec == 0 ); + emit_temps( func, xmm, - vec + TGSI_EXEC_NUM_TEMPS, + vec + TGSI_EXEC_TEMP_ADDR, chan ); } @@ -855,18 +900,21 @@ emit_fetch( { unsigned swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); - switch( swizzle ) { + switch (swizzle) { case TGSI_EXTSWIZZLE_X: case TGSI_EXTSWIZZLE_Y: case TGSI_EXTSWIZZLE_Z: case TGSI_EXTSWIZZLE_W: - switch( reg->SrcRegister.File ) { + switch (reg->SrcRegister.File) { case TGSI_FILE_CONSTANT: emit_const( func, xmm, reg->SrcRegister.Index, - swizzle ); + swizzle, + reg->SrcRegister.Indirect, + reg->SrcRegisterInd.File, + reg->SrcRegisterInd.Index ); break; case TGSI_FILE_IMMEDIATE: @@ -1174,16 +1222,11 @@ emit_instruction( switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ARL: -#if 0 - /* XXX this isn't working properly (see glean vertProg1 test) */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } -#else - return 0; -#endif break; case TGSI_OPCODE_MOV: -- cgit v1.2.3 From 0d9d2045e86a249a36d8eeebf59979a162c164af Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 19 Aug 2008 11:47:30 +0200 Subject: tgsi: Implement EXP opcode for SSE2. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 8 ++--- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 64 ++++++++++++++++++++++++++-------- 2 files changed, 53 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index b793e1ebae..d584a4e4fa 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1756,18 +1756,18 @@ exec_instruction( micro_flr( &r[1], &r[0] ); /* r1 = floor(r0) */ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { micro_exp2( &r[2], &r[1] ); /* r2 = 2 ^ r1 */ - STORE( &r[2], 0, CHAN_X ); /* store r2 */ + STORE( &r[2], 0, CHAN_X ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { micro_sub( &r[2], &r[0], &r[1] ); /* r2 = r0 - r1 */ - STORE( &r[2], 0, CHAN_Y ); /* store r2 */ + STORE( &r[2], 0, CHAN_Y ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { micro_exp2( &r[2], &r[0] ); /* r2 = 2 ^ r0 */ - STORE( &r[2], 0, CHAN_Z ); /* store r2 */ + STORE( &r[2], 0, CHAN_Z ); /* store r2 */ } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index ca48d422e0..684bc081e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -61,6 +61,9 @@ #define CHAN_Z 2 #define CHAN_W 3 +#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C + #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define TEMP_ADDR TGSI_EXEC_TEMP_ADDR @@ -958,8 +961,8 @@ emit_fetch( emit_tempf( func, xmm, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); break; default: @@ -1173,8 +1176,8 @@ emit_setcc( func, make_xmm( 0 ), get_temp( - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ) ); + TEMP_ONE_I, + TEMP_ONE_C ) ); STORE( func, *inst, 0, 0, chan_index ); } } @@ -1243,8 +1246,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C); + TEMP_ONE_I, + TEMP_ONE_C); if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { STORE( func, *inst, 0, 0, CHAN_X ); } @@ -1329,7 +1332,38 @@ emit_instruction( break; case TGSI_OPCODE_EXP: - return 0; + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( func, *inst, 0, 0, CHAN_X ); + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_MOV( func, 1, 0 ); + emit_flr( func, 1 ); + /* dst.x = ex2(floor(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { + emit_MOV( func, 2, 1 ); + emit_ex2( func, 2 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + /* dst.y = src.x - floor(src.x) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_MOV( func, 2, 0 ); + emit_sub( func, 2, 1 ); + STORE( func, *inst, 2, 0, CHAN_Y ); + } + } + /* dst.z = ex2(src.x) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + emit_ex2( func, 0 ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { + emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } break; case TGSI_OPCODE_LOG: @@ -1399,8 +1433,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); STORE( func, *inst, 0, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { @@ -1603,8 +1637,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); STORE( func, *inst, 0, 0, CHAN_W ); } break; @@ -1731,8 +1765,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1820,8 +1854,8 @@ emit_instruction( emit_tempf( func, 0, - TGSI_EXEC_TEMP_ONE_I, - TGSI_EXEC_TEMP_ONE_C ); + TEMP_ONE_I, + TEMP_ONE_C ); STORE( func, *inst, 0, 0, CHAN_W ); } break; -- cgit v1.2.3 From 4405e428e4c2a80172c803cc3c4933d546bf2b4d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 19 Aug 2008 12:07:25 +0200 Subject: tgsi: Implement LOG opcode for SSE2 codegen. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 8 ++++---- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 33 ++++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index d584a4e4fa..88a34a6961 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1777,18 +1777,18 @@ exec_instruction( micro_lg2( &r[1], &r[2] ); /* r1 = lg2(r2) */ micro_flr( &r[0], &r[1] ); /* r0 = floor(r1) */ if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( &r[0], 0, CHAN_X ); + STORE( &r[0], 0, CHAN_X ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) { micro_exp2( &r[0], &r[0] ); /* r0 = 2 ^ r0 */ micro_div( &r[0], &r[2], &r[0] ); /* r0 = r2 / r0 */ - STORE( &r[0], 0, CHAN_Y ); + STORE( &r[0], 0, CHAN_Y ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( &r[1], 0, CHAN_Z ); + STORE( &r[1], 0, CHAN_Z ); } if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) { - STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); + STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W ); } break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 684bc081e5..485e5a0e6f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1367,7 +1367,38 @@ emit_instruction( break; case TGSI_OPCODE_LOG: - return 0; + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_abs( func, 0 ); + emit_MOV( func, 1, 0 ); + emit_lg2( func, 1 ); + /* dst.z = lg2(abs(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( func, *inst, 1, 0, CHAN_Z ); + } + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_flr( func, 1 ); + /* dst.x = floor(lg2(abs(src.x))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( func, *inst, 1, 0, CHAN_X ); + } + /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_ex2( func, 1 ); + emit_rcp( func, 1, 1 ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + } + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { + emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } break; case TGSI_OPCODE_MUL: -- cgit v1.2.3 From 9e3aaa50483b61d644306d483feed564c69018bf Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 19 Aug 2008 21:33:59 +0100 Subject: gallium: WinCE build fixes. --- src/gallium/auxiliary/util/u_snprintf.c | 9 +++++++++ src/gallium/include/pipe/p_util.h | 6 ++++++ 2 files changed, 15 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c index 7fa84d8bec..0d54299b28 100644 --- a/src/gallium/auxiliary/util/u_snprintf.c +++ b/src/gallium/auxiliary/util/u_snprintf.c @@ -334,6 +334,15 @@ static void *mymemcpy(void *, void *, size_t); #endif /* HAVE_UINTPTR_T || defined(uintptr_t) */ #endif /* !defined(UINTPTR_T) */ +/* WinCE5.0 does not have uintptr_t defined */ +#if (_WIN32_WCE < 600) +#ifdef UINTPTR_T +#undef UINTPTR_T +#endif +#define UINTPTR_T unsigned long int +#endif + + /* Support for ptrdiff_t. */ #ifndef PTRDIFF_T #if HAVE_PTRDIFF_T || defined(ptrdiff_t) diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index b4ab70a3fc..473a8d94ab 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -99,6 +99,12 @@ __inline double __cdecl atan2(double val) #include #endif + /* Define ENOMEM for WINCE */ +#if (_WIN32_WCE < 600) +#ifndef ENOMEM +#define ENOMEM 12 +#endif +#endif #ifdef __cplusplus extern "C" { -- cgit v1.2.3 From 63c0970dca9c13ab83ea24f108b41f75f2a290a7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 19 Aug 2008 17:04:48 -0600 Subject: gallium: fix an assertion --- src/gallium/auxiliary/draw/draw_vs_varient.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index ad0b829afa..994ce3e889 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -298,7 +298,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, emit.element[i].input_offset = key->element[i].out.vs_output * 4 * sizeof(float); emit.element[i].output_format = draw_translate_vinfo_format(key->element[i].out.format); emit.element[i].output_offset = key->element[i].out.offset; - assert(emit.element[i].input_offset < fetch.output_stride); + assert(emit.element[i].input_offset <= fetch.output_stride); } else { emit.element[i].input_format = PIPE_FORMAT_R32_FLOAT; -- cgit v1.2.3 From f6abdb20437b1b8d27d8c45c0787017dfcad3497 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 20 Aug 2008 10:27:30 -0600 Subject: gallium: fix typo in LINE() macro (replace i+1 with i1 var) We were sometimes referencing an invalid vertex. Fixes progs/trivial/line-clip.c test among others. --- src/gallium/auxiliary/draw/draw_pipe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 3355c871ee..1db43876ef 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -238,7 +238,7 @@ void draw_pipeline_run( struct draw_context *draw, do_line( draw, \ flags, \ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \ - verts + stride * (i+1)) + verts + stride * (i1)) #define POINT(i0) \ do_point( draw, \ -- cgit v1.2.3 From 9e2b867b3f2e9afc9e9f9178788ae07f6be1f3c0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 15:13:47 -0600 Subject: gallium: new u_math.[ch] files for math functions So far, optimized/low-precision versions of exp(), exp2(), log2(), pow(). --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/SConscript | 1 + src/gallium/auxiliary/util/u_math.c | 60 ++++++++++++++ src/gallium/auxiliary/util/u_math.h | 144 ++++++++++++++++++++++++++++++++++ 4 files changed, 206 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_math.c create mode 100644 src/gallium/auxiliary/util/u_math.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index db3d810a2e..be1b97b535 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -12,6 +12,7 @@ C_SOURCES = \ u_gen_mipmap.c \ u_handle_table.c \ u_hash_table.c \ + u_math.c \ u_mm.c \ u_simple_shaders.c \ u_snprintf.c \ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 0309de1ac2..7865307a7a 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -13,6 +13,7 @@ util = env.ConvenienceLibrary( 'u_gen_mipmap.c', 'u_handle_table.c', 'u_hash_table.c', + 'u_math.c', 'u_mm.c', 'u_simple_shaders.c', 'u_snprintf.c', diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c new file mode 100644 index 0000000000..8bf6551b6e --- /dev/null +++ b/src/gallium/auxiliary/util/u_math.c @@ -0,0 +1,60 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + + +#include "util/u_math.h" + + + +float pow2_table[POW2_TABLE_SIZE]; + + +static void +init_pow2_table(void) +{ + int i; + for (i = 0; i < POW2_TABLE_SIZE; i++) { + pow2_table[i] = pow(2.0, i / POW2_TABLE_SCALE); + } +} + + +/** + * One time init for math utilities. + */ +void +util_init_math(void) +{ + static boolean initialized = FALSE; + if (!initialized) { + init_pow2_table(); + initialized = TRUE; + } +} + + diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h new file mode 100644 index 0000000000..e179ce700f --- /dev/null +++ b/src/gallium/auxiliary/util/u_math.h @@ -0,0 +1,144 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Math utilities and approximations for common math functions. + * Reduced precision is usually acceptable in shaders... + */ + + +#ifndef U_MATH_H +#define U_MATH_H + + +#include "pipe/p_util.h" +#include "util/u_math.h" + + + +#define POW2_TABLE_SIZE 256 +#define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1)) +extern float pow2_table[POW2_TABLE_SIZE]; + + +extern void +util_init_math(void); + + + + +/** + * Fast approximation to exp(x). + * Compute with base 2 exponents: exp(x) = exp2(log2(e) * x) + * Note: log2(e) is a constant, k = 1.44269 + * So, exp(x) = exp2(k * x); + * Identity: exp2(a + b) = exp2(a) * exp2(b) + * Let ipart = int(k*x) + * Let fpart = k*x - ipart; + * So, exp2(k*x) = exp2(ipart) * exp2(fpart) + * Compute exp2(ipart) with i << ipart + * Compute exp2(fpart) with lookup table. + */ +static INLINE float +util_fast_exp(float x) +{ + if (x >= 0.0f) { + float k = 1.44269f; /* = log2(e) */ + float kx = k * x; + int ipart = (int) kx; + float fpart = kx - (float) ipart; + float y = (float) (1 << ipart) + * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; + return y; + } + else { + /* exp(-x) = 1.0 / exp(x) */ + float k = -1.44269f; + float kx = k * x; + int ipart = (int) kx; + float fpart = kx - (float) ipart; + float y = (float) (1 << ipart) + * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; + return 1.0f / y; + } +} + + +/** + * Fast version of 2^x + * XXX the above function could be implemented in terms of this one. + */ +static INLINE float +util_fast_exp2(float x) +{ + if (x >= 0.0f) { + int ipart = (int) x; + float fpart = x - (float) ipart; + float y = (float) (1 << ipart) + * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; + return y; + } + else { + /* exp(-x) = 1.0 / exp(x) */ + int ipart = (int) -x; + float fpart = -x - (float) ipart; + float y = (float) (1 << ipart) + * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; + return 1.0f / y; + } +} + + +/** + * Based on code from http://www.flipcode.com/totd/ + */ +static INLINE float +util_fast_log2(float val) +{ + union fi num; + int log_2; + num.f = val; + log_2 = ((num.i >> 23) & 255) - 128; + num.i &= ~(255 << 23); + num.i += 127 << 23; + num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3; + return num.f + log_2; +} + + +static INLINE float +util_fast_pow(float x, float y) +{ + /* XXX this test may need adjustment */ + if (y >= 3.0 && -0.02f <= x && x <= 0.02f) + return 0.0f; + return util_fast_exp2(util_fast_log2(x) * y); +} + + +#endif /* U_MATH_H */ -- cgit v1.2.3 From 1c2ff4d9e65563c071747a9c3bd907bd24706da0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 15:16:43 -0600 Subject: gallium: use new util_fast_exp2(), _log2(), pow() functions New code surrounded with #if FAST_MATH to allow comparing against original code if we need to debug. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 66 +++++++++++++++++++++++++++++++- src/gallium/auxiliary/tgsi/tgsi_exec.c | 30 +++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_sse2.c | 19 +++++++++ 3 files changed, 113 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 441877d46f..41bdd012d5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -31,6 +31,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" @@ -43,6 +44,7 @@ #ifdef PIPE_ARCH_X86 #define DISASSEM 0 +#define FAST_MATH 1 static const char *files[] = { @@ -1380,14 +1382,28 @@ static boolean emit_MAD( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } + + /* A wrapper for powf(). * Makes sure it is cdecl and operates on floats. */ static float PIPE_CDECL _powerf( float x, float y ) { +#if FAST_MATH + return util_fast_pow(x, y); +#else return powf( x, y ); +#endif } +#if FAST_MATH +static float PIPE_CDECL _exp2(float x) +{ + return util_fast_exp2(x); +} +#endif + + /* Really not sufficient -- need to check for conditions that could * generate inf/nan values, which will slow things down hugely. */ @@ -1442,6 +1458,48 @@ static boolean emit_POW( struct aos_compilation *cp, const struct tgsi_full_inst } +#if FAST_MATH +static boolean emit_EXPBASE2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + uint i; + + /* For absolute correctness, need to spill/invalidate all XMM regs + * too. + */ + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } + + /* Push caller-save (ie scratch) regs. + */ + x86_cdecl_caller_push_regs( cp->func ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, -4) ); + + x87_fld_src( cp, &op->FullSrcRegisters[0], 0 ); + x87_fstp( cp->func, x86_make_disp( cp->stack_ESP, 0 ) ); + + /* tmp_EAX has been pushed & will be restored below */ + x86_mov_reg_imm( cp->func, cp->tmp_EAX, (unsigned long) _exp2 ); + x86_call( cp->func, cp->tmp_EAX ); + + x86_lea( cp->func, cp->stack_ESP, x86_make_disp(cp->stack_ESP, 4) ); + + x86_cdecl_caller_pop_regs( cp->func ); + + /* Note retval on x87 stack: + */ + cp->func->x87_stack++; + + x87_fstp_dest4( cp, &op->FullDstRegisters[0] ); + + return TRUE; +} +#endif + + static boolean emit_RCP( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); @@ -1662,7 +1720,9 @@ emit_instruction( struct aos_compilation *cp, return emit_RND(cp, inst); case TGSI_OPCODE_EXPBASE2: -#if 0 +#if FAST_MATH + return emit_EXPBASE2(cp, inst); +#elif 0 /* this seems to fail for "larger" exponents. * See glean tvertProg1's EX2 test. */ @@ -1827,6 +1887,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, struct aos_compilation cp; unsigned fixup, label; + util_init_math(); + tgsi_parse_init( &parse, varient->base.vs->state.tokens ); memset(&cp, 0, sizeof(cp)); @@ -2135,4 +2197,4 @@ struct draw_vs_varient *draw_vs_varient_aos_sse( struct draw_vertex_shader *vs, -#endif +#endif /* PIPE_ARCH_X86 */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 88a34a6961..e28b56c842 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -57,6 +57,9 @@ #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_math.h" + +#define FAST_MATH 1 #define TILE_TOP_LEFT 0 #define TILE_TOP_RIGHT 1 @@ -145,6 +148,8 @@ tgsi_exec_machine_bind_shader( tgsi_dump(tokens, 0); #endif + util_init_math(); + mach->Tokens = tokens; mach->Samplers = samplers; @@ -448,10 +453,17 @@ micro_exp2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src) { +#if FAST_MATH + dst->f[0] = util_fast_exp2( src->f[0] ); + dst->f[1] = util_fast_exp2( src->f[1] ); + dst->f[2] = util_fast_exp2( src->f[2] ); + dst->f[3] = util_fast_exp2( src->f[3] ); +#else dst->f[0] = powf( 2.0f, src->f[0] ); dst->f[1] = powf( 2.0f, src->f[1] ); dst->f[2] = powf( 2.0f, src->f[2] ); dst->f[3] = powf( 2.0f, src->f[3] ); +#endif } static void @@ -528,10 +540,17 @@ micro_lg2( union tgsi_exec_channel *dst, const union tgsi_exec_channel *src ) { +#if FAST_MATH + dst->f[0] = util_fast_log2( src->f[0] ); + dst->f[1] = util_fast_log2( src->f[1] ); + dst->f[2] = util_fast_log2( src->f[2] ); + dst->f[3] = util_fast_log2( src->f[3] ); +#else dst->f[0] = logf( src->f[0] ) * 1.442695f; dst->f[1] = logf( src->f[1] ) * 1.442695f; dst->f[2] = logf( src->f[2] ) * 1.442695f; dst->f[3] = logf( src->f[3] ) * 1.442695f; +#endif } static void @@ -796,10 +815,17 @@ micro_pow( const union tgsi_exec_channel *src0, const union tgsi_exec_channel *src1 ) { +#if FAST_MATH + dst->f[0] = util_fast_pow( src0->f[0], src1->f[0] ); + dst->f[1] = util_fast_pow( src0->f[1], src1->f[1] ); + dst->f[2] = util_fast_pow( src0->f[2], src1->f[2] ); + dst->f[3] = util_fast_pow( src0->f[3], src1->f[3] ); +#else dst->f[0] = powf( src0->f[0], src1->f[0] ); dst->f[1] = powf( src0->f[1], src1->f[1] ); dst->f[2] = powf( src0->f[2], src1->f[2] ); dst->f[3] = powf( src0->f[3], src1->f[3] ); +#endif } static void @@ -2024,7 +2050,11 @@ exec_instruction( /* TGSI_OPCODE_EX2 */ FETCH(&r[0], 0, CHAN_X); +#if FAST_MATH + micro_exp2( &r[0], &r[0] ); +#else micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] ); +#endif FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { STORE( &r[0], 0, chan_index ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 485e5a0e6f..e390607023 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -27,6 +27,7 @@ #include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -42,6 +43,8 @@ */ #define HIGH_PRECISION 1 +#define FAST_MATH 1 + #define FOR_EACH_CHANNEL( CHAN )\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) @@ -623,10 +626,17 @@ ex24f( { const unsigned X = 0; +#if FAST_MATH + store[X + 0] = util_fast_exp2( store[X + 0] ); + store[X + 1] = util_fast_exp2( store[X + 1] ); + store[X + 2] = util_fast_exp2( store[X + 2] ); + store[X + 3] = util_fast_exp2( store[X + 3] ); +#else store[X + 0] = powf( 2.0f, store[X + 0] ); store[X + 1] = powf( 2.0f, store[X + 1] ); store[X + 2] = powf( 2.0f, store[X + 2] ); store[X + 3] = powf( 2.0f, store[X + 3] ); +#endif } static void @@ -762,10 +772,17 @@ pow4f( { const unsigned X = 0; +#if FAST_MATH + store[X + 0] = util_fast_pow( store[X + 0], store[X + 4] ); + store[X + 1] = util_fast_pow( store[X + 1], store[X + 5] ); + store[X + 2] = util_fast_pow( store[X + 2], store[X + 6] ); + store[X + 3] = util_fast_pow( store[X + 3], store[X + 7] ); +#else store[X + 0] = powf( store[X + 0], store[X + 4] ); store[X + 1] = powf( store[X + 1], store[X + 5] ); store[X + 2] = powf( store[X + 2], store[X + 6] ); store[X + 3] = powf( store[X + 3], store[X + 7] ); +#endif } static void @@ -2235,6 +2252,8 @@ tgsi_emit_sse2( unsigned ok = 1; uint num_immediates = 0; + util_init_math(); + func->csr = func->store; tgsi_parse_init( &parse, tokens ); -- cgit v1.2.3 From 1a46dcc8a927dfb38ca1381e7b3dafb789f8257c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 15:25:21 -0600 Subject: gallium: replace LOG2() macro with util_fast_log2() inline func --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 8 ++++---- src/gallium/drivers/softpipe/sp_context.c | 3 +++ src/gallium/drivers/softpipe/sp_tex_sample.c | 3 ++- src/gallium/include/pipe/p_util.h | 15 --------------- 4 files changed, 9 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index e390607023..00ed4da450 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -713,10 +713,10 @@ lg24f( { const unsigned X = 0; - store[X + 0] = LOG2( store[X + 0] ); - store[X + 1] = LOG2( store[X + 1] ); - store[X + 2] = LOG2( store[X + 2] ); - store[X + 3] = LOG2( store[X + 3] ); + store[X + 0] = util_fast_log2( store[X + 0] ); + store[X + 1] = util_fast_log2( store[X + 1] ); + store[X + 2] = util_fast_log2( store[X + 2] ); + store[X + 3] = util_fast_log2( store[X + 3] ); } static void diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 626c3a9d4e..9b1313bc83 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_util.h" +#include "util/u_math.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" @@ -128,6 +129,8 @@ softpipe_create( struct pipe_screen *screen, struct softpipe_context *softpipe = CALLOC_STRUCT(softpipe_context); uint i; + util_init_math(); + #ifdef PIPE_ARCH_X86 softpipe->use_sse = !debug_get_bool_option( "GALLIUM_NOSSE", FALSE ); #else diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 01f4d0ca81..d7680ffa81 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -41,6 +41,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" +#include "util/u_math.h" /* @@ -499,7 +500,7 @@ compute_lambda(struct tgsi_sampler *sampler, rho = MAX2(rho, max); } - lambda = LOG2(rho); + lambda = util_fast_log2(rho); lambda += lodbias + sampler->state->lod_bias; lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 473a8d94ab..660192b28e 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -443,21 +443,6 @@ static INLINE int iround(float f) #define FABSF(x) ((float) fabs(x)) #endif -/* Pretty fast, and accurate. - * Based on code from http://www.flipcode.com/totd/ - */ -static INLINE float LOG2(float val) -{ - union fi num; - int log_2; - - num.f = val; - log_2 = ((num.i >> 23) & 255) - 128; - num.i &= ~(255 << 23); - num.i += 127 << 23; - num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3; - return num.f + log_2; -} #if defined(__GNUC__) #define CEILF(x) ceilf(x) -- cgit v1.2.3 From 120270def74149b240926f827b80ca073536d462 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 15:49:36 -0600 Subject: gallium: stop using FABSF() macro --- src/gallium/auxiliary/draw/draw_pipe_offset.c | 4 ++-- src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 8f1650e55c..2f5865741c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -84,8 +84,8 @@ static void do_offset_tri( struct draw_stage *stage, float a = ey*fz - ez*fy; float b = ez*fx - ex*fz; - float dzdx = FABSF(a * inv_det); - float dzdy = FABSF(b * inv_det); + float dzdx = fabsf(a * inv_det); + float dzdy = fabsf(b * inv_det); float zoffset = offset->units + MAX2(dzdx, dzdy) * offset->scale; diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 29649f5787..48ec2f1239 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -73,8 +73,8 @@ static void wideline_line( struct draw_stage *stage, float *pos2 = v2->data[pos]; float *pos3 = v3->data[pos]; - const float dx = FABSF(pos0[0] - pos2[0]); - const float dy = FABSF(pos0[1] - pos2[1]); + const float dx = fabsf(pos0[0] - pos2[0]); + const float dy = fabsf(pos0[1] - pos2[1]); /* small tweak to meet GL specification */ const float bias = 0.125f; -- cgit v1.2.3 From a22bdd42d77bc858814f37d657fa940a520dbe56 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 15:51:38 -0600 Subject: gallium: move math macros from p_util.h to u_math.h More can be done... --- src/gallium/auxiliary/util/u_math.h | 49 +++++++++++++++++++++++++++++++++++ src/gallium/include/pipe/p_util.h | 51 ------------------------------------- 2 files changed, 49 insertions(+), 51 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index e179ce700f..09c55e437f 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -29,6 +29,9 @@ /** * Math utilities and approximations for common math functions. * Reduced precision is usually acceptable in shaders... + * + * "fast" is used in the names of functions which are low-precision, + * or at least lower-precision than the normal C lib functions. */ @@ -36,6 +39,7 @@ #define U_MATH_H +#include "pipe/p_compiler.h" #include "pipe/p_util.h" #include "util/u_math.h" @@ -141,4 +145,49 @@ util_fast_pow(float x, float y) } + +/** + * Floor(x), returned as int. + */ +static INLINE int +util_ifloor(float f) +{ + int ai, bi; + double af, bf; + union fi u; + af = (3 << 22) + 0.5 + (double)f; + bf = (3 << 22) + 0.5 - (double)f; + u.f = (float) af; ai = u.i; + u.f = (float) bf; bi = u.i; + return (ai - bi) >> 1; +} + + +/** + * Round float to nearest int. + */ +static INLINE int +util_iround(float f) +{ +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) + int r; + __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); + return r; +#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) + int r; + _asm { + fld f + fistp r + } + return r; +#else + if (f >= 0.0f) + return (int) (f + 0.5f); + else + return (int) (f - 0.5f); +#endif +} + + + #endif /* U_MATH_H */ diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 660192b28e..8f5cb4ddc5 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -399,57 +399,6 @@ do { \ } while (0) -static INLINE int ifloor(float f) -{ - int ai, bi; - double af, bf; - union fi u; - - af = (3 << 22) + 0.5 + (double)f; - bf = (3 << 22) + 0.5 - (double)f; - u.f = (float) af; ai = u.i; - u.f = (float) bf; bi = u.i; - return (ai - bi) >> 1; -} - - -#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_X86) -static INLINE int iround(float f) -{ - int r; - __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); - return r; -} -#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) -static INLINE int iround(float f) -{ - int r; - _asm { - fld f - fistp r - } - return r; -} -#else -#define IROUND(f) ((int) (((f) >= 0.0F) ? ((f) + 0.5F) : ((f) - 0.5F))) -#endif - - -/* Could maybe have an inline version of this? - */ -#if defined(__GNUC__) -#define FABSF(x) fabsf(x) -#else -#define FABSF(x) ((float) fabs(x)) -#endif - - -#if defined(__GNUC__) -#define CEILF(x) ceilf(x) -#else -#define CEILF(x) ((float) ceil(x)) -#endif - static INLINE int align(int value, int alignment) { return (value + alignment - 1) & ~(alignment - 1); -- cgit v1.2.3 From f9c04d55d045b3cf3ffef24f805166e3995096e2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 15:53:28 -0600 Subject: gallium: insert __cplusplus/extern wrappings --- src/gallium/auxiliary/util/u_math.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 09c55e437f..a541d30a5d 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -44,12 +44,17 @@ #include "util/u_math.h" +#ifdef __cplusplus +extern "C" { +#endif + #define POW2_TABLE_SIZE 256 #define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1)) extern float pow2_table[POW2_TABLE_SIZE]; + extern void util_init_math(void); @@ -190,4 +195,8 @@ util_iround(float f) +#ifdef __cplusplus +} +#endif + #endif /* U_MATH_H */ -- cgit v1.2.3 From a13475ff0057f1de8e3bc08d6ca42b9e135a3f5a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 16:09:37 -0600 Subject: gallium: replace align_int() with align() The two functions are identical. Removed align_int() from p_util.h --- src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 6 +++--- src/gallium/drivers/i915simple/i915_texture.c | 8 ++++---- src/gallium/drivers/i965simple/brw_tex_layout.c | 8 ++++---- src/gallium/include/pipe/p_util.h | 11 +++-------- 4 files changed, 14 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index 0aec4b71ba..be3535ed9e 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -119,7 +119,7 @@ static void fetch_pipeline_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; - unsigned alloc_count = align_int( fetch_count, 4 ); + unsigned alloc_count = align( fetch_count, 4 ); struct vertex_header *pipeline_verts = (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); @@ -195,7 +195,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; - unsigned alloc_count = align_int( count, 4 ); + unsigned alloc_count = align( count, 4 ); struct vertex_header *pipeline_verts = (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); @@ -271,7 +271,7 @@ static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, struct draw_context *draw = fpme->draw; struct draw_vertex_shader *shader = draw->vs.vertex_shader; unsigned opt = fpme->opt; - unsigned alloc_count = align_int( count, 4 ); + unsigned alloc_count = align( count, 4 ); struct vertex_header *pipeline_verts = (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index cf4964b26b..ca0fb8761b 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -220,7 +220,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) */ if (pt->last_level > 0) { unsigned mip1_nblocksx - = align_int(pf_get_nblocksx(&pt->block, minify(width)), align_x) + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + pf_get_nblocksx(&pt->block, minify(minify(width))); if (mip1_nblocksx > nblocksx) @@ -229,14 +229,14 @@ i945_miptree_layout_2d( struct i915_texture *tex ) /* Pitch must be a whole number of dwords */ - tex->stride = align_int(tex->stride, 64); + tex->stride = align(tex->stride, 64); tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { i915_miptree_set_level_info(tex, level, 1, width, height, 1); i915_miptree_set_image_offset(tex, level, 0, x, y); - nblocksy = align_int(nblocksy, align_y); + nblocksy = align(nblocksy, align_y); /* Because the images are packed better, the final offset * might not be the maximal one: @@ -246,7 +246,7 @@ i945_miptree_layout_2d( struct i915_texture *tex ) /* Layout_below: step right after second mipmap level. */ if (level == 1) { - x += align_int(nblocksx, align_x); + x += align(nblocksx, align_x); } else { y += nblocksy; diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 8c7725605b..9b6cf81723 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -144,7 +144,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) */ if (pt->last_level > 0) { unsigned mip1_nblocksx - = align_int(pf_get_nblocksx(&pt->block, minify(width)), align_x) + = align(pf_get_nblocksx(&pt->block, minify(width)), align_x) + pf_get_nblocksx(&pt->block, minify(minify(width))); if (mip1_nblocksx > nblocksx) @@ -153,14 +153,14 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Pitch must be a whole number of dwords */ - tex->stride = align_int(tex->stride, 64); + tex->stride = align(tex->stride, 64); tex->total_nblocksy = 0; for (level = 0; level <= pt->last_level; level++) { intel_miptree_set_level_info(tex, level, 1, x, y, width, height, 1); - nblocksy = align_int(nblocksy, align_y); + nblocksy = align(nblocksy, align_y); /* Because the images are packed better, the final offset * might not be the maximal one: @@ -170,7 +170,7 @@ static void i945_miptree_layout_2d(struct brw_texture *tex) /* Layout_below: step right after second mipmap level. */ if (level == 1) { - x += align_int(nblocksx, align_x); + x += align(nblocksx, align_x); } else { y += nblocksy; diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index 8f5cb4ddc5..cac0039e12 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -289,13 +289,14 @@ align16( void *unaligned ) } -static INLINE int align_int(int x, int align) +static INLINE int align(int value, int alignment) { - return (x + align - 1) & ~(align - 1); + return (value + alignment - 1) & ~(alignment - 1); } + #if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) static INLINE unsigned ffs( unsigned u ) { @@ -399,12 +400,6 @@ do { \ } while (0) -static INLINE int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - - /* util/p_util.c */ extern void pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, -- cgit v1.2.3 From fe1e39afbb147deab60ecc932c24f921b46f1364 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 16:19:22 -0600 Subject: gallium: move pipe_copy_rect(), pipe_fill_rect() protos into new u_rect.h header --- src/gallium/auxiliary/util/p_util.c | 1 + src/gallium/auxiliary/util/u_rect.h | 54 +++++++++++++++++++++++++++ src/gallium/drivers/cell/ppu/cell_surface.c | 1 + src/gallium/drivers/i915simple/i915_surface.c | 1 + src/gallium/drivers/i965simple/brw_surface.c | 1 + src/gallium/drivers/softpipe/sp_surface.c | 1 + src/gallium/include/pipe/p_util.h | 13 ------- src/mesa/state_tracker/st_texture.c | 1 + 8 files changed, 60 insertions(+), 13 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_rect.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_util.c b/src/gallium/auxiliary/util/p_util.c index 271be4edf1..787881b192 100644 --- a/src/gallium/auxiliary/util/p_util.c +++ b/src/gallium/auxiliary/util/p_util.c @@ -33,6 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_util.h" #include "pipe/p_format.h" +#include "util/u_rect.h" /** diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h new file mode 100644 index 0000000000..fba4808864 --- /dev/null +++ b/src/gallium/auxiliary/util/u_rect.h @@ -0,0 +1,54 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Pipe copy/fill rect helpers. + */ + + +#ifndef U_RECT_H +#define U_RECT_H + + +#include "pipe/p_format.h" + + +extern void +pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, const ubyte * src, + int src_stride, unsigned src_x, int src_y); + +extern void +pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block, + unsigned dst_stride, unsigned dst_x, unsigned dst_y, + unsigned width, unsigned height, uint32_t value); + + + +#endif /* U_RECT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 5549eb496d..01ffa31c2c 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -30,6 +30,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" +#include "util/u_rect.h" #include "cell_context.h" #include "cell_surface.h" diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 4430e81626..17b5125e56 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -34,6 +34,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" +#include "util/u_rect.h" /* Assumes all values are within bounds -- no checking at this level - diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 0be3dfc743..69da252285 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -33,6 +33,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" +#include "util/u_rect.h" diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index 4f1bb881cb..bfbae234f1 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -30,6 +30,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" +#include "util/u_rect.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h index cac0039e12..4a3fca5962 100644 --- a/src/gallium/include/pipe/p_util.h +++ b/src/gallium/include/pipe/p_util.h @@ -31,7 +31,6 @@ #include "p_config.h" #include "p_compiler.h" #include "p_debug.h" -#include "p_format.h" #include "p_pointer.h" #if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) @@ -400,18 +399,6 @@ do { \ } while (0) -/* util/p_util.c - */ -extern void pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, - unsigned dst_stride, unsigned dst_x, unsigned dst_y, - unsigned width, unsigned height, const ubyte * src, - int src_stride, unsigned src_x, int src_y); - -extern void -pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block, - unsigned dst_stride, unsigned dst_x, unsigned dst_y, - unsigned width, unsigned height, uint32_t value); - #if defined(_MSC_VER) #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 289b78b38b..63046a0ecc 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -38,6 +38,7 @@ #include "pipe/p_inlines.h" #include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_rect.h" #define DBG if(0) printf -- cgit v1.2.3 From 2b87174ec306673c59386102890f35907be497ef Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 22 Aug 2008 16:22:35 -0600 Subject: gallium: rename p_util.c to u_rect.c (it only contains rect copy/fill helpers) --- src/gallium/auxiliary/util/Makefile | 2 +- src/gallium/auxiliary/util/SConscript | 2 +- src/gallium/auxiliary/util/p_util.c | 151 ---------------------------------- src/gallium/auxiliary/util/u_rect.c | 151 ++++++++++++++++++++++++++++++++++ 4 files changed, 153 insertions(+), 153 deletions(-) delete mode 100644 src/gallium/auxiliary/util/p_util.c create mode 100644 src/gallium/auxiliary/util/u_rect.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index be1b97b535..6eebf6d29b 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -6,7 +6,6 @@ LIBNAME = util C_SOURCES = \ p_debug.c \ p_tile.c \ - p_util.c \ u_blit.c \ u_draw_quad.c \ u_gen_mipmap.c \ @@ -14,6 +13,7 @@ C_SOURCES = \ u_hash_table.c \ u_math.c \ u_mm.c \ + u_rect.c \ u_simple_shaders.c \ u_snprintf.c \ u_time.c diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 7865307a7a..94382fe1f9 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -7,7 +7,6 @@ util = env.ConvenienceLibrary( 'p_debug_mem.c', 'p_debug_prof.c', 'p_tile.c', - 'p_util.c', 'u_blit.c', 'u_draw_quad.c', 'u_gen_mipmap.c', @@ -15,6 +14,7 @@ util = env.ConvenienceLibrary( 'u_hash_table.c', 'u_math.c', 'u_mm.c', + 'u_rect.c', 'u_simple_shaders.c', 'u_snprintf.c', 'u_time.c', diff --git a/src/gallium/auxiliary/util/p_util.c b/src/gallium/auxiliary/util/p_util.c deleted file mode 100644 index 787881b192..0000000000 --- a/src/gallium/auxiliary/util/p_util.c +++ /dev/null @@ -1,151 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * Miscellaneous utility functions. - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_format.h" -#include "util/u_rect.h" - - -/** - * Copy 2D rect from one place to another. - * Position and sizes are in pixels. - * src_pitch may be negative to do vertical flip of pixels from source. - */ -void -pipe_copy_rect(ubyte * dst, - const struct pipe_format_block *block, - unsigned dst_stride, - unsigned dst_x, - unsigned dst_y, - unsigned width, - unsigned height, - const ubyte * src, - int src_stride, - unsigned src_x, - int src_y) -{ - unsigned i; - int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; - - assert(block->size > 0); - assert(block->width > 0); - assert(block->height > 0); - assert(src_x >= 0); - assert(src_y >= 0); - assert(dst_x >= 0); - assert(dst_y >= 0); - - dst_x /= block->width; - dst_y /= block->height; - width = (width + block->width - 1)/block->width; - height = (height + block->height - 1)/block->height; - src_x /= block->width; - src_y /= block->height; - - dst += dst_x * block->size; - src += src_x * block->size; - dst += dst_y * dst_stride; - src += src_y * src_stride_pos; - width *= block->size; - - if (width == dst_stride && width == src_stride) - memcpy(dst, src, height * width); - else { - for (i = 0; i < height; i++) { - memcpy(dst, src, width); - dst += dst_stride; - src += src_stride; - } - } -} - -void -pipe_fill_rect(ubyte * dst, - const struct pipe_format_block *block, - unsigned dst_stride, - unsigned dst_x, - unsigned dst_y, - unsigned width, - unsigned height, - uint32_t value) -{ - unsigned i, j; - unsigned width_size; - - assert(block->size > 0); - assert(block->width > 0); - assert(block->height > 0); - assert(dst_x >= 0); - assert(dst_y >= 0); - - dst_x /= block->width; - dst_y /= block->height; - width = (width + block->width - 1)/block->width; - height = (height + block->height - 1)/block->height; - - dst += dst_x * block->size; - dst += dst_y * dst_stride; - width_size = width * block->size; - - switch (block->size) { - case 1: - if(dst_stride == width_size) - memset(dst, (ubyte) value, height * width_size); - else { - for (i = 0; i < height; i++) { - memset(dst, (ubyte) value, width_size); - dst += dst_stride; - } - } - break; - case 2: - for (i = 0; i < height; i++) { - uint16_t *row = (uint16_t *)dst; - for (j = 0; j < width; j++) - *row++ = (uint16_t) value; - dst += dst_stride; - } - break; - case 4: - for (i = 0; i < height; i++) { - uint32_t *row = (uint32_t *)dst; - for (j = 0; j < width; j++) - *row++ = value; - dst += dst_stride; - } - break; - default: - assert(0); - break; - } -} diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c new file mode 100644 index 0000000000..94e447b9d5 --- /dev/null +++ b/src/gallium/auxiliary/util/u_rect.c @@ -0,0 +1,151 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Rectangle-related helper functions. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_util.h" +#include "pipe/p_format.h" +#include "util/u_rect.h" + + +/** + * Copy 2D rect from one place to another. + * Position and sizes are in pixels. + * src_pitch may be negative to do vertical flip of pixels from source. + */ +void +pipe_copy_rect(ubyte * dst, + const struct pipe_format_block *block, + unsigned dst_stride, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + const ubyte * src, + int src_stride, + unsigned src_x, + int src_y) +{ + unsigned i; + int src_stride_pos = src_stride < 0 ? -src_stride : src_stride; + + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); + assert(src_x >= 0); + assert(src_y >= 0); + assert(dst_x >= 0); + assert(dst_y >= 0); + + dst_x /= block->width; + dst_y /= block->height; + width = (width + block->width - 1)/block->width; + height = (height + block->height - 1)/block->height; + src_x /= block->width; + src_y /= block->height; + + dst += dst_x * block->size; + src += src_x * block->size; + dst += dst_y * dst_stride; + src += src_y * src_stride_pos; + width *= block->size; + + if (width == dst_stride && width == src_stride) + memcpy(dst, src, height * width); + else { + for (i = 0; i < height; i++) { + memcpy(dst, src, width); + dst += dst_stride; + src += src_stride; + } + } +} + +void +pipe_fill_rect(ubyte * dst, + const struct pipe_format_block *block, + unsigned dst_stride, + unsigned dst_x, + unsigned dst_y, + unsigned width, + unsigned height, + uint32_t value) +{ + unsigned i, j; + unsigned width_size; + + assert(block->size > 0); + assert(block->width > 0); + assert(block->height > 0); + assert(dst_x >= 0); + assert(dst_y >= 0); + + dst_x /= block->width; + dst_y /= block->height; + width = (width + block->width - 1)/block->width; + height = (height + block->height - 1)/block->height; + + dst += dst_x * block->size; + dst += dst_y * dst_stride; + width_size = width * block->size; + + switch (block->size) { + case 1: + if(dst_stride == width_size) + memset(dst, (ubyte) value, height * width_size); + else { + for (i = 0; i < height; i++) { + memset(dst, (ubyte) value, width_size); + dst += dst_stride; + } + } + break; + case 2: + for (i = 0; i < height; i++) { + uint16_t *row = (uint16_t *)dst; + for (j = 0; j < width; j++) + *row++ = (uint16_t) value; + dst += dst_stride; + } + break; + case 4: + for (i = 0; i < height; i++) { + uint32_t *row = (uint32_t *)dst; + for (j = 0; j < width; j++) + *row++ = value; + dst += dst_stride; + } + break; + default: + assert(0); + break; + } +} -- cgit v1.2.3 From f3dfd5969d447515635c077b700a2d4957208167 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 23 Aug 2008 12:30:39 +0200 Subject: util: Include missing u_rect.h. --- src/gallium/auxiliary/util/p_tile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c index e366039a27..8219041c80 100644 --- a/src/gallium/auxiliary/util/p_tile.c +++ b/src/gallium/auxiliary/util/p_tile.c @@ -37,7 +37,7 @@ #include "pipe/p_inlines.h" #include "p_tile.h" - +#include "u_rect.h" /** -- cgit v1.2.3 From e7ff7f78be6c14b6c48e451d6d1f597af379f8f8 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sat, 23 Aug 2008 12:31:16 +0200 Subject: util: Silence compiler warnings on Windows. --- src/gallium/auxiliary/util/u_math.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index 8bf6551b6e..0729114d6a 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -39,7 +39,7 @@ init_pow2_table(void) { int i; for (i = 0; i < POW2_TABLE_SIZE; i++) { - pow2_table[i] = pow(2.0, i / POW2_TABLE_SCALE); + pow2_table[i] = (float) pow(2.0, i / POW2_TABLE_SCALE); } } -- cgit v1.2.3 From 4f25420bdd834e81a3e22733304efc5261c2998a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sun, 24 Aug 2008 17:48:55 -0600 Subject: gallium: refactor/replace p_util.h with util/u_memory.h and util/u_math.h Also, rename p_tile.[ch] to u_tile.[ch] --- src/gallium/README.portability | 4 +- src/gallium/auxiliary/cso_cache/cso_cache.c | 3 +- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- src/gallium/auxiliary/cso_cache/cso_hash.c | 2 +- src/gallium/auxiliary/draw/draw_context.c | 3 +- src/gallium/auxiliary/draw/draw_pipe.c | 1 - src/gallium/auxiliary/draw/draw_pipe_aaline.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_aapoint.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_clip.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_cull.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_flatshade.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_offset.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_pstipple.c | 4 +- src/gallium/auxiliary/draw/draw_pipe_stipple.c | 6 +- src/gallium/auxiliary/draw/draw_pipe_twoside.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_unfilled.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_util.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_validate.c | 2 +- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_wide_line.c | 3 +- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 3 +- src/gallium/auxiliary/draw/draw_pt.c | 1 - src/gallium/auxiliary/draw/draw_pt_emit.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 3 +- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 3 +- src/gallium/auxiliary/draw/draw_pt_post_vs.c | 2 +- src/gallium/auxiliary/draw/draw_pt_util.c | 1 - src/gallium/auxiliary/draw/draw_pt_varray.c | 4 +- src/gallium/auxiliary/draw/draw_pt_vcache.c | 2 +- src/gallium/auxiliary/draw/draw_vbuf.h | 2 - src/gallium/auxiliary/draw/draw_vs.c | 6 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 4 +- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- src/gallium/auxiliary/draw/draw_vs_aos_machine.c | 3 +- src/gallium/auxiliary/draw/draw_vs_exec.c | 3 +- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 - src/gallium/auxiliary/draw/draw_vs_sse.c | 3 +- src/gallium/auxiliary/draw/draw_vs_varient.c | 3 +- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 3 +- src/gallium/auxiliary/gallivm/instructions.cpp | 2 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 2 +- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 2 +- .../auxiliary/pipebuffer/pb_buffer_malloc.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 2 +- .../auxiliary/pipebuffer/pb_bufmgr_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_validate.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 2 +- src/gallium/auxiliary/sct/sct.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_build.c | 1 - src/gallium/auxiliary/tgsi/tgsi_build.h | 4 + src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 1 - src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_parse.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_scan.c | 6 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_transform.c | 1 + src/gallium/auxiliary/tgsi/tgsi_transform.h | 1 - src/gallium/auxiliary/tgsi/tgsi_util.c | 1 - src/gallium/auxiliary/translate/translate.c | 1 - src/gallium/auxiliary/translate/translate_cache.c | 2 +- .../auxiliary/translate/translate_generic.c | 2 +- src/gallium/auxiliary/translate/translate_sse.c | 2 +- src/gallium/auxiliary/util/Makefile | 2 +- src/gallium/auxiliary/util/SConscript | 2 +- src/gallium/auxiliary/util/p_debug.c | 1 - src/gallium/auxiliary/util/u_blit.c | 5 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- src/gallium/auxiliary/util/u_handle_table.c | 4 +- src/gallium/auxiliary/util/u_hash_table.c | 5 +- src/gallium/auxiliary/util/u_math.h | 240 +++- src/gallium/auxiliary/util/u_memory.h | 222 ++++ src/gallium/auxiliary/util/u_mm.c | 2 +- src/gallium/auxiliary/util/u_pack_color.h | 36 +- src/gallium/auxiliary/util/u_pointer.h | 107 ++ src/gallium/auxiliary/util/u_rect.c | 1 - src/gallium/auxiliary/util/u_simple_shaders.c | 2 +- src/gallium/auxiliary/util/u_tile.c | 1169 ++++++++++++++++++++ src/gallium/auxiliary/util/u_tile.h | 101 ++ src/gallium/drivers/cell/common.h | 1 - src/gallium/drivers/cell/ppu/cell_clear.c | 2 +- src/gallium/drivers/cell/ppu/cell_context.c | 2 +- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- src/gallium/drivers/cell/ppu/cell_render.c | 2 +- src/gallium/drivers/cell/ppu/cell_screen.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_derived.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/cell/ppu/cell_surface.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 2 +- src/gallium/drivers/cell/ppu/cell_winsys.c | 2 +- src/gallium/drivers/cell/spu/spu_exec.c | 1 - src/gallium/drivers/cell/spu/spu_tri.c | 1 - src/gallium/drivers/cell/spu/spu_util.c | 1 - src/gallium/drivers/cell/spu/spu_vertex_fetch.c | 1 - src/gallium/drivers/cell/spu/spu_vertex_shader.c | 1 - src/gallium/drivers/failover/fo_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_debug_fp.c | 2 +- src/gallium/drivers/i915simple/i915_fpc.h | 1 - .../drivers/i915simple/i915_fpc_translate.c | 2 + src/gallium/drivers/i915simple/i915_prim_emit.c | 4 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 3 +- src/gallium/drivers/i915simple/i915_screen.c | 2 +- src/gallium/drivers/i915simple/i915_state.c | 3 +- .../drivers/i915simple/i915_state_derived.c | 2 +- .../drivers/i915simple/i915_state_dynamic.c | 4 +- .../drivers/i915simple/i915_state_immediate.c | 2 +- .../drivers/i915simple/i915_state_sampler.c | 2 +- src/gallium/drivers/i915simple/i915_surface.c | 3 +- src/gallium/drivers/i915simple/i915_texture.c | 3 +- src/gallium/drivers/i965simple/brw_cc.c | 6 +- src/gallium/drivers/i965simple/brw_clip_state.c | 3 +- src/gallium/drivers/i965simple/brw_context.c | 2 +- src/gallium/drivers/i965simple/brw_curbe.c | 3 +- src/gallium/drivers/i965simple/brw_draw_upload.c | 1 + src/gallium/drivers/i965simple/brw_gs_state.c | 3 +- src/gallium/drivers/i965simple/brw_screen.c | 2 +- src/gallium/drivers/i965simple/brw_sf_state.c | 5 +- src/gallium/drivers/i965simple/brw_shader_info.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_state_batch.c | 2 +- src/gallium/drivers/i965simple/brw_state_cache.c | 2 +- src/gallium/drivers/i965simple/brw_state_pool.c | 3 +- src/gallium/drivers/i965simple/brw_state_upload.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 3 +- src/gallium/drivers/i965simple/brw_tex_layout.c | 8 +- src/gallium/drivers/i965simple/brw_vs_state.c | 3 +- src/gallium/drivers/i965simple/brw_wm.c | 2 +- src/gallium/drivers/i965simple/brw_wm_decl.c | 3 +- src/gallium/drivers/i965simple/brw_wm_glsl.c | 3 +- .../drivers/i965simple/brw_wm_sampler_state.c | 3 +- src/gallium/drivers/i965simple/brw_wm_state.c | 3 +- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_fs_exec.c | 2 +- src/gallium/drivers/softpipe/sp_fs_llvm.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/drivers/softpipe/sp_prim_setup.c | 2 +- src/gallium/drivers/softpipe/sp_prim_vbuf.c | 1 + src/gallium/drivers/softpipe/sp_quad_alpha_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 29 +- src/gallium/drivers/softpipe/sp_quad_bufloop.c | 2 +- src/gallium/drivers/softpipe/sp_quad_colormask.c | 3 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 2 +- src/gallium/drivers/softpipe/sp_quad_depth_test.c | 2 +- src/gallium/drivers/softpipe/sp_quad_earlyz.c | 2 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 3 +- src/gallium/drivers/softpipe/sp_quad_occlusion.c | 2 +- src/gallium/drivers/softpipe/sp_quad_output.c | 2 +- src/gallium/drivers/softpipe/sp_quad_stencil.c | 2 +- src/gallium/drivers/softpipe/sp_quad_stipple.c | 2 +- src/gallium/drivers/softpipe/sp_query.c | 2 +- src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/softpipe/sp_setup.c | 2 +- src/gallium/drivers/softpipe/sp_state_blend.c | 2 +- src/gallium/drivers/softpipe/sp_state_derived.c | 3 +- src/gallium/drivers/softpipe/sp_state_fs.c | 2 +- src/gallium/drivers/softpipe/sp_state_rasterizer.c | 2 +- src/gallium/drivers/softpipe/sp_state_sampler.c | 2 +- src/gallium/drivers/softpipe/sp_surface.c | 3 +- src/gallium/drivers/softpipe/sp_tex_sample.c | 2 +- src/gallium/drivers/softpipe/sp_texture.c | 3 +- src/gallium/drivers/softpipe/sp_tile_cache.c | 4 +- src/gallium/drivers/trace/tr_context.c | 2 +- src/gallium/drivers/trace/tr_dump.c | 2 + src/gallium/drivers/trace/tr_dump.h | 1 - src/gallium/drivers/trace/tr_screen.c | 2 +- src/gallium/drivers/trace/tr_state.c | 1 + src/gallium/drivers/trace/tr_stream_stdc.c | 2 +- src/gallium/drivers/trace/tr_stream_wd.c | 2 +- src/gallium/drivers/trace/tr_texture.c | 2 +- src/gallium/drivers/trace/tr_winsys.c | 3 +- src/gallium/include/pipe/p_util.h | 460 -------- src/gallium/state_trackers/python/gallium.i | 2 +- src/gallium/state_trackers/python/st_device.c | 3 +- src/gallium/state_trackers/python/st_sample.c | 5 +- .../state_trackers/python/st_softpipe_winsys.c | 3 +- .../winsys/drm/intel/common/intel_be_device.c | 2 +- .../winsys/drm/intel/dri/intel_winsys_softpipe.c | 2 +- src/gallium/winsys/egl_xlib/egl_xlib.c | 2 +- src/gallium/winsys/egl_xlib/sw_winsys.c | 3 +- src/gallium/winsys/gdi/wmesa.c | 2 +- src/gallium/winsys/xlib/brw_aub.c | 1 - src/gallium/winsys/xlib/xm_winsys.c | 3 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/state_tracker/acc2.c | 319 ++++++ src/mesa/state_tracker/st_cb_accum.c | 2 +- src/mesa/state_tracker/st_cb_bitmap.c | 2 +- src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_readpixels.c | 2 +- src/mesa/state_tracker/st_cb_texture.c | 2 +- src/mesa/state_tracker/st_program.c | 2 +- src/mesa/state_tracker/st_texture.c | 1 - 201 files changed, 2453 insertions(+), 686 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_memory.h create mode 100644 src/gallium/auxiliary/util/u_pointer.h create mode 100644 src/gallium/auxiliary/util/u_tile.c create mode 100644 src/gallium/auxiliary/util/u_tile.h delete mode 100644 src/gallium/include/pipe/p_util.h create mode 100644 src/mesa/state_tracker/acc2.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/README.portability b/src/gallium/README.portability index d5d5987a7f..adecf4bb79 100644 --- a/src/gallium/README.portability +++ b/src/gallium/README.portability @@ -35,8 +35,8 @@ not available in Windows Kernel Mode. Use the appropriate p_*.h include. * Use MALLOC, CALLOC, FREE instead of the malloc, calloc, free functions. -* Use align_pointer() function defined in p_util.h for aligning pointers in a -portable way. +* Use align_pointer() function defined in u_memory.h for aligning pointers + in a portable way. == Debugging == diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c index 36dc46ff80..6b1754ea00 100644 --- a/src/gallium/auxiliary/cso_cache/cso_cache.c +++ b/src/gallium/auxiliary/cso_cache/cso_cache.c @@ -28,9 +28,10 @@ /* Authors: Zack Rusin */ -#include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_memory.h" + #include "cso_cache.h" #include "cso_hash.h" diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 86e4d46a20..f22ba40824 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -36,7 +36,7 @@ */ #include "pipe/p_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 0646efd952..7f0044c5a7 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -31,7 +31,7 @@ */ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cso_hash.h" diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 2f263cf06a..1c26cb31a3 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -31,7 +31,8 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw_context.h" #include "draw_vbuf.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c index 1db43876ef..3cde9d36d3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe.c +++ b/src/gallium/auxiliary/draw/draw_pipe.c @@ -30,7 +30,6 @@ * Keith Whitwell */ -#include "pipe/p_util.h" #include "draw/draw_private.h" #include "draw/draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c index 991304b2c8..20841bb5d6 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c @@ -32,11 +32,12 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c index c7f4349cb3..2c1cacbdb4 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_aapoint.c +++ b/src/gallium/auxiliary/draw/draw_pipe_aapoint.c @@ -38,7 +38,6 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" @@ -47,6 +46,9 @@ #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "draw_context.h" #include "draw_vs.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_clip.c b/src/gallium/auxiliary/draw/draw_pipe_clip.c index fa10f8efca..3265dcd154 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_clip.c +++ b/src/gallium/auxiliary/draw/draw_pipe_clip.c @@ -32,7 +32,9 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" + #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_cull.c b/src/gallium/auxiliary/draw/draw_pipe_cull.c index d0d22a38e0..053be5f050 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_cull.c +++ b/src/gallium/auxiliary/draw/draw_pipe_cull.c @@ -33,7 +33,7 @@ */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c index 4741b22d02..43d1fecc4d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_flatshade.c +++ b/src/gallium/auxiliary/draw/draw_pipe_flatshade.c @@ -28,7 +28,9 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "pipe/p_shader_tokens.h" #include "draw_vs.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 2f5865741c..1fea5e6dcb 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -32,7 +32,8 @@ * \author Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c index e97136fa1f..b764d9c518 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c @@ -34,12 +34,14 @@ */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "tgsi/tgsi_transform.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_stipple.c b/src/gallium/auxiliary/draw/draw_pipe_stipple.c index bf0db18a68..b65e2aa102 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_stipple.c +++ b/src/gallium/auxiliary/draw/draw_pipe_stipple.c @@ -36,10 +36,12 @@ */ -#include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" -#include "draw_pipe.h" +#include "util/u_math.h" +#include "util/u_memory.h" + +#include "draw/draw_pipe.h" /** Subclass of draw_stage */ diff --git a/src/gallium/auxiliary/draw/draw_pipe_twoside.c b/src/gallium/auxiliary/draw/draw_pipe_twoside.c index 3ac825f565..c329d92339 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_twoside.c +++ b/src/gallium/auxiliary/draw/draw_pipe_twoside.c @@ -28,7 +28,8 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c index 8f97fdedaa..68835fd1a5 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_unfilled.c +++ b/src/gallium/auxiliary/draw/draw_pipe_unfilled.c @@ -33,7 +33,7 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_util.c b/src/gallium/auxiliary/draw/draw_pipe_util.c index 04438f4dd0..e22e5fed0c 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_util.c +++ b/src/gallium/auxiliary/draw/draw_pipe_util.c @@ -30,7 +30,7 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_private.h" #include "draw/draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index 6be1d369c3..f34c68728e 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -28,7 +28,7 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index a6fde77a0e..c0cf4269db 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -35,7 +35,8 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_vbuf.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c index 48ec2f1239..184e363594 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_line.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_line.c @@ -28,9 +28,10 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw_private.h" #include "draw_pipe.h" diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 54590984c6..4f1326053d 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -28,7 +28,8 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" #include "draw_vs.h" diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 85a75525c8..669c11c993 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -30,7 +30,6 @@ * Keith Whitwell */ -#include "pipe/p_util.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 40f05cb9e0..d4eca80588 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 07f4c99164..6377f896fb 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 4a1f3b0953..0684c93d10 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -30,7 +30,7 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index fdf9b6fe6a..87094f3092 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -31,7 +31,8 @@ */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index be3535ed9e..f617aac9f7 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -25,7 +25,8 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_post_vs.c b/src/gallium/auxiliary/draw/draw_pt_post_vs.c index af6306b1c6..96dc706b99 100644 --- a/src/gallium/auxiliary/draw/draw_pt_post_vs.c +++ b/src/gallium/auxiliary/draw/draw_pt_post_vs.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_context.h" #include "draw/draw_context.h" #include "draw/draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c index 32c8a9632c..3bc7939c55 100644 --- a/src/gallium/auxiliary/draw/draw_pt_util.c +++ b/src/gallium/auxiliary/draw/draw_pt_util.c @@ -30,7 +30,6 @@ * Keith Whitwell */ -#include "pipe/p_util.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index 46e722a154..c15afe65f1 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -25,7 +25,9 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index cda2987c9e..b8b5de729d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -30,7 +30,7 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index e90f37872a..62247ccd9f 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -37,8 +37,6 @@ #define DRAW_VBUF_H_ -#include "pipe/p_util.h" - struct draw_context; struct vertex_info; diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index f798b20492..34adbd49b0 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -31,11 +31,15 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + #include "pipe/p_shader_tokens.h" + #include "draw_private.h" #include "draw_context.h" #include "draw_vs.h" + #include "translate/translate.h" #include "translate/translate_cache.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 41bdd012d5..760fcb389f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -29,9 +29,9 @@ */ -#include "pipe/p_util.h" -#include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_math.h" +#include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index eda677cc62..ab3c5b94a5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c index e029b7b4bb..b358bd2df4 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_machine.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_machine.c @@ -29,8 +29,9 @@ #include "pipe/p_config.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index e26903d8cc..44563803f9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -31,7 +31,8 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index fc03473b91..2ce30b9a02 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -32,7 +32,6 @@ * Brian Paul */ -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" #include "draw_context.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 61f0c084c3..0efabd9de8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -31,13 +31,14 @@ * Brian Paul */ +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_config.h" #include "draw_vs.h" #if defined(PIPE_ARCH_X86) -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 994ce3e889..4daf05dae7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -30,7 +30,8 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" +#include "util/u_math.h" #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index cf5b978837..e64bfb1c6c 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -41,11 +41,12 @@ #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_dump.h" +#include "util/u_memory.h" + #include #include #include diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 035224e8f3..a82dc30306 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -35,7 +35,7 @@ #include "storage.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include #include diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 76049ade7c..efddc04e81 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -29,7 +29,7 @@ #include "storagesoa.h" #include "pipe/p_shader_tokens.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include #include diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index ce41418a0f..8ae052e875 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -45,7 +45,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index e90d2e5623..20fc87b39d 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -35,7 +35,7 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c index 0d2d6c0c1b..2afaeafa1a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_bufmgr.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index bed4bec4fe..b914c2d0fe 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -38,7 +38,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index d02e3500ff..5e518370d0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -37,7 +37,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 05efd8ce41..8fc63ce648 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -35,7 +35,7 @@ #include "pipe/p_debug.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" #include "pb_buffer_fenced.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index c51e582611..b40eb6cc90 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -36,7 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_mm.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 95af08929a..93d2cc9635 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -39,7 +39,7 @@ #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 598d9ce310..af307e265a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -39,7 +39,7 @@ #include "pipe/p_debug.h" #include "pipe/p_thread.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 362fd896f3..1e54fc39d4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_debug.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index 978944091f..28d137dbc4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -35,7 +35,7 @@ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pb_buffer.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 300c1c2d9d..dfa5c35ab6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -33,7 +33,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" #include "pipe/p_thread.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "rtasm_execmem.h" diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 7f6bf577b2..285ddc0e3f 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -30,7 +30,7 @@ */ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "rtasm_ppc_spe.h" #ifdef GALLIUM_CELL diff --git a/src/gallium/auxiliary/sct/sct.c b/src/gallium/auxiliary/sct/sct.c index 5e4126e014..49bb7ea92e 100644 --- a/src/gallium/auxiliary/sct/sct.c +++ b/src/gallium/auxiliary/sct/sct.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" #include "sct.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 050b448fe7..74614d3688 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_build.h" #include "tgsi_parse.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 6ae7f324f8..7d6234746a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -28,6 +28,10 @@ #ifndef TGSI_BUILD_H #define TGSI_BUILD_H + +struct tgsi_token; + + #if defined __cplusplus extern "C" { #endif diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index 1025866a25..be25cb45a0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "util/u_string.h" #include "tgsi_dump_c.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index e28b56c842..fb573fe1f0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -52,11 +52,11 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" +#include "util/u_memory.h" #include "util/u_math.h" #define FAST_MATH 1 diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index d16f0cdcad..3757486ba9 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -26,10 +26,10 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" +#include "util/u_memory.h" void tgsi_full_token_init( diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 59bcf10b53..be4870a498 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -33,11 +33,11 @@ */ -#include "tgsi_scan.h" -#include "tgsi/tgsi_parse.h" +#include "util/u_math.h" #include "tgsi/tgsi_build.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_scan.h" -#include "pipe/p_util.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 00ed4da450..626724ad4e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.c b/src/gallium/auxiliary/tgsi/tgsi_transform.c index 357f77b05a..ea87da31e5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.c +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.c @@ -31,6 +31,7 @@ * Authors: Brian Paul */ +#include "pipe/p_debug.h" #include "tgsi_transform.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_transform.h b/src/gallium/auxiliary/tgsi/tgsi_transform.h index 3da0b38271..a121adbaef 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_transform.h +++ b/src/gallium/auxiliary/tgsi/tgsi_transform.h @@ -29,7 +29,6 @@ #define TGSI_TRANSFORM_H -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_build.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_util.c b/src/gallium/auxiliary/tgsi/tgsi_util.c index 09486e649e..50101a9bb0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_util.c +++ b/src/gallium/auxiliary/tgsi/tgsi_util.c @@ -26,7 +26,6 @@ **************************************************************************/ #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi_parse.h" #include "tgsi_build.h" diff --git a/src/gallium/auxiliary/translate/translate.c b/src/gallium/auxiliary/translate/translate.c index b93fbf9033..7678903f75 100644 --- a/src/gallium/auxiliary/translate/translate.c +++ b/src/gallium/auxiliary/translate/translate.c @@ -31,7 +31,6 @@ */ #include "pipe/p_config.h" -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "translate.h" diff --git a/src/gallium/auxiliary/translate/translate_cache.c b/src/gallium/auxiliary/translate/translate_cache.c index 115dc9287e..d8069a149c 100644 --- a/src/gallium/auxiliary/translate/translate_cache.c +++ b/src/gallium/auxiliary/translate/translate_cache.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "translate.h" #include "translate_cache.h" diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 4c8179ffa8..4d336f47ea 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -30,7 +30,7 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_state.h" #include "translate.h" diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 18a212ac1c..7955186e16 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -28,7 +28,7 @@ #include "pipe/p_config.h" #include "pipe/p_compiler.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_simple_list.h" #include "translate.h" diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 6eebf6d29b..6e5fd26c05 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -5,7 +5,6 @@ LIBNAME = util C_SOURCES = \ p_debug.c \ - p_tile.c \ u_blit.c \ u_draw_quad.c \ u_gen_mipmap.c \ @@ -16,6 +15,7 @@ C_SOURCES = \ u_rect.c \ u_simple_shaders.c \ u_snprintf.c \ + u_tile.c \ u_time.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 94382fe1f9..ce3fad7068 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -6,7 +6,6 @@ util = env.ConvenienceLibrary( 'p_debug.c', 'p_debug_mem.c', 'p_debug_prof.c', - 'p_tile.c', 'u_blit.c', 'u_draw_quad.c', 'u_gen_mipmap.c', @@ -17,6 +16,7 @@ util = env.ConvenienceLibrary( 'u_rect.c', 'u_simple_shaders.c', 'u_snprintf.c', + 'u_tile.c', 'u_time.c', ]) diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 2c2f2f8931..7d1dba5a24 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -51,7 +51,6 @@ #endif #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" #include "pipe/p_format.h" #include "pipe/p_state.h" diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index ae087df4cf..05399f9885 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -37,12 +37,13 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" -#include "util/u_draw_quad.h" #include "util/u_blit.h" +#include "util/u_draw_quad.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "cso_cache/cso_context.h" diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 8713ff5d58..c1e2c19f87 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -37,10 +37,10 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_draw_quad.h" #include "util/u_gen_mipmap.h" #include "util/u_simple_shaders.h" diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 2176a00959..2c40011923 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -35,9 +35,9 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" -#include "u_handle_table.h" +#include "util/u_memory.h" +#include "util/u_handle_table.h" #define HANDLE_TABLE_INITIAL_SIZE 16 diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c index dd5eca7fca..0bc8de9632 100644 --- a/src/gallium/auxiliary/util/u_hash_table.c +++ b/src/gallium/auxiliary/util/u_hash_table.c @@ -40,10 +40,11 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "cso_cache/cso_hash.h" -#include "u_hash_table.h" + +#include "util/u_memory.h" +#include "util/u_hash_table.h" struct hash_table diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index a541d30a5d..9b4ca39371 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -40,8 +40,6 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" -#include "util/u_math.h" #ifdef __cplusplus @@ -49,6 +47,132 @@ extern "C" { #endif +#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +__inline double ceil(double val) +{ + double ceil_val; + + if((val - (long) val) == 0) { + ceil_val = val; + } + else { + if(val > 0) { + ceil_val = (long) val + 1; + } + else { + ceil_val = (long) val; + } + } + + return ceil_val; +} + +#ifndef PIPE_SUBSYSTEM_WINDOWS_CE +__inline double floor(double val) +{ + double floor_val; + + if((val - (long) val) == 0) { + floor_val = val; + } + else { + if(val > 0) { + floor_val = (long) val; + } + else { + floor_val = (long) val - 1; + } + } + + return floor_val; +} +#endif + +#pragma function(pow) +__inline double __cdecl pow(double val, double exponent) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(log) +__inline double __cdecl log(double val) +{ + /* XXX */ + assert(0); + return 0; +} + +#pragma function(atan2) +__inline double __cdecl atan2(double val) +{ + /* XXX */ + assert(0); + return 0; +} +#else +#include +#include +#endif + + +#if defined(_MSC_VER) +#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +static INLINE float cosf( float f ) +{ + return (float) cos( (double) f ); +} + +static INLINE float sinf( float f ) +{ + return (float) sin( (double) f ); +} + +static INLINE float ceilf( float f ) +{ + return (float) ceil( (double) f ); +} + +static INLINE float floorf( float f ) +{ + return (float) floor( (double) f ); +} + +static INLINE float powf( float f, float g ) +{ + return (float) pow( (double) f, (double) g ); +} + +static INLINE float sqrtf( float f ) +{ + return (float) sqrt( (double) f ); +} + +static INLINE float fabsf( float f ) +{ + return (float) fabs( (double) f ); +} + +static INLINE float logf( float f ) +{ + return (float) log( (double) f ); +} + +#else +/* Work-around an extra semi-colon in VS 2005 logf definition */ +#ifdef logf +#undef logf +#define logf(x) ((float)log((double)(x))) +#endif /* logf */ +#endif +#endif /* _MSC_VER */ + + + + + #define POW2_TABLE_SIZE 256 #define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1)) extern float pow2_table[POW2_TABLE_SIZE]; @@ -59,6 +183,11 @@ extern void util_init_math(void); +union fi { + float f; + int i; + unsigned ui; +}; /** @@ -195,6 +324,113 @@ util_iround(float f) +#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) +/** + * Find first bit set in word. Least significant bit is 1. + * Return 0 if no bits set. + */ +static INLINE +unsigned ffs( unsigned u ) +{ + unsigned i; + + if( u == 0 ) { + return 0; + } + + __asm bsf eax, [u] + __asm inc eax + __asm mov [i], eax + + return i; +} +#endif + + +/** + * Return float bits. + */ +static INLINE unsigned +fui( float f ) +{ + union fi fi; + fi.f = f; + return fi.ui; +} + + + +static INLINE float +ubyte_to_float(ubyte ub) +{ + return (float) ub * (1.0f / 255.0f); +} + + +/** + * Convert float in [0,1] to ubyte in [0,255] with clamping. + */ +static INLINE ubyte +float_to_ubyte(float f) +{ + const int ieee_0996 = 0x3f7f0000; /* 0.996 or so */ + union fi tmp; + + tmp.f = f; + if (tmp.i < 0) { + return (ubyte) 0; + } + else if (tmp.i >= ieee_0996) { + return (ubyte) 255; + } + else { + tmp.f = tmp.f * (255.0f/256.0f) + 32768.0f; + return (ubyte) tmp.i; + } +} + + + +#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) + +#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) +#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) + + +static INLINE int +align(int value, int alignment) +{ + return (value + alignment - 1) & ~(alignment - 1); +} + + +#ifndef COPY_4V +#define COPY_4V( DST, SRC ) \ +do { \ + (DST)[0] = (SRC)[0]; \ + (DST)[1] = (SRC)[1]; \ + (DST)[2] = (SRC)[2]; \ + (DST)[3] = (SRC)[3]; \ +} while (0) +#endif + + +#ifndef COPY_4FV +#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) +#endif + + +#ifndef ASSIGN_4V +#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ +do { \ + (DST)[0] = (V0); \ + (DST)[1] = (V1); \ + (DST)[2] = (V2); \ + (DST)[3] = (V3); \ +} while (0) +#endif + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h new file mode 100644 index 0000000000..148a5cb997 --- /dev/null +++ b/src/gallium/auxiliary/util/u_memory.h @@ -0,0 +1,222 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +/** + * Memory functions + */ + + +#ifndef U_MEMORY_H +#define U_MEMORY_H + + +#include "util/u_pointer.h" + + + /* Define ENOMEM for WINCE */ +#if (_WIN32_WCE < 600) +#ifndef ENOMEM +#define ENOMEM 12 +#endif +#endif + + + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) + +/* memory debugging */ + +#include "p_debug.h" + +#define MALLOC( _size ) \ + debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) +#define CALLOC( _count, _size ) \ + debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) +#define FREE( _ptr ) \ + debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) +#define REALLOC( _ptr, _old_size, _size ) \ + debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +void * __stdcall +EngAllocMem( + unsigned long Flags, + unsigned long MemSize, + unsigned long Tag ); + +void __stdcall +EngFreeMem( + void *Mem ); + +#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) +#define _FREE( _ptr ) EngFreeMem( _ptr ) + +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + +void * +ExAllocatePool( + unsigned long PoolType, + size_t NumberOfBytes); + +void +ExFreePool(void *P); + +#define MALLOC(_size) ExAllocatePool(0, _size) +#define _FREE(_ptr) ExFreePool(_ptr) + +#else + +#define MALLOC( SIZE ) malloc( SIZE ) +#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) +#define FREE( PTR ) free( PTR ) +#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) + +#endif + + +#ifndef CALLOC +static INLINE void * +CALLOC( unsigned count, unsigned size ) +{ + void *ptr = MALLOC( count * size ); + if( ptr ) { + memset( ptr, 0, count * size ); + } + return ptr; +} +#endif /* !CALLOC */ + +#ifndef FREE +static INLINE void +FREE( void *ptr ) +{ + if( ptr ) { + _FREE( ptr ); + } +} +#endif /* !FREE */ + +#ifndef REALLOC +static INLINE void * +REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) +{ + void *new_ptr = NULL; + + if (new_size != 0) { + unsigned copy_size = old_size < new_size ? old_size : new_size; + new_ptr = MALLOC( new_size ); + if (new_ptr && old_ptr && copy_size) { + memcpy( new_ptr, old_ptr, copy_size ); + } + } + + FREE( old_ptr ); + return new_ptr; +} +#endif /* !REALLOC */ + + +#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) + +#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) + + +/** + * Return memory on given byte alignment + */ +static INLINE void * +align_malloc(size_t bytes, uint alignment) +{ +#if defined(HAVE_POSIX_MEMALIGN) + void *mem; + alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1); + if(posix_memalign(& mem, alignment, bytes) != 0) + return NULL; + return mem; +#else + char *ptr, *buf; + + assert( alignment > 0 ); + + ptr = (char *) MALLOC(bytes + alignment + sizeof(void *)); + if (!ptr) + return NULL; + + buf = (char *) align_pointer( ptr + sizeof(void *), alignment ); + *(char **)(buf - sizeof(void *)) = ptr; + + return buf; +#endif /* defined(HAVE_POSIX_MEMALIGN) */ +} + +/** + * Free memory returned by align_malloc(). + */ +static INLINE void +align_free(void *ptr) +{ +#if defined(HAVE_POSIX_MEMALIGN) + FREE(ptr); +#else + void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); + void *realAddr = *cubbyHole; + FREE(realAddr); +#endif /* defined(HAVE_POSIX_MEMALIGN) */ +} + + +/** + * Duplicate a block of memory. + */ +static INLINE void * +mem_dup(const void *src, uint size) +{ + void *dup = MALLOC(size); + if (dup) + memcpy(dup, src, size); + return dup; +} + + +/** + * Number of elements in an array. + */ +#ifndef Elements +#define Elements(x) (sizeof(x)/sizeof((x)[0])) +#endif + + +/** + * Offset of a field in a struct, in bytes. + */ +#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) + + + +#endif /* U_MEMORY_H */ diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c index b49ae074e0..0f51dd5977 100644 --- a/src/gallium/auxiliary/util/u_mm.c +++ b/src/gallium/auxiliary/util/u_mm.c @@ -24,9 +24,9 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" +#include "util/u_memory.h" #include "util/u_mm.h" diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 06abb34d5a..39e4ae9d07 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -37,6 +37,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" +#include "util/u_math.h" /** @@ -150,10 +151,10 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) if (pf_size_x(format) <= 8) { /* format uses 8-bit components or less */ - UNCLAMPED_FLOAT_TO_UBYTE(r, rgba[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, rgba[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, rgba[2]); - UNCLAMPED_FLOAT_TO_UBYTE(a, rgba[3]); + r = float_to_ubyte(rgba[0]); + g = float_to_ubyte(rgba[1]); + b = float_to_ubyte(rgba[2]); + a = float_to_ubyte(rgba[3]); } switch (format) { @@ -286,4 +287,31 @@ util_pack_z(enum pipe_format format, double z) } +/** + * Pack 4 ubytes into a 4-byte word + */ +static INLINE unsigned +pack_ub4(ubyte b0, ubyte b1, ubyte b2, ubyte b3) +{ + return ((((unsigned int)b0) << 0) | + (((unsigned int)b1) << 8) | + (((unsigned int)b2) << 16) | + (((unsigned int)b3) << 24)); +} + + +/** + * Pack/convert 4 floats into one 4-byte word. + */ +static INLINE unsigned +pack_ui32_float4(float a, float b, float c, float d) +{ + return pack_ub4( float_to_ubyte(a), + float_to_ubyte(b), + float_to_ubyte(c), + float_to_ubyte(d) ); +} + + + #endif /* U_PACK_COLOR_H */ diff --git a/src/gallium/auxiliary/util/u_pointer.h b/src/gallium/auxiliary/util/u_pointer.h new file mode 100644 index 0000000000..e1af9f11cb --- /dev/null +++ b/src/gallium/auxiliary/util/u_pointer.h @@ -0,0 +1,107 @@ +/************************************************************************** + * + * Copyright 2007-2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_POINTER_H +#define U_POINTER_H + +#include "pipe/p_compiler.h" + +#ifdef __cplusplus +extern "C" { +#endif + +static INLINE intptr_t +pointer_to_intptr( const void *p ) +{ + union { + const void *p; + intptr_t i; + } pi; + pi.p = p; + return pi.i; +} + +static INLINE void * +intptr_to_pointer( intptr_t i ) +{ + union { + void *p; + intptr_t i; + } pi; + pi.i = i; + return pi.p; +} + +static INLINE uintptr_t +pointer_to_uintptr( const void *ptr ) +{ + union { + const void *p; + uintptr_t u; + } pu; + pu.p = ptr; + return pu.u; +} + +static INLINE void * +uintptr_to_pointer( uintptr_t u ) +{ + union { + void *p; + uintptr_t u; + } pu; + pu.u = u; + return pu.p; +} + +/** + * Return a pointer aligned to next multiple of N bytes. + */ +static INLINE void * +align_pointer( const void *unaligned, uintptr_t alignment ) +{ + uintptr_t aligned = (pointer_to_uintptr( unaligned ) + alignment - 1) & ~(alignment - 1); + return uintptr_to_pointer( aligned ); +} + + +/** + * Return a pointer aligned to next multiple of 16 bytes. + */ +static INLINE void * +align16( void *unaligned ) +{ + return align_pointer( unaligned, 16 ); +} + + + +#ifdef __cplusplus +} +#endif + +#endif /* U_POINTER_H */ diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 94e447b9d5..b31ab5415f 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -31,7 +31,6 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" #include "util/u_rect.h" diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index c34fb6ee33..f06d13c2c4 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -37,10 +37,10 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "tgsi/tgsi_build.h" diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c new file mode 100644 index 0000000000..853c503f4f --- /dev/null +++ b/src/gallium/auxiliary/util/u_tile.c @@ -0,0 +1,1169 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * RGBA/float tile get/put functions. + * Usable both by drivers and state trackers. + * Surfaces should already be in a mapped state. + */ + + +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_rect.h" +#include "util/u_tile.h" + + +/** + * Move raw block of pixels from surface to user memory. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_get_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *dst, int dst_stride) +{ + const void *src; + + if (dst_stride == 0) + dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + assert(src); + if(!src) + return; + + pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); + + pipe_surface_unmap(ps); +} + + +/** + * Move raw block of pixels from user memory to surface. + * This should be usable by any hw driver that has mappable surfaces. + */ +void +pipe_put_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *src, int src_stride) +{ + void *dst; + + if (src_stride == 0) + src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(dst); + if(!dst) + return; + + pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); + + pipe_surface_unmap(ps); +} + + + + +/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */ +#define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F)) + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +a8r8g8b8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 24) & 0xff); + } + p += dst_stride; + } +} + + +static void +a8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (a << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ + +static void +x8r8g8b8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 0) & 0xff); + pRow[3] = ubyte_to_float(0xff); + } + p += dst_stride; + } +} + + +static void +x8r8g8b8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ + +static void +b8g8r8a8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const unsigned pixel = *src++; + pRow[0] = ubyte_to_float((pixel >> 8) & 0xff); + pRow[1] = ubyte_to_float((pixel >> 16) & 0xff); + pRow[2] = ubyte_to_float((pixel >> 24) & 0xff); + pRow[3] = ubyte_to_float((pixel >> 0) & 0xff); + } + p += dst_stride; + } +} + + +static void +b8g8r8a8_put_tile_rgba(unsigned *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + *dst++ = (b << 24) | (g << 16) | (r << 8) | a; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ + +static void +a1r5g5b5_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = ((pixel >> 15) ) * 1.0f; + } + p += dst_stride; + } +} + + +static void +a1r5g5b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + r = r >> 3; /* 5 bits */ + g = g >> 3; /* 5 bits */ + b = b >> 3; /* 5 bits */ + a = a >> 7; /* 1 bit */ + *dst++ = (a << 15) | (r << 10) | (g << 5) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ + +static void +a4r4g4b4_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f); + pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f); + pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f); + pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f); + } + p += dst_stride; + } +} + + +static void +a4r4g4b4_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, g, b, a; + r = float_to_ubyte(pRow[0]); + g = float_to_ubyte(pRow[1]); + b = float_to_ubyte(pRow[2]); + a = float_to_ubyte(pRow[3]); + r >>= 4; + g >>= 4; + b >>= 4; + a >>= 4; + *dst++ = (a << 12) | (r << 16) | (g << 4) | b; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_R5G6B5_UNORM ***/ + +static void +r5g6b5_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + const ushort pixel = *src++; + pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f); + pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f); + pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); + pRow[3] = 1.0f; + } + p += dst_stride; + } +} + + +static void +r5g6b5_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0); + uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0); + uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0); + *dst++ = (r << 11) | (g << 5) | (b); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_Z16_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z16_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const float scale = 1.0f / 65535.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src++ * scale; + } + p += dst_stride; + } +} + + + + +/*** PIPE_FORMAT_L8_UNORM ***/ + +static void +l8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = ubyte_to_float(*src); + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +l8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + r = float_to_ubyte(pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_A8_UNORM ***/ + +static void +a8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = 0.0; + pRow[3] = ubyte_to_float(*src); + } + p += dst_stride; + } +} + + +static void +a8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned a; + a = float_to_ubyte(pRow[3]); + *dst++ = a; + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_R16_SNORM ***/ + +static void +r16_get_tile_rgba(const short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = + pRow[2] = 0.0; + pRow[3] = 1.0; + } + p += dst_stride; + } +} + + +static void +r16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst++, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ + +static void +r16g16b16a16_get_tile_rgba(const short *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src += 4, pRow += 4) { + pRow[0] = SHORT_TO_FLOAT(src[0]); + pRow[1] = SHORT_TO_FLOAT(src[1]); + pRow[2] = SHORT_TO_FLOAT(src[2]); + pRow[3] = SHORT_TO_FLOAT(src[3]); + } + p += dst_stride; + } +} + + +static void +r16g16b16a16_put_tile_rgba(short *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, dst += 4, pRow += 4) { + UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); + UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]); + UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]); + UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]); + } + p += src_stride; + } +} + + + +/*** PIPE_FORMAT_I8_UNORM ***/ + +static void +i8_get_tile_rgba(const ubyte *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, src++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = ubyte_to_float(*src); + } + p += dst_stride; + } +} + + +static void +i8_put_tile_rgba(ubyte *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r; + r = float_to_ubyte(pRow[0]); + *dst++ = r; + } + p += src_stride; + } +} + + +/*** PIPE_FORMAT_A8L8_UNORM ***/ + +static void +a8l8_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + ushort p = *src++; + pRow[0] = + pRow[1] = + pRow[2] = ubyte_to_float(p & 0xff); + pRow[3] = ubyte_to_float(p >> 8); + } + p += dst_stride; + } +} + + +static void +a8l8_put_tile_rgba(ushort *dst, + unsigned w, unsigned h, + const float *p, + unsigned src_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + const float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + unsigned r, a; + r = float_to_ubyte(pRow[0]); + a = float_to_ubyte(pRow[3]); + *dst++ = (a << 8) | r; + } + p += src_stride; + } +} + + + + +/*** PIPE_FORMAT_Z32_UNORM ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / (double) 0xffffffff; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (*src++ * scale); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_S8Z24_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +s8z24_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ & 0xffffff)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_Z24S8_UNORM ***/ + +/** + * Return Z component as four float in [0,1]. Stencil part ignored. + */ +static void +z24s8_get_tile_rgba(const unsigned *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + const double scale = 1.0 / ((1 << 24) - 1); + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = (float) (scale * (*src++ >> 8)); + } + p += dst_stride; + } +} + + +/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ + +/** + * Convert YCbCr (or YCrCb) to RGBA. + */ +static void +ycbcr_get_tile_rgba(const ushort *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride, + boolean rev) +{ + const float scale = 1.0f / 255.0f; + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + /* do two texels at a time */ + for (j = 0; j < (w & ~1); j += 2, src += 2) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + /* odd pixel: use y1,cr,cb */ + r = 1.164f * (y1-16) + 1.596f * (cr-128); + g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y1-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + + } + /* do the last texel */ + if (w & 1) { + const ushort t0 = src[0]; + const ushort t1 = src[1]; + const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ + ubyte cb, cr; + float r, g, b; + + if (rev) { + cb = t1 & 0xff; /* chroma U */ + cr = t0 & 0xff; /* chroma V */ + } + else { + cb = t0 & 0xff; /* chroma U */ + cr = t1 & 0xff; /* chroma V */ + } + + /* even pixel: y0,cr,cb */ + r = 1.164f * (y0-16) + 1.596f * (cr-128); + g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); + b = 1.164f * (y0-16) + 2.018f * (cb-128); + pRow[0] = r * scale; + pRow[1] = g * scale; + pRow[2] = b * scale; + pRow[3] = 1.0f; + pRow += 4; + } + p += dst_stride; + } +} + + +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride) +{ + switch (format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_L8_UNORM: + l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A8_UNORM: + a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_I8_UNORM: + i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_A8L8_UNORM: + a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R16_SNORM: + r16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z32_UNORM: + z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_Z24S8_UNORM: + z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); + break; + case PIPE_FORMAT_YCBCR: + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); + break; + case PIPE_FORMAT_YCBCR_REV: + ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); + break; + default: + assert(0); + } +} + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p) +{ + unsigned dst_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV) + assert((x & 1) == 0); + + pipe_get_tile_raw(ps, x, y, w, h, packed, 0); + + pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride); + + FREE(packed); +} + + +void +pipe_put_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p) +{ + unsigned src_stride = w * 4; + void *packed; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); + + if (!packed) + return; + + switch (ps->format) { + case PIPE_FORMAT_A8R8G8B8_UNORM: + a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_X8R8G8B8_UNORM: + x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_B8G8R8A8_UNORM: + b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A1R5G5B5_UNORM: + a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R5G6B5_UNORM: + r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R8G8B8A8_UNORM: + assert(0); + break; + case PIPE_FORMAT_A4R4G4B4_UNORM: + a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_L8_UNORM: + l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A8_UNORM: + a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_I8_UNORM: + i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_A8L8_UNORM: + a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R16_SNORM: + r16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_R16G16B16A16_SNORM: + r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); + break; + case PIPE_FORMAT_Z16_UNORM: + /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z32_UNORM: + /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + case PIPE_FORMAT_Z24S8_UNORM: + /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ + break; + default: + assert(0); + } + + pipe_put_tile_raw(ps, x, y, w, h, packed, 0); + + FREE(packed); +} + + +/** + * Get a block of Z values, converted to 32-bit range. + */ +void +pipe_get_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z) +{ + const uint dstStride = w; + ubyte *map; + uint *pDest = z; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); + if (!map) { + assert(0); + return; + } + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + const uint *pSrc + = (const uint *)(map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += dstStride; + pSrc += ps->stride/4; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + { + const uint *pSrc + = (const uint *)(map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 24-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); + } + pDest += dstStride; + pSrc += ps->stride/4; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + const ushort *pSrc + = (const ushort *)(map + y * ps->stride + x*2); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 16-bit Z to 32-bit Z */ + pDest[j] = (pSrc[j] << 16) | pSrc[j]; + } + pDest += dstStride; + pSrc += ps->stride/2; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + +void +pipe_put_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *zSrc) +{ + const uint srcStride = w; + const uint *pSrc = zSrc; + ubyte *map; + uint i, j; + + if (pipe_clip_tile(x, y, &w, &h, ps)) + return; + + map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); + if (!map) { + assert(0); + return; + } + + switch (ps->format) { + case PIPE_FORMAT_Z32_UNORM: + { + uint *pDest = (uint *) (map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + memcpy(pDest, pSrc, 4 * w); + pDest += ps->stride/4; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_S8Z24_UNORM: + case PIPE_FORMAT_X8Z24_UNORM: + { + uint *pDest = (uint *) (map + y * ps->stride + x*4); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 24-bit Z (0 stencil) */ + pDest[j] = pSrc[j] >> 8; + } + pDest += ps->stride/4; + pSrc += srcStride; + } + } + break; + case PIPE_FORMAT_Z16_UNORM: + { + ushort *pDest = (ushort *) (map + y * ps->stride + x*2); + for (i = 0; i < h; i++) { + for (j = 0; j < w; j++) { + /* convert 32-bit Z to 16-bit Z */ + pDest[j] = pSrc[j] >> 16; + } + pDest += ps->stride/2; + pSrc += srcStride; + } + } + break; + default: + assert(0); + } + + pipe_surface_unmap(ps); +} + + diff --git a/src/gallium/auxiliary/util/u_tile.h b/src/gallium/auxiliary/util/u_tile.h new file mode 100644 index 0000000000..a8ac805308 --- /dev/null +++ b/src/gallium/auxiliary/util/u_tile.h @@ -0,0 +1,101 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef P_TILE_H +#define P_TILE_H + +#include "pipe/p_compiler.h" + +struct pipe_surface; + + +/** + * Clip tile against surface dims. + * \return TRUE if tile is totally clipped, FALSE otherwise + */ +static INLINE boolean +pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) +{ + if (x >= ps->width) + return TRUE; + if (y >= ps->height) + return TRUE; + if (x + *w > ps->width) + *w = ps->width - x; + if (y + *h > ps->height) + *h = ps->height - y; + return FALSE; +} + +#ifdef __cplusplus +extern "C" { +#endif + +void +pipe_get_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + void *p, int dst_stride); + +void +pipe_put_tile_raw(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const void *p, int src_stride); + + +void +pipe_get_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + float *p); + +void +pipe_put_tile_rgba(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const float *p); + + +void +pipe_get_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + uint *z); + +void +pipe_put_tile_z(struct pipe_surface *ps, + uint x, uint y, uint w, uint h, + const uint *z); + +void +pipe_tile_raw_to_rgba(enum pipe_format format, + void *src, + uint w, uint h, + float *dst, unsigned dst_stride); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index f430e88b9c..6bace0bb11 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -34,7 +34,6 @@ #define CELL_COMMON_H #include "pipe/p_compiler.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" #include "pipe/p_state.h" diff --git a/src/gallium/drivers/cell/ppu/cell_clear.c b/src/gallium/drivers/cell/ppu/cell_clear.c index 3ffe09add6..cee0917b63 100644 --- a/src/gallium/drivers/cell/ppu/cell_clear.c +++ b/src/gallium/drivers/cell/ppu/cell_clear.c @@ -34,7 +34,7 @@ #include #include #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell/common.h" #include "cell_clear.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 12eb5aa254..5af95a3c10 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -35,7 +35,7 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 67b87f16d7..971d65d09e 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -30,7 +30,7 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "draw/draw_context.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index b663b37622..dd25ae880e 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -33,7 +33,7 @@ #include "cell_context.h" #include "cell_render.h" #include "cell_spu.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "draw/draw_private.h" diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 2bf441a0c5..139b3719b6 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_derived.c b/src/gallium/drivers/cell/ppu/cell_state_derived.c index 5480534ad9..8ab938a02a 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_derived.c +++ b/src/gallium/drivers/cell/ppu/cell_state_derived.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 9cae67f091..3646a0ee4f 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell_context.h" #include "cell_state.h" #include "cell_state_emit.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index f5707f2bb8..cd96b317fa 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_surface.c b/src/gallium/drivers/cell/ppu/cell_surface.c index 01ffa31c2c..2d31ad89a6 100644 --- a/src/gallium/drivers/cell/ppu/cell_surface.c +++ b/src/gallium/drivers/cell/ppu/cell_surface.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "util/p_tile.h" diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 533b64227d..1add81373d 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -33,7 +33,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "cell_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_winsys.c b/src/gallium/drivers/cell/ppu/cell_winsys.c index ebabce3c8f..d570bbd2f9 100644 --- a/src/gallium/drivers/cell/ppu/cell_winsys.c +++ b/src/gallium/drivers/cell/ppu/cell_winsys.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "cell_winsys.h" diff --git a/src/gallium/drivers/cell/spu/spu_exec.c b/src/gallium/drivers/cell/spu/spu_exec.c index 42e5022f30..89c61136a4 100644 --- a/src/gallium/drivers/cell/spu/spu_exec.c +++ b/src/gallium/drivers/cell/spu/spu_exec.c @@ -63,7 +63,6 @@ #include "pipe/p_compiler.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index ab4ff8160a..8944ef171e 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -32,7 +32,6 @@ #include #include "pipe/p_compiler.h" #include "pipe/p_format.h" -#include "pipe/p_util.h" #include "spu_colorpack.h" #include "spu_main.h" #include "spu_texture.h" diff --git a/src/gallium/drivers/cell/spu/spu_util.c b/src/gallium/drivers/cell/spu/spu_util.c index 74ab2bbd1f..dbcf4b0eb9 100644 --- a/src/gallium/drivers/cell/spu/spu_util.c +++ b/src/gallium/drivers/cell/spu/spu_util.c @@ -1,4 +1,3 @@ -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" //#include "tgsi_build.h" diff --git a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c index 219fd90cc0..26f2363749 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_fetch.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_fetch.c @@ -32,7 +32,6 @@ * Ian Romanick */ -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "spu_exec.h" diff --git a/src/gallium/drivers/cell/spu/spu_vertex_shader.c b/src/gallium/drivers/cell/spu/spu_vertex_shader.c index 3119a78c06..a1e81975e6 100644 --- a/src/gallium/drivers/cell/spu/spu_vertex_shader.c +++ b/src/gallium/drivers/cell/spu/spu_vertex_shader.c @@ -34,7 +34,6 @@ #include -#include "pipe/p_util.h" #include "pipe/p_state.h" #include "pipe/p_shader_tokens.h" #include "spu_vertex_shader.h" diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 014a3e31d5..10c4ffc209 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -28,7 +28,7 @@ #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_context.h" #include "fo_context.h" diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index e2bf5ab678..c6776716a2 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -35,7 +35,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c index c024a051a5..48be3e1472 100644 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -29,7 +29,7 @@ #include "i915_reg.h" #include "i915_debug.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" static void diff --git a/src/gallium/drivers/i915simple/i915_fpc.h b/src/gallium/drivers/i915simple/i915_fpc.h index 80a9576304..2f0f99d046 100644 --- a/src/gallium/drivers/i915simple/i915_fpc.h +++ b/src/gallium/drivers/i915simple/i915_fpc.h @@ -29,7 +29,6 @@ #ifndef I915_FPC_H #define I915_FPC_H -#include "pipe/p_util.h" #include "i915_context.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_fpc_translate.c b/src/gallium/drivers/i915simple/i915_fpc_translate.c index 64432982c4..34b4a846c1 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_translate.c +++ b/src/gallium/drivers/i915simple/i915_fpc_translate.c @@ -33,6 +33,8 @@ #include "i915_fpc.h" #include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index 9ffa460138..d194c2fb15 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -27,7 +27,9 @@ #include "draw/draw_pipe.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" #include "i915_context.h" #include "i915_winsys.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index aef3682bbf..e4ece55098 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -41,9 +41,10 @@ #include "draw/draw_context.h" #include "draw/draw_vbuf.h" #include "pipe/p_debug.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "i915_context.h" #include "i915_reg.h" diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 0afa17bed8..e9e40c3f0b 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index e8521b385e..d2487d8277 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -31,8 +31,9 @@ #include "draw/draw_context.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "tgsi/tgsi_parse.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 4daccec6e0..488615067c 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/i915simple/i915_state_dynamic.c b/src/gallium/drivers/i915simple/i915_state_dynamic.c index 8cfbdddd19..86126a5a15 100644 --- a/src/gallium/drivers/i915simple/i915_state_dynamic.c +++ b/src/gallium/drivers/i915simple/i915_state_dynamic.c @@ -30,7 +30,9 @@ #include "i915_context.h" #include "i915_reg.h" #include "i915_state.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_pack_color.h" #define FILE_DEBUG_FLAG DEBUG_STATE diff --git a/src/gallium/drivers/i915simple/i915_state_immediate.c b/src/gallium/drivers/i915simple/i915_state_immediate.c index 2501f2d7cb..8c16bb4e27 100644 --- a/src/gallium/drivers/i915simple/i915_state_immediate.c +++ b/src/gallium/drivers/i915simple/i915_state_immediate.c @@ -33,7 +33,7 @@ #include "i915_context.h" #include "i915_state.h" #include "i915_reg.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* All state expressable with the LOAD_STATE_IMMEDIATE_1 packet. diff --git a/src/gallium/drivers/i915simple/i915_state_sampler.c b/src/gallium/drivers/i915simple/i915_state_sampler.c index 7868f21ca6..c09c10601b 100644 --- a/src/gallium/drivers/i915simple/i915_state_sampler.c +++ b/src/gallium/drivers/i915simple/i915_state_sampler.c @@ -27,7 +27,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "i915_state_inlines.h" #include "i915_context.h" diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 17b5125e56..62f1926644 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -30,10 +30,9 @@ #include "i915_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index ca0fb8761b..32344da4d5 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -34,8 +34,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "i915_context.h" #include "i915_texture.h" diff --git a/src/gallium/drivers/i965simple/brw_cc.c b/src/gallium/drivers/i965simple/brw_cc.c index 337e4f95f6..79d4150383 100644 --- a/src/gallium/drivers/i965simple/brw_cc.c +++ b/src/gallium/drivers/i965simple/brw_cc.c @@ -29,7 +29,8 @@ * Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "brw_context.h" #include "brw_state.h" @@ -232,8 +233,7 @@ static void upload_cc_unit( struct brw_context *brw ) cc.cc3.alpha_test_func = brw_translate_compare_func(brw->attribs.DepthStencil->alpha.func); - UNCLAMPED_FLOAT_TO_UBYTE(cc.cc7.alpha_ref.ub[0], - brw->attribs.DepthStencil->alpha.ref); + cc.cc7.alpha_ref.ub[0] = float_to_ubyte(brw->attribs.DepthStencil->alpha.ref); cc.cc3.alpha_test_format = BRW_ALPHATEST_FORMAT_UNORM8; } diff --git a/src/gallium/drivers/i965simple/brw_clip_state.c b/src/gallium/drivers/i965simple/brw_clip_state.c index ea5c05a279..8e78dd51be 100644 --- a/src/gallium/drivers/i965simple/brw_clip_state.c +++ b/src/gallium/drivers/i965simple/brw_clip_state.c @@ -32,7 +32,8 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" static void upload_clip_unit( struct brw_context *brw ) diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 8326f7b9c4..96920df008 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -39,7 +39,7 @@ #include "pipe/p_winsys.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c index 52bbd525c1..824ee7fd6d 100644 --- a/src/gallium/drivers/i965simple/brw_curbe.c +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -39,7 +39,8 @@ #include "brw_wm.h" #include "pipe/p_state.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #define FILE_DEBUG_FLAG DEBUG_FALLBACKS diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c index 9c0c78c236..7c20ea52af 100644 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -33,6 +33,7 @@ #include "brw_context.h" #include "brw_state.h" + struct brw_array_state { union header_union header; diff --git a/src/gallium/drivers/i965simple/brw_gs_state.c b/src/gallium/drivers/i965simple/brw_gs_state.c index 3932e9e939..5b8016b2e9 100644 --- a/src/gallium/drivers/i965simple/brw_gs_state.c +++ b/src/gallium/drivers/i965simple/brw_gs_state.c @@ -34,7 +34,8 @@ #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index fadfbf94ab..ab7cd624b2 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/i965simple/brw_sf_state.c b/src/gallium/drivers/i965simple/brw_sf_state.c index 9acd3ea61b..2a5de61c21 100644 --- a/src/gallium/drivers/i965simple/brw_sf_state.c +++ b/src/gallium/drivers/i965simple/brw_sf_state.c @@ -30,11 +30,12 @@ */ - #include "brw_context.h" #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" + static void upload_sf_vp(struct brw_context *brw) { diff --git a/src/gallium/drivers/i965simple/brw_shader_info.c b/src/gallium/drivers/i965simple/brw_shader_info.c index 30f37a99d4..86d877d7ef 100644 --- a/src/gallium/drivers/i965simple/brw_shader_info.c +++ b/src/gallium/drivers/i965simple/brw_shader_info.c @@ -1,7 +1,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index 27ca32843d..af46cb546f 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -31,7 +31,7 @@ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/i965simple/brw_state_batch.c b/src/gallium/drivers/i965simple/brw_state_batch.c index 35db76b594..43a1c89fc4 100644 --- a/src/gallium/drivers/i965simple/brw_state_batch.c +++ b/src/gallium/drivers/i965simple/brw_state_batch.c @@ -32,7 +32,7 @@ #include "brw_state.h" #include "brw_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* A facility similar to the data caching code above, which aims to * prevent identical commands being issued repeatedly. diff --git a/src/gallium/drivers/i965simple/brw_state_cache.c b/src/gallium/drivers/i965simple/brw_state_cache.c index b3a5124461..094248fa69 100644 --- a/src/gallium/drivers/i965simple/brw_state_cache.c +++ b/src/gallium/drivers/i965simple/brw_state_cache.c @@ -38,7 +38,7 @@ #include "brw_sf.h" #include "brw_gs.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index f3174bfe0a..78d4c0e411 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -43,7 +43,8 @@ */ #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "brw_context.h" #include "brw_state.h" diff --git a/src/gallium/drivers/i965simple/brw_state_upload.c b/src/gallium/drivers/i965simple/brw_state_upload.c index e727601e1e..bac9161b5f 100644 --- a/src/gallium/drivers/i965simple/brw_state_upload.c +++ b/src/gallium/drivers/i965simple/brw_state_upload.c @@ -33,7 +33,7 @@ #include "brw_context.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /* This is used to initialize brw->state.atoms[]. We could use this * list directly except for a single atom, brw_constant_buffer, which diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 69da252285..b89756c47b 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -29,10 +29,9 @@ #include "brw_context.h" #include "brw_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 9b6cf81723..05eda9d1f2 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -33,16 +33,16 @@ /* Code to layout images in a mipmap tree for i965. */ -#include "brw_tex_layout.h" - #include "pipe/p_state.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" - +#include "util/u_math.h" +#include "util/u_memory.h" #include "brw_context.h" +#include "brw_tex_layout.h" + #define FILE_DEBUG_FLAG DEBUG_TEXTURE diff --git a/src/gallium/drivers/i965simple/brw_vs_state.c b/src/gallium/drivers/i965simple/brw_vs_state.c index c73469929c..1eaff87892 100644 --- a/src/gallium/drivers/i965simple/brw_vs_state.c +++ b/src/gallium/drivers/i965simple/brw_vs_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" static void upload_vs_unit( struct brw_context *brw ) { diff --git a/src/gallium/drivers/i965simple/brw_wm.c b/src/gallium/drivers/i965simple/brw_wm.c index 7fc5f59a98..8de565b96c 100644 --- a/src/gallium/drivers/i965simple/brw_wm.c +++ b/src/gallium/drivers/i965simple/brw_wm.c @@ -35,7 +35,7 @@ #include "brw_wm.h" #include "brw_eu.h" #include "brw_state.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_decl.c b/src/gallium/drivers/i965simple/brw_wm_decl.c index e6f1a44817..d50e66f613 100644 --- a/src/gallium/drivers/i965simple/brw_wm_decl.c +++ b/src/gallium/drivers/i965simple/brw_wm_decl.c @@ -2,7 +2,8 @@ #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_glsl.c b/src/gallium/drivers/i965simple/brw_wm_glsl.c index 6a4a5aef09..ab6410aa60 100644 --- a/src/gallium/drivers/i965simple/brw_wm_glsl.c +++ b/src/gallium/drivers/i965simple/brw_wm_glsl.c @@ -2,7 +2,8 @@ #include "brw_context.h" #include "brw_eu.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c index b9eaee56ee..52b2909a65 100644 --- a/src/gallium/drivers/i965simple/brw_wm_sampler_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_sampler_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #define COMPAREFUNC_ALWAYS 0 diff --git a/src/gallium/drivers/i965simple/brw_wm_state.c b/src/gallium/drivers/i965simple/brw_wm_state.c index f3aa36b07f..37a9bf919c 100644 --- a/src/gallium/drivers/i965simple/brw_wm_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_state.c @@ -34,7 +34,8 @@ #include "brw_state.h" #include "brw_defines.h" #include "brw_wm.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" /*********************************************************************** * WM unit - fragment programs and rasterization diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 9b1313bc83..dda90f760a 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -32,8 +32,8 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "sp_clear.h" #include "sp_context.h" #include "sp_flush.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index cc171bbc39..d0456731be 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -34,7 +34,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_llvm.c b/src/gallium/drivers/softpipe/sp_fs_llvm.c index 20226da78c..34adac5226 100644 --- a/src/gallium/drivers/softpipe/sp_fs_llvm.c +++ b/src/gallium/drivers/softpipe/sp_fs_llvm.c @@ -36,7 +36,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_sse2.h" diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 8b7da7c747..35653a8e48 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -34,7 +34,7 @@ #include "pipe/p_state.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "tgsi/tgsi_exec.h" #include "tgsi/tgsi_sse2.h" diff --git a/src/gallium/drivers/softpipe/sp_prim_setup.c b/src/gallium/drivers/softpipe/sp_prim_setup.c index 941ab62e00..038ff04d4f 100644 --- a/src/gallium/drivers/softpipe/sp_prim_setup.c +++ b/src/gallium/drivers/softpipe/sp_prim_setup.c @@ -41,7 +41,7 @@ #include "sp_prim_setup.h" #include "draw/draw_pipe.h" #include "draw/draw_vertex.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** * Triangle setup info (derived from draw_stage). diff --git a/src/gallium/drivers/softpipe/sp_prim_vbuf.c b/src/gallium/drivers/softpipe/sp_prim_vbuf.c index e9fae951e0..425e13cd28 100644 --- a/src/gallium/drivers/softpipe/sp_prim_vbuf.c +++ b/src/gallium/drivers/softpipe/sp_prim_vbuf.c @@ -43,6 +43,7 @@ #include "sp_setup.h" #include "draw/draw_context.h" #include "draw/draw_vbuf.h" +#include "util/u_memory.h" #define SP_MAX_VBUF_INDEXES 1024 diff --git a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c index 7a42b08ef5..7d3580fb4f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_alpha_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_alpha_test.c @@ -7,7 +7,7 @@ #include "sp_headers.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" static void diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 74c6bff84a..a834accb86 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -31,7 +31,8 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" @@ -128,15 +129,15 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) /* convert to ubyte */ for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][0], dest[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][1], dest[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][2], dest[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(dst[j][3], dest[j][3]); /* P3 */ - - UNCLAMPED_FLOAT_TO_UBYTE(src[j][0], quadColor[j][0]); /* P0 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][1], quadColor[j][1]); /* P1 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][2], quadColor[j][2]); /* P2 */ - UNCLAMPED_FLOAT_TO_UBYTE(src[j][3], quadColor[j][3]); /* P3 */ + dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */ + dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */ + dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */ + dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */ + + src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */ + src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */ + src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */ + src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */ } switch (softpipe->blend->logicop_func) { @@ -209,10 +210,10 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) } for (j = 0; j < 4; j++) { - quadColor[j][0] = UBYTE_TO_FLOAT(res[j][0]); - quadColor[j][1] = UBYTE_TO_FLOAT(res[j][1]); - quadColor[j][2] = UBYTE_TO_FLOAT(res[j][2]); - quadColor[j][3] = UBYTE_TO_FLOAT(res[j][3]); + quadColor[j][0] = ubyte_to_float(res[j][0]); + quadColor[j][1] = ubyte_to_float(res[j][1]); + quadColor[j][2] = ubyte_to_float(res[j][2]); + quadColor[j][3] = ubyte_to_float(res[j][3]); } } diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c index b3db428ef1..92e9af09c1 100644 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -1,5 +1,5 @@ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index 7fe080990b..f72f31db97 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -31,7 +31,8 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index dd5ebb2296..ad907ec25f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_depth_test.c b/src/gallium/drivers/softpipe/sp_quad_depth_test.c index 0c82692c6e..227cb2014e 100644 --- a/src/gallium/drivers/softpipe/sp_quad_depth_test.c +++ b/src/gallium/drivers/softpipe/sp_quad_depth_test.c @@ -30,7 +30,7 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_earlyz.c b/src/gallium/drivers/softpipe/sp_quad_earlyz.c index 22ea99049f..5a66a86699 100644 --- a/src/gallium/drivers/softpipe/sp_quad_earlyz.c +++ b/src/gallium/drivers/softpipe/sp_quad_earlyz.c @@ -30,7 +30,7 @@ */ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_headers.h" #include "sp_quad.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 8c88c192f8..5499ba5361 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -35,7 +35,8 @@ * all the enabled attributes run contiguously. */ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_defines.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_occlusion.c b/src/gallium/drivers/softpipe/sp_quad_occlusion.c index 54254df1f1..db13e73ae3 100644 --- a/src/gallium/drivers/softpipe/sp_quad_occlusion.c +++ b/src/gallium/drivers/softpipe/sp_quad_occlusion.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index 40083138a4..b64646a449 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_quad_stencil.c b/src/gallium/drivers/softpipe/sp_quad_stencil.c index b4c7e942fa..ce9562e07c 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stencil.c +++ b/src/gallium/drivers/softpipe/sp_quad_stencil.c @@ -10,7 +10,7 @@ #include "sp_tile_cache.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** Only 8-bit stencil supported */ diff --git a/src/gallium/drivers/softpipe/sp_quad_stipple.c b/src/gallium/drivers/softpipe/sp_quad_stipple.c index f1e9b80e09..a39ecc2e9d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_stipple.c +++ b/src/gallium/drivers/softpipe/sp_quad_stipple.c @@ -7,7 +7,7 @@ #include "sp_headers.h" #include "sp_quad.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" /** diff --git a/src/gallium/drivers/softpipe/sp_query.c b/src/gallium/drivers/softpipe/sp_query.c index adf9ccf64c..2106ee1d23 100644 --- a/src/gallium/drivers/softpipe/sp_query.c +++ b/src/gallium/drivers/softpipe/sp_query.c @@ -32,7 +32,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_query.h" diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index f6b3d7ac24..9644dbd168 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index c8c55fa6e8..87336ab6e3 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -42,9 +42,9 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_vertex.h" -#include "pipe/p_util.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#include "util/u_memory.h" #define DEBUG_VERTS 0 diff --git a/src/gallium/drivers/softpipe/sp_state_blend.c b/src/gallium/drivers/softpipe/sp_state_blend.c index 2d40d6bd8f..384fe559af 100644 --- a/src/gallium/drivers/softpipe/sp_state_blend.c +++ b/src/gallium/drivers/softpipe/sp_state_blend.c @@ -28,7 +28,7 @@ /* Authors: Keith Whitwell */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_state_derived.c b/src/gallium/drivers/softpipe/sp_state_derived.c index f10a1fa471..6b6a4c3ff3 100644 --- a/src/gallium/drivers/softpipe/sp_state_derived.c +++ b/src/gallium/drivers/softpipe/sp_state_derived.c @@ -25,7 +25,8 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "draw/draw_vertex.h" diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 76fe6bfef9..1be461b3a4 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -30,7 +30,7 @@ #include "sp_fs.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/softpipe/sp_state_rasterizer.c b/src/gallium/drivers/softpipe/sp_state_rasterizer.c index 98e04352db..87b7219683 100644 --- a/src/gallium/drivers/softpipe/sp_state_rasterizer.c +++ b/src/gallium/drivers/softpipe/sp_state_rasterizer.c @@ -26,7 +26,7 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_state_sampler.c b/src/gallium/drivers/softpipe/sp_state_sampler.c index 033288a0aa..99a28c0d7e 100644 --- a/src/gallium/drivers/softpipe/sp_state_sampler.c +++ b/src/gallium/drivers/softpipe/sp_state_sampler.c @@ -29,7 +29,7 @@ * Brian Paul */ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/softpipe/sp_surface.c b/src/gallium/drivers/softpipe/sp_surface.c index bfbae234f1..389aceb27c 100644 --- a/src/gallium/drivers/softpipe/sp_surface.c +++ b/src/gallium/drivers/softpipe/sp_surface.c @@ -26,10 +26,9 @@ **************************************************************************/ #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "pipe/p_winsys.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_rect.h" #include "sp_context.h" #include "sp_surface.h" diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 58a95d13e1..49250ec084 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -39,9 +39,9 @@ #include "sp_tile_cache.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "tgsi/tgsi_exec.h" #include "util/u_math.h" +#include "util/u_memory.h" /* diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index f775591352..3a737d6f72 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -33,8 +33,9 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sp_context.h" #include "sp_state.h" diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 57c12ffe33..b50c984513 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -32,9 +32,9 @@ * Brian Paul */ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_memory.h" +#include "util/u_tile.h" #include "sp_context.h" #include "sp_surface.h" #include "sp_texture.h" diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index f16359e8ad..1dd7719379 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_screen.h" #include "tr_dump.h" diff --git a/src/gallium/drivers/trace/tr_dump.c b/src/gallium/drivers/trace/tr_dump.c index 1613a626df..48032c1617 100644 --- a/src/gallium/drivers/trace/tr_dump.c +++ b/src/gallium/drivers/trace/tr_dump.c @@ -45,6 +45,8 @@ #endif #include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_dump.h b/src/gallium/drivers/trace/tr_dump.h index 6ddc8fc15c..76a53731b3 100644 --- a/src/gallium/drivers/trace/tr_dump.h +++ b/src/gallium/drivers/trace/tr_dump.h @@ -35,7 +35,6 @@ #include "pipe/p_compiler.h" -#include "pipe/p_util.h" boolean trace_dump_trace_begin(void); diff --git a/src/gallium/drivers/trace/tr_screen.c b/src/gallium/drivers/trace/tr_screen.c index a6467ec35f..8789f86b1a 100644 --- a/src/gallium/drivers/trace/tr_screen.c +++ b/src/gallium/drivers/trace/tr_screen.c @@ -25,7 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "tr_dump.h" #include "tr_state.h" diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 30ab5a8fdc..986d939e0c 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -27,6 +27,7 @@ #include "pipe/p_compiler.h" +#include "util/u_memory.h" #include "tgsi/tgsi_dump.h" #include "tr_dump.h" diff --git a/src/gallium/drivers/trace/tr_stream_stdc.c b/src/gallium/drivers/trace/tr_stream_stdc.c index 4c77e1c995..4c19ec0b24 100644 --- a/src/gallium/drivers/trace/tr_stream_stdc.c +++ b/src/gallium/drivers/trace/tr_stream_stdc.c @@ -36,7 +36,7 @@ #include -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_stream_wd.c b/src/gallium/drivers/trace/tr_stream_wd.c index b3b65f0971..704eb15bd7 100644 --- a/src/gallium/drivers/trace/tr_stream_wd.c +++ b/src/gallium/drivers/trace/tr_stream_wd.c @@ -37,7 +37,7 @@ #include #include -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "util/u_string.h" #include "tr_stream.h" diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 99ba74d366..440a78704a 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -25,9 +25,9 @@ * **************************************************************************/ -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "util/u_hash_table.h" +#include "util/u_memory.h" #include "tr_screen.h" #include "tr_texture.h" diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 2c7a6f893b..177835854e 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -25,8 +25,7 @@ * **************************************************************************/ -#include "pipe/p_util.h" -#include "pipe/p_state.h" +#include "util/u_memory.h" #include "util/u_hash_table.h" #include "tr_dump.h" diff --git a/src/gallium/include/pipe/p_util.h b/src/gallium/include/pipe/p_util.h deleted file mode 100644 index 4a3fca5962..0000000000 --- a/src/gallium/include/pipe/p_util.h +++ /dev/null @@ -1,460 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef P_UTIL_H -#define P_UTIL_H - -#include "p_config.h" -#include "p_compiler.h" -#include "p_debug.h" -#include "p_pointer.h" - -#if defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) -__inline double ceil(double val) -{ - double ceil_val; - - if((val - (long) val) == 0) { - ceil_val = val; - } else { - if(val > 0) { - ceil_val = (long) val + 1; - } else { - ceil_val = (long) val; - } - } - - return ceil_val; -} - -#ifndef PIPE_SUBSYSTEM_WINDOWS_CE -__inline double floor(double val) -{ - double floor_val; - - if((val - (long) val) == 0) { - floor_val = val; - } else { - if(val > 0) { - floor_val = (long) val; - } else { - floor_val = (long) val - 1; - } - } - - return floor_val; -} -#endif - -#pragma function(pow) -__inline double __cdecl pow(double val, double exponent) -{ - /* XXX */ - assert(0); - return 0; -} - -#pragma function(log) -__inline double __cdecl log(double val) -{ - /* XXX */ - assert(0); - return 0; -} - -#pragma function(atan2) -__inline double __cdecl atan2(double val) -{ - /* XXX */ - assert(0); - return 0; -} -#else -#include -#include -#endif - - /* Define ENOMEM for WINCE */ -#if (_WIN32_WCE < 600) -#ifndef ENOMEM -#define ENOMEM 12 -#endif -#endif - -#ifdef __cplusplus -extern "C" { -#endif - - -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) - -/* memory debugging */ - -#include "p_debug.h" - -#define MALLOC( _size ) \ - debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) -#define CALLOC( _count, _size ) \ - debug_calloc(__FILE__, __LINE__, __FUNCTION__, _count, _size ) -#define FREE( _ptr ) \ - debug_free( __FILE__, __LINE__, __FUNCTION__, _ptr ) -#define REALLOC( _ptr, _old_size, _size ) \ - debug_realloc( __FILE__, __LINE__, __FUNCTION__, _ptr, _old_size, _size ) - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) - -void * __stdcall -EngAllocMem( - unsigned long Flags, - unsigned long MemSize, - unsigned long Tag ); - -void __stdcall -EngFreeMem( - void *Mem ); - -#define MALLOC( _size ) EngAllocMem( 0, _size, 'D3AG' ) -#define _FREE( _ptr ) EngFreeMem( _ptr ) - -#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) - -void * -ExAllocatePool( - unsigned long PoolType, - size_t NumberOfBytes); - -void -ExFreePool(void *P); - -#define MALLOC(_size) ExAllocatePool(0, _size) -#define _FREE(_ptr) ExFreePool(_ptr) - -#else - -#define MALLOC( SIZE ) malloc( SIZE ) -#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE ) -#define FREE( PTR ) free( PTR ) -#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE ) - -#endif - - -#ifndef CALLOC -static INLINE void * -CALLOC( unsigned count, unsigned size ) -{ - void *ptr = MALLOC( count * size ); - if( ptr ) { - memset( ptr, 0, count * size ); - } - return ptr; -} -#endif /* !CALLOC */ - -#ifndef FREE -static INLINE void -FREE( void *ptr ) -{ - if( ptr ) { - _FREE( ptr ); - } -} -#endif /* !FREE */ - -#ifndef REALLOC -static INLINE void * -REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) -{ - void *new_ptr = NULL; - - if (new_size != 0) { - unsigned copy_size = old_size < new_size ? old_size : new_size; - new_ptr = MALLOC( new_size ); - if (new_ptr && old_ptr && copy_size) { - memcpy( new_ptr, old_ptr, copy_size ); - } - } - - FREE( old_ptr ); - return new_ptr; -} -#endif /* !REALLOC */ - - -#define MALLOC_STRUCT(T) (struct T *) MALLOC(sizeof(struct T)) - -#define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) - - -/** - * Return memory on given byte alignment - */ -static INLINE void * -align_malloc(size_t bytes, uint alignment) -{ -#if defined(HAVE_POSIX_MEMALIGN) - void *mem; - alignment = (alignment + (uint)sizeof(void*) - 1) & ~((uint)sizeof(void*) - 1); - if(posix_memalign(& mem, alignment, bytes) != 0) - return NULL; - return mem; -#else - char *ptr, *buf; - - assert( alignment > 0 ); - - ptr = (char *) MALLOC(bytes + alignment + sizeof(void *)); - if (!ptr) - return NULL; - - buf = (char *) align_pointer( ptr + sizeof(void *), alignment ); - *(char **)(buf - sizeof(void *)) = ptr; - - return buf; -#endif /* defined(HAVE_POSIX_MEMALIGN) */ -} - -/** - * Free memory returned by align_malloc(). - */ -static INLINE void -align_free(void *ptr) -{ -#if defined(HAVE_POSIX_MEMALIGN) - FREE(ptr); -#else - void **cubbyHole = (void **) ((char *) ptr - sizeof(void *)); - void *realAddr = *cubbyHole; - FREE(realAddr); -#endif /* defined(HAVE_POSIX_MEMALIGN) */ -} - - - -/** - * Duplicate a block of memory. - */ -static INLINE void * -mem_dup(const void *src, uint size) -{ - void *dup = MALLOC(size); - if (dup) - memcpy(dup, src, size); - return dup; -} - - - -#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) ) -#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) ) -#define MAX2( A, B ) ( (A)>(B) ? (A) : (B) ) - -#ifndef Elements -#define Elements(x) (sizeof(x)/sizeof((x)[0])) -#endif -#define Offset(TYPE, MEMBER) ((unsigned)&(((TYPE *)NULL)->MEMBER)) - -/** - * Return a pointer aligned to next multiple of 16 bytes. - */ -static INLINE void * -align16( void *unaligned ) -{ - return align_pointer( unaligned, 16 ); -} - - -static INLINE int align(int value, int alignment) -{ - return (value + alignment - 1) & ~(alignment - 1); -} - - - - -#if defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86) -static INLINE unsigned ffs( unsigned u ) -{ - unsigned i; - - if( u == 0 ) { - return 0; - } - - __asm bsf eax, [u] - __asm inc eax - __asm mov [i], eax - - return i; -} -#endif - -union fi { - float f; - int i; - unsigned ui; -}; - -#define UBYTE_TO_FLOAT( ub ) ((float)(ub) / 255.0F) - -#define IEEE_0996 0x3f7f0000 /* 0.996 or so */ - -/* This function/macro is sensitive to precision. Test very carefully - * if you change it! - */ -#define UNCLAMPED_FLOAT_TO_UBYTE(UB, F) \ - do { \ - union fi __tmp; \ - __tmp.f = (F); \ - if (__tmp.i < 0) \ - UB = (ubyte) 0; \ - else if (__tmp.i >= IEEE_0996) \ - UB = (ubyte) 255; \ - else { \ - __tmp.f = __tmp.f * (255.0f/256.0f) + 32768.0f; \ - UB = (ubyte) __tmp.i; \ - } \ - } while (0) - - - -static INLINE unsigned pack_ub4( unsigned char b0, - unsigned char b1, - unsigned char b2, - unsigned char b3 ) -{ - return ((((unsigned int)b0) << 0) | - (((unsigned int)b1) << 8) | - (((unsigned int)b2) << 16) | - (((unsigned int)b3) << 24)); -} - -static INLINE unsigned fui( float f ) -{ - union fi fi; - fi.f = f; - return fi.ui; -} - -static INLINE unsigned char float_to_ubyte( float f ) -{ - unsigned char ub; - UNCLAMPED_FLOAT_TO_UBYTE(ub, f); - return ub; -} - -static INLINE unsigned pack_ui32_float4( float a, - float b, - float c, - float d ) -{ - return pack_ub4( float_to_ubyte(a), - float_to_ubyte(b), - float_to_ubyte(c), - float_to_ubyte(d) ); -} - -#define COPY_4V( DST, SRC ) \ -do { \ - (DST)[0] = (SRC)[0]; \ - (DST)[1] = (SRC)[1]; \ - (DST)[2] = (SRC)[2]; \ - (DST)[3] = (SRC)[3]; \ -} while (0) - - -#define COPY_4FV( DST, SRC ) COPY_4V(DST, SRC) - - -#define ASSIGN_4V( DST, V0, V1, V2, V3 ) \ -do { \ - (DST)[0] = (V0); \ - (DST)[1] = (V1); \ - (DST)[2] = (V2); \ - (DST)[3] = (V3); \ -} while (0) - - - -#if defined(_MSC_VER) -#if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) - -static INLINE float cosf( float f ) -{ - return (float) cos( (double) f ); -} - -static INLINE float sinf( float f ) -{ - return (float) sin( (double) f ); -} - -static INLINE float ceilf( float f ) -{ - return (float) ceil( (double) f ); -} - -static INLINE float floorf( float f ) -{ - return (float) floor( (double) f ); -} - -static INLINE float powf( float f, float g ) -{ - return (float) pow( (double) f, (double) g ); -} - -static INLINE float sqrtf( float f ) -{ - return (float) sqrt( (double) f ); -} - -static INLINE float fabsf( float f ) -{ - return (float) fabs( (double) f ); -} - -static INLINE float logf( float f ) -{ - return (float) log( (double) f ); -} - -#else -/* Work-around an extra semi-colon in VS 2005 logf definition */ -#ifdef logf -#undef logf -#define logf(x) ((float)log((double)(x))) -#endif /* logf */ -#endif -#endif /* _MSC_VER */ - - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/src/gallium/state_trackers/python/gallium.i b/src/gallium/state_trackers/python/gallium.i index 641b19e940..a67372c623 100644 --- a/src/gallium/state_trackers/python/gallium.i +++ b/src/gallium/state_trackers/python/gallium.i @@ -42,7 +42,7 @@ #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "cso_cache/cso_context.h" #include "util/u_draw_quad.h" diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index a1889539dc..f71d85dd9b 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -26,12 +26,13 @@ **************************************************************************/ -#include "pipe/p_util.h" #include "pipe/p_winsys.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_inlines.h" #include "cso_cache/cso_context.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_simple_shaders.h" #include "trace/tr_screen.h" #include "trace/tr_context.h" diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c index b47c7be293..7765df3c4a 100644 --- a/src/gallium/state_trackers/python/st_sample.c +++ b/src/gallium/state_trackers/python/st_sample.c @@ -29,9 +29,10 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "st_sample.h" diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index 6ea3c9a5cf..2d4f5434b3 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -39,8 +39,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "st_winsys.h" diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c index 8db0329615..019ee5cbd2 100644 --- a/src/gallium/winsys/drm/intel/common/intel_be_device.c +++ b/src/gallium/winsys/drm/intel/common/intel_be_device.c @@ -13,8 +13,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_memory.h" #include "i915simple/i915_screen.h" diff --git a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c index 0d98d16cf1..20920a2052 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c +++ b/src/gallium/winsys/drm/intel/dri/intel_winsys_softpipe.c @@ -32,8 +32,8 @@ #include "intel_context.h" #include "intel_winsys_softpipe.h" #include "pipe/p_defines.h" -#include "pipe/p_util.h" #include "pipe/p_format.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 829732eea8..e9f821d276 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -38,8 +38,8 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_winsys.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "eglconfig.h" diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index f4199e6f89..ae81d7f801 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -37,8 +37,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "sw_winsys.h" diff --git a/src/gallium/winsys/gdi/wmesa.c b/src/gallium/winsys/gdi/wmesa.c index ff52ceb8c4..730fb1b541 100644 --- a/src/gallium/winsys/gdi/wmesa.c +++ b/src/gallium/winsys/gdi/wmesa.c @@ -12,8 +12,8 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #include "glapi/glapi.h" #include "colors.h" diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index 6e814ce5d1..f319802962 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -34,7 +34,6 @@ #include "brw_aub.h" #include "pipe/p_context.h" #include "pipe/p_state.h" -#include "pipe/p_util.h" #include "pipe/p_debug.h" diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 4b4dc56e84..68ead7f528 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -42,8 +42,9 @@ #include "pipe/p_winsys.h" #include "pipe/p_format.h" #include "pipe/p_context.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" +#include "util/u_math.h" +#include "util/u_memory.h" #include "softpipe/sp_winsys.h" #ifdef GALLIUM_CELL diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index 7fc9debdd5..3439367636 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -37,7 +37,7 @@ #include "xmesaP.h" #include "pipe/p_winsys.h" -#include "pipe/p_util.h" +#include "util/u_memory.h" #include "pipe/p_inlines.h" #include "i965simple/brw_winsys.h" #include "i965simple/brw_screen.h" diff --git a/src/mesa/state_tracker/acc2.c b/src/mesa/state_tracker/acc2.c new file mode 100644 index 0000000000..fa5de2b764 --- /dev/null +++ b/src/mesa/state_tracker/acc2.c @@ -0,0 +1,319 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Brian Paul + */ + +#include "main/imports.h" +#include "main/image.h" +#include "main/macros.h" + +#include "st_context.h" +#include "st_cb_accum.h" +#include "st_cb_fbo.h" +#include "st_draw.h" +#include "st_format.h" +#include "pipe/p_context.h" +#include "pipe/p_defines.h" +#include "pipe/p_inlines.h" +#include "util/p_tile.h" + + +#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ + us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) + + +/** + * For hardware that supports deep color buffers, we could accelerate + * most/all the accum operations with blending/texturing. + * For now, just use the get/put_tile() functions and do things in software. + */ + + +static void +acc_get_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, + uint x, uint y, uint w, uint h, float *p) +{ + const enum pipe_format f = acc_ps->format; + const int cpp = acc_ps->cpp; + + acc_ps->format = PIPE_FORMAT_R16G16B16A16_SNORM; + acc_ps->cpp = 8; + + pipe_get_tile_rgba(pipe, acc_ps, x, y, w, h, p); + + acc_ps->format = f; + acc_ps->cpp = cpp; +} + + +static void +acc_put_tile_rgba(struct pipe_context *pipe, struct pipe_surface *acc_ps, + uint x, uint y, uint w, uint h, const float *p) +{ + enum pipe_format f = acc_ps->format; + const int cpp = acc_ps->cpp; + + acc_ps->format = PIPE_FORMAT_R16G16B16A16_SNORM; + acc_ps->cpp = 8; + + pipe_put_tile_rgba(pipe, acc_ps, x, y, w, h, p); + + acc_ps->format = f; + acc_ps->cpp = cpp; +} + + + +void +st_clear_accum_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) +{ + struct pipe_context *pipe = ctx->st->pipe; + struct st_renderbuffer *acc_strb = st_renderbuffer(rb); + struct pipe_surface *acc_ps = acc_strb->surface; + const GLint xpos = ctx->DrawBuffer->_Xmin; + const GLint ypos = ctx->DrawBuffer->_Ymin; + const GLint width = ctx->DrawBuffer->_Xmax - xpos; + const GLint height = ctx->DrawBuffer->_Ymax - ypos; + const GLfloat r = ctx->Accum.ClearColor[0]; + const GLfloat g = ctx->Accum.ClearColor[1]; + const GLfloat b = ctx->Accum.ClearColor[2]; + const GLfloat a = ctx->Accum.ClearColor[3]; + GLfloat *accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + int i; + +#if 1 + GLvoid *map; + + map = pipe_surface_map(acc_ps); + switch (acc_strb->format) { + case PIPE_FORMAT_R16G16B16A16_SNORM: + { + GLshort r = FLOAT_TO_SHORT(ctx->Accum.ClearColor[0]); + GLshort g = FLOAT_TO_SHORT(ctx->Accum.ClearColor[1]); + GLshort b = FLOAT_TO_SHORT(ctx->Accum.ClearColor[2]); + GLshort a = FLOAT_TO_SHORT(ctx->Accum.ClearColor[3]); + int i, j; + for (i = 0; i < height; i++) { + GLshort *dst = ((GLshort *) map + + ((ypos + i) * acc_ps->pitch + xpos) * 4); + for (j = 0; j < width; j++) { + dst[0] = r; + dst[1] = g; + dst[2] = b; + dst[3] = a; + dst += 4; + } + } + } + break; + default: + _mesa_problem(ctx, "unexpected format in st_clear_accum_buffer()"); + } + + pipe_surface_unmap(acc_ps); + +#else + for (i = 0; i < width * height; i++) { + accBuf[i*4+0] = r; + accBuf[i*4+1] = g; + accBuf[i*4+2] = b; + accBuf[i*4+3] = a; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); +#endif +} + + +/** For ADD/MULT */ +static void +accum_mad(struct pipe_context *pipe, GLfloat scale, GLfloat bias, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps) +{ + GLfloat *accBuf; + GLint i; + + accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + for (i = 0; i < 4 * width * height; i++) { + accBuf[i] = accBuf[i] * scale + bias; + } + + pipe_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + free(accBuf); +} + + +static void +accum_accum(struct pipe_context *pipe, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + GLfloat *colorBuf, *accBuf; + GLint i; + + colorBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + accBuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, colorBuf); + acc_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + for (i = 0; i < 4 * width * height; i++) { + accBuf[i] = accBuf[i] + colorBuf[i] * value; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, accBuf); + + free(colorBuf); + free(accBuf); +} + + +static void +accum_load(struct pipe_context *pipe, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + GLfloat *buf; + GLint i; + + buf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, buf); + + for (i = 0; i < 4 * width * height; i++) { + buf[i] = buf[i] * value; + } + + acc_put_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, buf); + + free(buf); +} + + +static void +accum_return(GLcontext *ctx, GLfloat value, + GLint xpos, GLint ypos, GLint width, GLint height, + struct pipe_surface *acc_ps, + struct pipe_surface *color_ps) +{ + struct pipe_context *pipe = ctx->st->pipe; + const GLubyte *colormask = ctx->Color.ColorMask; + GLfloat *abuf, *cbuf = NULL; + GLint i, ch; + + abuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + + acc_get_tile_rgba(pipe, acc_ps, xpos, ypos, width, height, abuf); + + if (!colormask[0] || !colormask[1] || !colormask[2] || !colormask[3]) { + cbuf = (GLfloat *) malloc(width * height * 4 * sizeof(GLfloat)); + pipe_get_tile_rgba(pipe, color_ps, xpos, ypos, width, height, cbuf); + } + + for (i = 0; i < width * height; i++) { + for (ch = 0; ch < 4; ch++) { + if (colormask[ch]) { + GLfloat val = abuf[i * 4 + ch] * value; + abuf[i * 4 + ch] = CLAMP(val, 0.0, 1.0); + } + else { + abuf[i * 4 + ch] = cbuf[i * 4 + ch]; + } + } + } + + pipe_put_tile_rgba(pipe, color_ps, xpos, ypos, width, height, abuf); + + free(abuf); + if (cbuf) + free(cbuf); +} + + +static void +st_Accum(GLcontext *ctx, GLenum op, GLfloat value) +{ + struct st_context *st = ctx->st; + struct pipe_context *pipe = st->pipe; + struct st_renderbuffer *acc_strb + = st_renderbuffer(ctx->DrawBuffer->Attachment[BUFFER_ACCUM].Renderbuffer); + struct st_renderbuffer *color_strb + = st_renderbuffer(ctx->ReadBuffer->_ColorReadBuffer); + struct pipe_surface *acc_ps = acc_strb->surface; + struct pipe_surface *color_ps = color_strb->surface; + + const GLint xpos = ctx->DrawBuffer->_Xmin; + const GLint ypos = ctx->DrawBuffer->_Ymin; + const GLint width = ctx->DrawBuffer->_Xmax - xpos; + const GLint height = ctx->DrawBuffer->_Ymax - ypos; + + /* make sure color bufs aren't cached */ + pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + + switch (op) { + case GL_ADD: + if (value != 0.0F) { + accum_mad(pipe, 1.0, value, xpos, ypos, width, height, acc_ps); + } + break; + case GL_MULT: + if (value != 1.0F) { + accum_mad(pipe, value, 0.0, xpos, ypos, width, height, acc_ps); + } + break; + case GL_ACCUM: + if (value != 0.0F) { + accum_accum(pipe, value, xpos, ypos, width, height, acc_ps, color_ps); + } + break; + case GL_LOAD: + accum_load(pipe, value, xpos, ypos, width, height, acc_ps, color_ps); + break; + case GL_RETURN: + accum_return(ctx, value, xpos, ypos, width, height, acc_ps, color_ps); + break; + default: + assert(0); + } +} + + + +void st_init_accum_functions(struct dd_function_table *functions) +{ + functions->Accum = st_Accum; +} diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index a992e08ff6..cf3a99e7e9 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -42,7 +42,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index d5696a909f..a0c305d66f 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -50,7 +50,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_draw_quad.h" #include "util/u_simple_shaders.h" #include "shader/prog_instruction.h" diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 0c5e21d4ff..4ec7c752df 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -55,7 +55,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_draw_quad.h" #include "shader/prog_instruction.h" #include "cso_cache/cso_context.h" diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index 39f5856f94..c801532788 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -41,7 +41,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "st_context.h" #include "st_cb_bitmap.h" #include "st_cb_readpixels.h" diff --git a/src/mesa/state_tracker/st_cb_texture.c b/src/mesa/state_tracker/st_cb_texture.c index 6177ac63f0..16bbf3d80f 100644 --- a/src/mesa/state_tracker/st_cb_texture.c +++ b/src/mesa/state_tracker/st_cb_texture.c @@ -51,7 +51,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "util/p_tile.h" +#include "util/u_tile.h" #include "util/u_blit.h" diff --git a/src/mesa/state_tracker/st_program.c b/src/mesa/state_tracker/st_program.c index 325d95e865..936a6e32ea 100644 --- a/src/mesa/state_tracker/st_program.c +++ b/src/mesa/state_tracker/st_program.c @@ -55,7 +55,7 @@ #define TGSI_DEBUG 0 -/** XXX we should use the version of this from p_util.h but including +/** XXX we should use the version of this from u_memory.h but including * that header causes symbol collisions. */ static INLINE void * diff --git a/src/mesa/state_tracker/st_texture.c b/src/mesa/state_tracker/st_texture.c index 63046a0ecc..73cebff33f 100644 --- a/src/mesa/state_tracker/st_texture.c +++ b/src/mesa/state_tracker/st_texture.c @@ -36,7 +36,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_util.h" #include "pipe/p_inlines.h" #include "util/u_rect.h" -- cgit v1.2.3 From 4c84e940d009fe56c1f249507028daadea749ef2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sun, 24 Aug 2008 17:49:11 -0600 Subject: gallium: remove old tile util files --- src/gallium/auxiliary/util/p_tile.c | 1168 ----------------------------------- src/gallium/auxiliary/util/p_tile.h | 101 --- 2 files changed, 1269 deletions(-) delete mode 100644 src/gallium/auxiliary/util/p_tile.c delete mode 100644 src/gallium/auxiliary/util/p_tile.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_tile.c b/src/gallium/auxiliary/util/p_tile.c deleted file mode 100644 index 8219041c80..0000000000 --- a/src/gallium/auxiliary/util/p_tile.c +++ /dev/null @@ -1,1168 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * RGBA/float tile get/put functions. - * Usable both by drivers and state trackers. - * Surfaces should already be in a mapped state. - */ - - -#include "pipe/p_defines.h" -#include "pipe/p_util.h" -#include "pipe/p_inlines.h" - -#include "p_tile.h" -#include "u_rect.h" - - -/** - * Move raw block of pixels from surface to user memory. - * This should be usable by any hw driver that has mappable surfaces. - */ -void -pipe_get_tile_raw(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - void *dst, int dst_stride) -{ - const void *src; - - if (dst_stride == 0) - dst_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - - src = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); - assert(src); - if(!src) - return; - - pipe_copy_rect(dst, &ps->block, dst_stride, 0, 0, w, h, src, ps->stride, x, y); - - pipe_surface_unmap(ps); -} - - -/** - * Move raw block of pixels from user memory to surface. - * This should be usable by any hw driver that has mappable surfaces. - */ -void -pipe_put_tile_raw(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - const void *src, int src_stride) -{ - void *dst; - - if (src_stride == 0) - src_stride = pf_get_nblocksx(&ps->block, w) * ps->block.size; - - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - - dst = pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); - assert(dst); - if(!dst) - return; - - pipe_copy_rect(dst, &ps->block, ps->stride, x, y, w, h, src, src_stride, 0, 0); - - pipe_surface_unmap(ps); -} - - - - -/** Convert short in [-32768,32767] to GLfloat in [-1.0,1.0] */ -#define SHORT_TO_FLOAT(S) ((2.0F * (S) + 1.0F) * (1.0F/65535.0F)) - -#define UNCLAMPED_FLOAT_TO_SHORT(us, f) \ - us = ( (short) ( CLAMP((f), -1.0, 1.0) * 32767.0F) ) - - - -/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ - -static void -a8r8g8b8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const unsigned pixel = *src++; - pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff); - pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff); - pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff); - pRow[3] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff); - } - p += dst_stride; - } -} - - -static void -a8r8g8b8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); - UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); - *dst++ = (a << 24) | (r << 16) | (g << 8) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/ - -static void -x8r8g8b8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const unsigned pixel = *src++; - pRow[0] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff); - pRow[1] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff); - pRow[2] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff); - pRow[3] = UBYTE_TO_FLOAT(0xff); - } - p += dst_stride; - } -} - - -static void -x8r8g8b8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b; - UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); - *dst++ = (0xff << 24) | (r << 16) | (g << 8) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_B8G8R8A8_UNORM ***/ - -static void -b8g8r8a8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const unsigned pixel = *src++; - pRow[0] = UBYTE_TO_FLOAT((pixel >> 8) & 0xff); - pRow[1] = UBYTE_TO_FLOAT((pixel >> 16) & 0xff); - pRow[2] = UBYTE_TO_FLOAT((pixel >> 24) & 0xff); - pRow[3] = UBYTE_TO_FLOAT((pixel >> 0) & 0xff); - } - p += dst_stride; - } -} - - -static void -b8g8r8a8_put_tile_rgba(unsigned *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); - UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); - *dst++ = (b << 24) | (g << 16) | (r << 8) | a; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_A1R5G5B5_UNORM ***/ - -static void -a1r5g5b5_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const ushort pixel = *src++; - pRow[0] = ((pixel >> 10) & 0x1f) * (1.0f / 31.0f); - pRow[1] = ((pixel >> 5) & 0x1f) * (1.0f / 31.0f); - pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); - pRow[3] = ((pixel >> 15) ) * 1.0f; - } - p += dst_stride; - } -} - - -static void -a1r5g5b5_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); - UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); - r = r >> 3; /* 5 bits */ - g = g >> 3; /* 5 bits */ - b = b >> 3; /* 5 bits */ - a = a >> 7; /* 1 bit */ - *dst++ = (a << 15) | (r << 10) | (g << 5) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_A4R4G4B4_UNORM ***/ - -static void -a4r4g4b4_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const ushort pixel = *src++; - pRow[0] = ((pixel >> 8) & 0xf) * (1.0f / 15.0f); - pRow[1] = ((pixel >> 4) & 0xf) * (1.0f / 15.0f); - pRow[2] = ((pixel ) & 0xf) * (1.0f / 15.0f); - pRow[3] = ((pixel >> 12) ) * (1.0f / 15.0f); - } - p += dst_stride; - } -} - - -static void -a4r4g4b4_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, g, b, a; - UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); - UNCLAMPED_FLOAT_TO_UBYTE(g, pRow[1]); - UNCLAMPED_FLOAT_TO_UBYTE(b, pRow[2]); - UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); - r >>= 4; - g >>= 4; - b >>= 4; - a >>= 4; - *dst++ = (a << 12) | (r << 16) | (g << 4) | b; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_R5G6B5_UNORM ***/ - -static void -r5g6b5_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - const ushort pixel = *src++; - pRow[0] = ((pixel >> 11) & 0x1f) * (1.0f / 31.0f); - pRow[1] = ((pixel >> 5) & 0x3f) * (1.0f / 63.0f); - pRow[2] = ((pixel ) & 0x1f) * (1.0f / 31.0f); - pRow[3] = 1.0f; - } - p += dst_stride; - } -} - - -static void -r5g6b5_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - uint r = (uint) (CLAMP(pRow[0], 0.0, 1.0) * 31.0); - uint g = (uint) (CLAMP(pRow[1], 0.0, 1.0) * 63.0); - uint b = (uint) (CLAMP(pRow[2], 0.0, 1.0) * 31.0); - *dst++ = (r << 11) | (g << 5) | (b); - } - p += src_stride; - } -} - - - -/*** PIPE_FORMAT_Z16_UNORM ***/ - -/** - * Return each Z value as four floats in [0,1]. - */ -static void -z16_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - const float scale = 1.0f / 65535.0f; - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = *src++ * scale; - } - p += dst_stride; - } -} - - - - -/*** PIPE_FORMAT_L8_UNORM ***/ - -static void -l8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = UBYTE_TO_FLOAT(*src); - pRow[3] = 1.0; - } - p += dst_stride; - } -} - - -static void -l8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r; - UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); - *dst++ = r; - } - p += src_stride; - } -} - - - -/*** PIPE_FORMAT_A8_UNORM ***/ - -static void -a8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = 0.0; - pRow[3] = UBYTE_TO_FLOAT(*src); - } - p += dst_stride; - } -} - - -static void -a8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned a; - UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); - *dst++ = a; - } - p += src_stride; - } -} - - - -/*** PIPE_FORMAT_R16_SNORM ***/ - -static void -r16_get_tile_rgba(const short *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = SHORT_TO_FLOAT(src[0]); - pRow[1] = - pRow[2] = 0.0; - pRow[3] = 1.0; - } - p += dst_stride; - } -} - - -static void -r16_put_tile_rgba(short *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, dst++, pRow += 4) { - UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_R16G16B16A16_SNORM ***/ - -static void -r16g16b16a16_get_tile_rgba(const short *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src += 4, pRow += 4) { - pRow[0] = SHORT_TO_FLOAT(src[0]); - pRow[1] = SHORT_TO_FLOAT(src[1]); - pRow[2] = SHORT_TO_FLOAT(src[2]); - pRow[3] = SHORT_TO_FLOAT(src[3]); - } - p += dst_stride; - } -} - - -static void -r16g16b16a16_put_tile_rgba(short *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, dst += 4, pRow += 4) { - UNCLAMPED_FLOAT_TO_SHORT(dst[0], pRow[0]); - UNCLAMPED_FLOAT_TO_SHORT(dst[1], pRow[1]); - UNCLAMPED_FLOAT_TO_SHORT(dst[2], pRow[2]); - UNCLAMPED_FLOAT_TO_SHORT(dst[3], pRow[3]); - } - p += src_stride; - } -} - - - -/*** PIPE_FORMAT_I8_UNORM ***/ - -static void -i8_get_tile_rgba(const ubyte *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, src++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = UBYTE_TO_FLOAT(*src); - } - p += dst_stride; - } -} - - -static void -i8_put_tile_rgba(ubyte *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r; - UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); - *dst++ = r; - } - p += src_stride; - } -} - - -/*** PIPE_FORMAT_A8L8_UNORM ***/ - -static void -a8l8_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - ushort p = *src++; - pRow[0] = - pRow[1] = - pRow[2] = UBYTE_TO_FLOAT(p & 0xff); - pRow[3] = UBYTE_TO_FLOAT(p >> 8); - } - p += dst_stride; - } -} - - -static void -a8l8_put_tile_rgba(ushort *dst, - unsigned w, unsigned h, - const float *p, - unsigned src_stride) -{ - unsigned i, j; - - for (i = 0; i < h; i++) { - const float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - unsigned r, a; - UNCLAMPED_FLOAT_TO_UBYTE(r, pRow[0]); - UNCLAMPED_FLOAT_TO_UBYTE(a, pRow[3]); - *dst++ = (a << 8) | r; - } - p += src_stride; - } -} - - - - -/*** PIPE_FORMAT_Z32_UNORM ***/ - -/** - * Return each Z value as four floats in [0,1]. - */ -static void -z32_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - const double scale = 1.0 / (double) 0xffffffff; - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = (float) (*src++ * scale); - } - p += dst_stride; - } -} - - -/*** PIPE_FORMAT_S8Z24_UNORM ***/ - -/** - * Return Z component as four float in [0,1]. Stencil part ignored. - */ -static void -s8z24_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - const double scale = 1.0 / ((1 << 24) - 1); - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = (float) (scale * (*src++ & 0xffffff)); - } - p += dst_stride; - } -} - - -/*** PIPE_FORMAT_Z24S8_UNORM ***/ - -/** - * Return Z component as four float in [0,1]. Stencil part ignored. - */ -static void -z24s8_get_tile_rgba(const unsigned *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride) -{ - const double scale = 1.0 / ((1 << 24) - 1); - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - for (j = 0; j < w; j++, pRow += 4) { - pRow[0] = - pRow[1] = - pRow[2] = - pRow[3] = (float) (scale * (*src++ >> 8)); - } - p += dst_stride; - } -} - - -/*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ - -/** - * Convert YCbCr (or YCrCb) to RGBA. - */ -static void -ycbcr_get_tile_rgba(const ushort *src, - unsigned w, unsigned h, - float *p, - unsigned dst_stride, - boolean rev) -{ - const float scale = 1.0f / 255.0f; - unsigned i, j; - - for (i = 0; i < h; i++) { - float *pRow = p; - /* do two texels at a time */ - for (j = 0; j < (w & ~1); j += 2, src += 2) { - const ushort t0 = src[0]; - const ushort t1 = src[1]; - const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ - const ubyte y1 = (t1 >> 8) & 0xff; /* luminance */ - ubyte cb, cr; - float r, g, b; - - if (rev) { - cb = t1 & 0xff; /* chroma U */ - cr = t0 & 0xff; /* chroma V */ - } - else { - cb = t0 & 0xff; /* chroma U */ - cr = t1 & 0xff; /* chroma V */ - } - - /* even pixel: y0,cr,cb */ - r = 1.164f * (y0-16) + 1.596f * (cr-128); - g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y0-16) + 2.018f * (cb-128); - pRow[0] = r * scale; - pRow[1] = g * scale; - pRow[2] = b * scale; - pRow[3] = 1.0f; - pRow += 4; - - /* odd pixel: use y1,cr,cb */ - r = 1.164f * (y1-16) + 1.596f * (cr-128); - g = 1.164f * (y1-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y1-16) + 2.018f * (cb-128); - pRow[0] = r * scale; - pRow[1] = g * scale; - pRow[2] = b * scale; - pRow[3] = 1.0f; - pRow += 4; - - } - /* do the last texel */ - if (w & 1) { - const ushort t0 = src[0]; - const ushort t1 = src[1]; - const ubyte y0 = (t0 >> 8) & 0xff; /* luminance */ - ubyte cb, cr; - float r, g, b; - - if (rev) { - cb = t1 & 0xff; /* chroma U */ - cr = t0 & 0xff; /* chroma V */ - } - else { - cb = t0 & 0xff; /* chroma U */ - cr = t1 & 0xff; /* chroma V */ - } - - /* even pixel: y0,cr,cb */ - r = 1.164f * (y0-16) + 1.596f * (cr-128); - g = 1.164f * (y0-16) - 0.813f * (cr-128) - 0.391f * (cb-128); - b = 1.164f * (y0-16) + 2.018f * (cb-128); - pRow[0] = r * scale; - pRow[1] = g * scale; - pRow[2] = b * scale; - pRow[3] = 1.0f; - pRow += 4; - } - p += dst_stride; - } -} - - -void -pipe_tile_raw_to_rgba(enum pipe_format format, - void *src, - uint w, uint h, - float *dst, unsigned dst_stride) -{ - switch (format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - a8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - x8r8g8b8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - b8g8r8a8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_A1R5G5B5_UNORM: - a1r5g5b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_A4R4G4B4_UNORM: - a4r4g4b4_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - r5g6b5_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_L8_UNORM: - l8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_A8_UNORM: - a8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_I8_UNORM: - i8_get_tile_rgba((ubyte *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_A8L8_UNORM: - a8l8_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_R16_SNORM: - r16_get_tile_rgba((short *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_get_tile_rgba((short *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_Z16_UNORM: - z16_get_tile_rgba((ushort *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_Z32_UNORM: - z32_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - s8z24_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_Z24S8_UNORM: - z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); - break; - case PIPE_FORMAT_YCBCR: - ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); - break; - case PIPE_FORMAT_YCBCR_REV: - ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, TRUE); - break; - default: - assert(0); - } -} - - -void -pipe_get_tile_rgba(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - float *p) -{ - unsigned dst_stride = w * 4; - void *packed; - - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - - packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); - - if (!packed) - return; - - if(ps->format == PIPE_FORMAT_YCBCR || ps->format == PIPE_FORMAT_YCBCR_REV) - assert((x & 1) == 0); - - pipe_get_tile_raw(ps, x, y, w, h, packed, 0); - - pipe_tile_raw_to_rgba(ps->format, packed, w, h, p, dst_stride); - - FREE(packed); -} - - -void -pipe_put_tile_rgba(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - const float *p) -{ - unsigned src_stride = w * 4; - void *packed; - - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - - packed = MALLOC(pf_get_nblocks(&ps->block, w, h) * ps->block.size); - - if (!packed) - return; - - switch (ps->format) { - case PIPE_FORMAT_A8R8G8B8_UNORM: - a8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_X8R8G8B8_UNORM: - x8r8g8b8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_B8G8R8A8_UNORM: - b8g8r8a8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_A1R5G5B5_UNORM: - a1r5g5b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_R5G6B5_UNORM: - r5g6b5_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_R8G8B8A8_UNORM: - assert(0); - break; - case PIPE_FORMAT_A4R4G4B4_UNORM: - a4r4g4b4_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_L8_UNORM: - l8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_A8_UNORM: - a8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_I8_UNORM: - i8_put_tile_rgba((ubyte *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_A8L8_UNORM: - a8l8_put_tile_rgba((ushort *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_R16_SNORM: - r16_put_tile_rgba((short *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_R16G16B16A16_SNORM: - r16g16b16a16_put_tile_rgba((short *) packed, w, h, p, src_stride); - break; - case PIPE_FORMAT_Z16_UNORM: - /*z16_put_tile_rgba((ushort *) packed, w, h, p, src_stride);*/ - break; - case PIPE_FORMAT_Z32_UNORM: - /*z32_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - /*s8z24_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ - break; - case PIPE_FORMAT_Z24S8_UNORM: - /*z24s8_put_tile_rgba((unsigned *) packed, w, h, p, src_stride);*/ - break; - default: - assert(0); - } - - pipe_put_tile_raw(ps, x, y, w, h, packed, 0); - - FREE(packed); -} - - -/** - * Get a block of Z values, converted to 32-bit range. - */ -void -pipe_get_tile_z(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - uint *z) -{ - const uint dstStride = w; - ubyte *map; - uint *pDest = z; - uint i, j; - - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - - map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_READ); - if (!map) { - assert(0); - return; - } - - switch (ps->format) { - case PIPE_FORMAT_Z32_UNORM: - { - const uint *pSrc - = (const uint *)(map + y * ps->stride + x*4); - for (i = 0; i < h; i++) { - memcpy(pDest, pSrc, 4 * w); - pDest += dstStride; - pSrc += ps->stride/4; - } - } - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - { - const uint *pSrc - = (const uint *)(map + y * ps->stride + x*4); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - /* convert 24-bit Z to 32-bit Z */ - pDest[j] = (pSrc[j] << 8) | (pSrc[j] & 0xff); - } - pDest += dstStride; - pSrc += ps->stride/4; - } - } - break; - case PIPE_FORMAT_Z16_UNORM: - { - const ushort *pSrc - = (const ushort *)(map + y * ps->stride + x*2); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - /* convert 16-bit Z to 32-bit Z */ - pDest[j] = (pSrc[j] << 16) | pSrc[j]; - } - pDest += dstStride; - pSrc += ps->stride/2; - } - } - break; - default: - assert(0); - } - - pipe_surface_unmap(ps); -} - - -void -pipe_put_tile_z(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - const uint *zSrc) -{ - const uint srcStride = w; - const uint *pSrc = zSrc; - ubyte *map; - uint i, j; - - if (pipe_clip_tile(x, y, &w, &h, ps)) - return; - - map = (ubyte *)pipe_surface_map(ps, PIPE_BUFFER_USAGE_CPU_WRITE); - if (!map) { - assert(0); - return; - } - - switch (ps->format) { - case PIPE_FORMAT_Z32_UNORM: - { - uint *pDest = (uint *) (map + y * ps->stride + x*4); - for (i = 0; i < h; i++) { - memcpy(pDest, pSrc, 4 * w); - pDest += ps->stride/4; - pSrc += srcStride; - } - } - break; - case PIPE_FORMAT_S8Z24_UNORM: - case PIPE_FORMAT_X8Z24_UNORM: - { - uint *pDest = (uint *) (map + y * ps->stride + x*4); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - /* convert 32-bit Z to 24-bit Z (0 stencil) */ - pDest[j] = pSrc[j] >> 8; - } - pDest += ps->stride/4; - pSrc += srcStride; - } - } - break; - case PIPE_FORMAT_Z16_UNORM: - { - ushort *pDest = (ushort *) (map + y * ps->stride + x*2); - for (i = 0; i < h; i++) { - for (j = 0; j < w; j++) { - /* convert 32-bit Z to 16-bit Z */ - pDest[j] = pSrc[j] >> 16; - } - pDest += ps->stride/2; - pSrc += srcStride; - } - } - break; - default: - assert(0); - } - - pipe_surface_unmap(ps); -} - - diff --git a/src/gallium/auxiliary/util/p_tile.h b/src/gallium/auxiliary/util/p_tile.h deleted file mode 100644 index a8ac805308..0000000000 --- a/src/gallium/auxiliary/util/p_tile.h +++ /dev/null @@ -1,101 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -#ifndef P_TILE_H -#define P_TILE_H - -#include "pipe/p_compiler.h" - -struct pipe_surface; - - -/** - * Clip tile against surface dims. - * \return TRUE if tile is totally clipped, FALSE otherwise - */ -static INLINE boolean -pipe_clip_tile(uint x, uint y, uint *w, uint *h, const struct pipe_surface *ps) -{ - if (x >= ps->width) - return TRUE; - if (y >= ps->height) - return TRUE; - if (x + *w > ps->width) - *w = ps->width - x; - if (y + *h > ps->height) - *h = ps->height - y; - return FALSE; -} - -#ifdef __cplusplus -extern "C" { -#endif - -void -pipe_get_tile_raw(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - void *p, int dst_stride); - -void -pipe_put_tile_raw(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - const void *p, int src_stride); - - -void -pipe_get_tile_rgba(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - float *p); - -void -pipe_put_tile_rgba(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - const float *p); - - -void -pipe_get_tile_z(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - uint *z); - -void -pipe_put_tile_z(struct pipe_surface *ps, - uint x, uint y, uint w, uint h, - const uint *z); - -void -pipe_tile_raw_to_rgba(enum pipe_format format, - void *src, - uint w, uint h, - float *dst, unsigned dst_stride); - - -#ifdef __cplusplus -} -#endif - -#endif -- cgit v1.2.3 From 6fd2feaad7029a2f6d02bcf7039cbe72f53c615c Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 24 Aug 2008 18:10:50 -0600 Subject: gallium: include u_memory.h, u_math.h --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 1 + src/gallium/winsys/xlib/brw_aub.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 5e518370d0..4cb8c3bb55 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -37,6 +37,7 @@ #include "pipe/p_debug.h" #include "pipe/p_winsys.h" #include "pipe/p_thread.h" +#include "util/u_math.h" #include "util/u_memory.h" #include "util/u_double_list.h" #include "util/u_time.h" diff --git a/src/gallium/winsys/xlib/brw_aub.c b/src/gallium/winsys/xlib/brw_aub.c index f319802962..9e96efaa53 100644 --- a/src/gallium/winsys/xlib/brw_aub.c +++ b/src/gallium/winsys/xlib/brw_aub.c @@ -35,6 +35,7 @@ #include "pipe/p_context.h" #include "pipe/p_state.h" #include "pipe/p_debug.h" +#include "util/u_memory.h" struct brw_aubfile { -- cgit v1.2.3 From 2c4661f8fce8a27f2082c6ac498f9fb188878476 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 25 Aug 2008 10:42:00 +0200 Subject: gallium: Add missing includes. --- src/gallium/auxiliary/util/u_memory.h | 2 +- src/gallium/drivers/i915simple/i915_fpc_emit.c | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index 148a5cb997..ec32d60fbe 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -51,7 +51,7 @@ /* memory debugging */ -#include "p_debug.h" +#include "pipe/p_debug.h" #define MALLOC( _size ) \ debug_malloc( __FILE__, __LINE__, __FUNCTION__, _size ) diff --git a/src/gallium/drivers/i915simple/i915_fpc_emit.c b/src/gallium/drivers/i915simple/i915_fpc_emit.c index 4bdeefb449..b054ce41d3 100644 --- a/src/gallium/drivers/i915simple/i915_fpc_emit.c +++ b/src/gallium/drivers/i915simple/i915_fpc_emit.c @@ -28,6 +28,7 @@ #include "i915_reg.h" #include "i915_context.h" #include "i915_fpc.h" +#include "util/u_math.h" #define A0_DEST( reg ) (((reg)&UREG_TYPE_NR_MASK)>>UREG_A0_DEST_SHIFT_LEFT) -- cgit v1.2.3 From fd06d01b80fd9b2924682686e943d819bfcf027b Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Aug 2008 11:29:39 -0600 Subject: gallium: include on linux to get sched_yield() proto --- src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 8ae052e875..410d336fef 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -38,6 +38,7 @@ #if defined(PIPE_OS_LINUX) #include +#include #endif #include "pipe/p_compiler.h" -- cgit v1.2.3 From a644c5a850242376e6ab03f7f4bdbfb8a232490f Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Aug 2008 11:30:20 -0600 Subject: gallium: include p_debug.h for non-HAVE_POSIX_MEMALIGN --- src/gallium/auxiliary/util/u_memory.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index ec32d60fbe..8fbbb4e55d 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -36,6 +36,7 @@ #include "util/u_pointer.h" +#include "pipe/p_debug.h" /* Define ENOMEM for WINCE */ -- cgit v1.2.3 From 60ac76175b6457ecc1cd8bd7a25cb79b2d529434 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 25 Aug 2008 11:30:57 -0600 Subject: gallium: added const qualifiers on some draw funcs --- src/gallium/auxiliary/draw/draw_context.c | 4 ++-- src/gallium/auxiliary/draw/draw_context.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 1c26cb31a3..36751c2621 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -288,7 +288,7 @@ draw_enable_point_sprites(struct draw_context *draw, boolean enable) * work for the drivers. */ int -draw_find_vs_output(struct draw_context *draw, +draw_find_vs_output(const struct draw_context *draw, uint semantic_name, uint semantic_index) { const struct draw_vertex_shader *vs = draw->vs.vertex_shader; @@ -314,7 +314,7 @@ draw_find_vs_output(struct draw_context *draw, * Return number of vertex shader outputs. */ uint -draw_num_vs_outputs(struct draw_context *draw) +draw_num_vs_outputs(const struct draw_context *draw) { uint count = draw->vs.vertex_shader->info.num_outputs; if (draw->extra_vp_outputs.slot > 0) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index b8f2bfa332..0ab3681b64 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -84,11 +84,11 @@ draw_install_pstipple_stage(struct draw_context *draw, struct pipe_context *pipe int -draw_find_vs_output(struct draw_context *draw, +draw_find_vs_output(const struct draw_context *draw, uint semantic_name, uint semantic_index); uint -draw_num_vs_outputs(struct draw_context *draw); +draw_num_vs_outputs(const struct draw_context *draw); -- cgit v1.2.3 From e6887a5752774c18cf527477fdd3e57e4893ff3b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 25 Aug 2008 11:19:24 +0100 Subject: draw: attempt atomic submit of large drawelements calls --- src/gallium/auxiliary/draw/draw_pt.h | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 9 ++++----- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 7 ++++--- .../auxiliary/draw/draw_pt_fetch_shade_pipeline.c | 11 ++++------- src/gallium/auxiliary/draw/draw_pt_vcache.c | 21 +++++++++++++-------- src/gallium/auxiliary/draw/draw_vbuf.h | 3 ++- 6 files changed, 29 insertions(+), 26 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h index 3d2a9c78b7..c02f229110 100644 --- a/src/gallium/auxiliary/draw/draw_pt.h +++ b/src/gallium/auxiliary/draw/draw_pt.h @@ -98,9 +98,9 @@ struct draw_pt_middle_end { unsigned count); /* Transform all vertices in a linear range and then draw them with - * the supplied element list. + * the supplied element list. May fail and return FALSE. */ - void (*run_linear_elts)( struct draw_pt_middle_end *, + boolean (*run_linear_elts)( struct draw_pt_middle_end *, unsigned fetch_start, unsigned fetch_count, const ushort *draw_elts, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 0684c93d10..5a4db6cfe5 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -324,7 +324,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, } -static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, +static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, unsigned start, unsigned count, const ushort *draw_elts, @@ -341,10 +341,8 @@ static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, hw_verts = draw->render->allocate_vertices( draw->render, (ushort)feme->translate->key.output_stride, (ushort)count ); - if (!hw_verts) { - assert(0); - return; - } + if (!hw_verts) + return FALSE; /* Single routine to fetch vertices and emit HW verts. */ @@ -367,6 +365,7 @@ static void fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, feme->translate->key.output_stride, count ); + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 87094f3092..73fc70c1bc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -332,7 +332,7 @@ fse_run(struct draw_pt_middle_end *middle, -static void fse_run_linear_elts( struct draw_pt_middle_end *middle, +static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, unsigned start, unsigned count, const ushort *draw_elts, @@ -351,8 +351,7 @@ static void fse_run_linear_elts( struct draw_pt_middle_end *middle, (ushort)count ); if (!hw_verts) { - assert(0); - return; + return FALSE; } /* Single routine to fetch vertices, run shader and emit HW verts. @@ -374,6 +373,8 @@ static void fse_run_linear_elts( struct draw_pt_middle_end *middle, hw_verts, fse->key.output_stride, count ); + + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c index f617aac9f7..ec3b41c320 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_pipeline.c @@ -262,7 +262,7 @@ static void fetch_pipeline_linear_run( struct draw_pt_middle_end *middle, -static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, +static boolean fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, unsigned start, unsigned count, const ushort *draw_elts, @@ -277,12 +277,8 @@ static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, struct vertex_header *pipeline_verts = (struct vertex_header *)MALLOC(fpme->vertex_size * alloc_count); - if (!pipeline_verts) { - /* Not much we can do here - just skip the rendering. - */ - assert(0); - return; - } + if (!pipeline_verts) + return FALSE; /* Fetch into our vertex buffer */ @@ -336,6 +332,7 @@ static void fetch_pipeline_linear_run_elts( struct draw_pt_middle_end *middle, } FREE(pipeline_verts); + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index b8b5de729d..e8467b2ae3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -306,6 +306,7 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, unsigned fetch_count = max_index + 1 - min_index; const ushort *transformed_elts; ushort *storage = NULL; + boolean ok; if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, @@ -313,7 +314,6 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, draw_count); if (max_index == 0xffffffff || - fetch_count >= vcache->fetch_max || fetch_count > draw_count) { if (0) debug_printf("fail\n"); goto fail; @@ -395,14 +395,19 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, transformed_elts = storage; } - vcache->middle->run_linear_elts( vcache->middle, - min_index, /* start */ - fetch_count, - transformed_elts, - draw_count ); - + ok = vcache->middle->run_linear_elts( vcache->middle, + min_index, /* start */ + fetch_count, + transformed_elts, + draw_count ); + FREE(storage); - return; + + if (ok) + return; + + debug_printf("failed to execute atomic draw elts for %d/%d, splitting up\n", + fetch_count, draw_count); fail: vcache_run( frontend, get_elt, elts, draw_count ); diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 62247ccd9f..b0aa2df309 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -79,7 +79,8 @@ struct vbuf_render { boolean (*set_primitive)( struct vbuf_render *, unsigned prim ); /** - * DrawElements, note indices are ushort: + * DrawElements, note indices are ushort. The driver must complete + * this call, if necessary splitting the index list itself. */ void (*draw)( struct vbuf_render *, const ushort *indices, -- cgit v1.2.3 From 0bb852fa49e7f9a31036089ea4f5dfbd312a4a3a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 26 Aug 2008 16:35:12 -0600 Subject: gallium: thread wrapper clean-up In p_thread.h replace _glthread_* functions with new pipe_* functions. Remove other old cruft. --- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 24 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 18 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 20 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 24 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 20 +- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 11 +- src/gallium/include/pipe/p_thread.h | 366 ++++++++------------- .../winsys/drm/intel/common/ws_dri_bufmgr.c | 102 +++--- .../winsys/drm/intel/common/ws_dri_bufpool.h | 8 +- .../winsys/drm/intel/common/ws_dri_drmpool.c | 4 +- .../winsys/drm/intel/common/ws_dri_fencemgr.c | 66 ++-- .../winsys/drm/intel/common/ws_dri_mallocpool.c | 6 +- .../winsys/drm/intel/common/ws_dri_slabpool.c | 66 ++-- src/gallium/winsys/drm/intel/dri/intel_lock.c | 8 +- src/gallium/winsys/xlib/glxapi.c | 26 +- src/gallium/winsys/xlib/xm_api.c | 9 +- src/gallium/winsys/xlib/xmesaP.h | 3 +- 17 files changed, 328 insertions(+), 453 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 410d336fef..17b2781052 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -69,7 +69,7 @@ struct fenced_buffer_list { - _glthread_Mutex mutex; + pipe_mutex mutex; struct pipe_winsys *winsys; @@ -240,7 +240,7 @@ fenced_buffer_destroy(struct pb_buffer *buf) struct fenced_buffer *fenced_buf = fenced_buffer(buf); struct fenced_buffer_list *fenced_list = fenced_buf->list; - _glthread_LOCK_MUTEX(fenced_list->mutex); + pipe_mutex_lock(fenced_list->mutex); assert(fenced_buf->base.base.refcount == 0); if (fenced_buf->fence) { struct pipe_winsys *winsys = fenced_list->winsys; @@ -263,7 +263,7 @@ fenced_buffer_destroy(struct pb_buffer *buf) else { _fenced_buffer_destroy(fenced_buf); } - _glthread_UNLOCK_MUTEX(fenced_list->mutex); + pipe_mutex_unlock(fenced_list->mutex); } @@ -396,7 +396,7 @@ buffer_fence(struct pb_buffer *buf, return; } - _glthread_LOCK_MUTEX(fenced_list->mutex); + pipe_mutex_lock(fenced_list->mutex); if (fenced_buf->fence) _fenced_buffer_remove(fenced_list, fenced_buf); if (fence) { @@ -404,7 +404,7 @@ buffer_fence(struct pb_buffer *buf, fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; _fenced_buffer_add(fenced_buf); } - _glthread_UNLOCK_MUTEX(fenced_list->mutex); + pipe_mutex_unlock(fenced_list->mutex); } @@ -423,7 +423,7 @@ fenced_buffer_list_create(struct pipe_winsys *winsys) fenced_list->numDelayed = 0; - _glthread_INIT_MUTEX(fenced_list->mutex); + pipe_mutex_init(fenced_list->mutex); return fenced_list; } @@ -433,28 +433,28 @@ void fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, int wait) { - _glthread_LOCK_MUTEX(fenced_list->mutex); + pipe_mutex_lock(fenced_list->mutex); _fenced_buffer_list_check_free(fenced_list, wait); - _glthread_UNLOCK_MUTEX(fenced_list->mutex); + pipe_mutex_unlock(fenced_list->mutex); } void fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list) { - _glthread_LOCK_MUTEX(fenced_list->mutex); + pipe_mutex_lock(fenced_list->mutex); /* Wait on outstanding fences */ while (fenced_list->numDelayed) { - _glthread_UNLOCK_MUTEX(fenced_list->mutex); + pipe_mutex_unlock(fenced_list->mutex); #if defined(PIPE_OS_LINUX) sched_yield(); #endif _fenced_buffer_list_check_free(fenced_list, 1); - _glthread_LOCK_MUTEX(fenced_list->mutex); + pipe_mutex_lock(fenced_list->mutex); } - _glthread_UNLOCK_MUTEX(fenced_list->mutex); + pipe_mutex_unlock(fenced_list->mutex); FREE(fenced_list); } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index b914c2d0fe..e2b8fe0f98 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -79,7 +79,7 @@ struct pb_cache_manager struct pb_manager *provider; unsigned usecs; - _glthread_Mutex mutex; + pipe_mutex mutex; struct list_head delayed; size_t numDelayed; @@ -153,7 +153,7 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf) struct pb_cache_buffer *buf = pb_cache_buffer(_buf); struct pb_cache_manager *mgr = buf->mgr; - _glthread_LOCK_MUTEX(mgr->mutex); + pipe_mutex_lock(mgr->mutex); assert(buf->base.base.refcount == 0); _pb_cache_buffer_list_check_free(mgr); @@ -162,7 +162,7 @@ pb_cache_buffer_destroy(struct pb_buffer *_buf) util_time_add(&buf->start, mgr->usecs, &buf->end); LIST_ADDTAIL(&buf->head, &mgr->delayed); ++mgr->numDelayed; - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); } @@ -235,7 +235,7 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, struct list_head *curr, *next; struct util_time now; - _glthread_LOCK_MUTEX(mgr->mutex); + pipe_mutex_lock(mgr->mutex); buf = NULL; curr = mgr->delayed.next; @@ -264,12 +264,12 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, if(buf) { LIST_DEL(&buf->head); - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); ++buf->base.base.refcount; return &buf->base; } - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); buf = CALLOC_STRUCT(pb_cache_buffer); if(!buf) @@ -305,7 +305,7 @@ pb_cache_flush(struct pb_manager *_mgr) struct list_head *curr, *next; struct pb_cache_buffer *buf; - _glthread_LOCK_MUTEX(mgr->mutex); + pipe_mutex_lock(mgr->mutex); curr = mgr->delayed.next; next = curr->next; while(curr != &mgr->delayed) { @@ -314,7 +314,7 @@ pb_cache_flush(struct pb_manager *_mgr) curr = next; next = curr->next; } - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); } @@ -345,7 +345,7 @@ pb_cache_manager_create(struct pb_manager *provider, mgr->usecs = usecs; LIST_INITHEAD(&mgr->delayed); mgr->numDelayed = 0; - _glthread_INIT_MUTEX(mgr->mutex); + pipe_mutex_init(mgr->mutex); return &mgr->base; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index b40eb6cc90..e8c7f8e1f8 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -53,7 +53,7 @@ struct mm_pb_manager { struct pb_manager base; - _glthread_Mutex mutex; + pipe_mutex mutex; size_t size; struct mem_block *heap; @@ -99,10 +99,10 @@ mm_buffer_destroy(struct pb_buffer *buf) assert(buf->base.refcount == 0); - _glthread_LOCK_MUTEX(mm->mutex); + pipe_mutex_lock(mm->mutex); mmFreeMem(mm_buf->block); FREE(buf); - _glthread_UNLOCK_MUTEX(mm->mutex); + pipe_mutex_unlock(mm->mutex); } @@ -158,11 +158,11 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, if(desc->alignment % (1 << mm->align2)) return NULL; - _glthread_LOCK_MUTEX(mm->mutex); + pipe_mutex_lock(mm->mutex); mm_buf = CALLOC_STRUCT(mm_buffer); if (!mm_buf) { - _glthread_UNLOCK_MUTEX(mm->mutex); + pipe_mutex_unlock(mm->mutex); return NULL; } @@ -185,7 +185,7 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); if(!mm_buf->block) { FREE(mm_buf); - _glthread_UNLOCK_MUTEX(mm->mutex); + pipe_mutex_unlock(mm->mutex); return NULL; } } @@ -194,7 +194,7 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size); assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size); - _glthread_UNLOCK_MUTEX(mm->mutex); + pipe_mutex_unlock(mm->mutex); return SUPER(mm_buf); } @@ -204,14 +204,14 @@ mm_bufmgr_destroy(struct pb_manager *mgr) { struct mm_pb_manager *mm = mm_pb_manager(mgr); - _glthread_LOCK_MUTEX(mm->mutex); + pipe_mutex_lock(mm->mutex); mmDestroy(mm->heap); pb_unmap(mm->buffer); pb_reference(&mm->buffer, NULL); - _glthread_UNLOCK_MUTEX(mm->mutex); + pipe_mutex_unlock(mm->mutex); FREE(mgr); } @@ -236,7 +236,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, mm->size = size; mm->align2 = align2; /* 64-byte alignment */ - _glthread_INIT_MUTEX(mm->mutex); + pipe_mutex_init(mm->mutex); mm->buffer = buffer; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 93d2cc9635..3ef72c5bbb 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -56,7 +56,7 @@ struct pool_pb_manager { struct pb_manager base; - _glthread_Mutex mutex; + pipe_mutex mutex; size_t bufSize; size_t bufAlign; @@ -110,10 +110,10 @@ pool_buffer_destroy(struct pb_buffer *buf) assert(pool_buf->base.base.refcount == 0); - _glthread_LOCK_MUTEX(pool->mutex); + pipe_mutex_lock(pool->mutex); LIST_ADD(&pool_buf->head, &pool->free); pool->numFree++; - _glthread_UNLOCK_MUTEX(pool->mutex); + pipe_mutex_unlock(pool->mutex); } @@ -124,9 +124,9 @@ pool_buffer_map(struct pb_buffer *buf, unsigned flags) struct pool_pb_manager *pool = pool_buf->mgr; void *map; - _glthread_LOCK_MUTEX(pool->mutex); + pipe_mutex_lock(pool->mutex); map = (unsigned char *) pool->map + pool_buf->start; - _glthread_UNLOCK_MUTEX(pool->mutex); + pipe_mutex_unlock(pool->mutex); return map; } @@ -171,10 +171,10 @@ pool_bufmgr_create_buffer(struct pb_manager *mgr, assert(size == pool->bufSize); assert(pool->bufAlign % desc->alignment == 0); - _glthread_LOCK_MUTEX(pool->mutex); + pipe_mutex_lock(pool->mutex); if (pool->numFree == 0) { - _glthread_UNLOCK_MUTEX(pool->mutex); + pipe_mutex_unlock(pool->mutex); debug_printf("warning: out of fixed size buffer objects\n"); return NULL; } @@ -182,7 +182,7 @@ pool_bufmgr_create_buffer(struct pb_manager *mgr, item = pool->free.next; if (item == &pool->free) { - _glthread_UNLOCK_MUTEX(pool->mutex); + pipe_mutex_unlock(pool->mutex); debug_printf("error: fixed size buffer pool corruption\n"); return NULL; } @@ -190,7 +190,7 @@ pool_bufmgr_create_buffer(struct pb_manager *mgr, LIST_DEL(item); --pool->numFree; - _glthread_UNLOCK_MUTEX(pool->mutex); + pipe_mutex_unlock(pool->mutex); pool_buf = LIST_ENTRY(struct pool_buffer, item, head); assert(pool_buf->base.base.refcount == 0); @@ -206,14 +206,14 @@ static void pool_bufmgr_destroy(struct pb_manager *mgr) { struct pool_pb_manager *pool = pool_pb_manager(mgr); - _glthread_LOCK_MUTEX(pool->mutex); + pipe_mutex_lock(pool->mutex); FREE(pool->bufs); pb_unmap(pool->buffer); pb_reference(&pool->buffer, NULL); - _glthread_UNLOCK_MUTEX(pool->mutex); + pipe_mutex_unlock(pool->mutex); FREE(mgr); } @@ -246,7 +246,7 @@ pool_bufmgr_create(struct pb_manager *provider, pool->bufSize = bufSize; pool->bufAlign = desc->alignment; - _glthread_INIT_MUTEX(pool->mutex); + pipe_mutex_init(pool->mutex); pool->buffer = provider->create_buffer(provider, numBufs*bufSize, desc); if (!pool->buffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index af307e265a..ac0296b26a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -57,7 +57,7 @@ struct pb_slab_buffer struct list_head head; unsigned mapCount; size_t start; - _glthread_Cond event; + pipe_condvar event; }; struct pb_slab @@ -85,7 +85,7 @@ struct pb_slab_manager struct list_head slabs; struct list_head freeSlabs; - _glthread_Mutex mutex; + pipe_mutex mutex; }; /** @@ -143,7 +143,7 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf) struct pb_slab_manager *mgr = slab->mgr; struct list_head *list = &buf->head; - _glthread_LOCK_MUTEX(mgr->mutex); + pipe_mutex_lock(mgr->mutex); assert(buf->base.base.refcount == 0); @@ -179,7 +179,7 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf) } } - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); } @@ -201,7 +201,7 @@ pb_slab_buffer_unmap(struct pb_buffer *_buf) --buf->mapCount; if (buf->mapCount == 0) - _glthread_COND_BROADCAST(buf->event); + pipe_condvar_broadcast(buf->event); } @@ -283,7 +283,7 @@ pb_slab_create(struct pb_slab_manager *mgr) buf->slab = slab; buf->start = i* mgr->bufSize; buf->mapCount = 0; - _glthread_INIT_COND(buf->event); + pipe_condvar_init(buf->event); LIST_ADDTAIL(&buf->head, &slab->freeBuffers); slab->numFree++; buf++; @@ -328,11 +328,11 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, if(!pb_check_usage(desc->usage, mgr->desc.usage)) return NULL; - _glthread_LOCK_MUTEX(mgr->mutex); + pipe_mutex_lock(mgr->mutex); if (mgr->slabs.next == &mgr->slabs) { (void) pb_slab_create(mgr); if (mgr->slabs.next == &mgr->slabs) { - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); return NULL; } } @@ -344,7 +344,7 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, list = slab->freeBuffers.next; LIST_DELINIT(list); - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); buf = LIST_ENTRY(struct pb_slab_buffer, list, head); ++buf->base.base.refcount; @@ -388,7 +388,7 @@ pb_slab_manager_create(struct pb_manager *provider, LIST_INITHEAD(&mgr->slabs); LIST_INITHEAD(&mgr->freeSlabs); - _glthread_INIT_MUTEX(mgr->mutex); + pipe_mutex_init(mgr->mutex); return &mgr->base; } diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index dfa5c35ab6..19087589a8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -47,11 +47,12 @@ #include #include +#include "pipe/p_thread.h" #include "util/u_mm.h" #define EXEC_HEAP_SIZE (10*1024*1024) -_glthread_DECLARE_STATIC_MUTEX(exec_mutex); +pipe_static_mutex(exec_mutex); static struct mem_block *exec_heap = NULL; static unsigned char *exec_mem = NULL; @@ -76,7 +77,7 @@ rtasm_exec_malloc(size_t size) struct mem_block *block = NULL; void *addr = NULL; - _glthread_LOCK_MUTEX(exec_mutex); + pipe_mutex_lock(exec_mutex); init_heap(); @@ -90,7 +91,7 @@ rtasm_exec_malloc(size_t size) else debug_printf("rtasm_exec_malloc failed\n"); - _glthread_UNLOCK_MUTEX(exec_mutex); + pipe_mutex_unlock(exec_mutex); return addr; } @@ -99,7 +100,7 @@ rtasm_exec_malloc(size_t size) void rtasm_exec_free(void *addr) { - _glthread_LOCK_MUTEX(exec_mutex); + pipe_mutex_lock(exec_mutex); if (exec_heap) { struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); @@ -108,7 +109,7 @@ rtasm_exec_free(void *addr) mmFreeMem(block); } - _glthread_UNLOCK_MUTEX(exec_mutex); + pipe_mutex_unlock(exec_mutex); } diff --git a/src/gallium/include/pipe/p_thread.h b/src/gallium/include/pipe/p_thread.h index 6e526b7aa8..4e6f7cbb44 100644 --- a/src/gallium/include/pipe/p_thread.h +++ b/src/gallium/include/pipe/p_thread.h @@ -23,307 +23,199 @@ * **************************************************************************/ + /** - * @file - * Thread - * - * Initial version by John Stone (j.stone@acm.org) (johns@cs.umr.edu) - * and Christoph Poliwoda (poliwoda@volumegraphics.com) - * Revised by Keith Whitwell - * Adapted for new gl dispatcher by Brian Paul - * - * - * - * DOCUMENTATION - * - * This thread module exports the following types: - * _glthread_TSD Thread-specific data area - * _glthread_Thread Thread datatype - * _glthread_Mutex Mutual exclusion lock - * - * Macros: - * _glthread_DECLARE_STATIC_MUTEX(name) Declare a non-local mutex - * _glthread_INIT_MUTEX(name) Initialize a mutex - * _glthread_LOCK_MUTEX(name) Lock a mutex - * _glthread_UNLOCK_MUTEX(name) Unlock a mutex - * - * Functions: - * _glthread_GetID(v) Get integer thread ID - * _glthread_InitTSD() Initialize thread-specific data - * _glthread_GetTSD() Get thread-specific data - * _glthread_SetTSD() Set thread-specific data - * - * If this file is accidentally included by a non-threaded build, - * it should not cause the build to fail, or otherwise cause problems. - * In general, it should only be included when needed however. + * Thread, mutex, condition var and thread-specific data functions. */ -#ifndef _P_THREAD_H_ -#define _P_THREAD_H_ +#ifndef _P_THREAD2_H_ +#define _P_THREAD2_H_ -#if (defined(PTHREADS) || defined(SOLARIS_THREADS) ||\ - defined(WIN32_THREADS) || defined(USE_XTHREADS) || defined(BEOS_THREADS)) \ - && !defined(THREADS) -# define THREADS -#endif -#ifdef VMS -#include -#endif +#include "pipe/p_compiler.h" -/* - * POSIX threads. This should be your choice in the Unix world - * whenever possible. When building with POSIX threads, be sure - * to enable any compiler flags which will cause the MT-safe - * libc (if one exists) to be used when linking, as well as any - * header macros for MT-safe errno, etc. For Solaris, this is the -mt - * compiler flag. On Solaris with gcc, use -D_REENTRANT to enable - * proper compiling for MT-safe libc etc. - */ -#if defined(PTHREADS) -#include /* POSIX threads headers */ -typedef struct { - pthread_key_t key; - int initMagic; -} _glthread_TSD; +#if defined(PIPE_OS_LINUX) -typedef pthread_t _glthread_Thread; +#include /* POSIX threads headers */ +#include /* for perror() */ -typedef pthread_mutex_t _glthread_Mutex; -#define _glthread_DECLARE_STATIC_MUTEX(name) \ - static _glthread_Mutex name = PTHREAD_MUTEX_INITIALIZER +typedef pthread_t pipe_thread; +typedef pthread_mutex_t pipe_mutex; +typedef pthread_cond_t pipe_condvar; -#define _glthread_INIT_MUTEX(name) \ - pthread_mutex_init(&(name), NULL) +#define pipe_static_mutex(mutex) \ + static pipe_mutex mutex = PTHREAD_MUTEX_INITIALIZER -#define _glthread_DESTROY_MUTEX(name) \ - pthread_mutex_destroy(&(name)) +#define pipe_mutex_init(mutex) \ + pthread_mutex_init(&(mutex), NULL) -#define _glthread_LOCK_MUTEX(name) \ - (void) pthread_mutex_lock(&(name)) +#define pipe_mutex_destroy(mutex) \ + pthread_mutex_destroy(&(mutex)) -#define _glthread_UNLOCK_MUTEX(name) \ - (void) pthread_mutex_unlock(&(name)) +#define pipe_mutex_lock(mutex) \ + (void) pthread_mutex_lock(&(mutex)) -typedef pthread_cond_t _glthread_Cond; +#define pipe_mutex_unlock(mutex) \ + (void) pthread_mutex_unlock(&(mutex)) -#define _glthread_DECLARE_STATIC_COND(name) \ - static _glthread_Cond name = PTHREAD_COND_INITIALIZER +#define pipe_static_condvar(mutex) \ + static pipe_condvar mutex = PTHREAD_COND_INITIALIZER -#define _glthread_INIT_COND(cond) \ +#define pipe_condvar_init(cond) \ pthread_cond_init(&(cond), NULL) -#define _glthread_DESTROY_COND(name) \ - pthread_cond_destroy(&(name)) +#define pipe_condvar_destroy(cond) \ + pthread_cond_destroy(&(cond)) -#define _glthread_COND_WAIT(cond, mutex) \ +#define pipe_condvar_wait(cond, mutex) \ pthread_cond_wait(&(cond), &(mutex)) -#define _glthread_COND_SIGNAL(cond) \ +#define pipe_condvar_signal(cond) \ pthread_cond_signal(&(cond)) -#define _glthread_COND_BROADCAST(cond) \ +#define pipe_condvar_broadcast(cond) \ pthread_cond_broadcast(&(cond)) -#endif /* PTHREADS */ - - +#elif defined(PIPE_OS_WINDOWS) -/* - * Solaris threads. Use only up to Solaris 2.4. - * Solaris 2.5 and higher provide POSIX threads. - * Be sure to compile with -mt on the Solaris compilers, or - * use -D_REENTRANT if using gcc. - */ -#ifdef SOLARIS_THREADS -#include - -typedef struct { - thread_key_t key; - mutex_t keylock; - int initMagic; -} _glthread_TSD; - -typedef thread_t _glthread_Thread; - -typedef mutex_t _glthread_Mutex; - -/* XXX need to really implement mutex-related macros */ -#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 -#define _glthread_INIT_MUTEX(name) (void) name -#define _glthread_DESTROY_MUTEX(name) (void) name -#define _glthread_LOCK_MUTEX(name) (void) name -#define _glthread_UNLOCK_MUTEX(name) (void) name - -#endif /* SOLARIS_THREADS */ - - - - -/* - * Windows threads. Should work with Windows NT and 95. - * IMPORTANT: Link with multithreaded runtime library when THREADS are - * used! - */ -#ifdef WIN32_THREADS #include -typedef struct { - DWORD key; - int initMagic; -} _glthread_TSD; - -typedef HANDLE _glthread_Thread; +typedef HANDLE pipe_thread; +typedef CRITICAL_SECTION pipe_mutex; -typedef CRITICAL_SECTION _glthread_Mutex; +#define pipe_static_mutex(name) \ + /*static*/ pipe_mutex name = {0,0,0,0,0,0} -#define _glthread_DECLARE_STATIC_MUTEX(name) /*static*/ _glthread_Mutex name = {0,0,0,0,0,0} -#define _glthread_INIT_MUTEX(name) InitializeCriticalSection(&name) -#define _glthread_DESTROY_MUTEX(name) DeleteCriticalSection(&name) -#define _glthread_LOCK_MUTEX(name) EnterCriticalSection(&name) -#define _glthread_UNLOCK_MUTEX(name) LeaveCriticalSection(&name) +#define pipe_mutex_init(name) \ + InitializeCriticalSection(&name) -#endif /* WIN32_THREADS */ +#define pipe_mutex_destroy(name) \ + DeleteCriticalSection(&name) +#define pipe_mutex_lock(name) \ + EnterCriticalSection(&name) +#define pipe_mutex_unlock(name) \ + LeaveCriticalSection(&name) -/* - * XFree86 has its own thread wrapper, Xthreads.h - * We wrap it again for GL. - */ -#ifdef USE_XTHREADS -#include - -typedef struct { - xthread_key_t key; - int initMagic; -} _glthread_TSD; - -typedef xthread_t _glthread_Thread; - -typedef xmutex_rec _glthread_Mutex; - -#ifdef XMUTEX_INITIALIZER -#define _glthread_DECLARE_STATIC_MUTEX(name) \ - static _glthread_Mutex name = XMUTEX_INITIALIZER #else -#define _glthread_DECLARE_STATIC_MUTEX(name) \ - static _glthread_Mutex name -#endif - -#define _glthread_INIT_MUTEX(name) \ - xmutex_init(&(name)) - -#define _glthread_DESTROY_MUTEX(name) \ - xmutex_clear(&(name)) - -#define _glthread_LOCK_MUTEX(name) \ - (void) xmutex_lock(&(name)) - -#define _glthread_UNLOCK_MUTEX(name) \ - (void) xmutex_unlock(&(name)) - -#endif /* USE_XTHREADS */ - - - -/* - * BeOS threads. R5.x required. - */ -#ifdef BEOS_THREADS - -#include -#include - -typedef struct { - int32 key; - int initMagic; -} _glthread_TSD; - -typedef thread_id _glthread_Thread; - -/* Use Benaphore, aka speeder semaphore */ -typedef struct { - int32 lock; - sem_id sem; -} benaphore; -typedef benaphore _glthread_Mutex; -#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = { 0, 0 } -#define _glthread_INIT_MUTEX(name) name.sem = create_sem(0, #name"_benaphore"), name.lock = 0 -#define _glthread_DESTROY_MUTEX(name) delete_sem(name.sem), name.lock = 0 -#define _glthread_LOCK_MUTEX(name) if (name.sem == 0) _glthread_INIT_MUTEX(name); \ - if (atomic_add(&(name.lock), 1) >= 1) acquire_sem(name.sem) -#define _glthread_UNLOCK_MUTEX(name) if (atomic_add(&(name.lock), -1) > 1) release_sem(name.sem) +/** Dummy definitions */ -#endif /* BEOS_THREADS */ +typedef unsigned pipe_thread; +typedef unsigned pipe_mutex; +typedef unsigned pipe_condvar; +typedef unsigned pipe_tsd; +#define pipe_static_mutex(mutex) \ + static pipe_mutex mutex = 0 +#define pipe_mutex_init(mutex) \ + (void) mutex -#ifndef THREADS +#define pipe_mutex_destroy(mutex) \ + (void) mutex -/* - * THREADS not defined - */ - -typedef unsigned _glthread_TSD; - -typedef unsigned _glthread_Thread; - -typedef unsigned _glthread_Mutex; +#define pipe_mutex_lock(mutex) \ + (void) mutex -#define _glthread_DECLARE_STATIC_MUTEX(name) static _glthread_Mutex name = 0 +#define pipe_mutex_unlock(mutex) \ + (void) mutex -#define _glthread_INIT_MUTEX(name) (void) name +#define pipe_static_condvar(condvar) \ + static _glthread_Cond condvar = 0 -#define _glthread_DESTROY_MUTEX(name) (void) name +#define pipe_condvar_init(condvar) \ + (void) condvar -#define _glthread_LOCK_MUTEX(name) (void) name +#define pipe_condvar_destroy(condvar) \ + (void) condvar -#define _glthread_UNLOCK_MUTEX(name) (void) name +#define pipe_condvar_wait(condvar, mutex) \ + (void) condvar -typedef unsigned _glthread_Cond; +#define pipe_condvar_signal(condvar) \ + (void) condvar -#define _glthread_DECLARE_STATIC_COND(name) static _glthread_Cond name = 0 +#define pipe_condvar_broadcast(condvar) \ + (void) condvar -#define _glthread_INIT_COND(name) (void) name -#define _glthread_DESTROY_COND(name) (void) name - -#define _glthread_COND_WAIT(name, mutex) (void) name - -#define _glthread_COND_SIGNAL(name) (void) name - -#define _glthread_COND_BROADCAST(name) (void) name - -#endif /* THREADS */ +#endif /* PIPE_OS_? */ /* - * Platform independent thread specific data API. + * Thread-specific data. */ -extern unsigned long -_glthread_GetID(void); - - -extern void -_glthread_InitTSD(_glthread_TSD *); +typedef struct { +#if defined(PIPE_OS_LINUX) + pthread_key_t key; +#elif defined(PIPE_OS_WINDOWS) + DWORD key; +#endif + int initMagic; +} pipe_tsd; -extern void * -_glthread_GetTSD(_glthread_TSD *); +#define PIPE_TSD_INIT_MAGIC 0xff8adc98 -extern void -_glthread_SetTSD(_glthread_TSD *, void *); +static INLINE void +pipe_tsd_init(pipe_tsd *tsd) +{ +#if defined(PIPE_OS_LINUX) + if (pthread_key_create(&tsd->key, NULL/*free*/) != 0) { + perror("pthread_key_create(): failed to allocate key for thread specific data"); + exit(-1); + } +#elif defined(PIPE_OS_WINDOWS) + assert(0); +#endif + tsd->initMagic = PIPE_TSD_INIT_MAGIC; +} + +static INLINE void * +pipe_tsd_get(pipe_tsd *tsd) +{ + if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { + pipe_tsd_init(tsd); + } +#if defined(PIPE_OS_LINUX) + return pthread_getspecific(tsd->key); +#elif defined(PIPE_OS_WINDOWS) + assert(0); + return NULL; +#else + assert(0); + return NULL; +#endif +} + +static INLINE void +pipe_tsd_set(pipe_tsd *tsd, void *value) +{ + if (tsd->initMagic != (int) PIPE_TSD_INIT_MAGIC) { + pipe_tsd_init(tsd); + } +#if defined(PIPE_OS_LINUX) + if (pthread_setspecific(tsd->key, value) != 0) { + perror("pthread_set_specific() failed"); + exit(-1); + } +#elif defined(PIPE_OS_WINDOWS) + assert(0); +#else + assert(0); +#endif +} -#endif /* _P_THREAD_H_ */ +#endif /* _P_THREAD2_H_ */ diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c index b6d901f85e..499f7bef8f 100644 --- a/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c +++ b/src/gallium/winsys/drm/intel/common/ws_dri_bufmgr.c @@ -33,7 +33,7 @@ #include #include #include -#include "glthread.h" +#include "pipe/p_thread.h" #include "errno.h" #include "ws_dri_bufmgr.h" #include "string.h" @@ -51,8 +51,8 @@ * driBufferObject mutex - > this rw lock. */ -_glthread_DECLARE_STATIC_MUTEX(bmMutex); -_glthread_DECLARE_STATIC_COND(bmCond); +pipe_static_mutex(bmMutex); +pipe_static_condvar(bmCond); static int kernelReaders = 0; static int num_buffers = 0; @@ -241,29 +241,29 @@ static int drmBOResetList(drmBOList *list) void driWriteLockKernelBO(void) { - _glthread_LOCK_MUTEX(bmMutex); + pipe_mutex_lock(bmMutex); while(kernelReaders != 0) - _glthread_COND_WAIT(bmCond, bmMutex); + pipe_condvar_wait(bmCond, bmMutex); } void driWriteUnlockKernelBO(void) { - _glthread_UNLOCK_MUTEX(bmMutex); + pipe_mutex_unlock(bmMutex); } void driReadLockKernelBO(void) { - _glthread_LOCK_MUTEX(bmMutex); + pipe_mutex_lock(bmMutex); kernelReaders++; - _glthread_UNLOCK_MUTEX(bmMutex); + pipe_mutex_unlock(bmMutex); } void driReadUnlockKernelBO(void) { - _glthread_LOCK_MUTEX(bmMutex); + pipe_mutex_lock(bmMutex); if (--kernelReaders == 0) - _glthread_COND_BROADCAST(bmCond); - _glthread_UNLOCK_MUTEX(bmMutex); + pipe_condvar_broadcast(bmCond); + pipe_mutex_unlock(bmMutex); } @@ -277,7 +277,7 @@ void driReadUnlockKernelBO(void) typedef struct _DriBufferObject { DriBufferPool *pool; - _glthread_Mutex mutex; + pipe_mutx mutex; int refCount; const char *name; uint64_t flags; @@ -318,12 +318,12 @@ driBOKernel(struct _DriBufferObject *buf) drmBO *ret; driReadLockKernelBO(); - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); assert(buf->private != NULL); ret = buf->pool->kernel(buf->pool, buf->private); if (!ret) BM_CKFATAL(-EINVAL); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); driReadUnlockKernelBO(); return ret; @@ -338,9 +338,9 @@ driBOWaitIdle(struct _DriBufferObject *buf, int lazy) * that time?? */ - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); BM_CKFATAL(buf->pool->waitIdle(buf->pool, buf->private, &buf->mutex, lazy)); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); } void * @@ -353,11 +353,11 @@ driBOMap(struct _DriBufferObject *buf, unsigned flags, unsigned hint) return buf->userData; } - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); assert(buf->private != NULL); retval = buf->pool->map(buf->pool, buf->private, flags, hint, &buf->mutex, &virtual); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); return retval == 0 ? virtual : NULL; } @@ -369,9 +369,9 @@ driBOUnmap(struct _DriBufferObject *buf) return; assert(buf->private != NULL); - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); } unsigned long @@ -381,9 +381,9 @@ driBOOffset(struct _DriBufferObject *buf) assert(buf->private != NULL); - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); ret = buf->pool->offset(buf->pool, buf->private); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); return ret; } @@ -394,9 +394,9 @@ driBOPoolOffset(struct _DriBufferObject *buf) assert(buf->private != NULL); - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); ret = buf->pool->poolOffset(buf->pool, buf->private); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); return ret; } @@ -408,9 +408,9 @@ driBOFlags(struct _DriBufferObject *buf) assert(buf->private != NULL); driReadLockKernelBO(); - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); ret = buf->pool->flags(buf->pool, buf->private); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); driReadUnlockKernelBO(); return ret; } @@ -418,12 +418,12 @@ driBOFlags(struct _DriBufferObject *buf) struct _DriBufferObject * driBOReference(struct _DriBufferObject *buf) { - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); if (++buf->refCount == 1) { - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); BM_CKFATAL(-EINVAL); } - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); return buf; } @@ -435,10 +435,10 @@ driBOUnReference(struct _DriBufferObject *buf) if (!buf) return; - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); tmp = --buf->refCount; if (!tmp) { - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); if (buf->private) { if (buf->createdByReference) buf->pool->unreference(buf->pool, buf->private); @@ -451,7 +451,7 @@ driBOUnReference(struct _DriBufferObject *buf) num_buffers--; free(buf); } else - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); } @@ -469,7 +469,7 @@ driBOData(struct _DriBufferObject *buf, assert(!buf->userBuffer); /* XXX just do a memcpy? */ - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); pool = buf->pool; if (pool == NULL && newPool != NULL) { @@ -556,7 +556,7 @@ driBOData(struct _DriBufferObject *buf, } out: - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); return retval; } @@ -569,7 +569,7 @@ driBOSubData(struct _DriBufferObject *buf, assert(!buf->userBuffer); /* XXX just do a memcpy? */ - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); if (size && data) { BM_CKFATAL(buf->pool->map(buf->pool, buf->private, DRM_BO_FLAG_WRITE, 0, &buf->mutex, @@ -577,7 +577,7 @@ driBOSubData(struct _DriBufferObject *buf, memcpy((unsigned char *) virtual + offset, data, size); BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); } - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); } void @@ -588,21 +588,21 @@ driBOGetSubData(struct _DriBufferObject *buf, assert(!buf->userBuffer); /* XXX just do a memcpy? */ - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); if (size && data) { BM_CKFATAL(buf->pool->map(buf->pool, buf->private, DRM_BO_FLAG_READ, 0, &buf->mutex, &virtual)); memcpy(data, (unsigned char *) virtual + offset, size); BM_CKFATAL(buf->pool->unmap(buf->pool, buf->private)); } - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); } void driBOSetReferenced(struct _DriBufferObject *buf, unsigned long handle) { - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); if (buf->private != NULL) { assert((size_t)"Invalid buffer for setReferenced\n" & 0); BM_CKFATAL(-EINVAL); @@ -619,7 +619,7 @@ driBOSetReferenced(struct _DriBufferObject *buf, } buf->createdByReference = TRUE; buf->flags = buf->pool->kernel(buf->pool, buf->private)->flags; - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); } int @@ -644,8 +644,8 @@ driGenBuffers(struct _DriBufferPool *pool, if (!buf) return -ENOMEM; - _glthread_INIT_MUTEX(buf->mutex); - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_init(buf->mutex); + pipe_mutex_lock(buf->mutex); buf->refCount = 1; buf->flags = flags; buf->hint = hint; @@ -653,7 +653,7 @@ driGenBuffers(struct _DriBufferPool *pool, buf->alignment = alignment; buf->pool = pool; buf->createdByReference = 0; - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); buffers[i] = buf; } return 0; @@ -818,7 +818,7 @@ driBOAddListItem(struct _DriBufferList * list, struct _DriBufferObject *buf, { int newItem; - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); BM_CKFATAL(driAddValidateItem(&list->drmBuffers, buf->pool->kernel(buf->pool, buf->private), flags, mask, itemLoc, node)); @@ -827,7 +827,7 @@ driBOAddListItem(struct _DriBufferList * list, struct _DriBufferObject *buf, if (newItem) buf->refCount++; - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); } drmBOList *driGetdrmBOList(struct _DriBufferList *list) @@ -845,10 +845,10 @@ void driPutdrmBOList(struct _DriBufferList *list) void driBOFence(struct _DriBufferObject *buf, struct _DriFenceObject *fence) { - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); if (buf->pool->fence) BM_CKFATAL(buf->pool->fence(buf->pool, buf->private, fence)); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); } @@ -908,10 +908,10 @@ driBOValidateUserList(struct _DriBufferList * list) while (curBuf) { buf = (struct _DriBufferObject *) drmBOListBuf(curBuf); - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); if (buf->pool->validate) BM_CKFATAL(buf->pool->validate(buf->pool, buf->private, &buf->mutex)); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); curBuf = drmBOListNext(&list->driBuffers, curBuf); } } @@ -929,9 +929,9 @@ driBOSize(struct _DriBufferObject *buf) { unsigned long size; - _glthread_LOCK_MUTEX(buf->mutex); + pipe_mutex_lock(buf->mutex); size = buf->pool->size(buf->pool, buf->private); - _glthread_UNLOCK_MUTEX(buf->mutex); + pipe_mutex_unlock(buf->mutex); return size; diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h b/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h index bf60798924..ad3b6f3931 100644 --- a/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h +++ b/src/gallium/winsys/drm/intel/common/ws_dri_bufpool.h @@ -33,14 +33,14 @@ #define _PSB_BUFPOOL_H_ #include -#include +#include "pipe/p_thread.h" struct _DriFenceObject; typedef struct _DriBufferPool { int fd; int (*map) (struct _DriBufferPool * pool, void *private, - unsigned flags, int hint, _glthread_Mutex *mutex, + unsigned flags, int hint, pipe_mutex *mutex, void **virtual); int (*unmap) (struct _DriBufferPool * pool, void *private); int (*destroy) (struct _DriBufferPool * pool, void *private); @@ -55,8 +55,8 @@ typedef struct _DriBufferPool int (*fence) (struct _DriBufferPool * pool, void *private, struct _DriFenceObject * fence); drmBO *(*kernel) (struct _DriBufferPool * pool, void *private); - int (*validate) (struct _DriBufferPool * pool, void *private, _glthread_Mutex *mutex); - int (*waitIdle) (struct _DriBufferPool *pool, void *private, _glthread_Mutex *mutex, + int (*validate) (struct _DriBufferPool * pool, void *private, pipe_mutex *mutex); + int (*waitIdle) (struct _DriBufferPool *pool, void *private, pipe_mutex *mutex, int lazy); int (*setStatus) (struct _DriBufferPool *pool, void *private, uint64_t flag_diff, uint64_t old_flags); diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c index 40929efa2f..54618b1c82 100644 --- a/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c +++ b/src/gallium/winsys/drm/intel/common/ws_dri_drmpool.c @@ -113,7 +113,7 @@ pool_unreference(struct _DriBufferPool *pool, void *private) static int pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, - int hint, _glthread_Mutex *mutex, void **virtual) + int hint, pipe_mutex *mutex, void **virtual) { drmBO *buf = (drmBO *) private; int ret; @@ -202,7 +202,7 @@ pool_kernel(struct _DriBufferPool *pool, void *private) } static int -pool_waitIdle(struct _DriBufferPool *pool, void *private, _glthread_Mutex *mutex, +pool_waitIdle(struct _DriBufferPool *pool, void *private, pipe_mutex *mutex, int lazy) { drmBO *buf = (drmBO *) private; diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c index b56bc269da..831c75d30c 100644 --- a/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c +++ b/src/gallium/winsys/drm/intel/common/ws_dri_fencemgr.c @@ -1,5 +1,5 @@ #include "ws_dri_fencemgr.h" -#include "glthread.h" +#include "pipe/p_thread.h" #include #include #include @@ -20,7 +20,7 @@ struct _DriFenceMgr { /* * These members are protected by this->mutex */ - _glthread_Mutex mutex; + pipe_mutex mutex; int refCount; drmMMListHead *heads; int num_fences; @@ -44,7 +44,7 @@ struct _DriFenceObject { /* * These members are protected by this->mutex. */ - _glthread_Mutex mutex; + pipe_mutex mutex; uint32_t signaled_type; void *private; }; @@ -65,8 +65,8 @@ driFenceMgrCreate(const struct _DriFenceMgrCreateInfo *info) if (!tmp) return NULL; - _glthread_INIT_MUTEX(tmp->mutex); - _glthread_LOCK_MUTEX(tmp->mutex); + pipe_mutex_init(tmp->mutex); + pipe_mutex_lock(tmp->mutex); tmp->refCount = 1; tmp->info = *info; tmp->num_fences = 0; @@ -77,7 +77,7 @@ driFenceMgrCreate(const struct _DriFenceMgrCreateInfo *info) for (i=0; iinfo.num_classes; ++i) { DRMINITLISTHEAD(&tmp->heads[i]); } - _glthread_UNLOCK_MUTEX(tmp->mutex); + pipe_mutex_unlock(tmp->mutex); return tmp; out_err: @@ -95,13 +95,13 @@ driFenceMgrUnrefUnlock(struct _DriFenceMgr **pMgr) if (--mgr->refCount == 0) free(mgr); else - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); } void driFenceMgrUnReference(struct _DriFenceMgr **pMgr) { - _glthread_LOCK_MUTEX((*pMgr)->mutex); + pipe_mutex_lock((*pMgr)->mutex); driFenceMgrUnrefUnlock(pMgr); } @@ -143,9 +143,9 @@ driSignalPreviousFencesLocked(struct _DriFenceMgr *mgr, */ ++entry->refCount; - _glthread_UNLOCK_MUTEX(mgr->mutex); - _glthread_LOCK_MUTEX(entry->mutex); - _glthread_LOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); + pipe_mutex_lock(entry->mutex); + pipe_mutex_lock(mgr->mutex); prev = list->prev; @@ -157,7 +157,7 @@ driSignalPreviousFencesLocked(struct _DriFenceMgr *mgr, * Somebody else removed the entry from the list. */ - _glthread_UNLOCK_MUTEX(entry->mutex); + pipe_mutex_unlock(entry->mutex); driFenceUnReferenceLocked(&entry); return; } @@ -167,7 +167,7 @@ driSignalPreviousFencesLocked(struct _DriFenceMgr *mgr, DRMLISTDELINIT(list); mgr->info.unreference(mgr, &entry->private); } - _glthread_UNLOCK_MUTEX(entry->mutex); + pipe_mutex_unlock(entry->mutex); driFenceUnReferenceLocked(&entry); list = prev; } @@ -181,7 +181,7 @@ driFenceFinish(struct _DriFenceObject *fence, uint32_t fence_type, struct _DriFenceMgr *mgr = fence->mgr; int ret = 0; - _glthread_LOCK_MUTEX(fence->mutex); + pipe_mutex_lock(fence->mutex); if ((fence->signaled_type & fence_type) == fence_type) goto out0; @@ -190,16 +190,16 @@ driFenceFinish(struct _DriFenceObject *fence, uint32_t fence_type, if (ret) goto out0; - _glthread_LOCK_MUTEX(mgr->mutex); - _glthread_UNLOCK_MUTEX(fence->mutex); + pipe_mutex_lock(mgr->mutex); + pipe_mutex_unlock(fence->mutex); driSignalPreviousFencesLocked(mgr, &fence->head, fence->fence_class, fence_type); - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); return 0; out0: - _glthread_UNLOCK_MUTEX(fence->mutex); + pipe_mutex_unlock(fence->mutex); return ret; } @@ -207,9 +207,9 @@ uint32_t driFenceSignaledTypeCached(struct _DriFenceObject *fence) { uint32_t ret; - _glthread_LOCK_MUTEX(fence->mutex); + pipe_mutex_lock(fence->mutex); ret = fence->signaled_type; - _glthread_UNLOCK_MUTEX(fence->mutex); + pipe_mutex_unlock(fence->mutex); return ret; } @@ -221,7 +221,7 @@ driFenceSignaledType(struct _DriFenceObject *fence, uint32_t flush_type, int ret = 0; struct _DriFenceMgr *mgr; - _glthread_LOCK_MUTEX(fence->mutex); + pipe_mutex_lock(fence->mutex); mgr = fence->mgr; *signaled = fence->signaled_type; if ((fence->signaled_type & flush_type) == flush_type) @@ -236,25 +236,25 @@ driFenceSignaledType(struct _DriFenceObject *fence, uint32_t flush_type, if ((fence->signaled_type | *signaled) == fence->signaled_type) goto out0; - _glthread_LOCK_MUTEX(mgr->mutex); - _glthread_UNLOCK_MUTEX(fence->mutex); + pipe_mutex_lock(mgr->mutex); + pipe_mutex_unlock(fence->mutex); driSignalPreviousFencesLocked(mgr, &fence->head, fence->fence_class, *signaled); - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); return 0; out0: - _glthread_UNLOCK_MUTEX(fence->mutex); + pipe_mutex_unlock(fence->mutex); return ret; } struct _DriFenceObject * driFenceReference(struct _DriFenceObject *fence) { - _glthread_LOCK_MUTEX(fence->mgr->mutex); + pipe_mutex_lock(fence->mgr->mutex); ++fence->refCount; - _glthread_UNLOCK_MUTEX(fence->mgr->mutex); + pipe_mutex_unlock(fence->mgr->mutex); return fence; } @@ -267,7 +267,7 @@ driFenceUnReference(struct _DriFenceObject **pFence) return; mgr = (*pFence)->mgr; - _glthread_LOCK_MUTEX(mgr->mutex); + pipe_mutex_lock(mgr->mutex); ++mgr->refCount; driFenceUnReferenceLocked(pFence); driFenceMgrUnrefUnlock(&mgr); @@ -294,15 +294,15 @@ struct _DriFenceObject return NULL; } - _glthread_INIT_MUTEX(fence->mutex); - _glthread_LOCK_MUTEX(fence->mutex); - _glthread_LOCK_MUTEX(mgr->mutex); + pipe_mutex_init(fence->mutex); + pipe_mutex_lock(fence->mutex); + pipe_mutex_lock(mgr->mutex); fence->refCount = 1; DRMLISTADDTAIL(&fence->head, &mgr->heads[fence_class]); fence->mgr = mgr; ++mgr->refCount; ++mgr->num_fences; - _glthread_UNLOCK_MUTEX(mgr->mutex); + pipe_mutex_unlock(mgr->mutex); fence->fence_class = fence_class; fence->fence_type = fence_type; fence->signaled_type = 0; @@ -312,7 +312,7 @@ struct _DriFenceObject memcpy(fence->private, private, private_size); } - _glthread_UNLOCK_MUTEX(fence->mutex); + pipe_mutex_unlock(fence->mutex); return fence; } diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c index a80555c9c7..60924eac9e 100644 --- a/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c +++ b/src/gallium/winsys/drm/intel/common/ws_dri_mallocpool.c @@ -33,7 +33,7 @@ #include #include #include "pipe/p_debug.h" -#include "glthread.h" +#include "pipe/p_thread.h" #include "ws_dri_bufpool.h" #include "ws_dri_bufmgr.h" @@ -60,14 +60,14 @@ pool_destroy(struct _DriBufferPool *pool, void *private) static int pool_waitIdle(struct _DriBufferPool *pool, void *private, - _glthread_Mutex *mutex, int lazy) + pipe_mutex *mutex, int lazy) { return 0; } static int pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, - int hint, _glthread_Mutex *mutex, void **virtual) + int hint, pipe_mutex *mutex, void **virtual) { *virtual = (void *)((unsigned long *)private + 2); return 0; diff --git a/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c b/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c index dfcf6d6b19..391cea50a7 100644 --- a/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c +++ b/src/gallium/winsys/drm/intel/common/ws_dri_slabpool.c @@ -37,7 +37,7 @@ #include "ws_dri_bufpool.h" #include "ws_dri_fencemgr.h" #include "ws_dri_bufmgr.h" -#include "glthread.h" +#include "pipe/p_thread.h" #define DRI_SLABPOOL_ALLOC_RETRIES 100 @@ -53,7 +53,7 @@ struct _DriSlabBuffer { uint32_t start; uint32_t fenceType; int unFenced; - _glthread_Cond event; + pipe_condvar event; }; struct _DriKernelBO { @@ -84,7 +84,7 @@ struct _DriSlabSizeHeader { uint32_t numDelayed; struct _DriSlabPool *slabPool; uint32_t bufSize; - _glthread_Mutex mutex; + pipe_mutex mutex; }; struct _DriFreeSlabManager { @@ -94,7 +94,7 @@ struct _DriFreeSlabManager { drmMMListHead timeoutList; drmMMListHead unCached; drmMMListHead cached; - _glthread_Mutex mutex; + pipe_mutex mutex; }; @@ -196,7 +196,7 @@ driSetKernelBOFree(struct _DriFreeSlabManager *fMan, { struct timeval time; - _glthread_LOCK_MUTEX(fMan->mutex); + pipe_mutex_lock(fMan->mutex); gettimeofday(&time, NULL); driTimeAdd(&time, &fMan->slabTimeout); @@ -210,7 +210,7 @@ driSetKernelBOFree(struct _DriFreeSlabManager *fMan, DRMLISTADDTAIL(&kbo->timeoutHead, &fMan->timeoutList); driFreeTimeoutKBOsLocked(fMan, &time); - _glthread_UNLOCK_MUTEX(fMan->mutex); + pipe_mutex_unlock(fMan->mutex); } /* @@ -237,7 +237,7 @@ driAllocKernelBO(struct _DriSlabSizeHeader *header) size = (size <= slabPool->maxSlabSize) ? size : slabPool->maxSlabSize; size = (size + slabPool->pageSize - 1) & ~(slabPool->pageSize - 1); - _glthread_LOCK_MUTEX(fMan->mutex); + pipe_mutex_lock(fMan->mutex); kbo = NULL; @@ -269,7 +269,7 @@ driAllocKernelBO(struct _DriSlabSizeHeader *header) DRMLISTDELINIT(&kbo->timeoutHead); } - _glthread_UNLOCK_MUTEX(fMan->mutex); + pipe_mutex_unlock(fMan->mutex); if (kbo) { uint64_t new_mask = kbo->bo.proposedFlags ^ slabPool->proposedFlags; @@ -360,7 +360,7 @@ driAllocSlab(struct _DriSlabSizeHeader *header) buf->start = i* header->bufSize; buf->mapCount = 0; buf->isSlabBuffer = 1; - _glthread_INIT_COND(buf->event); + pipe_condvar_init(buf->event); DRMLISTADDTAIL(&buf->head, &slab->freeBuffers); slab->numFree++; buf++; @@ -494,23 +494,23 @@ driSlabAllocBuffer(struct _DriSlabSizeHeader *header) drmMMListHead *list; int count = DRI_SLABPOOL_ALLOC_RETRIES; - _glthread_LOCK_MUTEX(header->mutex); + pipe_mutex_lock(header->mutex); while(header->slabs.next == &header->slabs && count > 0) { driSlabCheckFreeLocked(header, 0); if (header->slabs.next != &header->slabs) break; - _glthread_UNLOCK_MUTEX(header->mutex); + pipe_mutex_unlock(header->mutex); if (count != DRI_SLABPOOL_ALLOC_RETRIES) usleep(1); - _glthread_LOCK_MUTEX(header->mutex); + pipe_mutex_lock(header->mutex); (void) driAllocSlab(header); count--; } list = header->slabs.next; if (list == &header->slabs) { - _glthread_UNLOCK_MUTEX(header->mutex); + pipe_mutex_unlock(header->mutex); return NULL; } slab = DRMLISTENTRY(struct _DriSlab, list, head); @@ -520,7 +520,7 @@ driSlabAllocBuffer(struct _DriSlabSizeHeader *header) list = slab->freeBuffers.next; DRMLISTDELINIT(list); - _glthread_UNLOCK_MUTEX(header->mutex); + pipe_mutex_unlock(header->mutex); buf = DRMLISTENTRY(struct _DriSlabBuffer, list, head); return buf; } @@ -618,7 +618,7 @@ pool_destroy(struct _DriBufferPool *driPool, void *private) slab = buf->parent; header = slab->header; - _glthread_LOCK_MUTEX(header->mutex); + pipe_mutex_lock(header->mutex); buf->unFenced = 0; buf->mapCount = 0; @@ -631,18 +631,18 @@ pool_destroy(struct _DriBufferPool *driPool, void *private) driSlabFreeBufferLocked(buf); } - _glthread_UNLOCK_MUTEX(header->mutex); + pipe_mutex_unlock(header->mutex); return 0; } static int pool_waitIdle(struct _DriBufferPool *driPool, void *private, - _glthread_Mutex *mutex, int lazy) + pipe_mutex *mutex, int lazy) { struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; while(buf->unFenced) - _glthread_COND_WAIT(buf->event, *mutex); + pipe_condvar_wait(buf->event, *mutex); if (!buf->fence) return 0; @@ -655,7 +655,7 @@ pool_waitIdle(struct _DriBufferPool *driPool, void *private, static int pool_map(struct _DriBufferPool *pool, void *private, unsigned flags, - int hint, _glthread_Mutex *mutex, void **virtual) + int hint, pipe_mutex *mutex, void **virtual) { struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; int busy; @@ -689,7 +689,7 @@ pool_unmap(struct _DriBufferPool *pool, void *private) --buf->mapCount; if (buf->mapCount == 0 && buf->isSlabBuffer) - _glthread_COND_BROADCAST(buf->event); + pipe_condvar_broadcast(buf->event); return 0; } @@ -760,7 +760,7 @@ pool_fence(struct _DriBufferPool *pool, void *private, buf->fenceType = bo->fenceFlags; buf->unFenced = 0; - _glthread_COND_BROADCAST(buf->event); + pipe_condvar_broadcast(buf->event); return 0; } @@ -775,7 +775,7 @@ pool_kernel(struct _DriBufferPool *pool, void *private) static int pool_validate(struct _DriBufferPool *pool, void *private, - _glthread_Mutex *mutex) + pipe_mutex *mutex) { struct _DriSlabBuffer *buf = (struct _DriSlabBuffer *) private; @@ -783,7 +783,7 @@ pool_validate(struct _DriBufferPool *pool, void *private, return 0; while(buf->mapCount != 0) - _glthread_COND_WAIT(buf->event, *mutex); + pipe_condvar_wait(buf->event, *mutex); buf->unFenced = 1; return 0; @@ -799,8 +799,8 @@ driInitFreeSlabManager(uint32_t checkIntervalMsec, uint32_t slabTimeoutMsec) if (!tmp) return NULL; - _glthread_INIT_MUTEX(tmp->mutex); - _glthread_LOCK_MUTEX(tmp->mutex); + pipe_mutex_init(tmp->mutex); + pipe_mutex_lock(tmp->mutex); tmp->slabTimeout.tv_usec = slabTimeoutMsec*1000; tmp->slabTimeout.tv_sec = tmp->slabTimeout.tv_usec / 1000000; tmp->slabTimeout.tv_usec -= tmp->slabTimeout.tv_sec*1000000; @@ -814,7 +814,7 @@ driInitFreeSlabManager(uint32_t checkIntervalMsec, uint32_t slabTimeoutMsec) DRMINITLISTHEAD(&tmp->timeoutList); DRMINITLISTHEAD(&tmp->unCached); DRMINITLISTHEAD(&tmp->cached); - _glthread_UNLOCK_MUTEX(tmp->mutex); + pipe_mutex_unlock(tmp->mutex); return tmp; } @@ -827,9 +827,9 @@ driFinishFreeSlabManager(struct _DriFreeSlabManager *fMan) time = fMan->nextCheck; driTimeAdd(&time, &fMan->checkInterval); - _glthread_LOCK_MUTEX(fMan->mutex); + pipe_mutex_lock(fMan->mutex); driFreeTimeoutKBOsLocked(fMan, &time); - _glthread_UNLOCK_MUTEX(fMan->mutex); + pipe_mutex_unlock(fMan->mutex); assert(fMan->timeoutList.next == &fMan->timeoutList); assert(fMan->unCached.next == &fMan->unCached); @@ -842,8 +842,8 @@ static void driInitSizeHeader(struct _DriSlabPool *pool, uint32_t size, struct _DriSlabSizeHeader *header) { - _glthread_INIT_MUTEX(header->mutex); - _glthread_LOCK_MUTEX(header->mutex); + pipe_mutex_init(header->mutex); + pipe_mutex_lock(header->mutex); DRMINITLISTHEAD(&header->slabs); DRMINITLISTHEAD(&header->freeSlabs); @@ -853,7 +853,7 @@ driInitSizeHeader(struct _DriSlabPool *pool, uint32_t size, header->slabPool = pool; header->bufSize = size; - _glthread_UNLOCK_MUTEX(header->mutex); + pipe_mutex_unlock(header->mutex); } static void @@ -862,7 +862,7 @@ driFinishSizeHeader(struct _DriSlabSizeHeader *header) drmMMListHead *list, *next; struct _DriSlabBuffer *buf; - _glthread_LOCK_MUTEX(header->mutex); + pipe_mutex_lock(header->mutex); for (list = header->delayedBuffers.next, next = list->next; list != &header->delayedBuffers; list = next, next = list->next) { @@ -875,7 +875,7 @@ driFinishSizeHeader(struct _DriSlabSizeHeader *header) header->numDelayed--; driSlabFreeBufferLocked(buf); } - _glthread_UNLOCK_MUTEX(header->mutex); + pipe_mutex_unlock(header->mutex); } static void diff --git a/src/gallium/winsys/drm/intel/dri/intel_lock.c b/src/gallium/winsys/drm/intel/dri/intel_lock.c index 406284c98f..18c7bba0d5 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_lock.c +++ b/src/gallium/winsys/drm/intel/dri/intel_lock.c @@ -27,7 +27,7 @@ #include "main/glheader.h" -#include "glapi/glthread.h" +#include "pipe/p_lthread.h" #include #include "state_tracker/st_public.h" #include "intel_context.h" @@ -35,7 +35,7 @@ -_glthread_DECLARE_STATIC_MUTEX( lockMutex ); +pipe_static_mutex( lockMutex ); static void @@ -72,7 +72,7 @@ void LOCK_HARDWARE( struct intel_context *intel ) { char __ret = 0; - _glthread_LOCK_MUTEX(lockMutex); + pipe_mutex_lock(lockMutex); assert(!intel->locked); DRM_CAS(intel->driHwLock, intel->hHWContext, @@ -96,7 +96,7 @@ void UNLOCK_HARDWARE( struct intel_context *intel ) DRM_UNLOCK(intel->driFd, intel->driHwLock, intel->hHWContext); - _glthread_UNLOCK_MUTEX(lockMutex); + pipe_mutex_unlock(lockMutex); DBG(LOCK, "%s - unlocked\n", __progname); } diff --git a/src/gallium/winsys/xlib/glxapi.c b/src/gallium/winsys/xlib/glxapi.c index c2ccce6f52..c059fc3edb 100644 --- a/src/gallium/winsys/xlib/glxapi.c +++ b/src/gallium/winsys/xlib/glxapi.c @@ -37,6 +37,7 @@ #include "main/glheader.h" #include "glapi/glapi.h" #include "glxapi.h" +#include "pipe/p_thread.h" extern struct _glxapi_table *_real_GetGLXDispatchTable(void); @@ -127,26 +128,13 @@ get_dispatch(Display *dpy) /** * GLX API current context. */ -#if defined(GLX_USE_TLS) -PUBLIC __thread void * CurrentContext - __attribute__((tls_model("initial-exec"))); -#elif defined(THREADS) -static _glthread_TSD ContextTSD; /**< Per-thread context pointer */ -#else -static GLXContext CurrentContext = 0; -#endif +pipe_tsd ContextTSD; static void SetCurrentContext(GLXContext c) { -#if defined(GLX_USE_TLS) - CurrentContext = c; -#elif defined(THREADS) - _glthread_SetTSD(&ContextTSD, c); -#else - CurrentContext = c; -#endif + pipe_tsd_set(&ContextTSD, c); } @@ -238,13 +226,7 @@ glXGetConfig(Display *dpy, XVisualInfo *visinfo, int attrib, int *value) GLXContext PUBLIC glXGetCurrentContext(void) { -#if defined(GLX_USE_TLS) - return CurrentContext; -#elif defined(THREADS) - return (GLXContext) _glthread_GetTSD(&ContextTSD); -#else - return CurrentContext; -#endif + return (GLXContext) pipe_tsd_get(&ContextTSD); } diff --git a/src/gallium/winsys/xlib/xm_api.c b/src/gallium/winsys/xlib/xm_api.c index 7256340420..edcadff9c5 100644 --- a/src/gallium/winsys/xlib/xm_api.c +++ b/src/gallium/winsys/xlib/xm_api.c @@ -62,7 +62,6 @@ #include "xmesaP.h" #include "main/context.h" #include "main/framebuffer.h" -#include "glapi/glthread.h" #include "state_tracker/st_public.h" #include "state_tracker/st_context.h" @@ -75,7 +74,7 @@ /** * Global X driver lock */ -_glthread_Mutex _xmesa_lock; +pipe_mutex _xmesa_lock; int xmesa_mode; @@ -245,10 +244,10 @@ xmesa_get_window_size(XMesaDisplay *dpy, XMesaBuffer b, #else Status stat; - _glthread_LOCK_MUTEX(_xmesa_lock); + pipe_mutex_lock(_xmesa_lock); XSync(b->xm_visual->display, 0); /* added for Chromium */ stat = get_drawable_size(dpy, b->drawable, width, height); - _glthread_UNLOCK_MUTEX(_xmesa_lock); + pipe_mutex_unlock(_xmesa_lock); if (!stat) { /* probably querying a window that's recently been destroyed */ @@ -779,7 +778,7 @@ XMesaContext XMesaCreateContext( XMesaVisual v, XMesaContext share_list ) uint pf; if (firstTime) { - _glthread_INIT_MUTEX(_xmesa_lock); + pipe_mutex_init(_xmesa_lock); firstTime = GL_FALSE; } diff --git a/src/gallium/winsys/xlib/xmesaP.h b/src/gallium/winsys/xlib/xmesaP.h index 9b15b2ddf9..fcaeee52bc 100644 --- a/src/gallium/winsys/xlib/xmesaP.h +++ b/src/gallium/winsys/xlib/xmesaP.h @@ -35,9 +35,10 @@ #include "state_tracker/st_context.h" #include "state_tracker/st_public.h" +#include "pipe/p_thread.h" -extern _glthread_Mutex _xmesa_lock; +extern pipe_mutex _xmesa_lock; extern XMesaBuffer XMesaBufferList; -- cgit v1.2.3 From c6739e8cea287e17b248120e1a76480f1a25c082 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 28 Aug 2008 21:06:06 +0900 Subject: pipebuffer: Check buffer over- & underflows when mapping/unmapping too. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 68 +++++++++++++++++----- 1 file changed, 52 insertions(+), 16 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 4cb8c3bb55..5d9f6d3f39 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -138,12 +138,30 @@ check_random_pattern(const uint8_t *dst, size_t size, static void -pb_debug_buffer_destroy(struct pb_buffer *_buf) +pb_debug_buffer_fill(struct pb_debug_buffer *buf) { - struct pb_debug_buffer *buf = pb_debug_buffer(_buf); uint8_t *map; - assert(!buf->base.base.refcount); + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + assert(map); + if(map) { + fill_random_pattern(map, buf->underflow_size); + fill_random_pattern(map + buf->underflow_size + buf->base.base.size, + buf->overflow_size); + pb_unmap(buf->buffer); + } +} + + +/** + * Check for under/over flows. + * + * Should be called with the buffer unmaped. + */ +static void +pb_debug_buffer_check(struct pb_debug_buffer *buf) +{ + uint8_t *map; map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); assert(map); @@ -154,9 +172,9 @@ pb_debug_buffer_destroy(struct pb_buffer *_buf) underflow = !check_random_pattern(map, buf->underflow_size, &min_ofs, &max_ofs); if(underflow) { - debug_printf("buffer underflow (%u of %u bytes) detected\n", - buf->underflow_size - min_ofs, - buf->underflow_size); + debug_printf("buffer underflow (%u of %u bytes) detected\n", + buf->underflow_size - min_ofs, + buf->underflow_size); } overflow = !check_random_pattern(map + buf->underflow_size + buf->base.base.size, @@ -169,9 +187,27 @@ pb_debug_buffer_destroy(struct pb_buffer *_buf) } debug_assert(!underflow && !overflow); - + + /* re-fill if not aborted */ + if(underflow) + fill_random_pattern(map, buf->underflow_size); + if(overflow) + fill_random_pattern(map + buf->underflow_size + buf->base.base.size, + buf->overflow_size); + pb_unmap(buf->buffer); } +} + + +static void +pb_debug_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + + assert(!buf->base.base.refcount); + + pb_debug_buffer_check(buf); pb_reference(&buf->buffer, NULL); FREE(buf); @@ -183,9 +219,14 @@ pb_debug_buffer_map(struct pb_buffer *_buf, unsigned flags) { struct pb_debug_buffer *buf = pb_debug_buffer(_buf); - void *map = pb_map(buf->buffer, flags); + void *map; + + pb_debug_buffer_check(buf); + + map = pb_map(buf->buffer, flags); if(!map) return NULL; + return (uint8_t *)map + buf->underflow_size; } @@ -195,6 +236,8 @@ pb_debug_buffer_unmap(struct pb_buffer *_buf) { struct pb_debug_buffer *buf = pb_debug_buffer(_buf); pb_unmap(buf->buffer); + + pb_debug_buffer_check(buf); } @@ -227,7 +270,6 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr, struct pb_debug_buffer *buf; struct pb_desc real_desc; size_t real_size; - uint8_t *map; buf = CALLOC_STRUCT(pb_debug_buffer); if(!buf) @@ -262,13 +304,7 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr, buf->underflow_size = mgr->band_size; buf->overflow_size = buf->buffer->base.size - buf->underflow_size - size; - map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); - assert(map); - if(map) { - fill_random_pattern(map, buf->underflow_size); - fill_random_pattern(map + buf->underflow_size + size, buf->overflow_size); - pb_unmap(buf->buffer); - } + pb_debug_buffer_fill(buf); return &buf->base; } -- cgit v1.2.3 From c46c07f6c60edb60514756bd6af7918f70931e53 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 28 Aug 2008 22:03:42 +0900 Subject: pipebuffer: Fix/add detail to the under- overflow report messages. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 5d9f6d3f39..5f1ed3e5a8 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -129,7 +129,7 @@ check_random_pattern(const uint8_t *dst, size_t size, for(i = 0; i < size; ++i) { if(*dst++ != random_pattern[i % sizeof(random_pattern)]) { *min_ofs = MIN2(*min_ofs, i); - *max_ofs = MIN2(*max_ofs, i); + *max_ofs = MAX2(*max_ofs, i); result = FALSE; } } @@ -172,18 +172,21 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf) underflow = !check_random_pattern(map, buf->underflow_size, &min_ofs, &max_ofs); if(underflow) { - debug_printf("buffer underflow (%u of %u bytes) detected\n", + debug_printf("buffer underflow (offset -%u%s to -%u bytes) detected\n", buf->underflow_size - min_ofs, - buf->underflow_size); + min_ofs == 0 ? "+" : "", + buf->underflow_size - max_ofs); } overflow = !check_random_pattern(map + buf->underflow_size + buf->base.base.size, buf->overflow_size, &min_ofs, &max_ofs); if(overflow) { - debug_printf("buffer overflow (%u of %u bytes) detected\n", + debug_printf("buffer overflow (size %u plus offset %u to %u%s bytes) detected\n", + buf->base.base.size, + min_ofs, max_ofs, - buf->overflow_size); + max_ofs == buf->overflow_size - 1 ? "+" : ""); } debug_assert(!underflow && !overflow); -- cgit v1.2.3 From ef823dd543e1d98da98d188c53c8fc3285924364 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 31 Aug 2008 17:40:08 +0900 Subject: util: Fix compiler errors in the release build of C++ sources. --- src/gallium/auxiliary/util/u_memory.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index 8fbbb4e55d..857102719d 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -39,7 +39,12 @@ #include "pipe/p_debug.h" - /* Define ENOMEM for WINCE */ +#ifdef __cplusplus +extern "C" { +#endif + + +/* Define ENOMEM for WINCE */ #if (_WIN32_WCE < 600) #ifndef ENOMEM #define ENOMEM 12 @@ -47,7 +52,6 @@ #endif - #if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) /* memory debugging */ @@ -220,4 +224,9 @@ mem_dup(const void *src, uint size) +#ifdef __cplusplus +} +#endif + + #endif /* U_MEMORY_H */ -- cgit v1.2.3 From a4d04d71a46c88c5cbe08336111267ae9d0b7f11 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 1 Sep 2008 14:11:12 +0100 Subject: add u_timed_winsys.[ch] --- src/gallium/auxiliary/util/Makefile | 3 +- src/gallium/auxiliary/util/u_timed_winsys.c | 355 ++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_timed_winsys.h | 41 ++++ 3 files changed, 398 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/util/u_timed_winsys.c create mode 100644 src/gallium/auxiliary/util/u_timed_winsys.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 6e5fd26c05..61f4344d17 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -16,7 +16,8 @@ C_SOURCES = \ u_simple_shaders.c \ u_snprintf.c \ u_tile.c \ - u_time.c + u_time.c \ + u_timed_winsys.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c new file mode 100644 index 0000000000..e91d69ce1b --- /dev/null +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -0,0 +1,355 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Keith Whitwell + */ + +#include "pipe/p_winsys.h" +#include "u_timed_winsys.h" +#include "util/u_memory.h" +#include + + + +struct timed_winsys { + struct pipe_winsys base; + struct pipe_winsys *backend; + unsigned long long last_dump; + struct { + const char *name_key; + double total; + unsigned calls; + } funcs[13]; +}; + + +static struct timed_winsys *timed_winsys( struct pipe_winsys *winsys ) +{ + return (struct timed_winsys *)winsys; +} + + +static unsigned long long get_time( void ) +{ + struct timeval systime; + gettimeofday( &systime, NULL ); + return (((unsigned long long) systime.tv_sec) * 1000000LL) + systime.tv_usec; +} + + +static unsigned long long time_start( void ) +{ + return get_time(); +} + + +static void time_display( struct pipe_winsys *winsys ) +{ + struct timed_winsys *tws = timed_winsys(winsys); + unsigned i; + double overall = 0; + + for (i = 0; i < Elements(tws->funcs); i++) { + if (tws->funcs[i].name_key) { + debug_printf("*** %-25s %5.3fms (%d calls, avg %.3fms)\n", + tws->funcs[i].name_key, + tws->funcs[i].total, + tws->funcs[i].calls, + tws->funcs[i].total / tws->funcs[i].calls); + overall += tws->funcs[i].total; + tws->funcs[i].calls = 0; + tws->funcs[i].total = 0; + } + } + + debug_printf("*** %-25s %5.3fms\n", + "OVERALL WINSYS", + overall); +} + +static void time_finish( struct pipe_winsys *winsys, + long long startval, + unsigned idx, + const char *name ) +{ + struct timed_winsys *tws = timed_winsys(winsys); + unsigned long long endval = get_time(); + double elapsed = (endval - startval)/1000.0; + + if (endval - startval > 1000LL) + debug_printf("*** %s %.3f\n", name, elapsed ); + + assert( tws->funcs[idx].name_key == name || + tws->funcs[idx].name_key == NULL); + + tws->funcs[idx].name_key = name; + tws->funcs[idx].total += elapsed; + tws->funcs[idx].calls++; + + if (endval - tws->last_dump > 10LL * 1000LL * 1000LL) { + time_display( winsys ); + tws->last_dump = endval; + } +} + + +/* Pipe has no concept of pools, but the psb driver passes a flag that + * can be mapped onto pools in the backend. + */ +static struct pipe_buffer * +timed_buffer_create(struct pipe_winsys *winsys, + unsigned alignment, + unsigned usage, + unsigned size ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + struct pipe_buffer *buf = backend->buffer_create( backend, alignment, usage, size ); + + time_finish(winsys, start, 0, __FUNCTION__); + + return buf; +} + + + + +static struct pipe_buffer * +timed_user_buffer_create(struct pipe_winsys *winsys, + void *data, + unsigned bytes) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + struct pipe_buffer *buf = backend->user_buffer_create( backend, data, bytes ); + + time_finish(winsys, start, 1, __FUNCTION__); + + return buf; +} + + +static void * +timed_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + void *map = backend->buffer_map( backend, buf, flags ); + + time_finish(winsys, start, 2, __FUNCTION__); + + return map; +} + + +static void +timed_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + backend->buffer_unmap( backend, buf ); + + time_finish(winsys, start, 3, __FUNCTION__); +} + + +static void +timed_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + backend->buffer_destroy( backend, buf ); + + time_finish(winsys, start, 4, __FUNCTION__); +} + + +static void +timed_flush_frontbuffer( struct pipe_winsys *winsys, + struct pipe_surface *surf, + void *context_private) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + backend->flush_frontbuffer( backend, surf, context_private ); + + time_finish(winsys, start, 5, __FUNCTION__); +} + + + + +static struct pipe_surface * +timed_surface_alloc(struct pipe_winsys *winsys) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + struct pipe_surface *surf = backend->surface_alloc( backend ); + + time_finish(winsys, start, 6, __FUNCTION__); + + return surf; +} + + + +static int +timed_surface_alloc_storage(struct pipe_winsys *winsys, + struct pipe_surface *surf, + unsigned width, unsigned height, + enum pipe_format format, + unsigned flags, + unsigned tex_usage) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + int ret = backend->surface_alloc_storage( backend, surf, width, height, + format, flags, tex_usage ); + + time_finish(winsys, start, 7, __FUNCTION__); + + return ret; +} + + +static void +timed_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + backend->surface_release( backend, s ); + + time_finish(winsys, start, 8, __FUNCTION__); +} + + + +static const char * +timed_get_name( struct pipe_winsys *winsys ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + const char *ret = backend->get_name( backend ); + + time_finish(winsys, start, 9, __FUNCTION__); + + return ret; +} + +static void +timed_fence_reference(struct pipe_winsys *winsys, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + backend->fence_reference( backend, ptr, fence ); + + time_finish(winsys, start, 10, __FUNCTION__); +} + + +static int +timed_fence_signalled( struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + int ret = backend->fence_signalled( backend, fence, flag ); + + time_finish(winsys, start, 11, __FUNCTION__); + + return ret; +} + +static int +timed_fence_finish( struct pipe_winsys *winsys, + struct pipe_fence_handle *fence, + unsigned flag ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + unsigned long long start = time_start(); + + int ret = backend->fence_finish( backend, fence, flag ); + + time_finish(winsys, start, 12, __FUNCTION__); + + return ret; +} + +static void +timed_winsys_destroy( struct pipe_winsys *winsys ) +{ + struct pipe_winsys *backend = timed_winsys(winsys)->backend; + backend->destroy( backend ); + FREE(winsys); +} + + + +struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ) +{ + struct timed_winsys *ws = CALLOC_STRUCT(timed_winsys); + + ws->base.user_buffer_create = timed_user_buffer_create; + ws->base.buffer_map = timed_buffer_map; + ws->base.buffer_unmap = timed_buffer_unmap; + ws->base.buffer_destroy = timed_buffer_destroy; + ws->base.buffer_create = timed_buffer_create; + ws->base.flush_frontbuffer = timed_flush_frontbuffer; + ws->base.get_name = timed_get_name; + ws->base.surface_alloc = timed_surface_alloc; + ws->base.surface_alloc_storage = timed_surface_alloc_storage; + ws->base.surface_release = timed_surface_release; + ws->base.fence_reference = timed_fence_reference; + ws->base.fence_signalled = timed_fence_signalled; + ws->base.fence_finish = timed_fence_finish; + ws->base.destroy = timed_winsys_destroy; + + ws->backend = backend; + + return &ws->base; +} + diff --git a/src/gallium/auxiliary/util/u_timed_winsys.h b/src/gallium/auxiliary/util/u_timed_winsys.h new file mode 100644 index 0000000000..542365112d --- /dev/null +++ b/src/gallium/auxiliary/util/u_timed_winsys.h @@ -0,0 +1,41 @@ +/************************************************************************** + * + * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * + **************************************************************************/ +/* + * Authors: Keith Whitwell + */ + + +#ifndef U_TIMED_WINSYS_H +#define U_TIMED_WINSYS_H + + +struct pipe_winsys; +struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ); + + +#endif -- cgit v1.2.3 From c118c654fa238f983f1550149466727fa81e51ab Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 1 Sep 2008 14:23:47 +0100 Subject: util: add func to return time as uint64 microseconds --- src/gallium/auxiliary/util/u_time.c | 20 ++++++++++++++++++++ src/gallium/auxiliary/util/u_time.h | 3 +++ 2 files changed, 23 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index 49dce75289..bf7d1d1c8d 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -136,6 +136,26 @@ util_time_diff(const struct util_time *t1, } + +uint64_t +util_time_micros( void ) +{ + struct util_time t1; + + util_time_get(&t1); + +#if defined(PIPE_OS_LINUX) + return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) + util_time_get_frequency(); + return t1.counter*INT64_C(1000000)/frequency; +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) + return t1.counter/10; +#endif +} + + + /** * Compare two time values. * diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h index f9963ce0e2..35d97d16c7 100644 --- a/src/gallium/auxiliary/util/u_time.h +++ b/src/gallium/auxiliary/util/u_time.h @@ -74,6 +74,9 @@ util_time_add(const struct util_time *t1, int64_t usecs, struct util_time *t2); +uint64_t +util_time_micros( void ); + int64_t util_time_diff(const struct util_time *t1, const struct util_time *t2); -- cgit v1.2.3 From 5e184894d24fbf01ed826c39ea921c01ae6d9caf Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 1 Sep 2008 14:24:07 +0100 Subject: util: make timed_winsys os independent --- src/gallium/auxiliary/util/u_timed_winsys.c | 45 ++++++++++++----------------- 1 file changed, 18 insertions(+), 27 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c index e91d69ce1b..8beb3b4c88 100644 --- a/src/gallium/auxiliary/util/u_timed_winsys.c +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -32,14 +32,13 @@ #include "pipe/p_winsys.h" #include "u_timed_winsys.h" #include "util/u_memory.h" -#include - +#include "util/u_time.h" struct timed_winsys { struct pipe_winsys base; struct pipe_winsys *backend; - unsigned long long last_dump; + uint64_t last_dump; struct { const char *name_key; double total; @@ -54,17 +53,9 @@ static struct timed_winsys *timed_winsys( struct pipe_winsys *winsys ) } -static unsigned long long get_time( void ) -{ - struct timeval systime; - gettimeofday( &systime, NULL ); - return (((unsigned long long) systime.tv_sec) * 1000000LL) + systime.tv_usec; -} - - -static unsigned long long time_start( void ) +static uint64_t time_start( void ) { - return get_time(); + return util_time_micros(); } @@ -98,7 +89,7 @@ static void time_finish( struct pipe_winsys *winsys, const char *name ) { struct timed_winsys *tws = timed_winsys(winsys); - unsigned long long endval = get_time(); + uint64_t endval = util_time_micros(); double elapsed = (endval - startval)/1000.0; if (endval - startval > 1000LL) @@ -128,7 +119,7 @@ timed_buffer_create(struct pipe_winsys *winsys, unsigned size ) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); struct pipe_buffer *buf = backend->buffer_create( backend, alignment, usage, size ); @@ -146,7 +137,7 @@ timed_user_buffer_create(struct pipe_winsys *winsys, unsigned bytes) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); struct pipe_buffer *buf = backend->user_buffer_create( backend, data, bytes ); @@ -162,7 +153,7 @@ timed_buffer_map(struct pipe_winsys *winsys, unsigned flags) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); void *map = backend->buffer_map( backend, buf, flags ); @@ -177,7 +168,7 @@ timed_buffer_unmap(struct pipe_winsys *winsys, struct pipe_buffer *buf) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); backend->buffer_unmap( backend, buf ); @@ -190,7 +181,7 @@ timed_buffer_destroy(struct pipe_winsys *winsys, struct pipe_buffer *buf) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); backend->buffer_destroy( backend, buf ); @@ -204,7 +195,7 @@ timed_flush_frontbuffer( struct pipe_winsys *winsys, void *context_private) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); backend->flush_frontbuffer( backend, surf, context_private ); @@ -218,7 +209,7 @@ static struct pipe_surface * timed_surface_alloc(struct pipe_winsys *winsys) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); struct pipe_surface *surf = backend->surface_alloc( backend ); @@ -238,7 +229,7 @@ timed_surface_alloc_storage(struct pipe_winsys *winsys, unsigned tex_usage) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); int ret = backend->surface_alloc_storage( backend, surf, width, height, format, flags, tex_usage ); @@ -253,7 +244,7 @@ static void timed_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); backend->surface_release( backend, s ); @@ -266,7 +257,7 @@ static const char * timed_get_name( struct pipe_winsys *winsys ) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); const char *ret = backend->get_name( backend ); @@ -281,7 +272,7 @@ timed_fence_reference(struct pipe_winsys *winsys, struct pipe_fence_handle *fence) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); backend->fence_reference( backend, ptr, fence ); @@ -295,7 +286,7 @@ timed_fence_signalled( struct pipe_winsys *winsys, unsigned flag ) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); int ret = backend->fence_signalled( backend, fence, flag ); @@ -310,7 +301,7 @@ timed_fence_finish( struct pipe_winsys *winsys, unsigned flag ) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; - unsigned long long start = time_start(); + uint64_t start = time_start(); int ret = backend->fence_finish( backend, fence, flag ); -- cgit v1.2.3 From f4d707b40e8dde8cdf68f5c4595b838c138fcf9b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 31 Aug 2008 23:56:59 +0900 Subject: draw: Put INLINES where appropriate. In the hope of MSVC inline some more functions, but without much result. --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 149 ++++++++++++++++------------ 1 file changed, 85 insertions(+), 64 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index e8467b2ae3..80d7200ca6 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -63,7 +63,8 @@ struct vcache_frontend { unsigned opt; }; -static void vcache_flush( struct vcache_frontend *vcache ) +static INLINE void +vcache_flush( struct vcache_frontend *vcache ) { if (vcache->middle_prim != vcache->output_prim) { vcache->middle_prim = vcache->output_prim; @@ -86,7 +87,8 @@ static void vcache_flush( struct vcache_frontend *vcache ) vcache->draw_count = 0; } -static void vcache_check_flush( struct vcache_frontend *vcache ) +static INLINE void +vcache_check_flush( struct vcache_frontend *vcache ) { if ( vcache->draw_count + 6 >= DRAW_MAX || vcache->fetch_count + 4 >= FETCH_MAX ) @@ -96,9 +98,10 @@ static void vcache_check_flush( struct vcache_frontend *vcache ) } -static INLINE void vcache_elt( struct vcache_frontend *vcache, - unsigned felt, - ushort flags ) +static INLINE void +vcache_elt( struct vcache_frontend *vcache, + unsigned felt, + ushort flags ) { unsigned idx = felt % CACHE_MAX; @@ -115,10 +118,11 @@ static INLINE void vcache_elt( struct vcache_frontend *vcache, -static void vcache_triangle( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2 ) +static INLINE void +vcache_triangle( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2 ) { vcache_elt(vcache, i0, 0); vcache_elt(vcache, i1, 0); @@ -127,11 +131,12 @@ static void vcache_triangle( struct vcache_frontend *vcache, } -static void vcache_triangle_flags( struct vcache_frontend *vcache, - ushort flags, - unsigned i0, - unsigned i1, - unsigned i2 ) +static INLINE void +vcache_triangle_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1, + unsigned i2 ) { vcache_elt(vcache, i0, flags); vcache_elt(vcache, i1, 0); @@ -139,9 +144,10 @@ static void vcache_triangle_flags( struct vcache_frontend *vcache, vcache_check_flush(vcache); } -static void vcache_line( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1 ) +static INLINE void +vcache_line( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1 ) { vcache_elt(vcache, i0, 0); vcache_elt(vcache, i1, 0); @@ -149,10 +155,11 @@ static void vcache_line( struct vcache_frontend *vcache, } -static void vcache_line_flags( struct vcache_frontend *vcache, - ushort flags, - unsigned i0, - unsigned i1 ) +static INLINE void +vcache_line_flags( struct vcache_frontend *vcache, + ushort flags, + unsigned i0, + unsigned i1 ) { vcache_elt(vcache, i0, flags); vcache_elt(vcache, i1, 0); @@ -160,28 +167,31 @@ static void vcache_line_flags( struct vcache_frontend *vcache, } -static void vcache_point( struct vcache_frontend *vcache, - unsigned i0 ) +static INLINE void +vcache_point( struct vcache_frontend *vcache, + unsigned i0 ) { vcache_elt(vcache, i0, 0); vcache_check_flush(vcache); } -static void vcache_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) +static INLINE void +vcache_quad( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) { vcache_triangle( vcache, i0, i1, i3 ); vcache_triangle( vcache, i1, i2, i3 ); } -static void vcache_ef_quad( struct vcache_frontend *vcache, - unsigned i0, - unsigned i1, - unsigned i2, - unsigned i3 ) +static INLINE void +vcache_ef_quad( struct vcache_frontend *vcache, + unsigned i0, + unsigned i1, + unsigned i2, + unsigned i3 ) { vcache_triangle_flags( vcache, ( DRAW_PIPE_RESET_STIPPLE | @@ -213,10 +223,11 @@ static void vcache_ef_quad( struct vcache_frontend *vcache, #define FUNC vcache_run #include "draw_pt_vcache_tmp.h" -static void rebase_uint_elts( const unsigned *src, - unsigned count, - int delta, - ushort *dest ) +static INLINE void +rebase_uint_elts( const unsigned *src, + unsigned count, + int delta, + ushort *dest ) { unsigned i; @@ -224,9 +235,10 @@ static void rebase_uint_elts( const unsigned *src, dest[i] = (ushort)(src[i] + delta); } -static void rebase_ushort_elts( const ushort *src, - unsigned count, - int delta, +static INLINE void +rebase_ushort_elts( const ushort *src, + unsigned count, + int delta, ushort *dest ) { unsigned i; @@ -235,10 +247,11 @@ static void rebase_ushort_elts( const ushort *src, dest[i] = (ushort)(src[i] + delta); } -static void rebase_ubyte_elts( const ubyte *src, - unsigned count, - int delta, - ushort *dest ) +static INLINE void +rebase_ubyte_elts( const ubyte *src, + unsigned count, + int delta, + ushort *dest ) { unsigned i; @@ -248,9 +261,10 @@ static void rebase_ubyte_elts( const ubyte *src, -static void translate_uint_elts( const unsigned *src, - unsigned count, - ushort *dest ) +static INLINE void +translate_uint_elts( const unsigned *src, + unsigned count, + ushort *dest ) { unsigned i; @@ -258,9 +272,10 @@ static void translate_uint_elts( const unsigned *src, dest[i] = (ushort)(src[i]); } -static void translate_ushort_elts( const ushort *src, - unsigned count, - ushort *dest ) +static INLINE void +translate_ushort_elts( const ushort *src, + unsigned count, + ushort *dest ) { unsigned i; @@ -268,9 +283,10 @@ static void translate_ushort_elts( const ushort *src, dest[i] = (ushort)(src[i]); } -static void translate_ubyte_elts( const ubyte *src, - unsigned count, - ushort *dest ) +static INLINE void +translate_ubyte_elts( const ubyte *src, + unsigned count, + ushort *dest ) { unsigned i; @@ -282,7 +298,8 @@ static void translate_ubyte_elts( const ubyte *src, #if 0 -static enum pipe_format format_from_get_elt( pt_elt_func get_elt ) +static INLINE enum pipe_format +format_from_get_elt( pt_elt_func get_elt ) { switch (draw->pt.user.eltSize) { case 1: return PIPE_FORMAT_R8_UNORM; @@ -293,10 +310,11 @@ static enum pipe_format format_from_get_elt( pt_elt_func get_elt ) } #endif -static void vcache_check_run( struct draw_pt_front_end *frontend, - pt_elt_func get_elt, - const void *elts, - unsigned draw_count ) +static INLINE void +vcache_check_run( struct draw_pt_front_end *frontend, + pt_elt_func get_elt, + const void *elts, + unsigned draw_count ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; struct draw_context *draw = vcache->draw; @@ -416,10 +434,11 @@ static void vcache_check_run( struct draw_pt_front_end *frontend, -static void vcache_prepare( struct draw_pt_front_end *frontend, - unsigned prim, - struct draw_pt_middle_end *middle, - unsigned opt ) +static void +vcache_prepare( struct draw_pt_front_end *frontend, + unsigned prim, + struct draw_pt_middle_end *middle, + unsigned opt ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; @@ -448,14 +467,16 @@ static void vcache_prepare( struct draw_pt_front_end *frontend, -static void vcache_finish( struct draw_pt_front_end *frontend ) +static void +vcache_finish( struct draw_pt_front_end *frontend ) { struct vcache_frontend *vcache = (struct vcache_frontend *)frontend; vcache->middle->finish( vcache->middle ); vcache->middle = NULL; } -static void vcache_destroy( struct draw_pt_front_end *frontend ) +static void +vcache_destroy( struct draw_pt_front_end *frontend ) { FREE(frontend); } -- cgit v1.2.3 From 5c198f660a1812d9b3970408695d04bdd74a5d1e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 2 Sep 2008 02:16:43 +0900 Subject: pipebuffer: Comment the slab code. Remove the freeSlabs list. The freeSlabs list is not really needed as we free empty slabs immediately. Time based cached is done separately. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 104 ++++++++++++++++------ 1 file changed, 77 insertions(+), 27 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index ac0296b26a..8698c4cff6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -30,6 +30,8 @@ * @file * S-lab pool implementation. * + * @sa http://en.wikipedia.org/wiki/Slab_allocation + * * @author Thomas Hellstrom * @author Jose Fonseca */ @@ -49,46 +51,96 @@ struct pb_slab; + +/** + * Buffer in a slab. + * + * Sub-allocation of a contiguous buffer. + */ struct pb_slab_buffer { struct pb_buffer base; struct pb_slab *slab; + struct list_head head; + unsigned mapCount; + + /** Offset relative to the start of the slab buffer. */ size_t start; + + /** Use when validating, to signal that all mappings are finished */ + /* TODO: Actually validation does not reach this stage yet */ pipe_condvar event; }; + +/** + * Slab -- a contiguous piece of memory. + */ struct pb_slab { struct list_head head; struct list_head freeBuffers; size_t numBuffers; size_t numFree; + struct pb_slab_buffer *buffers; struct pb_slab_manager *mgr; + /** Buffer from the provider */ struct pb_buffer *bo; + void *virtual; }; + +/** + * It adds/removes slabs as needed in order to meet the allocation/destruction + * of individual buffers. + */ struct pb_slab_manager { struct pb_manager base; + /** From where we get our buffers */ struct pb_manager *provider; + + /** Size of the buffers we hand on downstream */ size_t bufSize; + + /** Size of the buffers we request upstream */ size_t slabSize; + + /** + * Alignment, usage to be used to allocate the slab buffers. + * + * We can only provide buffers which are consistent (in alignment, usage) + * with this description. + */ struct pb_desc desc; + /** + * Partial slabs + * + * Full slabs are not stored in any list. Empty slabs are destroyed + * immediatly. + */ struct list_head slabs; - struct list_head freeSlabs; pipe_mutex mutex; }; + /** + * Wrapper around several slabs, therefore capable of handling buffers of + * multiple sizes. + * + * This buffer manager just dispatches buffer allocations to the appropriate slab + * manager, according to the requested buffer size, or by passes the slab + * managers altogether for even greater sizes. + * * The data of this structure remains constant after * initialization and thus needs no mutex protection. */ @@ -97,12 +149,17 @@ struct pb_slab_range_manager struct pb_manager base; struct pb_manager *provider; + size_t minBufSize; size_t maxBufSize; + + /** @sa pb_slab_manager::desc */ struct pb_desc desc; unsigned numBuckets; size_t *bucketSizes; + + /** Array of pb_slab_manager, one for each bucket size */ struct pb_manager **buckets; }; @@ -156,29 +213,15 @@ pb_slab_buffer_destroy(struct pb_buffer *_buf) if (slab->head.next == &slab->head) LIST_ADDTAIL(&slab->head, &mgr->slabs); + /* If the slab becomes totally empty, free it */ if (slab->numFree == slab->numBuffers) { list = &slab->head; - LIST_DEL(list); - LIST_ADDTAIL(list, &mgr->freeSlabs); + LIST_DELINIT(list); + pb_reference(&slab->bo, NULL); + FREE(slab->buffers); + FREE(slab); } - if (mgr->slabs.next == &mgr->slabs || slab->numFree - != slab->numBuffers) { - - struct list_head *next; - - for (list = mgr->freeSlabs.next, next = list->next; list - != &mgr->freeSlabs; list = next, next = list->next) { - - slab = LIST_ENTRY(struct pb_slab, list, head); - - LIST_DELINIT(list); - pb_reference(&slab->bo, NULL); - FREE(slab->buffers); - FREE(slab); - } - } - pipe_mutex_unlock(mgr->mutex); } @@ -225,6 +268,11 @@ pb_slab_buffer_vtbl = { }; +/** + * Create a new slab. + * + * Called when we ran out of free slabs. + */ static enum pipe_error pb_slab_create(struct pb_slab_manager *mgr) { @@ -238,17 +286,14 @@ pb_slab_create(struct pb_slab_manager *mgr) if (!slab) return PIPE_ERROR_OUT_OF_MEMORY; - /* - * FIXME: We should perhaps allow some variation in slabsize in order - * to efficiently reuse slabs. - */ - slab->bo = mgr->provider->create_buffer(mgr->provider, mgr->slabSize, &mgr->desc); if(!slab->bo) { ret = PIPE_ERROR_OUT_OF_MEMORY; goto out_err0; } + /* Note down the slab virtual address. All mappings are accessed directly + * through this address so it is required that the buffer is pinned. */ slab->virtual = pb_map(slab->bo, PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE); @@ -256,7 +301,6 @@ pb_slab_create(struct pb_slab_manager *mgr) ret = PIPE_ERROR_OUT_OF_MEMORY; goto out_err1; } - pb_unmap(slab->bo); numBuffers = slab->bo->base.size / mgr->bufSize; @@ -289,6 +333,7 @@ pb_slab_create(struct pb_slab_manager *mgr) buf++; } + /* Add this slab to the list of partial slabs */ LIST_ADDTAIL(&slab->head, &mgr->slabs); return PIPE_OK; @@ -329,6 +374,8 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, return NULL; pipe_mutex_lock(mgr->mutex); + + /* Create a new slab, if we run out of partial slabs */ if (mgr->slabs.next == &mgr->slabs) { (void) pb_slab_create(mgr); if (mgr->slabs.next == &mgr->slabs) { @@ -336,8 +383,12 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, return NULL; } } + + /* Allocate the buffer from a partial (or just created) slab */ list = mgr->slabs.next; slab = LIST_ENTRY(struct pb_slab, list, head); + + /* If totally full remove from the partial slab list */ if (--slab->numFree == 0) LIST_DELINIT(list); @@ -386,7 +437,6 @@ pb_slab_manager_create(struct pb_manager *provider, mgr->desc = *desc; LIST_INITHEAD(&mgr->slabs); - LIST_INITHEAD(&mgr->freeSlabs); pipe_mutex_init(mgr->mutex); -- cgit v1.2.3 From 038d53cbdb9e504388141c25859bce12f7e8f87e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 2 Sep 2008 02:51:06 +0900 Subject: pipebuffer: Add missing break statement to cache lookup logic. Second loop was never run. Spotted by Keith. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index e2b8fe0f98..1ec422fb19 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -249,17 +249,25 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, buf = curr_buf; else if(util_time_timeout(&curr_buf->start, &curr_buf->end, &now)) _pb_cache_buffer_destroy(curr_buf); + else + /* This buffer (and all hereafter) are still hot in cache */ + break; curr = next; next = curr->next; } /* keep searching in the hot buffers */ - while(!buf && curr != &mgr->delayed) { - curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); - if(pb_cache_is_buffer_compat(curr_buf, size, desc)) - buf = curr_buf; - curr = next; - next = curr->next; + if(!buf) { + while(curr != &mgr->delayed) { + curr_buf = LIST_ENTRY(struct pb_cache_buffer, curr, head); + if(pb_cache_is_buffer_compat(curr_buf, size, desc)) { + buf = curr_buf; + break; + } + /* no need to check the timeout here */ + curr = next; + next = curr->next; + } } if(buf) { -- cgit v1.2.3 From f3a7463feefcf1f22c1309e1f5b0bfe381859686 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 1 Sep 2008 15:30:26 -0600 Subject: gallium: include u_pointer,h, not p_pointer.h --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index f4ca282dd9..6d4c081e04 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -27,7 +27,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_pointer.h" +#include "util/u_pointer.h" #include "rtasm_execmem.h" #include "rtasm_x86sse.h" -- cgit v1.2.3 From feea0c9d958bc1645b09b288cd4d4756d0d6e61a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 2 Sep 2008 18:04:08 -0600 Subject: gallium: fix out of bounds array errors in SSE execution 1. #define MAX_INPUTS/OUTPUTS/TEMPS/etc with better values. 2. Add assertions in aos_get_x86() to check register file indexes 3. Assert that constant regs haven't changed after running SSE code. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 11 +++++++++++ src/gallium/auxiliary/draw/draw_vs_aos.h | 10 +++++----- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 760fcb389f..a556477a76 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -32,6 +32,7 @@ #include "util/u_memory.h" #include "util/u_math.h" #include "pipe/p_shader_tokens.h" +#include "pipe/p_debug.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi/tgsi_exec.h" @@ -119,21 +120,26 @@ static struct x86_reg get_reg_ptr(struct aos_compilation *cp, switch (file) { case TGSI_FILE_INPUT: + assert(idx < MAX_INPUTS); return x86_make_disp(ptr, Offset(struct aos_machine, input[idx])); case TGSI_FILE_OUTPUT: return x86_make_disp(ptr, Offset(struct aos_machine, output[idx])); case TGSI_FILE_TEMPORARY: + assert(idx < MAX_TEMPS); return x86_make_disp(ptr, Offset(struct aos_machine, temp[idx])); case AOS_FILE_INTERNAL: + assert(idx < MAX_INTERNALS); return x86_make_disp(ptr, Offset(struct aos_machine, internal[idx])); case TGSI_FILE_IMMEDIATE: + assert(idx < MAX_IMMEDIATES); /* just a sanity check */ return x86_make_disp(aos_get_x86(cp, 0, X86_IMMEDIATES), idx * 4 * sizeof(float)); case TGSI_FILE_CONSTANT: + assert(idx < MAX_CONSTANTS); /* just a sanity check */ return x86_make_disp(aos_get_x86(cp, 1, X86_CONSTANTS), idx * 4 * sizeof(float)); default: @@ -2108,6 +2114,11 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, start, count, output_buffer ); + + /* Sanity spot checks to make sure we didn't trash our constants */ + assert(machine->internal[IMM_ONES][0] == 1.0f); + assert(machine->internal[IMM_IDENTITY][0] == 0.0f); + assert(machine->internal[IMM_NEGS][0] == -1.0f); } diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 64e021ff6b..7fe6f79db0 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -51,11 +51,11 @@ struct x86_function; #define W 3 #define MAX_INPUTS PIPE_MAX_ATTRIBS -#define MAX_OUTPUTS PIPE_MAX_ATTRIBS -#define MAX_TEMPS PIPE_MAX_ATTRIBS /* say */ -#define MAX_CONSTANTS PIPE_MAX_ATTRIBS /* say */ -#define MAX_IMMEDIATES PIPE_MAX_ATTRIBS /* say */ -#define MAX_INTERNALS 8 +#define MAX_OUTPUTS PIPE_MAX_SHADER_OUTPUTS +#define MAX_TEMPS TGSI_EXEC_NUM_TEMPS +#define MAX_CONSTANTS 1024 /** only used for sanity checking */ +#define MAX_IMMEDIATES 1024 /** only used for sanity checking */ +#define MAX_INTERNALS 8 /** see IMM_x values below */ #define AOS_FILE_INTERNAL TGSI_FILE_COUNT -- cgit v1.2.3 From e3509fd4d09a19293ac3f2e0c92d758bc3ef308c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 2 Sep 2008 18:05:09 -0600 Subject: gallium: increase string buffer size to 16000 to avoid truncated output of long shaders --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index a168c94928..58693db13a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -517,7 +517,7 @@ tgsi_dump( const struct tgsi_token *tokens, uint flags ) { - static char str[4096]; + static char str[16000]; uint len; char *p = str; -- cgit v1.2.3 From 82086f5d21295d5ceffb0fd9963de7de4112964b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 3 Sep 2008 10:30:09 +0900 Subject: draw: Describe the steps in emit_load_R32G32B32A32. --- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index ab3c5b94a5..26297c74f8 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -55,9 +55,13 @@ static void emit_load_R32G32B32( struct aos_compilation *cp, struct x86_reg src_ptr ) { sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); + /* data = z ? ? ? */ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); + /* data = z ? 0 1 */ sse_shufps(cp->func, data, data, SHUF(Y,Z,X,W) ); + /* data = ? 0 z 1 */ sse_movlps(cp->func, data, src_ptr); + /* data = x y z 1 */ } static void emit_load_R32G32( struct aos_compilation *cp, -- cgit v1.2.3 From f637a96e85a51a66f2c53b91118a6815bb61d6e6 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 3 Sep 2008 11:48:05 +0900 Subject: gallium: Have pipe_buffer_* receive a pipe_screen instead of a pipe_context. We want to use the pipe_buffer_* inlines everywhere, but a pipe context is not always available nor is it needed. --- src/gallium/auxiliary/util/u_draw_quad.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 4 +-- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_texture.c | 8 ++--- src/gallium/drivers/i965simple/brw_state_pool.c | 2 +- src/gallium/drivers/i965simple/brw_tex_layout.c | 4 +-- src/gallium/drivers/softpipe/sp_context.c | 2 +- src/gallium/drivers/softpipe/sp_state_fs.c | 2 +- src/gallium/drivers/softpipe/sp_texture.c | 6 ++-- src/gallium/include/pipe/p_inlines.h | 28 +++++++-------- src/gallium/state_trackers/python/p_context.i | 2 +- src/gallium/state_trackers/python/st_device.c | 2 +- .../state_trackers/python/st_softpipe_winsys.c | 2 +- src/gallium/winsys/drm/intel/dri/intel_screen.c | 2 +- src/gallium/winsys/egl_xlib/sw_winsys.c | 2 +- src/gallium/winsys/gdi/wmesa.c | 2 +- src/gallium/winsys/xlib/xm_winsys.c | 2 +- src/gallium/winsys/xlib/xm_winsys_aub.c | 2 +- src/mesa/state_tracker/st_atom_constbuf.c | 8 ++--- src/mesa/state_tracker/st_cb_bitmap.c | 8 ++--- src/mesa/state_tracker/st_cb_bufferobjects.c | 18 +++++----- src/mesa/state_tracker/st_cb_clear.c | 8 ++--- src/mesa/state_tracker/st_cb_drawpixels.c | 8 ++--- src/mesa/state_tracker/st_context.c | 2 +- src/mesa/state_tracker/st_draw.c | 40 +++++++++++----------- src/mesa/state_tracker/st_gen_mipmap.c | 8 ++--- 27 files changed, 89 insertions(+), 89 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index bf143815d8..d643ee9ab7 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -127,6 +127,6 @@ util_draw_texquad(struct pipe_context *pipe, util_draw_vertex_buffer(pipe, vbuf, PIPE_PRIM_TRIANGLE_FAN, 4, 2); } - pipe_buffer_reference(pipe->winsys, &vbuf, NULL); + pipe_buffer_reference(pipe->screen, &vbuf, NULL); } } diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index 86bcad05e9..3d1b887da9 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -166,7 +166,7 @@ cell_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); /* note: reference counting */ - pipe_buffer_reference(ws, + winsys_buffer_reference(ws, &cell->constants[shader].buffer, buf->buffer); cell->constants[shader].size = buf->size; diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 5a0942bbd6..5c01aa21b8 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -130,7 +130,7 @@ cell_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) spt); */ - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen, &spt->buffer, NULL); FREE(spt); } @@ -161,7 +161,7 @@ cell_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + winsys_buffer_reference(ws, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index e4ece55098..9397a2ca1a 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -124,7 +124,7 @@ i915_vbuf_render_allocate_vertices( struct vbuf_render *render, if (i915_render->vbo_size > size + i915_render->vbo_offset && !i915->vbo_flushed) { } else { i915->vbo_flushed = 0; - pipe_buffer_reference(winsys, &i915_render->vbo, NULL); + winsys_buffer_reference(winsys, &i915_render->vbo, NULL); } if (!i915_render->vbo) { diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index a853a5a3f6..bd87217063 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -645,7 +645,7 @@ i915_texture_release(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(screen->winsys, &tex->buffer, NULL); + pipe_buffer_reference(screen, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -684,7 +684,7 @@ i915_get_tex_surface(struct pipe_screen *screen, ps->refcount = 1; ps->winsys = ws; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(ws, &ps->buffer, tex->buffer); + pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -728,7 +728,7 @@ i915_texture_blanket(struct pipe_screen * screen, i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1); i915_miptree_set_image_offset(tex, 0, 0, 0, 0); - pipe_buffer_reference(screen->winsys, &tex->buffer, buffer); + pipe_buffer_reference(screen, &tex->buffer, buffer); return &tex->base; } @@ -756,7 +756,7 @@ i915_tex_surface_release(struct pipe_screen *screen, } pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen->winsys, &surf->buffer, NULL); + pipe_buffer_reference(screen, &surf->buffer, NULL); FREE(surf); } diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index 78d4c0e411..d0dc1ef74d 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -103,7 +103,7 @@ static void brw_destroy_pool( struct brw_context *brw, { struct brw_mem_pool *pool = &brw->pool[pool_id]; - pipe_buffer_reference( pool->brw->pipe.winsys, + winsys_buffer_reference( pool->brw->pipe.winsys, &pool->buffer, NULL ); } diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 05eda9d1f2..cc0c665e02 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -330,7 +330,7 @@ brw_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - pipe_buffer_reference(ws, &tex->buffer, NULL); + winsys_buffer_reference(ws, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -369,7 +369,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { assert(ps->format); assert(ps->refcount); - pipe_buffer_reference(ws, &ps->buffer, tex->buffer); + winsys_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index dda90f760a..6f12390cf7 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -113,7 +113,7 @@ static void softpipe_destroy( struct pipe_context *pipe ) for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { - pipe_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); } } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 1be461b3a4..e5b609cf6c 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -152,7 +152,7 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, assert(index == 0); /* note: reference counting */ - pipe_buffer_reference(ws, + winsys_buffer_reference(ws, &softpipe->constants[shader].buffer, buf ? buf->buffer : NULL); softpipe->constants[shader].size = buf ? buf->size : 0; diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 3a737d6f72..c283e3e410 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -192,7 +192,7 @@ softpipe_texture_blanket(struct pipe_screen * screen, spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); spt->stride[0] = stride[0]; - pipe_buffer_reference(screen->winsys, &spt->buffer, buffer); + pipe_buffer_reference(screen, &spt->buffer, buffer); return &spt->base; } @@ -208,7 +208,7 @@ softpipe_texture_release(struct pipe_screen *screen, if (--(*pt)->refcount <= 0) { struct softpipe_texture *spt = softpipe_texture(*pt); - pipe_buffer_reference(screen->winsys, &spt->buffer, NULL); + pipe_buffer_reference(screen, &spt->buffer, NULL); FREE(spt); } *pt = NULL; @@ -231,7 +231,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); - pipe_buffer_reference(ws, &ps->buffer, spt->buffer); + pipe_buffer_reference(screen, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 1e4b98edb4..d70de8e301 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -109,7 +109,7 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) /* XXX: thread safety issues! */ static INLINE void -pipe_buffer_reference(struct pipe_winsys *winsys, +winsys_buffer_reference(struct pipe_winsys *winsys, struct pipe_buffer **ptr, struct pipe_buffer *buf) { @@ -164,48 +164,48 @@ pipe_texture_release(struct pipe_texture **ptr) */ static INLINE struct pipe_buffer * -pipe_buffer_create( struct pipe_context *pipe, +pipe_buffer_create( struct pipe_screen *screen, unsigned alignment, unsigned usage, unsigned size ) { - return pipe->winsys->buffer_create(pipe->winsys, alignment, usage, size); + return screen->winsys->buffer_create(screen->winsys, alignment, usage, size); } static INLINE struct pipe_buffer * -pipe_user_buffer_create( struct pipe_context *pipe, void *ptr, unsigned size ) +pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size ) { - return pipe->winsys->user_buffer_create(pipe->winsys, ptr, size); + return screen->winsys->user_buffer_create(screen->winsys, ptr, size); } static INLINE void -pipe_buffer_destroy( struct pipe_context *pipe, struct pipe_buffer *buf ) +pipe_buffer_destroy( struct pipe_screen *screen, struct pipe_buffer *buf ) { - pipe->winsys->buffer_destroy(pipe->winsys, buf); + screen->winsys->buffer_destroy(screen->winsys, buf); } static INLINE void * -pipe_buffer_map(struct pipe_context *pipe, +pipe_buffer_map(struct pipe_screen *screen, struct pipe_buffer *buf, unsigned usage) { - return pipe->winsys->buffer_map(pipe->winsys, buf, usage); + return screen->winsys->buffer_map(screen->winsys, buf, usage); } static INLINE void -pipe_buffer_unmap(struct pipe_context *pipe, +pipe_buffer_unmap(struct pipe_screen *screen, struct pipe_buffer *buf) { - pipe->winsys->buffer_unmap(pipe->winsys, buf); + screen->winsys->buffer_unmap(screen->winsys, buf); } /* XXX when we're using this everywhere, get rid of - * pipe_buffer_reference() above. + * winsys_buffer_reference() above. */ static INLINE void -pipe_reference_buffer(struct pipe_context *pipe, +pipe_buffer_reference(struct pipe_screen *screen, struct pipe_buffer **ptr, struct pipe_buffer *buf) { - pipe_buffer_reference(pipe->winsys, ptr, buf); + winsys_buffer_reference(screen->winsys, ptr, buf); } diff --git a/src/gallium/state_trackers/python/p_context.i b/src/gallium/state_trackers/python/p_context.i index 0b2621f7c3..231e07cd63 100644 --- a/src/gallium/state_trackers/python/p_context.i +++ b/src/gallium/state_trackers/python/p_context.i @@ -248,7 +248,7 @@ struct st_context { util_draw_vertex_buffer(pipe, vbuf, prim, num_verts, num_attribs); error2: - pipe_buffer_reference(pipe->winsys, &vbuf, NULL); + pipe_buffer_reference(pipe->screen, &vbuf, NULL); error1: ; } diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index f71d85dd9b..bd71755f0b 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -293,7 +293,7 @@ st_buffer_destroy(struct st_buffer *st_buf) { if(st_buf) { struct pipe_winsys *winsys = st_buf->st_dev->screen->winsys; - pipe_buffer_reference(winsys, &st_buf->buffer, NULL); + pipe_buffer_reference(pipe->screen, &st_buf->buffer, NULL); FREE(st_buf); } } diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index 2d4f5434b3..f62113a469 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -221,7 +221,7 @@ st_softpipe_surface_release(struct pipe_winsys *winsys, surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/gallium/winsys/drm/intel/dri/intel_screen.c b/src/gallium/winsys/drm/intel/dri/intel_screen.c index 46d4861e77..3a486481f5 100644 --- a/src/gallium/winsys/drm/intel/dri/intel_screen.c +++ b/src/gallium/winsys/drm/intel/dri/intel_screen.c @@ -83,7 +83,7 @@ intelCreateSurface(struct intel_screen *intelScreen, struct pipe_winsys *winsys, buffer); /* Unref the buffer we don't need it anyways */ - pipe_buffer_reference(screen->winsys, &buffer, NULL); + pipe_buffer_reference(screen, &buffer, NULL); surface = screen->get_tex_surface(screen, texture, diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index ae81d7f801..2fd190da52 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -216,7 +216,7 @@ surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/gallium/winsys/gdi/wmesa.c b/src/gallium/winsys/gdi/wmesa.c index 730fb1b541..ed3dd2b927 100644 --- a/src/gallium/winsys/gdi/wmesa.c +++ b/src/gallium/winsys/gdi/wmesa.c @@ -463,7 +463,7 @@ wm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/gallium/winsys/xlib/xm_winsys.c b/src/gallium/winsys/xlib/xm_winsys.c index 68ead7f528..70f01e0ef8 100644 --- a/src/gallium/winsys/xlib/xm_winsys.c +++ b/src/gallium/winsys/xlib/xm_winsys.c @@ -543,7 +543,7 @@ xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/gallium/winsys/xlib/xm_winsys_aub.c b/src/gallium/winsys/xlib/xm_winsys_aub.c index 35c4ebc4ba..b7c10b6bca 100644 --- a/src/gallium/winsys/xlib/xm_winsys_aub.c +++ b/src/gallium/winsys/xlib/xm_winsys_aub.c @@ -308,7 +308,7 @@ aub_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) surf->refcount--; if (surf->refcount == 0) { if (surf->buffer) - pipe_buffer_reference(winsys, &surf->buffer, NULL); + winsys_buffer_reference(winsys, &surf->buffer, NULL); free(surf); } *s = NULL; diff --git a/src/mesa/state_tracker/st_atom_constbuf.c b/src/mesa/state_tracker/st_atom_constbuf.c index d3aadf5074..d02e51cb9a 100644 --- a/src/mesa/state_tracker/st_atom_constbuf.c +++ b/src/mesa/state_tracker/st_atom_constbuf.c @@ -73,8 +73,8 @@ void st_upload_constants( struct st_context *st, /* We always need to get a new buffer, to keep the drivers simple and * avoid gratuitous rendering synchronization. */ - pipe_reference_buffer(pipe, &cbuf->buffer, NULL ); - cbuf->buffer = pipe_buffer_create(pipe, 16, PIPE_BUFFER_USAGE_CONSTANT, + pipe_buffer_reference(pipe->screen, &cbuf->buffer, NULL ); + cbuf->buffer = pipe_buffer_create(pipe->screen, 16, PIPE_BUFFER_USAGE_CONSTANT, paramBytes ); if (0) @@ -86,10 +86,10 @@ void st_upload_constants( struct st_context *st, /* load Mesa constants into the constant buffer */ if (cbuf->buffer) { - void *map = pipe_buffer_map(pipe, cbuf->buffer, + void *map = pipe_buffer_map(pipe->screen, cbuf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(map, params->ParameterValues, paramBytes); - pipe_buffer_unmap(pipe, cbuf->buffer); + pipe_buffer_unmap(pipe->screen, cbuf->buffer); } cbuf->size = paramBytes; diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index a0c305d66f..694104f9cf 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -378,7 +378,7 @@ setup_bitmap_vertex_data(struct st_context *st, void *buf; if (!st->bitmap.vbuf) { - st->bitmap.vbuf = pipe_buffer_create(pipe, 32, PIPE_BUFFER_USAGE_VERTEX, + st->bitmap.vbuf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, sizeof(st->bitmap.vertices)); } @@ -418,9 +418,9 @@ setup_bitmap_vertex_data(struct st_context *st, } /* put vertex data into vbuf */ - buf = pipe_buffer_map(pipe, st->bitmap.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + buf = pipe_buffer_map(pipe->screen, st->bitmap.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(buf, st->bitmap.vertices, sizeof(st->bitmap.vertices)); - pipe_buffer_unmap(pipe, st->bitmap.vbuf); + pipe_buffer_unmap(pipe->screen, st->bitmap.vbuf); } @@ -779,7 +779,7 @@ st_destroy_bitmap(struct st_context *st) } if (st->bitmap.vbuf) { - pipe_buffer_destroy(pipe, st->bitmap.vbuf); + pipe_buffer_destroy(pipe->screen, st->bitmap.vbuf); st->bitmap.vbuf = NULL; } diff --git a/src/mesa/state_tracker/st_cb_bufferobjects.c b/src/mesa/state_tracker/st_cb_bufferobjects.c index af79aefa96..07fa2afce0 100644 --- a/src/mesa/state_tracker/st_cb_bufferobjects.c +++ b/src/mesa/state_tracker/st_cb_bufferobjects.c @@ -78,7 +78,7 @@ st_bufferobj_free(GLcontext *ctx, struct gl_buffer_object *obj) struct st_buffer_object *st_obj = st_buffer_object(obj); if (st_obj->buffer) - pipe_reference_buffer(pipe, &st_obj->buffer, NULL); + pipe_buffer_reference(pipe->screen, &st_obj->buffer, NULL); free(st_obj); } @@ -105,9 +105,9 @@ st_bufferobj_subdata(GLcontext *ctx, if (offset >= st_obj->size || size > (st_obj->size - offset)) return; - map = pipe_buffer_map(pipe, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = pipe_buffer_map(pipe->screen, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(map + offset, data, size); - pipe_buffer_unmap(pipe, st_obj->buffer); + pipe_buffer_unmap(pipe->screen, st_obj->buffer); } @@ -128,9 +128,9 @@ st_bufferobj_get_subdata(GLcontext *ctx, if (offset >= st_obj->size || size > (st_obj->size - offset)) return; - map = pipe_buffer_map(pipe, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(pipe->screen, st_obj->buffer, PIPE_BUFFER_USAGE_CPU_READ); memcpy(data, map + offset, size); - pipe_buffer_unmap(pipe, st_obj->buffer); + pipe_buffer_unmap(pipe->screen, st_obj->buffer); } @@ -171,9 +171,9 @@ st_bufferobj_data(GLcontext *ctx, buffer_usage = 0; } - pipe_reference_buffer( pipe, &st_obj->buffer, NULL ); + pipe_buffer_reference( pipe->screen, &st_obj->buffer, NULL ); - st_obj->buffer = pipe_buffer_create( pipe, 32, buffer_usage, size ); + st_obj->buffer = pipe_buffer_create( pipe->screen, 32, buffer_usage, size ); st_obj->size = size; @@ -207,7 +207,7 @@ st_bufferobj_map(GLcontext *ctx, GLenum target, GLenum access, break; } - obj->Pointer = pipe_buffer_map(pipe, st_obj->buffer, flags); + obj->Pointer = pipe_buffer_map(pipe->screen, st_obj->buffer, flags); return obj->Pointer; } @@ -221,7 +221,7 @@ st_bufferobj_unmap(GLcontext *ctx, GLenum target, struct gl_buffer_object *obj) struct pipe_context *pipe = st_context(ctx)->pipe; struct st_buffer_object *st_obj = st_buffer_object(obj); - pipe_buffer_unmap(pipe, st_obj->buffer); + pipe_buffer_unmap(pipe->screen, st_obj->buffer); obj->Pointer = NULL; return GL_TRUE; } diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index e475f022d3..013b9a9c9c 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -116,7 +116,7 @@ st_destroy_clear(struct st_context *st) st->clear.vs = NULL; } if (st->clear.vbuf) { - pipe_buffer_destroy(pipe, st->clear.vbuf); + pipe_buffer_destroy(pipe->screen, st->clear.vbuf); st->clear.vbuf = NULL; } } @@ -152,7 +152,7 @@ draw_quad(GLcontext *ctx, void *buf; if (!st->clear.vbuf) { - st->clear.vbuf = pipe_buffer_create(pipe, 32, PIPE_BUFFER_USAGE_VERTEX, + st->clear.vbuf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, sizeof(st->clear.vertices)); } @@ -180,9 +180,9 @@ draw_quad(GLcontext *ctx, } /* put vertex data into vbuf */ - buf = pipe_buffer_map(pipe, st->clear.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); + buf = pipe_buffer_map(pipe->screen, st->clear.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(buf, st->clear.vertices, sizeof(st->clear.vertices)); - pipe_buffer_unmap(pipe, st->clear.vbuf); + pipe_buffer_unmap(pipe->screen, st->clear.vbuf); /* draw */ util_draw_vertex_buffer(pipe, st->clear.vbuf, diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 4ec7c752df..00bbcae32a 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -487,17 +487,17 @@ draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, ubyte *map; /* allocate/load buffer object with vertex data */ - buf = pipe_buffer_create(pipe, 32, PIPE_BUFFER_USAGE_VERTEX, + buf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, sizeof(verts)); - map = pipe_buffer_map(pipe, buf, PIPE_BUFFER_USAGE_CPU_WRITE); + map = pipe_buffer_map(pipe->screen, buf, PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(map, verts, sizeof(verts)); - pipe_buffer_unmap(pipe, buf); + pipe_buffer_unmap(pipe->screen, buf); util_draw_vertex_buffer(pipe, buf, PIPE_PRIM_QUADS, 4, /* verts */ 3); /* attribs/vert */ - pipe_buffer_reference(pipe->winsys, &buf, NULL); + pipe_buffer_reference(pipe->screen, &buf, NULL); } } diff --git a/src/mesa/state_tracker/st_context.c b/src/mesa/state_tracker/st_context.c index 83b0be06da..08d4db7f7f 100644 --- a/src/mesa/state_tracker/st_context.c +++ b/src/mesa/state_tracker/st_context.c @@ -196,7 +196,7 @@ static void st_destroy_context_priv( struct st_context *st ) for (i = 0; i < Elements(st->state.constants); i++) { if (st->state.constants[i].buffer) { - pipe_reference_buffer(st->pipe, &st->state.constants[i].buffer, NULL); + pipe_buffer_reference(st->pipe->screen, &st->state.constants[i].buffer, NULL); } } diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 2c80701186..bdf8648ef7 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -229,7 +229,7 @@ setup_edgeflags(GLcontext *ctx, GLenum primMode, GLint start, GLint count, if (!vec) return NULL; - map = pipe_buffer_map(pipe, stobj->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = pipe_buffer_map(pipe->screen, stobj->buffer, PIPE_BUFFER_USAGE_CPU_READ); map = ADD_POINTERS(map, array->Ptr); for (i = 0; i < count; i++) { @@ -239,7 +239,7 @@ setup_edgeflags(GLcontext *ctx, GLenum primMode, GLint start, GLint count, map += array->StrideB; } - pipe_buffer_unmap(pipe, stobj->buffer); + pipe_buffer_unmap(pipe->screen, stobj->buffer); pipe->set_edgeflags(pipe, vec); @@ -373,13 +373,13 @@ setup_interleaved_attribs(GLcontext *ctx, get_user_arrays_bounds(vp, arrays, max_index, &low, &high); /*printf("user buffer range: %p %p %d\n", low, high, high-low);*/ vbuffer->buffer = - pipe_user_buffer_create(pipe, (void *) low, high - low); + pipe_user_buffer_create(pipe->screen, (void *) low, high - low); vbuffer->buffer_offset = 0; offset0 = low; } else { vbuffer->buffer = NULL; - pipe_reference_buffer(pipe, &vbuffer->buffer, stobj->buffer); + pipe_buffer_reference(pipe->screen, &vbuffer->buffer, stobj->buffer); vbuffer->buffer_offset = (unsigned) arrays[mesaAttr]->Ptr; offset0 = arrays[mesaAttr]->Ptr; } @@ -432,7 +432,7 @@ setup_non_interleaved_attribs(GLcontext *ctx, /*printf("stobj %u = %p\n", attr, (void*) stobj);*/ vbuffer[attr].buffer = NULL; - pipe_reference_buffer(pipe, &vbuffer[attr].buffer, stobj->buffer); + pipe_buffer_reference(pipe->screen, &vbuffer[attr].buffer, stobj->buffer); vbuffer[attr].buffer_offset = (unsigned) arrays[mesaAttr]->Ptr; velements[attr].src_offset = 0; } @@ -451,13 +451,13 @@ setup_non_interleaved_attribs(GLcontext *ctx, bytes = arrays[mesaAttr]->Size * _mesa_sizeof_type(arrays[mesaAttr]->Type); } - vbuffer[attr].buffer = pipe_user_buffer_create(pipe, + vbuffer[attr].buffer = pipe_user_buffer_create(pipe->screen, (void *) arrays[mesaAttr]->Ptr, bytes); } else { /* no array, use ctx->Current.Attrib[] value */ bytes = sizeof(ctx->Current.Attrib[0]); - vbuffer[attr].buffer = pipe_user_buffer_create(pipe, + vbuffer[attr].buffer = pipe_user_buffer_create(pipe->screen, (void *) ctx->Current.Attrib[mesaAttr], bytes); stride = 0; } @@ -581,12 +581,12 @@ st_draw_vbo(GLcontext *ctx, if (bufobj && bufobj->Name) { /* elements/indexes are in a real VBO */ struct st_buffer_object *stobj = st_buffer_object(bufobj); - pipe_reference_buffer(pipe, &indexBuf, stobj->buffer); + pipe_buffer_reference(pipe->screen, &indexBuf, stobj->buffer); indexOffset = (unsigned) ib->ptr / indexSize; } else { /* element/indicies are in user space memory */ - indexBuf = pipe_user_buffer_create(pipe, (void *) ib->ptr, + indexBuf = pipe_user_buffer_create(pipe->screen, (void *) ib->ptr, ib->count * indexSize); indexOffset = 0; } @@ -621,7 +621,7 @@ st_draw_vbo(GLcontext *ctx, } } - pipe_reference_buffer(pipe, &indexBuf, NULL); + pipe_buffer_reference(pipe->screen, &indexBuf, NULL); } else { /* non-indexed */ @@ -637,7 +637,7 @@ st_draw_vbo(GLcontext *ctx, /* unreference buffers (frees wrapped user-space buffer objects) */ for (attr = 0; attr < num_vbuffers; attr++) { - pipe_reference_buffer(pipe, &vbuffer[attr].buffer, NULL); + pipe_buffer_reference(pipe->screen, &vbuffer[attr].buffer, NULL); assert(!vbuffer[attr].buffer); } pipe->set_vertex_buffers(pipe, vp->num_inputs, vbuffer); @@ -750,7 +750,7 @@ st_feedback_draw_vbo(GLcontext *ctx, assert(stobj->buffer); vbuffers[attr].buffer = NULL; - pipe_reference_buffer(pipe, &vbuffers[attr].buffer, stobj->buffer); + pipe_buffer_reference(pipe->screen, &vbuffers[attr].buffer, stobj->buffer); vbuffers[attr].buffer_offset = (unsigned) arrays[0]->Ptr;/* in bytes */ velements[attr].src_offset = arrays[mesaAttr]->Ptr - arrays[0]->Ptr; } @@ -762,7 +762,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* wrap user data */ vbuffers[attr].buffer - = pipe_user_buffer_create(pipe, (void *) arrays[mesaAttr]->Ptr, + = pipe_user_buffer_create(pipe->screen, (void *) arrays[mesaAttr]->Ptr, bytes); vbuffers[attr].buffer_offset = 0; velements[attr].src_offset = 0; @@ -784,7 +784,7 @@ st_feedback_draw_vbo(GLcontext *ctx, #endif /* map the attrib buffer */ - map = pipe_buffer_map(pipe, vbuffers[attr].buffer, + map = pipe_buffer_map(pipe->screen, vbuffers[attr].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, attr, map); } @@ -812,7 +812,7 @@ st_feedback_draw_vbo(GLcontext *ctx, return; } - map = pipe_buffer_map(pipe, index_buffer_handle, + map = pipe_buffer_map(pipe->screen, index_buffer_handle, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, map); } @@ -823,7 +823,7 @@ st_feedback_draw_vbo(GLcontext *ctx, /* map constant buffers */ - mapped_constants = pipe_buffer_map(pipe, + mapped_constants = pipe_buffer_map(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_constant_buffer(st->draw, mapped_constants, @@ -837,20 +837,20 @@ st_feedback_draw_vbo(GLcontext *ctx, /* unmap constant buffers */ - pipe_buffer_unmap(pipe, st->state.constants[PIPE_SHADER_VERTEX].buffer); + pipe_buffer_unmap(pipe->screen, st->state.constants[PIPE_SHADER_VERTEX].buffer); /* * unmap vertex/index buffers */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (draw->pt.vertex_buffer[i].buffer) { - pipe_buffer_unmap(pipe, draw->pt.vertex_buffer[i].buffer); - pipe_reference_buffer(pipe, &draw->pt.vertex_buffer[i].buffer, NULL); + pipe_buffer_unmap(pipe->screen, draw->pt.vertex_buffer[i].buffer); + pipe_buffer_reference(pipe->screen, &draw->pt.vertex_buffer[i].buffer, NULL); draw_set_mapped_vertex_buffer(draw, i, NULL); } } if (ib) { - pipe_buffer_unmap(pipe, index_buffer_handle); + pipe_buffer_unmap(pipe->screen, index_buffer_handle); draw_set_mapped_element_buffer(draw, 0, NULL); } } diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 6db9bc0dd5..b9d114b1c9 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -128,10 +128,10 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcData = (ubyte *) pipe_buffer_map(pipe, srcSurf->buffer, + srcData = (ubyte *) pipe_buffer_map(pipe->screen, srcSurf->buffer, PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset; - dstData = (ubyte *) pipe_buffer_map(pipe, dstSurf->buffer, + dstData = (ubyte *) pipe_buffer_map(pipe->screen, dstSurf->buffer, PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset; @@ -144,8 +144,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstSurf->stride, /* stride in bytes */ dstData); - pipe_buffer_unmap(pipe, srcSurf->buffer); - pipe_buffer_unmap(pipe, dstSurf->buffer); + pipe_buffer_unmap(pipe->screen, srcSurf->buffer); + pipe_buffer_unmap(pipe->screen, dstSurf->buffer); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); -- cgit v1.2.3 From a563a27c07eee9f0278c8473e27853fa2d1ad119 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 3 Sep 2008 13:32:09 -0600 Subject: gallium: silence warnings --- src/gallium/auxiliary/translate/translate_generic.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate_generic.c b/src/gallium/auxiliary/translate/translate_generic.c index 4d336f47ea..8d39b64c6c 100644 --- a/src/gallium/auxiliary/translate/translate_generic.c +++ b/src/gallium/auxiliary/translate/translate_generic.c @@ -415,6 +415,12 @@ static fetch_func get_fetch_func( enum pipe_format format ) static emit_func get_emit_func( enum pipe_format format ) { + /* silence warnings */ + (void) emit_R32G32B32A32_FIXED; + (void) emit_R32G32B32_FIXED; + (void) emit_R32G32_FIXED; + (void) emit_R32_FIXED; + switch (format) { case PIPE_FORMAT_R64_FLOAT: return &emit_R64_FLOAT; -- cgit v1.2.3 From a3e39a4aa950ae0f9d4c3106bdf438c6529bd63c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 4 Sep 2008 09:32:54 +0900 Subject: gallium: Use pipe_buffer_* inlines as much as possible. --- src/gallium/auxiliary/util/u_blit.c | 22 ++++++++--------- src/gallium/auxiliary/util/u_draw_quad.c | 10 ++++---- src/gallium/auxiliary/util/u_gen_mipmap.c | 40 +++++++++++++++---------------- 3 files changed, 36 insertions(+), 36 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 05399f9885..9adf72944e 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -138,10 +138,10 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) /* fragment shader */ ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); - ctx->vbuf = pipe->winsys->buffer_create(pipe->winsys, - 32, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(ctx->vertices)); + ctx->vbuf = pipe_buffer_create(pipe->screen, + 32, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(ctx->vertices)); if (!ctx->vbuf) { FREE(ctx); ctx->pipe->delete_fs_state(ctx->pipe, ctx->fs); @@ -174,7 +174,7 @@ util_destroy_blit(struct blit_state *ctx) FREE((void*) ctx->vert_shader.tokens); FREE((void*) ctx->frag_shader.tokens); - pipe->winsys->buffer_destroy(pipe->winsys, ctx->vbuf); + pipe_buffer_reference(pipe->screen, &ctx->vbuf, NULL); FREE(ctx); } @@ -214,12 +214,12 @@ setup_vertex_data(struct blit_state *ctx, ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; - buf = ctx->pipe->winsys->buffer_map(ctx->pipe->winsys, ctx->vbuf, - PIPE_BUFFER_USAGE_CPU_WRITE); + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); - ctx->pipe->winsys->buffer_unmap(ctx->pipe->winsys, ctx->vbuf); + pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); } @@ -259,12 +259,12 @@ setup_vertex_data_tex(struct blit_state *ctx, ctx->vertices[3][1][0] = s0; ctx->vertices[3][1][1] = t1; - buf = ctx->pipe->winsys->buffer_map(ctx->pipe->winsys, ctx->vbuf, - PIPE_BUFFER_USAGE_CPU_WRITE); + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); - ctx->pipe->winsys->buffer_unmap(ctx->pipe->winsys, ctx->vbuf); + pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); } /** * Copy pixel block from src surface to dst surface. diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index d643ee9ab7..8ecae71b64 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -86,11 +86,11 @@ util_draw_texquad(struct pipe_context *pipe, vertexBytes = 4 * (4 * numAttribs * sizeof(float)); /* XXX create one-time */ - vbuf = pipe->winsys->buffer_create(pipe->winsys, 32, - PIPE_BUFFER_USAGE_VERTEX, vertexBytes); + vbuf = pipe_buffer_create(pipe->screen, 32, + PIPE_BUFFER_USAGE_VERTEX, vertexBytes); if (vbuf) { - float *v = (float *) pipe->winsys->buffer_map(pipe->winsys, vbuf, - PIPE_BUFFER_USAGE_CPU_WRITE); + float *v = (float *) pipe_buffer_map(pipe->screen, vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); if (v) { /* * Load vertex buffer @@ -123,7 +123,7 @@ util_draw_texquad(struct pipe_context *pipe, v[28] = 0.0; v[29] = 1.0; - pipe->winsys->buffer_unmap(pipe->winsys, vbuf); + pipe_buffer_unmap(pipe->screen, vbuf); util_draw_vertex_buffer(pipe, vbuf, PIPE_PRIM_TRIANGLE_FAN, 4, 2); } diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index c1e2c19f87..8c983da309 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -595,19 +595,19 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) winsys->buffer_map(winsys, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); reduce_1d(pt->format, srcSurf->width, srcMap, dstSurf->width, dstMap); - winsys->buffer_unmap(winsys, srcSurf->buffer); - winsys->buffer_unmap(winsys, dstSurf->buffer); + pipe_buffer_unmap(screen, srcSurf->buffer); + pipe_buffer_unmap(screen, dstSurf->buffer); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); @@ -639,11 +639,11 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) winsys->buffer_map(winsys, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) winsys->buffer_map(winsys, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); reduce_2d(pt->format, @@ -652,8 +652,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstSurf->width, dstSurf->height, dstSurf->stride, dstMap); - winsys->buffer_unmap(winsys, srcSurf->buffer); - winsys->buffer_unmap(winsys, dstSurf->buffer); + pipe_buffer_unmap(screen, srcSurf->buffer); + pipe_buffer_unmap(screen, dstSurf->buffer); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); @@ -759,10 +759,10 @@ util_create_gen_mipmap(struct pipe_context *pipe, /* fragment shader */ ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); - ctx->vbuf = pipe->winsys->buffer_create(pipe->winsys, - 32, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(ctx->vertices)); + ctx->vbuf = pipe_buffer_create(pipe->screen, + 32, + PIPE_BUFFER_USAGE_VERTEX, + sizeof(ctx->vertices)); if (!ctx->vbuf) { FREE(ctx); return NULL; @@ -805,12 +805,12 @@ set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; - buf = ctx->pipe->winsys->buffer_map(ctx->pipe->winsys, ctx->vbuf, - PIPE_BUFFER_USAGE_CPU_WRITE); + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); - ctx->pipe->winsys->buffer_unmap(ctx->pipe->winsys, ctx->vbuf); + pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); } @@ -829,7 +829,7 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) FREE((void*) ctx->vert_shader.tokens); FREE((void*) ctx->frag_shader.tokens); - pipe->winsys->buffer_destroy(pipe->winsys, ctx->vbuf); + pipe_buffer_reference(pipe->winsys, &ctx->vbuf, NULL); FREE(ctx); } -- cgit v1.2.3 From 135a0dd75cb91285ca0fcec657f0256066e0a73e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 4 Sep 2008 11:14:35 +0900 Subject: gallium: Fix typo. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 8c983da309..b19a649bbc 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -580,7 +580,6 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_winsys *winsys = pipe->winsys; const uint zslice = 0; uint dstLevel; @@ -622,7 +621,6 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; - struct pipe_winsys *winsys = pipe->winsys; const uint zslice = 0; uint dstLevel; @@ -829,7 +827,7 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) FREE((void*) ctx->vert_shader.tokens); FREE((void*) ctx->frag_shader.tokens); - pipe_buffer_reference(pipe->winsys, &ctx->vbuf, NULL); + pipe_buffer_reference(pipe->screen, &ctx->vbuf, NULL); FREE(ctx); } -- cgit v1.2.3 From ce8c08c2b03d8dc363deb14124372a6bbb4efd99 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 4 Sep 2008 10:31:23 -0600 Subject: gallium: new util_surface_copy() and util_surface_fill() helpers These are plug-in fallbacks for the pipe->surface_copy() and pipe->surface_fill() functions. --- src/gallium/auxiliary/util/u_rect.c | 178 ++++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_rect.h | 18 ++++ 2 files changed, 196 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index b31ab5415f..f5619ef791 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -32,6 +32,8 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" #include "util/u_rect.h" @@ -148,3 +150,179 @@ pipe_fill_rect(ubyte * dst, break; } } + + + +/** + * Fallback function for pipe->surface_copy(). + * Note: (X,Y)=(0,0) is always the upper-left corner. + * if do_flip, flip the image vertically on its way from src rect to dst rect. + * XXX should probably put this in new u_surface.c file... + */ +void +util_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dst_x, unsigned dst_y, + struct pipe_surface *src, + unsigned src_x, unsigned src_y, + unsigned w, unsigned h) +{ + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *new_src = NULL, *new_dst = NULL; + void *dst_map; + const void *src_map; + + assert(dst->block.size == src->block.size); + assert(dst->block.width == src->block.width); + assert(dst->block.height == src->block.height); + + if ((src->usage & PIPE_BUFFER_USAGE_CPU_READ) == 0) { + /* Need to create new src surface which is CPU readable */ + assert(src->texture); + if (!src->texture) + return; + new_src = screen->get_tex_surface(screen, + src->texture, + src->face, + src->level, + src->zslice, + PIPE_BUFFER_USAGE_CPU_READ); + src = new_src; + } + + if ((dst->usage & PIPE_BUFFER_USAGE_CPU_WRITE) == 0) { + /* Need to create new dst surface which is CPU writable */ + assert(dst->texture); + if (!dst->texture) + return; + new_dst = screen->get_tex_surface(screen, + dst->texture, + dst->face, + dst->level, + dst->zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + dst = new_dst; + } + + src_map = pipe->screen->surface_map(screen, + src, PIPE_BUFFER_USAGE_CPU_READ); + dst_map = pipe->screen->surface_map(screen, + dst, PIPE_BUFFER_USAGE_CPU_WRITE); + + assert(src_map); + assert(dst_map); + + if (src_map && dst_map) { + /* If do_flip, invert src_y position and pass negative src stride */ + pipe_copy_rect(dst_map, + &dst->block, + dst->stride, + dst_x, dst_y, + w, h, + src_map, + do_flip ? -(int) src->stride : src->stride, + src_x, src_y); + } + + pipe->screen->surface_unmap(pipe->screen, src); + pipe->screen->surface_unmap(pipe->screen, dst); + + if (new_src) + screen->tex_surface_release(screen, &new_src); + if (new_dst) + screen->tex_surface_release(screen, &new_dst); +} + + + +static void * +get_pointer(struct pipe_surface *dst, void *dst_map, unsigned x, unsigned y) +{ + return (char *)dst_map + + y / dst->block.height * dst->stride + + x / dst->block.width * dst->block.size; +} + + +#define UBYTE_TO_USHORT(B) ((B) | ((B) << 8)) + + +/** + * Fallback for pipe->surface_fill() function. + * XXX should probably put this in new u_surface.c file... + */ +void +util_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value) +{ + struct pipe_screen *screen = pipe->screen; + struct pipe_surface *new_dst = NULL; + void *dst_map; + + if ((dst->usage & PIPE_BUFFER_USAGE_CPU_WRITE) == 0) { + /* Need to create new dst surface which is CPU writable */ + assert(dst->texture); + if (!dst->texture) + return; + new_dst = screen->get_tex_surface(screen, + dst->texture, + dst->face, + dst->level, + dst->zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + dst = new_dst; + } + + dst_map = pipe->screen->surface_map(screen, + dst, PIPE_BUFFER_USAGE_CPU_WRITE); + + assert(dst_map); + + if (dst_map) { + assert(dst->stride > 0); + + switch (dst->block.size) { + case 1: + case 2: + case 4: + pipe_fill_rect(dst_map, &dst->block, dst->stride, + dstx, dsty, width, height, value); + break; + case 8: + { + /* expand the 4-byte clear value to an 8-byte value */ + ushort *row = (ushort *) get_pointer(dst, dst_map, dstx, dsty); + ushort val0 = UBYTE_TO_USHORT((value >> 0) & 0xff); + ushort val1 = UBYTE_TO_USHORT((value >> 8) & 0xff); + ushort val2 = UBYTE_TO_USHORT((value >> 16) & 0xff); + ushort val3 = UBYTE_TO_USHORT((value >> 24) & 0xff); + unsigned i, j; + val0 = (val0 << 8) | val0; + val1 = (val1 << 8) | val1; + val2 = (val2 << 8) | val2; + val3 = (val3 << 8) | val3; + for (i = 0; i < height; i++) { + for (j = 0; j < width; j++) { + row[j*4+0] = val0; + row[j*4+1] = val1; + row[j*4+2] = val2; + row[j*4+3] = val3; + } + row += dst->stride/2; + } + } + break; + default: + assert(0); + break; + } + } + + pipe->screen->surface_unmap(pipe->screen, dst); + + if (new_dst) + screen->tex_surface_release(screen, &new_dst); +} diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h index fba4808864..59e842e16d 100644 --- a/src/gallium/auxiliary/util/u_rect.h +++ b/src/gallium/auxiliary/util/u_rect.h @@ -37,6 +37,9 @@ #include "pipe/p_format.h" +struct pipe_context; +struct pipe_surface; + extern void pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block, @@ -50,5 +53,20 @@ pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block, unsigned width, unsigned height, uint32_t value); +extern void +util_surface_copy(struct pipe_context *pipe, + boolean do_flip, + struct pipe_surface *dst, + unsigned dst_x, unsigned dst_y, + struct pipe_surface *src, + unsigned src_x, unsigned src_y, + unsigned w, unsigned h); + +extern void +util_surface_fill(struct pipe_context *pipe, + struct pipe_surface *dst, + unsigned dstx, unsigned dsty, + unsigned width, unsigned height, unsigned value); + #endif /* U_RECT_H */ -- cgit v1.2.3 From 50524c284528d467082266dfa18112f3cdc6202d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 5 Sep 2008 10:27:03 +0900 Subject: gallium: Pass 512 bytes max to EngDebugPrint. --- src/gallium/auxiliary/util/p_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 7d1dba5a24..d56449e7ec 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -76,7 +76,7 @@ void _debug_vprintf(const char *format, va_list ap) /* EngDebugPrint does not handle float point arguments, so we need to use * our own vsnprintf implementation. It is also very slow, so buffer until * we find a newline. */ - static char buf[512 + 1] = {'\0'}; + static char buf[512] = {'\0'}; size_t len = strlen(buf); int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap); if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) { -- cgit v1.2.3 From b8a7eef242f6bb97d90f6e0303d270b2cbc58421 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 5 Sep 2008 10:29:17 +0900 Subject: tgsi: Refactor tgsi_dump to avoid using string buffers when dumping. This fixes a stack overflow when dumping shaders. It ended up being pretty much as the original code Michal had before, before I went on a cleanup rampage on it and took things that ended up needing... --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 160 +++++++++++++++++---------------- 1 file changed, 83 insertions(+), 77 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 58693db13a..afc8ffa553 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -37,30 +37,40 @@ struct dump_ctx uint instno; - struct util_strbuf *sbuf; + void (*printf)(struct dump_ctx *ctx, const char *format, ...); }; +static void +dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) +{ + va_list ap; + (void)ctx; + va_start(ap, format); + debug_vprintf(format, ap); + va_end(ap); +} + static void dump_enum( - struct util_strbuf *sbuf, + struct dump_ctx *ctx, uint e, const char **enums, uint enum_count ) { if (e >= enum_count) - util_strbuf_printf( sbuf, "%u", e ); + ctx->printf( ctx, "%u", e ); else - util_strbuf_printf( sbuf, "%s", enums[e] ); + ctx->printf( ctx, "%s", enums[e] ); } -#define EOL() util_strbuf_printf( sbuf, "\n" ) -#define TXT(S) util_strbuf_printf( sbuf, "%s", S ) -#define CHR(C) util_strbuf_printf( sbuf, "%c", C ) -#define UIX(I) util_strbuf_printf( sbuf, "0x%x", I ) -#define UID(I) util_strbuf_printf( sbuf, "%u", I ) -#define SID(I) util_strbuf_printf( sbuf, "%d", I ) -#define FLT(F) util_strbuf_printf( sbuf, "%10.4f", F ) -#define ENM(E,ENUMS) dump_enum( sbuf, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) +#define EOL() ctx->printf( ctx, "\n" ) +#define TXT(S) ctx->printf( ctx, "%s", S ) +#define CHR(C) ctx->printf( ctx, "%c", C ) +#define UIX(I) ctx->printf( ctx, "0x%x", I ) +#define UID(I) ctx->printf( ctx, "%u", I ) +#define SID(I) ctx->printf( ctx, "%d", I ) +#define FLT(F) ctx->printf( ctx, "%10.4f", F ) +#define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) static const char *processor_type_names[] = { @@ -148,7 +158,7 @@ static const char *modulate_names[TGSI_MODULATE_COUNT] = static void _dump_register( - struct util_strbuf *sbuf, + struct dump_ctx *ctx, uint file, int first, int last ) @@ -165,7 +175,7 @@ _dump_register( static void _dump_register_ind( - struct util_strbuf *sbuf, + struct dump_ctx *ctx, uint file, int index, uint ind_file, @@ -187,7 +197,7 @@ _dump_register_ind( static void _dump_writemask( - struct util_strbuf *sbuf, + struct dump_ctx *ctx, uint writemask ) { if (writemask != TGSI_WRITEMASK_XYZW) { @@ -208,18 +218,17 @@ iter_declaration( struct tgsi_iterate_context *iter, struct tgsi_full_declaration *decl ) { - struct dump_ctx *ctx = (struct dump_ctx *) iter; - struct util_strbuf *sbuf = ctx->sbuf; + struct dump_ctx *ctx = (struct dump_ctx *)iter; TXT( "DCL " ); _dump_register( - sbuf, + ctx, decl->Declaration.File, decl->DeclarationRange.First, decl->DeclarationRange.Last ); _dump_writemask( - sbuf, + ctx, decl->Declaration.UsageMask ); if (decl->Declaration.Semantic) { @@ -245,17 +254,11 @@ void tgsi_dump_declaration( const struct tgsi_full_declaration *decl ) { - static char str[1024]; - struct util_strbuf sbuf; struct dump_ctx ctx; - util_strbuf_init(&sbuf, str, sizeof(str)); - - ctx.sbuf = &sbuf; + ctx.printf = dump_ctx_printf; iter_declaration( &ctx.iter, (struct tgsi_full_declaration *)decl ); - - debug_printf("%s", str); } static boolean @@ -264,7 +267,6 @@ iter_immediate( struct tgsi_full_immediate *imm ) { struct dump_ctx *ctx = (struct dump_ctx *) iter; - struct util_strbuf *sbuf = ctx->sbuf; uint i; @@ -295,17 +297,11 @@ void tgsi_dump_immediate( const struct tgsi_full_immediate *imm ) { - static char str[1024]; - struct util_strbuf sbuf; struct dump_ctx ctx; - util_strbuf_init(&sbuf, str, sizeof(str)); - - ctx.sbuf = &sbuf; + ctx.printf = dump_ctx_printf; iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm ); - - debug_printf("%s", str); } static boolean @@ -314,7 +310,6 @@ iter_instruction( struct tgsi_full_instruction *inst ) { struct dump_ctx *ctx = (struct dump_ctx *) iter; - struct util_strbuf *sbuf = ctx->sbuf; uint instno = ctx->instno++; uint i; @@ -345,12 +340,12 @@ iter_instruction( CHR( ' ' ); _dump_register( - sbuf, + ctx, dst->DstRegister.File, dst->DstRegister.Index, dst->DstRegister.Index ); ENM( dst->DstRegisterExtModulate.Modulate, modulate_names ); - _dump_writemask( sbuf, dst->DstRegister.WriteMask ); + _dump_writemask( ctx, dst->DstRegister.WriteMask ); first_reg = FALSE; } @@ -377,7 +372,7 @@ iter_instruction( if (src->SrcRegister.Indirect) { _dump_register_ind( - sbuf, + ctx, src->SrcRegister.File, src->SrcRegister.Index, src->SrcRegisterInd.File, @@ -385,7 +380,7 @@ iter_instruction( } else { _dump_register( - sbuf, + ctx, src->SrcRegister.File, src->SrcRegister.Index, src->SrcRegister.Index ); @@ -460,18 +455,12 @@ tgsi_dump_instruction( const struct tgsi_full_instruction *inst, uint instno ) { - static char str[1024]; - struct util_strbuf sbuf; struct dump_ctx ctx; - util_strbuf_init(&sbuf, str, sizeof(str)); - ctx.instno = instno; - ctx.sbuf = &sbuf; + ctx.printf = dump_ctx_printf; iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst ); - - debug_printf("%s", str); } static boolean @@ -479,7 +468,6 @@ prolog( struct tgsi_iterate_context *iter ) { struct dump_ctx *ctx = (struct dump_ctx *) iter; - struct util_strbuf *sbuf = ctx->sbuf; ENM( iter->processor.Processor, processor_type_names ); UID( iter->version.MajorVersion ); CHR( '.' ); @@ -489,17 +477,12 @@ prolog( } void -tgsi_dump_str( +tgsi_dump( const struct tgsi_token *tokens, - uint flags, - char *str, - size_t size) + uint flags ) { - struct util_strbuf sbuf; struct dump_ctx ctx; - util_strbuf_init(&sbuf, str, size); - ctx.iter.prolog = prolog; ctx.iter.iterate_instruction = iter_instruction; ctx.iter.iterate_declaration = iter_declaration; @@ -507,34 +490,57 @@ tgsi_dump_str( ctx.iter.epilog = NULL; ctx.instno = 0; - ctx.sbuf = &sbuf; + ctx.printf = dump_ctx_printf; tgsi_iterate_shader( tokens, &ctx.iter ); } +struct str_dump_ctx +{ + struct dump_ctx base; + char *str; + char *ptr; + size_t left; +}; + +static void +str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...) +{ + struct str_dump_ctx *sctx = (struct str_dump_ctx *)ctx; + + if(sctx->left > 1) { + size_t written; + va_list ap; + va_start(ap, format); + written = util_vsnprintf(sctx->ptr, sctx->left, format, ap); + va_end(ap); + sctx->ptr += written; + sctx->left -= written; + } +} + void -tgsi_dump( +tgsi_dump_str( const struct tgsi_token *tokens, - uint flags ) + uint flags, + char *str, + size_t size) { - static char str[16000]; - uint len; - char *p = str; - - tgsi_dump_str(tokens, flags, str, sizeof(str)); - - /* Workaround output buffer size limitations. - */ - len = strlen( str ); - while (len > 256) { - char piggy_bank; - - piggy_bank = p[256]; - p[256] = '\0'; - debug_printf( "%s", p ); - p[256] = piggy_bank; - p += 256; - len -= 256; - } - debug_printf( "%s", p ); + struct str_dump_ctx ctx; + + ctx.base.iter.prolog = prolog; + ctx.base.iter.iterate_instruction = iter_instruction; + ctx.base.iter.iterate_declaration = iter_declaration; + ctx.base.iter.iterate_immediate = iter_immediate; + ctx.base.iter.epilog = NULL; + + ctx.base.instno = 0; + ctx.base.printf = &str_dump_ctx_printf; + + ctx.str = str; + ctx.str[0] = 0; + ctx.ptr = str; + ctx.left = size; + + tgsi_iterate_shader( tokens, &ctx.base.iter ); } -- cgit v1.2.3 From 7071e774e49f0e7b5997ab634f5523efb2666952 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 5 Sep 2008 10:09:48 -0600 Subject: gallium: new util_unpack_color_ub() function --- src/gallium/auxiliary/util/u_pack_color.h | 157 ++++++++++++++++++++++++++++++ 1 file changed, 157 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 39e4ae9d07..43cb7e56ec 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -141,6 +141,161 @@ util_pack_color_ub(ubyte r, ubyte g, ubyte b, ubyte a, } +/** + * Unpack RGBA from a packed pixel, returning values as ubytes in [0,255]. + */ +static INLINE void +util_unpack_color_ub(enum pipe_format format, const void *src, + ubyte *r, ubyte *g, ubyte *b, ubyte *a) +{ + switch (format) { + case PIPE_FORMAT_R8G8B8A8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 24) & 0xff); + *g = (ubyte) ((p >> 16) & 0xff); + *b = (ubyte) ((p >> 8) & 0xff); + *a = (ubyte) ((p >> 0) & 0xff); + } + return; + case PIPE_FORMAT_R8G8B8X8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 24) & 0xff); + *g = (ubyte) ((p >> 16) & 0xff); + *b = (ubyte) ((p >> 8) & 0xff); + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_A8R8G8B8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 16) & 0xff); + *g = (ubyte) ((p >> 8) & 0xff); + *b = (ubyte) ((p >> 0) & 0xff); + *a = (ubyte) ((p >> 24) & 0xff); + } + return; + case PIPE_FORMAT_X8R8G8B8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 16) & 0xff); + *g = (ubyte) ((p >> 8) & 0xff); + *b = (ubyte) ((p >> 0) & 0xff); + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_B8G8R8A8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 8) & 0xff); + *g = (ubyte) ((p >> 16) & 0xff); + *b = (ubyte) ((p >> 24) & 0xff); + *a = (ubyte) ((p >> 0) & 0xff); + } + return; + case PIPE_FORMAT_B8G8R8X8_UNORM: + { + uint p = ((const uint *) src)[0]; + *r = (ubyte) ((p >> 8) & 0xff); + *g = (ubyte) ((p >> 16) & 0xff); + *b = (ubyte) ((p >> 24) & 0xff); + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_R5G6B5_UNORM: + { + ushort p = ((const ushort *) src)[0]; + *r = (ubyte) (((p >> 8) & 0xf8) | ((p >> 13) & 0x7)); + *g = (ubyte) (((p >> 3) & 0xfc) | ((p >> 9) & 0x3)); + *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_A1R5G5B5_UNORM: + { + ushort p = ((const ushort *) src)[0]; + *r = (ubyte) (((p >> 7) & 0xf8) | ((p >> 12) & 0x7)); + *g = (ubyte) (((p >> 2) & 0xf8) | ((p >> 7) & 0x7)); + *b = (ubyte) (((p << 3) & 0xf8) | ((p >> 2) & 0x7)); + *a = (ubyte) (0xff * (p >> 15)); + } + return; + case PIPE_FORMAT_A4R4G4B4_UNORM: + { + ushort p = ((const ushort *) src)[0]; + *r = (ubyte) (((p >> 4) & 0xf0) | ((p >> 8) & 0xf)); + *g = (ubyte) (((p >> 0) & 0xf0) | ((p >> 4) & 0xf)); + *b = (ubyte) (((p << 4) & 0xf0) | ((p >> 0) & 0xf)); + *a = (ubyte) (((p >> 8) & 0xf0) | ((p >> 12) & 0xf)); + } + return; + case PIPE_FORMAT_A8_UNORM: + { + ubyte p = ((const ubyte *) src)[0]; + *r = *g = *b = (ubyte) 0xff; + *a = p; + } + return; + case PIPE_FORMAT_L8_UNORM: + { + ubyte p = ((const ubyte *) src)[0]; + *r = *g = *b = p; + *a = (ubyte) 0xff; + } + return; + case PIPE_FORMAT_I8_UNORM: + { + ubyte p = ((const ubyte *) src)[0]; + *r = *g = *b = *a = p; + } + return; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + const float *p = (const float *) src; + *r = float_to_ubyte(p[0]); + *g = float_to_ubyte(p[1]); + *b = float_to_ubyte(p[2]); + *a = float_to_ubyte(p[3]); + } + return; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + const float *p = (const float *) src; + *r = float_to_ubyte(p[0]); + *g = float_to_ubyte(p[1]); + *b = float_to_ubyte(p[2]); + *a = (ubyte) 0xff; + } + return; + + case PIPE_FORMAT_R32G32_FLOAT: + { + const float *p = (const float *) src; + *r = float_to_ubyte(p[0]); + *g = float_to_ubyte(p[1]); + *b = *a = (ubyte) 0xff; + } + return; + + case PIPE_FORMAT_R32_FLOAT: + { + const float *p = (const float *) src; + *r = float_to_ubyte(p[0]); + *g = *b = *a = (ubyte) 0xff; + } + return; + + /* XXX lots more cases to add */ + default: + debug_print_format("gallium: unhandled format in util_unpack_color_ub()", + format); + assert(0); + } +} + + + /** * Note rgba outside [0,1] will be clamped for int pixel formats. */ @@ -157,6 +312,8 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) a = float_to_ubyte(rgba[3]); } + printf("%s %s\n", __FUNCTION__, pf_name(format)); + switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { -- cgit v1.2.3 From 2877727c9bb5496caf3c01625513900b03953fcc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 5 Sep 2008 10:10:18 -0600 Subject: gallium: remove debug code from prev commit --- src/gallium/auxiliary/util/u_pack_color.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h index 43cb7e56ec..e0e8aa8e9f 100644 --- a/src/gallium/auxiliary/util/u_pack_color.h +++ b/src/gallium/auxiliary/util/u_pack_color.h @@ -312,8 +312,6 @@ util_pack_color(const float rgba[4], enum pipe_format format, void *dest) a = float_to_ubyte(rgba[3]); } - printf("%s %s\n", __FUNCTION__, pf_name(format)); - switch (format) { case PIPE_FORMAT_R8G8B8A8_UNORM: { -- cgit v1.2.3 From 86a15954bf2adad0ab0dc5713a5bb446c9584103 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 8 Sep 2008 11:09:23 +0900 Subject: util: Rip-off trace's os-independent stream code. --- src/gallium/auxiliary/util/Makefile | 2 + src/gallium/auxiliary/util/SConscript | 2 + src/gallium/auxiliary/util/u_stream.h | 56 +++++++++ src/gallium/auxiliary/util/u_stream_stdc.c | 104 ++++++++++++++++ src/gallium/auxiliary/util/u_stream_wd.c | 183 +++++++++++++++++++++++++++++ 5 files changed, 347 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_stream.h create mode 100644 src/gallium/auxiliary/util/u_stream_stdc.c create mode 100644 src/gallium/auxiliary/util/u_stream_wd.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 61f4344d17..d3951e4e7d 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -15,6 +15,8 @@ C_SOURCES = \ u_rect.c \ u_simple_shaders.c \ u_snprintf.c \ + u_stream_stdc.c \ + u_stream_wd.c \ u_tile.c \ u_time.c \ u_timed_winsys.c diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index ce3fad7068..e65c17b1cc 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -16,6 +16,8 @@ util = env.ConvenienceLibrary( 'u_rect.c', 'u_simple_shaders.c', 'u_snprintf.c', + 'u_stream_stdc.c', + 'u_stream_wd.c', 'u_tile.c', 'u_time.c', ]) diff --git a/src/gallium/auxiliary/util/u_stream.h b/src/gallium/auxiliary/util/u_stream.h new file mode 100644 index 0000000000..516e634a99 --- /dev/null +++ b/src/gallium/auxiliary/util/u_stream.h @@ -0,0 +1,56 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Cross-platform sequential access stream abstraction. + */ + +#ifndef U_STREAM_H +#define U_STREAM_H + + +#include "pipe/p_compiler.h" + + +struct util_stream; + + +struct util_stream * +util_stream_create(const char *filename); + +boolean +util_stream_write(struct util_stream *stream, const void *data, size_t size); + +void +util_stream_flush(struct util_stream *stream); + +void +util_stream_close(struct util_stream *stream); + + +#endif /* U_STREAM_H */ diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c new file mode 100644 index 0000000000..03c845b6a7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -0,0 +1,104 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation based on the Standard C Library. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_OS_LINUX) + +#include + +#include "util/u_memory.h" + +#include "u_stream.h" + + +struct util_stream +{ + FILE *file; +}; + + +struct util_stream * +util_stream_create(const char *filename) +{ + struct util_stream *stream; + + stream = CALLOC_STRUCT(util_stream); + if(!stream) + goto error1; + + stream->file = fopen(filename, "w"); + if(!stream->file) + goto error2; + + return stream; + +error2: + FREE(stream); +error1: + return NULL; +} + + +boolean +util_stream_write(struct util_stream *stream, const void *data, size_t size) +{ + if(!stream) + return FALSE; + + return fwrite(data, size, 1, stream->file) == size ? TRUE : FALSE; +} + + +void +util_stream_flush(struct util_stream *stream) +{ + if(!stream) + return; + + fflush(stream->file); +} + + +void +util_stream_close(struct util_stream *stream) +{ + if(!stream) + return; + + fclose(stream->file); + + FREE(stream); +} + + +#endif diff --git a/src/gallium/auxiliary/util/u_stream_wd.c b/src/gallium/auxiliary/util/u_stream_wd.c new file mode 100644 index 0000000000..c8bb2ad011 --- /dev/null +++ b/src/gallium/auxiliary/util/u_stream_wd.c @@ -0,0 +1,183 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Stream implementation for the Windows Display driver. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) + +#include +#include + +#include "util/u_memory.h" +#include "util/u_string.h" + +#include "u_stream.h" + + +#define MAP_FILE_SIZE (4*1024*1024) + + +struct util_stream +{ + char filename[MAX_PATH + 1]; + WCHAR wFileName[MAX_PATH + 1]; + ULONG_PTR iFile; + char *pMap; + size_t written; + unsigned suffix; +}; + + +static INLINE boolean +util_stream_map(struct util_stream *stream) +{ + ULONG BytesInUnicodeString; + static char filename[MAX_PATH + 1]; + unsigned filename_len; + + filename_len = util_snprintf(filename, + sizeof(filename), + "\\??\\%s.%04x", + stream->filename, + stream->suffix++); + + EngMultiByteToUnicodeN( + stream->wFileName, + sizeof(stream->wFileName), + &BytesInUnicodeString, + filename, + filename_len); + + stream->pMap = EngMapFile(stream->wFileName, MAP_FILE_SIZE, &stream->iFile); + if(!stream->pMap) + return FALSE; + + memset(stream->pMap, 0, MAP_FILE_SIZE); + stream->written = 0; + + return TRUE; +} + + +static INLINE void +util_stream_unmap(struct util_stream *stream) +{ + EngUnmapFile(stream->iFile); + if(stream->written < MAP_FILE_SIZE) { + /* Truncate file size */ + stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); + if(stream->pMap) + EngUnmapFile(stream->iFile); + } + + stream->pMap = NULL; +} + + +struct util_stream * +util_stream_create(const char *filename) +{ + struct util_stream *stream; + + stream = CALLOC_STRUCT(util_stream); + if(!stream) + goto error1; + + strncpy(stream->filename, filename, sizeof(stream->filename)); + + if(!util_stream_map(stream)) + goto error2; + + return stream; + +error2: + FREE(stream); +error1: + return NULL; +} + + +static INLINE void +util_stream_copy(struct util_stream *stream, const char *data, size_t size) +{ + assert(stream->written + size <= MAP_FILE_SIZE); + memcpy(stream->pMap + stream->written, data, size); + stream->written += size; +} + + +boolean +util_stream_write(struct util_stream *stream, const void *data, size_t size) +{ + if(!stream) + return FALSE; + + if(!stream->pMap) + return FALSE; + + while(stream->written + size > MAP_FILE_SIZE) { + size_t step = MAP_FILE_SIZE - stream->written; + util_stream_copy(stream, data, step); + data = (const char *)data + step; + size -= step; + + util_stream_unmap(stream); + if(!util_stream_map(stream)) + return FALSE; + } + + util_stream_copy(stream, data, size); + + return TRUE; +} + + +void +util_stream_flush(struct util_stream *stream) +{ + (void)stream; +} + + +void +util_stream_close(struct util_stream *stream) +{ + if(!stream) + return; + + util_stream_unmap(stream); + + FREE(stream); +} + + +#endif -- cgit v1.2.3 From 1da0a13389ce9709586058a8807c0c4120e520a2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 8 Sep 2008 22:21:33 +0900 Subject: util: Dump surfaces to BMP. This allows quick inspection of surfaces in mass scale. --- src/gallium/auxiliary/util/p_debug.c | 109 +++++++++++++++++++++++++++++++++++ src/gallium/include/pipe/p_debug.h | 3 + 2 files changed, 112 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index d56449e7ec..131e9b026c 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -55,7 +55,11 @@ #include "pipe/p_format.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" +#include "util/u_memory.h" #include "util/u_string.h" +#include "util/u_stream.h" +#include "util/u_math.h" +#include "util/u_tile.h" #ifdef PIPE_SUBSYSTEM_WINDOWS_DISPLAY @@ -584,4 +588,109 @@ error2: error1: ; } + + +#pragma pack(push,2) +struct bmp_file_header { + uint16_t bfType; + uint32_t bfSize; + uint16_t bfReserved1; + uint16_t bfReserved2; + uint32_t bfOffBits; +}; +#pragma pack(pop) + +struct bmp_info_header { + uint32_t biSize; + int32_t biWidth; + int32_t biHeight; + uint16_t biPlanes; + uint16_t biBitCount; + uint32_t biCompression; + uint32_t biSizeImage; + int32_t biXPelsPerMeter; + int32_t biYPelsPerMeter; + uint32_t biClrUsed; + uint32_t biClrImportant; +}; + +struct bmp_rgb_quad { + uint8_t rgbBlue; + uint8_t rgbGreen; + uint8_t rgbRed; + uint8_t rgbAlpha; +}; + +void +debug_dump_surface_bmp(const char *filename, + struct pipe_surface *surface) +{ + struct util_stream *stream; + unsigned surface_usage; + struct bmp_file_header bmfh; + struct bmp_info_header bmih; + float *rgba; + unsigned x, y; + + if (!surface) + goto error1; + + rgba = MALLOC(surface->width*4*sizeof(float)); + if(!rgba) + goto error1; + + bmfh.bfType = 0x4d42; + bmfh.bfSize = 14 + 40 + surface->height*surface->width*4; + bmfh.bfReserved1 = 0; + bmfh.bfReserved2 = 0; + bmfh.bfOffBits = 14 + 40; + + bmih.biSize = 40; + bmih.biWidth = surface->width; + bmih.biHeight = surface->height; + bmih.biPlanes = 1; + bmih.biBitCount = 32; + bmih.biCompression = 0; + bmih.biSizeImage = surface->height*surface->width*4; + bmih.biXPelsPerMeter = 0; + bmih.biYPelsPerMeter = 0; + bmih.biClrUsed = 0; + bmih.biClrImportant = 0; + + stream = util_stream_create(filename); + if(!stream) + goto error2; + + util_stream_write(stream, &bmfh, 14); + util_stream_write(stream, &bmih, 40); + + /* XXX: force mappable surface */ + surface_usage = surface->usage; + surface->usage |= PIPE_BUFFER_USAGE_CPU_READ; + + y = surface->height; + while(y--) { + pipe_get_tile_rgba(surface, + 0, y, surface->width, 1, + rgba); + for(x = 0; x < surface->width; ++x) + { + struct bmp_rgb_quad pixel; + pixel.rgbRed = float_to_ubyte(rgba[x*4 + 0]); + pixel.rgbGreen = float_to_ubyte(rgba[x*4 + 1]); + pixel.rgbBlue = float_to_ubyte(rgba[x*4 + 2]); + pixel.rgbAlpha = float_to_ubyte(rgba[x*4 + 3]); + util_stream_write(stream, &pixel, 4); + } + } + + surface->usage = surface_usage; + + util_stream_close(stream); +error2: + FREE(rgba); +error1: + ; +} + #endif diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 6478ae2f08..cb6196aa9f 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -340,9 +340,12 @@ void debug_dump_image(const char *prefix, const void *data); void debug_dump_surface(const char *prefix, struct pipe_surface *surface); +void debug_dump_surface_bmp(const char *filename, + struct pipe_surface *surface); #else #define debug_dump_image(prefix, format, cpp, width, height, stride, data) ((void)0) #define debug_dump_surface(prefix, surface) ((void)0) +#define debug_dump_surface_bmp(filename, surface) ((void)0) #endif -- cgit v1.2.3 From a4a739eb58f70368ef87c195ea77629c1526e71f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 8 Sep 2008 22:56:38 +0900 Subject: util: Allow to define the maximum file size. This avoids splitting the bitmaps in many files. --- src/gallium/auxiliary/util/p_debug.c | 2 +- src/gallium/auxiliary/util/u_stream.h | 7 ++++- src/gallium/auxiliary/util/u_stream_stdc.c | 4 ++- src/gallium/auxiliary/util/u_stream_wd.c | 43 +++++++++++++++++++++--------- 4 files changed, 40 insertions(+), 16 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 131e9b026c..b6cff281e6 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -657,7 +657,7 @@ debug_dump_surface_bmp(const char *filename, bmih.biClrUsed = 0; bmih.biClrImportant = 0; - stream = util_stream_create(filename); + stream = util_stream_create(filename, bmfh.bfSize); if(!stream) goto error2; diff --git a/src/gallium/auxiliary/util/u_stream.h b/src/gallium/auxiliary/util/u_stream.h index 516e634a99..a9d0f0121a 100644 --- a/src/gallium/auxiliary/util/u_stream.h +++ b/src/gallium/auxiliary/util/u_stream.h @@ -40,8 +40,13 @@ struct util_stream; +/** + * Create a stream + * @param filename relative or absolute path (necessary for windows) + * @param optional maximum file size (0 for a growable size). + */ struct util_stream * -util_stream_create(const char *filename); +util_stream_create(const char *filename, size_t max_size); boolean util_stream_write(struct util_stream *stream, const void *data, size_t size); diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c index 03c845b6a7..dfb6a0e647 100644 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -48,10 +48,12 @@ struct util_stream struct util_stream * -util_stream_create(const char *filename) +util_stream_create(const char *filename, size_t max_size) { struct util_stream *stream; + (void)max_size; + stream = CALLOC_STRUCT(util_stream); if(!stream) goto error1; diff --git a/src/gallium/auxiliary/util/u_stream_wd.c b/src/gallium/auxiliary/util/u_stream_wd.c index c8bb2ad011..1e135c6ba0 100644 --- a/src/gallium/auxiliary/util/u_stream_wd.c +++ b/src/gallium/auxiliary/util/u_stream_wd.c @@ -50,6 +50,8 @@ struct util_stream { char filename[MAX_PATH + 1]; WCHAR wFileName[MAX_PATH + 1]; + boolean growable; + size_t map_size; ULONG_PTR iFile; char *pMap; size_t written; @@ -64,11 +66,17 @@ util_stream_map(struct util_stream *stream) static char filename[MAX_PATH + 1]; unsigned filename_len; - filename_len = util_snprintf(filename, - sizeof(filename), - "\\??\\%s.%04x", - stream->filename, - stream->suffix++); + if(stream->growable) + filename_len = util_snprintf(filename, + sizeof(filename), + "\\??\\%s.%04x", + stream->filename, + stream->suffix++); + else + filename_len = util_snprintf(filename, + sizeof(filename), + "\\??\\%s", + stream->filename); EngMultiByteToUnicodeN( stream->wFileName, @@ -77,11 +85,11 @@ util_stream_map(struct util_stream *stream) filename, filename_len); - stream->pMap = EngMapFile(stream->wFileName, MAP_FILE_SIZE, &stream->iFile); + stream->pMap = EngMapFile(stream->wFileName, stream->map_size, &stream->iFile); if(!stream->pMap) return FALSE; - memset(stream->pMap, 0, MAP_FILE_SIZE); + memset(stream->pMap, 0, stream->map_size); stream->written = 0; return TRUE; @@ -92,7 +100,7 @@ static INLINE void util_stream_unmap(struct util_stream *stream) { EngUnmapFile(stream->iFile); - if(stream->written < MAP_FILE_SIZE) { + if(stream->written < stream->map_size) { /* Truncate file size */ stream->pMap = EngMapFile(stream->wFileName, stream->written, &stream->iFile); if(stream->pMap) @@ -104,7 +112,7 @@ util_stream_unmap(struct util_stream *stream) struct util_stream * -util_stream_create(const char *filename) +util_stream_create(const char *filename, size_t max_size) { struct util_stream *stream; @@ -114,6 +122,15 @@ util_stream_create(const char *filename) strncpy(stream->filename, filename, sizeof(stream->filename)); + if(max_size) { + stream->growable = FALSE; + stream->map_size = max_size; + } + else { + stream->growable = TRUE; + stream->map_size = MAP_FILE_SIZE; + } + if(!util_stream_map(stream)) goto error2; @@ -129,7 +146,7 @@ error1: static INLINE void util_stream_copy(struct util_stream *stream, const char *data, size_t size) { - assert(stream->written + size <= MAP_FILE_SIZE); + assert(stream->written + size <= stream->map_size); memcpy(stream->pMap + stream->written, data, size); stream->written += size; } @@ -144,14 +161,14 @@ util_stream_write(struct util_stream *stream, const void *data, size_t size) if(!stream->pMap) return FALSE; - while(stream->written + size > MAP_FILE_SIZE) { - size_t step = MAP_FILE_SIZE - stream->written; + while(stream->written + size > stream->map_size) { + size_t step = stream->map_size - stream->written; util_stream_copy(stream, data, step); data = (const char *)data + step; size -= step; util_stream_unmap(stream); - if(!util_stream_map(stream)) + if(!stream->growable || !util_stream_map(stream)) return FALSE; } -- cgit v1.2.3 From 5a25628bd20684ac67adcb542647a0a2d7649a37 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 5 Sep 2008 17:08:50 +0200 Subject: tgsi: Cleanup code. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 87 +++++++++++++++------------------- 1 file changed, 37 insertions(+), 50 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 626724ad4e..4681b29f52 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -601,12 +601,10 @@ static void PIPE_CDECL cos4f( float *store ) { - const unsigned X = 0; - - store[X + 0] = cosf( store[X + 0] ); - store[X + 1] = cosf( store[X + 1] ); - store[X + 2] = cosf( store[X + 2] ); - store[X + 3] = cosf( store[X + 3] ); + store[0] = cosf( store[0] ); + store[1] = cosf( store[1] ); + store[2] = cosf( store[2] ); + store[3] = cosf( store[3] ); } static void @@ -624,18 +622,16 @@ static void PIPE_CDECL ex24f( float *store ) { - const unsigned X = 0; - #if FAST_MATH - store[X + 0] = util_fast_exp2( store[X + 0] ); - store[X + 1] = util_fast_exp2( store[X + 1] ); - store[X + 2] = util_fast_exp2( store[X + 2] ); - store[X + 3] = util_fast_exp2( store[X + 3] ); + store[0] = util_fast_exp2( store[0] ); + store[1] = util_fast_exp2( store[1] ); + store[2] = util_fast_exp2( store[2] ); + store[3] = util_fast_exp2( store[3] ); #else - store[X + 0] = powf( 2.0f, store[X + 0] ); - store[X + 1] = powf( 2.0f, store[X + 1] ); - store[X + 2] = powf( 2.0f, store[X + 2] ); - store[X + 3] = powf( 2.0f, store[X + 3] ); + store[0] = powf( 2.0f, store[0] ); + store[1] = powf( 2.0f, store[1] ); + store[2] = powf( 2.0f, store[2] ); + store[3] = powf( 2.0f, store[3] ); #endif } @@ -665,12 +661,10 @@ static void PIPE_CDECL flr4f( float *store ) { - const unsigned X = 0; - - store[X + 0] = floorf( store[X + 0] ); - store[X + 1] = floorf( store[X + 1] ); - store[X + 2] = floorf( store[X + 2] ); - store[X + 3] = floorf( store[X + 3] ); + store[0] = floorf( store[0] ); + store[1] = floorf( store[1] ); + store[2] = floorf( store[2] ); + store[3] = floorf( store[3] ); } static void @@ -688,12 +682,10 @@ static void PIPE_CDECL frc4f( float *store ) { - const unsigned X = 0; - - store[X + 0] -= floorf( store[X + 0] ); - store[X + 1] -= floorf( store[X + 1] ); - store[X + 2] -= floorf( store[X + 2] ); - store[X + 3] -= floorf( store[X + 3] ); + store[0] -= floorf( store[0] ); + store[1] -= floorf( store[1] ); + store[2] -= floorf( store[2] ); + store[3] -= floorf( store[3] ); } static void @@ -711,12 +703,10 @@ static void PIPE_CDECL lg24f( float *store ) { - const unsigned X = 0; - - store[X + 0] = util_fast_log2( store[X + 0] ); - store[X + 1] = util_fast_log2( store[X + 1] ); - store[X + 2] = util_fast_log2( store[X + 2] ); - store[X + 3] = util_fast_log2( store[X + 3] ); + store[0] = util_fast_log2( store[0] ); + store[1] = util_fast_log2( store[1] ); + store[2] = util_fast_log2( store[2] ); + store[3] = util_fast_log2( store[3] ); } static void @@ -770,18 +760,16 @@ static void PIPE_CDECL pow4f( float *store ) { - const unsigned X = 0; - #if FAST_MATH - store[X + 0] = util_fast_pow( store[X + 0], store[X + 4] ); - store[X + 1] = util_fast_pow( store[X + 1], store[X + 5] ); - store[X + 2] = util_fast_pow( store[X + 2], store[X + 6] ); - store[X + 3] = util_fast_pow( store[X + 3], store[X + 7] ); + store[0] = util_fast_pow( store[0], store[4] ); + store[1] = util_fast_pow( store[1], store[5] ); + store[2] = util_fast_pow( store[2], store[6] ); + store[3] = util_fast_pow( store[3], store[7] ); #else - store[X + 0] = powf( store[X + 0], store[X + 4] ); - store[X + 1] = powf( store[X + 1], store[X + 5] ); - store[X + 2] = powf( store[X + 2], store[X + 6] ); - store[X + 3] = powf( store[X + 3], store[X + 7] ); + store[0] = powf( store[0], store[4] ); + store[1] = powf( store[1], store[5] ); + store[2] = powf( store[2], store[6] ); + store[3] = powf( store[3], store[7] ); #endif } @@ -877,12 +865,10 @@ static void PIPE_CDECL sin4f( float *store ) { - const unsigned X = 0; - - store[X + 0] = sinf( store[X + 0] ); - store[X + 1] = sinf( store[X + 1] ); - store[X + 2] = sinf( store[X + 2] ); - store[X + 3] = sinf( store[X + 3] ); + store[0] = sinf( store[0] ); + store[1] = sinf( store[1] ); + store[2] = sinf( store[2] ); + store[3] = sinf( store[3] ); } static void @@ -2416,3 +2402,4 @@ tgsi_emit_sse2( } #endif /* PIPE_ARCH_X86 */ + -- cgit v1.2.3 From 67c213499a9a533d84bc40ef5ef02e3350fcfc75 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 9 Sep 2008 11:25:05 +0200 Subject: util: Enable u_stream_std.c for PIPE_SUBSYSTEM_WINDOWS_USER. --- src/gallium/auxiliary/util/u_stream_stdc.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_stream_stdc.c b/src/gallium/auxiliary/util/u_stream_stdc.c index dfb6a0e647..ca80bef0f3 100644 --- a/src/gallium/auxiliary/util/u_stream_stdc.c +++ b/src/gallium/auxiliary/util/u_stream_stdc.c @@ -32,7 +32,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_OS_LINUX) +#if defined(PIPE_OS_LINUX) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) #include -- cgit v1.2.3 From bfe45670aef35c358e7d22b8a9cb8cd6532b0e3d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 9 Sep 2008 21:16:36 +0900 Subject: util: Ensure we always have a full qualified file name on windows display. --- src/gallium/auxiliary/util/u_stream_wd.c | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_stream_wd.c b/src/gallium/auxiliary/util/u_stream_wd.c index 1e135c6ba0..864489e775 100644 --- a/src/gallium/auxiliary/util/u_stream_wd.c +++ b/src/gallium/auxiliary/util/u_stream_wd.c @@ -69,13 +69,13 @@ util_stream_map(struct util_stream *stream) if(stream->growable) filename_len = util_snprintf(filename, sizeof(filename), - "\\??\\%s.%04x", + "%s.%04x", stream->filename, stream->suffix++); else filename_len = util_snprintf(filename, sizeof(filename), - "\\??\\%s", + "%s", stream->filename); EngMultiByteToUnicodeN( @@ -111,6 +111,28 @@ util_stream_unmap(struct util_stream *stream) } +static INLINE void +util_stream_full_qualified_filename(char *dst, size_t size, const char *src) +{ + boolean need_drive, need_root; + + if((('A' <= src[0] && src[0] <= 'Z') || ('a' <= src[0] && src[0] <= 'z')) && src[1] == ':') { + need_drive = FALSE; + need_root = src[2] == '\\' ? FALSE : TRUE; + } + else { + need_drive = TRUE; + need_root = src[0] == '\\' ? FALSE : TRUE; + } + + util_snprintf(dst, size, + "\\??\\%s%s%s", + need_drive ? "C:" : "", + need_root ? "\\" : "", + src); +} + + struct util_stream * util_stream_create(const char *filename, size_t max_size) { @@ -120,7 +142,9 @@ util_stream_create(const char *filename, size_t max_size) if(!stream) goto error1; - strncpy(stream->filename, filename, sizeof(stream->filename)); + util_stream_full_qualified_filename(stream->filename, + sizeof(stream->filename), + filename); if(max_size) { stream->growable = FALSE; -- cgit v1.2.3 From dc1834a873726b9920c2cae6ffad86e09d4637ee Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 10 Sep 2008 10:32:52 +0900 Subject: tgsi: Verify constants are set before attempting to read them. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index fb573fe1f0..df002939c6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -957,6 +957,7 @@ fetch_src_file_channel( case TGSI_EXTSWIZZLE_W: switch( file ) { case TGSI_FILE_CONSTANT: + assert(mach->Consts); chan->f[0] = mach->Consts[index->i[0]][swizzle]; chan->f[1] = mach->Consts[index->i[1]][swizzle]; chan->f[2] = mach->Consts[index->i[2]][swizzle]; -- cgit v1.2.3 From eb5b16d278e0f7ee0121049e43dfee1d52f2b0f7 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 10 Sep 2008 10:33:03 +0900 Subject: tgsi: Fix newline pos. --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 0b5bdd6ba1..c659027296 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -322,7 +322,7 @@ epilog( /* Print totals, if any. */ if (ctx->errors || ctx->warnings) - debug_printf( "\n%u errors, %u warnings", ctx->errors, ctx->warnings ); + debug_printf( "%u errors, %u warnings\n", ctx->errors, ctx->warnings ); return TRUE; } -- cgit v1.2.3 From bb5becf1e289b2c9240d98299e9447a9673da9fc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 5 Sep 2008 13:54:14 -0600 Subject: gallium: comments, assertions, etc --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 36 +++++++++++++++++++++++++---- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 20 +++++++++------- 2 files changed, 43 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 285ddc0e3f..fe5beba456 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -300,7 +300,9 @@ void _name (struct spe_function *p, int imm) \ #include "rtasm_ppc_spe.h" -/* +/** + * Initialize an spe_function. + * \param code_size size of instruction buffer to allocate, in bytes. */ void spe_init_func(struct spe_function *p, unsigned code_size) { @@ -324,10 +326,14 @@ void spe_release_func(struct spe_function *p) } +/** + * Alloate a SPE register. + * \return register index or -1 if none left. + */ int spe_allocate_available_register(struct spe_function *p) { unsigned i; - for (i = 0; i < 128; i++) { + for (i = 0; i < SPE_NUM_REGS; i++) { const uint64_t mask = (1ULL << (i % 64)); const unsigned idx = i / 64; @@ -341,11 +347,15 @@ int spe_allocate_available_register(struct spe_function *p) } +/** + * Mark the given SPE register as "allocated". + */ int spe_allocate_register(struct spe_function *p, int reg) { const unsigned idx = reg / 64; const unsigned bit = reg % 64; + assert(reg < SPE_NUM_REGS); assert((p->regs[idx] & (1ULL << bit)) != 0); p->regs[idx] &= ~(1ULL << bit); @@ -353,57 +363,73 @@ int spe_allocate_register(struct spe_function *p, int reg) } +/** + * Mark the given SPE register as "unallocated". + */ void spe_release_register(struct spe_function *p, int reg) { const unsigned idx = reg / 64; const unsigned bit = reg % 64; + assert(reg < SPE_NUM_REGS); assert((p->regs[idx] & (1ULL << bit)) == 0); p->regs[idx] |= (1ULL << bit); } +/** + * For branch instructions: + * \param d if 1, disable interupts if branch is taken + * \param e if 1, enable interupts if branch is taken + * If d and e are both zero, don't change interupt status (right?) + */ - +/** Branch Indirect to address in rA */ void spe_bi(struct spe_function *p, unsigned rA, int d, int e) { emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); } +/** Interupt Return */ void spe_iret(struct spe_function *p, unsigned rA, int d, int e) { emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); } +/** Branch indirect and set link on external data */ void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); } +/** Branch indirect and set link. Save PC in rT, jump to rA. */ void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); } -void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, - int e) +/** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ +void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); } +/** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); } +/** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); } +/** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 1cacc717b1..7dd754ba77 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -32,13 +32,17 @@ #ifndef RTASM_PPC_SPE_H #define RTASM_PPC_SPE_H -struct spe_function { - /** - * - */ - uint32_t *store; - uint32_t *csr; - const char *fn; +/** 4 bytes per instruction */ +#define SPE_INST_SIZE 4 + +/** number of general-purpose SIMD registers */ +#define SPE_NUM_REGS 128 + +struct spe_function +{ + uint32_t *store; /**< instruction buffer */ + uint32_t *csr; /**< next free pos in instruction buffer */ + const char *fn; /**< unused */ /** * Mask of used / unused registers @@ -50,7 +54,7 @@ struct spe_function { * spe_allocate_register, spe_allocate_available_register, * spe_release_register */ - uint64_t regs[2]; + uint64_t regs[SPE_NUM_REGS / 64]; }; extern void spe_init_func(struct spe_function *p, unsigned code_size); -- cgit v1.2.3 From ee582fd3a7a9ddbcb5595249201cf213a6c6f014 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 10 Sep 2008 17:11:48 -0600 Subject: gallium: assorted additions and fixes to Cell SPE rtasm code Fix incorrect opcode for fsmbi. Added "macro" functions for loading floats/ints, register complement, zero, move. Added #defines for return address and stack pointer registers. Added assertions to check that the instruction buffer doesn't overflow. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 88 +++++++++++++++++++++++------ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 38 +++++++++++-- 2 files changed, 105 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index fe5beba456..61010e4333 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -151,8 +151,8 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -165,8 +165,8 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rC = rC; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -178,8 +178,8 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i7 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -192,8 +192,8 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i8 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -206,8 +206,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i10 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -218,8 +218,8 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, inst.inst.op = op; inst.inst.i16 = imm; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -230,8 +230,8 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, inst.inst.op = op; inst.inst.i18 = imm; inst.inst.rT = rT; - *p->csr = inst.bits; - p->csr++; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); } @@ -307,8 +307,9 @@ void _name (struct spe_function *p, int imm) \ void spe_init_func(struct spe_function *p, unsigned code_size) { p->store = align_malloc(code_size, 16); - p->csr = p->store; - + p->num_inst = 0; + p->max_inst = code_size / SPE_INST_SIZE; + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. */ p->regs[0] = ~7; @@ -318,11 +319,11 @@ void spe_init_func(struct spe_function *p, unsigned code_size) void spe_release_func(struct spe_function *p) { + assert(p->num_inst <= p->max_inst); if (p->store != NULL) { align_free(p->store); } p->store = NULL; - p->csr = NULL; } @@ -337,6 +338,7 @@ int spe_allocate_available_register(struct spe_function *p) const uint64_t mask = (1ULL << (i % 64)); const unsigned idx = i / 64; + assert(idx < 2); if ((p->regs[idx] & mask) != 0) { p->regs[idx] &= ~mask; return i; @@ -371,6 +373,8 @@ void spe_release_register(struct spe_function *p, int reg) const unsigned idx = reg / 64; const unsigned bit = reg % 64; + assert(idx < 2); + assert(reg < SPE_NUM_REGS); assert((p->regs[idx] & (1ULL << bit)) == 0); @@ -458,4 +462,54 @@ EMIT_R (spe_mfspr, 0x00c); EMIT_R (spe_mtspr, 0x10c); #endif + +/** + ** Helper / "macro" instructions. + ** Use somewhat verbose names as a reminder that these aren't native + ** SPE instructions. + **/ + + +void +spe_load_float(struct spe_function *p, unsigned rT, float x) +{ + union { + float f; + unsigned u; + } bits; + bits.f = x; + spe_ilhu(p, rT, bits.u >> 16); + spe_iohl(p, rT, bits.u & 0xffff); +} + + +void +spe_load_int(struct spe_function *p, unsigned rT, int i) +{ + spe_ilhu(p, rT, i >> 16); + spe_iohl(p, rT, i & 0xffff); +} + + +void +spe_complement(struct spe_function *p, unsigned rT) +{ + spe_nor(p, rT, rT, rT); +} + + +void +spe_move(struct spe_function *p, unsigned rT, unsigned rA) +{ + spe_ori(p, rT, rA, 0); +} + + +void +spe_zero(struct spe_function *p, unsigned rT) +{ + spe_xor(p, rT, rT, rT); +} + + #endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 7dd754ba77..dee8c55c4a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -25,6 +25,7 @@ /** * \file * Real-time assembly generation interface for Cell B.E. SPEs. + * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf * * \author Ian Romanick */ @@ -38,11 +39,18 @@ /** number of general-purpose SIMD registers */ #define SPE_NUM_REGS 128 +/** Return Address register */ +#define SPE_REG_RA 0 + +/** Stack Pointer register */ +#define SPE_REG_SP 1 + + struct spe_function { - uint32_t *store; /**< instruction buffer */ - uint32_t *csr; /**< next free pos in instruction buffer */ - const char *fn; /**< unused */ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; /** * Mask of used / unused registers @@ -123,7 +131,8 @@ EMIT_RI16(spe_ilhu, 0x082); EMIT_RI16(spe_il, 0x081); EMIT_RI18(spe_ila, 0x021); EMIT_RI16(spe_iohl, 0x0c1); -EMIT_RI16(spe_fsmbi, 0x0c5); +EMIT_RI16(spe_fsmbi, 0x065); + /* Integer and logical instructions @@ -275,6 +284,27 @@ extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e); +/** Load/splat immediate float into rT. */ +extern void +spe_load_float(struct spe_function *p, unsigned rT, float x); + +/** Load/splat immediate int into rT. */ +extern void +spe_load_int(struct spe_function *p, unsigned rT, int i); + +/** Complement/invert all bits in rT. */ +extern void +spe_complement(struct spe_function *p, unsigned rT); + +/** rT = rA. */ +extern void +spe_move(struct spe_function *p, unsigned rT, unsigned rA); + +/** rT = {0,0,0,0}. */ +extern void +spe_zero(struct spe_function *p, unsigned rT); + + /* Floating-point instructions */ EMIT_RR (spe_fa, 0x2c4); -- cgit v1.2.3 From 178bbaff80d079606a1135bd65f1a85bac9774c4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 11 Sep 2008 17:07:30 -0600 Subject: gallium: add special cases in spe_load_float(), spe_load_int(), added spe_splat() --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 45 +++++++++++++++++++++++------ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 4 +++ 2 files changed, 40 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 61010e4333..a04cc6c4ff 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -473,21 +473,48 @@ EMIT_R (spe_mtspr, 0x10c); void spe_load_float(struct spe_function *p, unsigned rT, float x) { - union { - float f; - unsigned u; - } bits; - bits.f = x; - spe_ilhu(p, rT, bits.u >> 16); - spe_iohl(p, rT, bits.u & 0xffff); + if (x == 0.0f) { + spe_il(p, rT, 0x0); + } + else if (x == 0.5f) { + spe_ilhu(p, rT, 0x3f00); + } + else if (x == 1.0f) { + spe_ilhu(p, rT, 0x3f80); + } + else if (x == -1.0f) { + spe_ilhu(p, rT, 0xbf80); + } + else { + union { + float f; + unsigned u; + } bits; + bits.f = x; + spe_ilhu(p, rT, bits.u >> 16); + spe_iohl(p, rT, bits.u & 0xffff); + } } void spe_load_int(struct spe_function *p, unsigned rT, int i) { - spe_ilhu(p, rT, i >> 16); - spe_iohl(p, rT, i & 0xffff); + if (-32768 <= i && i <= 32767) { + spe_il(p, rT, i); + } + else { + spe_ilhu(p, rT, i >> 16); + spe_iohl(p, rT, i & 0xffff); + } +} + + +void +spe_splat(struct spe_function *p, unsigned rT, unsigned rA) +{ + spe_ila(p, rT, 66051); + spe_shufb(p, rT, rA, rA, rT); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index dee8c55c4a..d95e5aace3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -292,6 +292,10 @@ spe_load_float(struct spe_function *p, unsigned rT, float x); extern void spe_load_int(struct spe_function *p, unsigned rT, int i); +/** Replicate word 0 of rA across rT. */ +extern void +spe_splat(struct spe_function *p, unsigned rT, unsigned rA); + /** Complement/invert all bits in rT. */ extern void spe_complement(struct spe_function *p, unsigned rT); -- cgit v1.2.3 From be5d8bd07886157fe524b8715509cd03ade2fda9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 08:21:43 -0600 Subject: gallium: initial PPC/Altivec codegen --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 365 ++++++++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 181 ++++++++++++++++ 2 files changed, 546 insertions(+) create mode 100644 src/gallium/auxiliary/rtasm/rtasm_ppc.c create mode 100644 src/gallium/auxiliary/rtasm/rtasm_ppc.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c new file mode 100644 index 0000000000..534a23568d --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -0,0 +1,365 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * \author Brian Paul + */ + + +#include "util/u_memory.h" +#include "pipe/p_debug.h" +#include "rtasm_ppc.h" + + +void +ppc_init_func(struct ppc_function *p, unsigned max_inst) +{ + p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); + p->num_inst = 0; + p->max_inst = max_inst; + p->vec_used = ~0; +} + + +void +ppc_release_func(struct ppc_function *p) +{ + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + align_free(p->store); + } + p->store = NULL; +} + + +/** + * Alloate a vector register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_vec_register(struct ppc_function *p, int reg) +{ + unsigned i; + for (i = 0; i < PPC_NUM_VEC_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->vec_used & mask) != 0) { + p->vec_used &= ~mask; + return i; + } + } + + return -1; +} + + +/** + * Mark the given vector register as "unallocated". + */ +void +ppc_release_vec_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_VEC_REGS); + assert((p->vec_used & (1 << reg)) == 0); + + p->vec_used |= (1 << reg); +} + + + +union vx_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned op2:11; + } inst; +}; + +union vxr_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned rC:1; + unsigned op2:10; + } inst; +}; + +union va_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vD:5; + unsigned vA:5; + unsigned vB:5; + unsigned vC:5; + unsigned op2:6; + } inst; +}; + + +static inline void +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vx_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + +static inline void +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vxr_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.rC = 0; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + +static inline void +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +{ + union va_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.vC = vC; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +void +ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB) +{ + emit_vx(p, 10, vD, vA, vB); +} + +/** vector float substract */ +void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 74, vD, vA, vB); +} + +/** vector float min */ +void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1098, vD, vA, vB); +} + +/** vector float max */ +void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1034, vD, vA, vB); +} + +/** vector float mult add */ +void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 46, vD, vA, vB, vC); +} + +/** vector float compare greater than */ +void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 710, vD, vA, vB); +} + +/** vector float compare greater than or equal to */ +void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 454, vD, vA, vB); +} + +/** vector float compare equal */ +void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vxr(p, 198, vD, vA, vB); +} + +/** vector float 2^x */ +void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 394, vD, 0, vB); +} + +/** vector float log2(x) */ +void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 458, vD, 0, vB); +} + +/** vector float reciprocol */ +void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 266, vD, 0, vB); +} + +/** vector float reciprocol sqrt estimate */ +void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 330, vD, 0, vB); +} + +/** vector float round to negative infinity */ +void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 714, vD, 0, vB); +} + +/** vector float round to positive infinity */ +void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 650, vD, 0, vB); +} + +/** vector float round to nearest int */ +void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 522, vD, 0, vB); +} + +/** vector float round to int toward zero */ +void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) +{ + emit_vx(p, 586, vD, 0, vB); +} + + + +/** + ** bitwise operations + **/ + + +/** vector and */ +void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1028, vD, vA, vB); +} + +/** vector and complement */ +void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1092, vD, vA, vB); +} + +/** vector or */ +void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1156, vD, vA, vB); +} + +/** vector nor */ +void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1284, vD, vA, vB); +} + +/** vector xor */ +void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 1220, vD, vA, vB); +} + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 43, vD, vA, vB, vC); +} + +/** vector select */ +void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 42, vD, vA, vB, vC); +} + +/** vector splat byte */ +void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 42, vD, imm, vB); +} + +/** vector splat half word */ +void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 588, vD, imm, vB); +} + +/** vector splat word */ +void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) +{ + emit_vx(p, 652, vD, imm, vB); +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h new file mode 100644 index 0000000000..ed14e943df --- /dev/null +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -0,0 +1,181 @@ +/************************************************************************** + * + * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN + * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * PPC code generation. + * \author Brian Paul + */ + + +#ifndef RTASM_PPC_H +#define RTASM_PPC_H + + +#include "pipe/p_compiler.h" + + +#define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ + +#define PPC_NUM_VEC_REGS 32 + + +struct ppc_function +{ + uint32_t *store; /**< instruction buffer */ + uint num_inst; + uint max_inst; + uint32_t vec_used; /** used/free vector registers bitmask */ + uint32_t reg_used; /** used/free general-purpose registers bitmask */ +}; + + + +extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); +extern void ppc_release_func(struct ppc_function *p); + +extern int ppc_allocate_vec_register(struct ppc_function *p, int reg); +extern void ppc_release_vec_register(struct ppc_function *p, int reg); + + +/** + ** float vector arithmetic + **/ + +/** vector float add */ +extern void +ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB); + +/** vector float substract */ +extern void +ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float min */ +extern void +ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float max */ +extern void +ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float mult add */ +extern void +ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector float compare greater than */ +extern void +ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare greater than or equal to */ +extern void +ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float compare equal */ +extern void +ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector float 2^x */ +extern void +ppc_vexptefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float log2(x) */ +extern void +ppc_vlogefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol */ +extern void +ppc_vrefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float reciprocol sqrt estimate */ +extern void +ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to negative infinity */ +extern void +ppc_vrfim(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to positive infinity */ +extern void +ppc_vrfip(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to nearest int */ +extern void +ppc_vrfin(struct ppc_function *p, uint vD, uint vB); + +/** vector float round to int toward zero */ +extern void +ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); + + + +/** + ** bitwise operations + **/ + + +/** vector and */ +extern void +ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector and complement */ +extern void +ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector or */ +extern void +ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector nor */ +extern void +ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); + +/** vector xor */ +extern void +ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); + + +/** + ** Vector shuffle / select / splat / etc + **/ + +/** vector permute */ +extern void +ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector select */ +extern void +ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + +/** vector splat byte */ +extern void +ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat half word */ +extern void +ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); + +/** vector splat word */ +extern void +ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); + + +#endif /* RTASM_PPC_H */ -- cgit v1.2.3 From b71f4150c8be662d777da22ed0554663a9d1c84d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 08:22:15 -0600 Subject: gallium: minor optimization to spe_load_int() --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index a04cc6c4ff..62e3adb357 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -505,7 +505,8 @@ spe_load_int(struct spe_function *p, unsigned rT, int i) } else { spe_ilhu(p, rT, i >> 16); - spe_iohl(p, rT, i & 0xffff); + if (i & 0xffff) + spe_iohl(p, rT, i & 0xffff); } } -- cgit v1.2.3 From 50f78fcc2e3da24fa6dc076f0985355b3f64e9fd Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 11:01:31 -0600 Subject: gallium: silence warning --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index df002939c6..1a5294eabc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1674,6 +1674,7 @@ exec_declaration( break; default: + eval = NULL; assert( 0 ); } -- cgit v1.2.3 From 31d2e5b954ece02070555b51c06ee427cf951b1f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 11:02:18 -0600 Subject: gallium: use new compare32() function to fix warnings about type punning and aliasing --- src/gallium/auxiliary/tgsi/tgsi_build.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 74614d3688..38fcaf8829 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -793,10 +793,14 @@ tgsi_default_instruction_ext_nv( void ) return instruction_ext_nv; } -union token_u32 + +/** test for inequality of 32-bit values pointed to by a and b */ +static INLINE boolean +compare32(const void *a, const void *b) { - unsigned u32; -}; + return *((uint32_t *) a) != *((uint32_t *) b); +} + unsigned tgsi_compare_instruction_ext_nv( @@ -805,7 +809,7 @@ tgsi_compare_instruction_ext_nv( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_nv @@ -864,7 +868,7 @@ tgsi_compare_instruction_ext_label( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_label @@ -905,7 +909,7 @@ tgsi_compare_instruction_ext_texture( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_instruction_ext_texture @@ -1027,7 +1031,7 @@ tgsi_compare_src_register_ext_swz( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_src_register_ext_swz @@ -1095,7 +1099,7 @@ tgsi_compare_src_register_ext_mod( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_src_register_ext_mod @@ -1241,7 +1245,7 @@ tgsi_compare_dst_register_ext_concode( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_dst_register_ext_concode @@ -1299,7 +1303,7 @@ tgsi_compare_dst_register_ext_modulate( { a.Padding = b.Padding = 0; a.Extended = b.Extended = 0; - return ((union token_u32 *) &a)->u32 != ((union token_u32 *) &b)->u32; + return compare32(&a, &b); } struct tgsi_dst_register_ext_modulate -- cgit v1.2.3 From bd34b8a4febb7aadec0545250fd8b6b06ad774e8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 11:40:31 -0600 Subject: gallium: use copy_token() function to avoid type punning/aliasing problems This fixes parsing errors seen with optimized builds on PPC (which led to crashes). The memcpy() is heavy-handed, but works. A lighter uint assignment could be used on x86... --- src/gallium/auxiliary/tgsi/tgsi_parse.c | 54 ++++++++++++++++++++------------- 1 file changed, 33 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c index 3757486ba9..2cd56e413a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_parse.c +++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c @@ -88,16 +88,33 @@ tgsi_parse_end_of_tokens( 1 + ctx->FullHeader.Header.HeaderSize + ctx->FullHeader.Header.BodySize; } + +/** + * This function is used to avoid and work-around type punning/aliasing + * warnings. The warnings seem harmless on x86 but on PPC they cause + * real failures. + */ +static INLINE void +copy_token(void *dst, const void *src) +{ + memcpy(dst, src, 4); +} + + +/** + * Get next 4-byte token, return it at address specified by 'token' + */ static void next_token( struct tgsi_parse_context *ctx, void *token ) { assert( !tgsi_parse_end_of_tokens( ctx ) ); - - *(struct tgsi_token *) token = ctx->Tokens[ctx->Position++]; + copy_token(token, &ctx->Tokens[ctx->Position]); + ctx->Position++; } + void tgsi_parse_token( struct tgsi_parse_context *ctx ) @@ -116,7 +133,7 @@ tgsi_parse_token( struct tgsi_full_declaration *decl = &ctx->FullToken.FullDeclaration; *decl = tgsi_default_full_declaration(); - decl->Declaration = *(struct tgsi_declaration *) &token; + copy_token(&decl->Declaration, &token); next_token( ctx, &decl->DeclarationRange ); @@ -132,8 +149,7 @@ tgsi_parse_token( struct tgsi_full_immediate *imm = &ctx->FullToken.FullImmediate; *imm = tgsi_default_full_immediate(); - imm->Immediate = *(struct tgsi_immediate *) &token; - + copy_token(&imm->Immediate, &token); assert( !imm->Immediate.Extended ); switch (imm->Immediate.DataType) { @@ -158,8 +174,7 @@ tgsi_parse_token( unsigned extended; *inst = tgsi_default_full_instruction(); - inst->Instruction = *(struct tgsi_instruction *) &token; - + copy_token(&inst->Instruction, &token); extended = inst->Instruction.Extended; while( extended ) { @@ -169,18 +184,15 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_INSTRUCTION_EXT_TYPE_NV: - inst->InstructionExtNv = - *(struct tgsi_instruction_ext_nv *) &token; + copy_token(&inst->InstructionExtNv, &token); break; case TGSI_INSTRUCTION_EXT_TYPE_LABEL: - inst->InstructionExtLabel = - *(struct tgsi_instruction_ext_label *) &token; + copy_token(&inst->InstructionExtLabel, &token); break; case TGSI_INSTRUCTION_EXT_TYPE_TEXTURE: - inst->InstructionExtTexture = - *(struct tgsi_instruction_ext_texture *) &token; + copy_token(&inst->InstructionExtTexture, &token); break; default: @@ -212,13 +224,13 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_DST_REGISTER_EXT_TYPE_CONDCODE: - inst->FullDstRegisters[i].DstRegisterExtConcode = - *(struct tgsi_dst_register_ext_concode *) &token; + copy_token(&inst->FullDstRegisters[i].DstRegisterExtConcode, + &token); break; case TGSI_DST_REGISTER_EXT_TYPE_MODULATE: - inst->FullDstRegisters[i].DstRegisterExtModulate = - *(struct tgsi_dst_register_ext_modulate *) &token; + copy_token(&inst->FullDstRegisters[i].DstRegisterExtModulate, + &token); break; default: @@ -245,13 +257,13 @@ tgsi_parse_token( switch( token.Type ) { case TGSI_SRC_REGISTER_EXT_TYPE_SWZ: - inst->FullSrcRegisters[i].SrcRegisterExtSwz = - *(struct tgsi_src_register_ext_swz *) &token; + copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtSwz, + &token); break; case TGSI_SRC_REGISTER_EXT_TYPE_MOD: - inst->FullSrcRegisters[i].SrcRegisterExtMod = - *(struct tgsi_src_register_ext_mod *) &token; + copy_token(&inst->FullSrcRegisters[i].SrcRegisterExtMod, + &token); break; default: -- cgit v1.2.3 From 9defef29c59c580da1f6312f737822cd2efc9e28 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 11:42:33 -0600 Subject: gallium: avoid redundant tgsi_exec_machine_bind_shader() calls on draw exec path tgsi_exec_machine_bind_shader() isn't cheap so avoiding unecessary calls is a big win. A similar change should be done for softpipe's fragment exec path but extra care needs to be taken with the texture sampler state/params. --- src/gallium/auxiliary/draw/draw_vs_exec.c | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 44563803f9..8041705b9c 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -46,6 +46,7 @@ struct exec_vertex_shader { struct draw_vertex_shader base; struct tgsi_exec_machine *machine; + struct tgsi_token *machine_tokens; }; static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs ) @@ -62,12 +63,16 @@ vs_exec_prepare( struct draw_vertex_shader *shader, { struct exec_vertex_shader *evs = exec_vertex_shader(shader); - /* specify the vertex program to interpret/execute */ - tgsi_exec_machine_bind_shader(evs->machine, - shader->state.tokens, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/ ); - + /* Specify the vertex program to interpret/execute. + * Avoid rebinding when possible. + */ + if (evs->machine_tokens != shader->state.tokens) { + tgsi_exec_machine_bind_shader(evs->machine, + shader->state.tokens, + PIPE_MAX_SAMPLERS, + NULL /*samplers*/ ); + evs->machine_tokens = shader->state.tokens; + } } -- cgit v1.2.3 From 31a112cad4d2e515bc668b58abd4e402b4362c70 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 21:08:01 -0600 Subject: gallium: added spe_splat_word() --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 25 +++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 4 ++++ 2 files changed, 29 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 62e3adb357..89f8e24ce6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -540,4 +540,29 @@ spe_zero(struct spe_function *p, unsigned rT) } +void +spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) +{ + assert(word >= 0); + assert(word <= 3); + + if (word == 0) { + int tmp1 = rT; + spe_ila(p, tmp1, 66051); + spe_shufb(p, rT, rA, rA, tmp1); + } + else { + /* XXX review this, we may not need the rotqbyi instruction */ + int tmp1 = rT; + int tmp2 = spe_allocate_available_register(p); + + spe_ila(p, tmp1, 66051); + spe_rotqbyi(p, tmp2, rA, 4 * word); + spe_shufb(p, rT, tmp2, tmp2, tmp1); + + spe_release_register(p, tmp2); + } +} + + #endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index d95e5aace3..7a3ab9ace5 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -308,6 +308,10 @@ spe_move(struct spe_function *p, unsigned rT, unsigned rA); extern void spe_zero(struct spe_function *p, unsigned rT); +/** rT = splat(rA, word) */ +extern void +spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word); + /* Floating-point instructions */ -- cgit v1.2.3 From 8b5013d232bf6846717fac093465e8a39064e0b6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 12 Sep 2008 21:52:47 -0600 Subject: gallium: added print/dump code to SPE code emitter --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 128 ++++++++++++++++++++++------ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 10 +++ 2 files changed, 113 insertions(+), 25 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 89f8e24ce6..8718be9ded 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -27,12 +27,16 @@ * Real-time assembly generation interface for Cell B.E. SPEs. * * \author Ian Romanick + * \author Brian Paul */ + +#include #include "pipe/p_compiler.h" #include "util/u_memory.h" #include "rtasm_ppc_spe.h" + #ifdef GALLIUM_CELL /** * SPE instruction types @@ -143,8 +147,25 @@ union spe_inst_RI18 { /*@}*/ +static void +indent(const struct spe_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +static const char * +rem_prefix(const char *longname) +{ + return longname + 4; +} + + static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB) + unsigned rA, unsigned rB, const char *name) { union spe_inst_RR inst; inst.inst.op = op; @@ -153,11 +174,15 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\tr%d, r%d, r%d\n", rem_prefix(name), rT, rA, rB); + } } static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB, unsigned rC) + unsigned rA, unsigned rB, unsigned rC, const char *name) { union spe_inst_RRR inst; inst.inst.op = op; @@ -167,11 +192,15 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rC = rC; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\tr%d, r%d, r%d, r%d\n", rem_prefix(name), rT, rA, rB, rB); + } } static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) + unsigned rA, int imm, const char *name) { union spe_inst_RI7 inst; inst.inst.op = op; @@ -180,12 +209,16 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\tr%d, r%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + } } static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) + unsigned rA, int imm, const char *name) { union spe_inst_RI8 inst; inst.inst.op = op; @@ -194,12 +227,16 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\tr%d, r%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + } } static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm) + unsigned rA, int imm, const char *name) { union spe_inst_RI10 inst; inst.inst.op = op; @@ -208,11 +245,15 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\tr%d, r%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + } } static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, - int imm) + int imm, const char *name) { union spe_inst_RI16 inst; inst.inst.op = op; @@ -220,11 +261,15 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\tr%d, 0x%x\n", rem_prefix(name), rT, imm); + } } static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, - int imm) + int imm, const char *name) { union spe_inst_RI18 inst; inst.inst.op = op; @@ -232,6 +277,10 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rT = rT; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); + if (p->print) { + indent(p); + printf("%s\tr%d, 0x%x\n", rem_prefix(name), rT, imm); + } } @@ -240,61 +289,61 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, #define EMIT_(_name, _op) \ void _name (struct spe_function *p, unsigned rT) \ { \ - emit_RR(p, _op, rT, 0, 0); \ + emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ } #define EMIT_R(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA) \ { \ - emit_RR(p, _op, rT, rA, 0); \ + emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ } #define EMIT_RR(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ { \ - emit_RR(p, _op, rT, rA, rB); \ + emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ } #define EMIT_RRR(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ { \ - emit_RRR(p, _op, rT, rA, rB, rC); \ + emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ } #define EMIT_RI7(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ { \ - emit_RI7(p, _op, rT, rA, imm); \ + emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI8(_name, _op, bias) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ { \ - emit_RI8(p, _op, rT, rA, bias - imm); \ + emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ } #define EMIT_RI10(_name, _op) \ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ { \ - emit_RI10(p, _op, rT, rA, imm); \ + emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI16(_name, _op) \ void _name (struct spe_function *p, unsigned rT, int imm) \ { \ - emit_RI16(p, _op, rT, imm); \ + emit_RI16(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_RI18(_name, _op) \ void _name (struct spe_function *p, unsigned rT, int imm) \ { \ - emit_RI18(p, _op, rT, imm); \ + emit_RI18(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_I16(_name, _op) \ void _name (struct spe_function *p, int imm) \ { \ - emit_RI16(p, _op, 0, imm); \ + emit_RI16(p, _op, 0, imm, __FUNCTION__); \ } #include "rtasm_ppc_spe.h" @@ -314,6 +363,9 @@ void spe_init_func(struct spe_function *p, unsigned code_size) */ p->regs[0] = ~7; p->regs[1] = (1U << (80 - 64)) - 1; + + p->print = false; + p->indent = 0; } @@ -382,6 +434,32 @@ void spe_release_register(struct spe_function *p, int reg) } +void +spe_print_code(struct spe_function *p, boolean enable) +{ + p->print = enable; +} + + +void +spe_indent(struct spe_function *p, int spaces) +{ + p->indent += spaces; +} + + +extern void +spe_comment(struct spe_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("%s\n", s); + } +} + + /** * For branch instructions: * \param d if 1, disable interupts if branch is taken @@ -392,51 +470,51 @@ void spe_release_register(struct spe_function *p, int reg) /** Branch Indirect to address in rA */ void spe_bi(struct spe_function *p, unsigned rA, int d, int e) { - emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Interupt Return */ void spe_iret(struct spe_function *p, unsigned rA, int d, int e) { - emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link on external data */ void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link. Save PC in rT, jump to rA. */ void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) { - emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4)); + emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 7a3ab9ace5..2579045232 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -28,6 +28,7 @@ * For details, see /opt/cell/sdk/docs/arch/SPU_ISA_v1.2_27Jan2007_pub.pdf * * \author Ian Romanick + * \author Brian Paul */ #ifndef RTASM_PPC_SPE_H @@ -63,8 +64,12 @@ struct spe_function * spe_release_register */ uint64_t regs[SPE_NUM_REGS / 64]; + + boolean print; /**< print/dump instructions as they're emitted? */ + int indent; /**< number of spaces to indent */ }; + extern void spe_init_func(struct spe_function *p, unsigned code_size); extern void spe_release_func(struct spe_function *p); @@ -72,6 +77,11 @@ extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); extern void spe_release_register(struct spe_function *p, int reg); +extern void spe_print_code(struct spe_function *p, boolean enable); +extern void spe_indent(struct spe_function *p, int spaces); +extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); + + #endif /* RTASM_PPC_SPE_H */ #ifndef EMIT_ -- cgit v1.2.3 From 809e81c0b424839cb742ff2502b1010c0258b368 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 13 Sep 2008 15:21:58 -0600 Subject: gallium: add another value check to util_fast_pow() Fixes glitches seen in morph3d demo. --- src/gallium/auxiliary/util/u_math.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 9b4ca39371..0b10622ee7 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -272,8 +272,10 @@ util_fast_log2(float val) static INLINE float util_fast_pow(float x, float y) { - /* XXX this test may need adjustment */ - if (y >= 3.0 && -0.02f <= x && x <= 0.02f) + /* XXX these tests may need adjustment */ + if (y >= 3.0f && (-0.02f <= x && x <= 0.02f)) + return 0.0f; + if (y >= 50.0f && (-0.9f <= x && x <= 0.9f)) return 0.0f; return util_fast_exp2(util_fast_log2(x) * y); } -- cgit v1.2.3 From 75c19eb5a1caf0c36e04270174579d0d7fec9ccb Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sat, 13 Sep 2008 15:20:31 -0600 Subject: gallium: add another value check to util_fast_pow() Fixes glitches seen in morph3d demo. --- src/gallium/auxiliary/util/u_math.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 9b4ca39371..0b10622ee7 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -272,8 +272,10 @@ util_fast_log2(float val) static INLINE float util_fast_pow(float x, float y) { - /* XXX this test may need adjustment */ - if (y >= 3.0 && -0.02f <= x && x <= 0.02f) + /* XXX these tests may need adjustment */ + if (y >= 3.0f && (-0.02f <= x && x <= 0.02f)) + return 0.0f; + if (y >= 50.0f && (-0.9f <= x && x <= 0.9f)) return 0.0f; return util_fast_exp2(util_fast_log2(x) * y); } -- cgit v1.2.3 From e852232ebf256d1587ae0d456c366553749fd275 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Sun, 14 Sep 2008 19:04:53 +0200 Subject: draw: Silence compiler warnings on Windows. --- src/gallium/auxiliary/draw/draw_vs_exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 8041705b9c..79a19d6be2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -46,7 +46,7 @@ struct exec_vertex_shader { struct draw_vertex_shader base; struct tgsi_exec_machine *machine; - struct tgsi_token *machine_tokens; + const struct tgsi_token *machine_tokens; }; static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs ) -- cgit v1.2.3 From 367774a62aa0627c5589e91ab7b411634113c815 Mon Sep 17 00:00:00 2001 From: Jonathan White Date: Mon, 15 Sep 2008 11:56:21 -0600 Subject: Fixed emit_RRR --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 8718be9ded..74cd4176e7 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -194,7 +194,7 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\tr%d, r%d, r%d, r%d\n", rem_prefix(name), rT, rA, rB, rB); + printf("%s\tr%d, r%d, r%d, r%d\n", rem_prefix(name), rT, rA, rB, rC); } } -- cgit v1.2.3 From ae3373441dd4548702f23fe44bd04830e4902241 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 15 Sep 2008 15:10:02 -0600 Subject: gallium: emit SPU instructions in assembler-compatible syntax --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 74cd4176e7..870ae802c5 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -176,7 +176,7 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\tr%d, r%d, r%d\n", rem_prefix(name), rT, rA, rB); + printf("%s\t$%d, $%d, $%d\n", rem_prefix(name), rT, rA, rB); } } @@ -194,7 +194,7 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\tr%d, r%d, r%d, r%d\n", rem_prefix(name), rT, rA, rB, rC); + printf("%s\t$%d, $%d, $%d, $%d\n", rem_prefix(name), rT, rA, rB, rC); } } @@ -211,7 +211,7 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\tr%d, r%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); } } @@ -229,7 +229,7 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\tr%d, r%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); } } @@ -247,7 +247,11 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\tr%d, r%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + if (strcmp(name, "spe_lqd") == 0 || + strcmp(name, "spe_stqd") == 0) + printf("%s\t$%d, 0x%x($%d)\n", rem_prefix(name), rT, imm, rA); + else + printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); } } @@ -263,7 +267,7 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\tr%d, 0x%x\n", rem_prefix(name), rT, imm); + printf("%s\t$%d, 0x%x\n", rem_prefix(name), rT, imm); } } @@ -279,7 +283,7 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\tr%d, 0x%x\n", rem_prefix(name), rT, imm); + printf("%s\t$%d, 0x%x\n", rem_prefix(name), rT, imm); } } @@ -455,7 +459,7 @@ spe_comment(struct spe_function *p, int rel_indent, const char *s) p->indent += rel_indent; indent(p); p->indent -= rel_indent; - printf("%s\n", s); + printf("# %s\n", s); } } -- cgit v1.2.3 From ad16ecbbe4fe8c1bcb18ed8fbbd672c68a0b17fa Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Tue, 16 Sep 2008 16:16:54 +0200 Subject: tgsi: Make tgsi_sanity.c compile with make --- src/gallium/auxiliary/tgsi/Makefile | 1 + src/gallium/auxiliary/tgsi/tgsi_sanity.c | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index 806a2bd4c5..c5d2082087 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -4,6 +4,7 @@ include $(TOP)/configs/current LIBNAME = tgsi C_SOURCES = \ + tgsi_sanity.c \ tgsi_build.c \ tgsi_dump.c \ tgsi_exec.c \ diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index c659027296..20b32477be 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -152,6 +152,12 @@ check_register_usage( { if (!check_file_name( ctx, file )) return FALSE; + + if (index < 0 || index > MAX_REGISTERS) { + report_error( ctx, "%s[%i]: Invalid index %s", file_names[file], index, name ); + return FALSE; + } + if (indirect_access) { if (!is_any_register_declared( ctx, file )) report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); -- cgit v1.2.3 From 8cdab20c9a0d8794d5d85dbeef478b982ce39506 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Sep 2008 12:52:19 -0600 Subject: gallium: fix info entries for KIL, KILP KIL takes 1 src register. KILP uses no registers (uses cond codes). --- src/gallium/auxiliary/tgsi/tgsi_info.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c index a4899cd4c2..68c7a6b7f5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_info.c +++ b/src/gallium/auxiliary/tgsi/tgsi_info.c @@ -69,7 +69,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 1, 1, 0, 0, "COS" }, { 1, 1, 0, 0, "DDX" }, { 1, 1, 0, 0, "DDY" }, - { 0, 1, 0, 0, "KILP" }, + { 0, 0, 0, 0, "KILP" }, { 1, 1, 0, 0, "PK2H" }, { 1, 1, 0, 0, "PK2US" }, { 1, 1, 0, 0, "PK4B" }, @@ -146,7 +146,7 @@ static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] = { 0, 1, 0, 0, "CALLNZ" }, { 0, 1, 0, 0, "IFC" }, { 0, 1, 0, 0, "BREAKC" }, - { 0, 0, 0, 0, "KIL" }, + { 0, 1, 0, 0, "KIL" }, { 0, 0, 0, 0, "END" }, { 1, 1, 0, 0, "SWZ" } }; -- cgit v1.2.3 From f10e7f0d288530a31a7ed3bc976a537fe640ce69 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 17 Sep 2008 09:47:51 -0600 Subject: gallium: fix lack of surface reference counting in cso_set/save/restore_framebuffer() Fixes asst problems with FBO / render to texture. --- src/gallium/auxiliary/cso_cache/cso_context.c | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index f22ba40824..b1ccfc0374 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -765,12 +765,30 @@ void cso_restore_vertex_shader(struct cso_context *ctx) } +/** + * Copy framebuffer state from src to dst with refcounting of surfaces. + */ +static void +copy_framebuffer_state(struct pipe_framebuffer_state *dst, + const struct pipe_framebuffer_state *src) +{ + uint i; + + dst->width = src->width; + dst->height = src->height; + dst->num_cbufs = src->num_cbufs; + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); + } + pipe_surface_reference(&dst->zsbuf, src->zsbuf); +} + enum pipe_error cso_set_framebuffer(struct cso_context *ctx, const struct pipe_framebuffer_state *fb) { if (memcmp(&ctx->fb, fb, sizeof(*fb)) != 0) { - ctx->fb = *fb; + copy_framebuffer_state(&ctx->fb, fb); ctx->pipe->set_framebuffer_state(ctx->pipe, fb); } return PIPE_OK; @@ -778,13 +796,13 @@ enum pipe_error cso_set_framebuffer(struct cso_context *ctx, void cso_save_framebuffer(struct cso_context *ctx) { - ctx->fb_saved = ctx->fb; + copy_framebuffer_state(&ctx->fb_saved, &ctx->fb); } void cso_restore_framebuffer(struct cso_context *ctx) { if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { - ctx->fb = ctx->fb_saved; + copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb); } } -- cgit v1.2.3 From e6a120fefea44078b3a8d4292d83671e6c41357f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 17 Sep 2008 13:14:57 -0600 Subject: gallium: fix tgsi sanity checker with respect to END. Subroutine code may be found after the END instruction so it's not always the last instruction. At least check for presence of exactly one END instruction though. --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 20b32477be..11659247c0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -180,12 +180,10 @@ iter_instruction( const struct tgsi_opcode_info *info; uint i; - /* There must be no other instructions after END. - */ - if (ctx->index_of_END != ~0) { - report_error( ctx, "Unexpected instruction after END" ); - } - else if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + if (inst->Instruction.Opcode == TGSI_OPCODE_END) { + if (ctx->index_of_END != ~0) { + report_error( ctx, "Too many END instructions" ); + } ctx->index_of_END = ctx->num_instructions; } @@ -307,10 +305,10 @@ epilog( struct sanity_check_ctx *ctx = (struct sanity_check_ctx *) iter; uint file; - /* There must be an END instruction at the end. + /* There must be an END instruction somewhere. */ - if (ctx->index_of_END == ~0 || ctx->index_of_END != ctx->num_instructions - 1) { - report_error( ctx, "Expected END at end of instruction sequence" ); + if (ctx->index_of_END == ~0) { + report_error( ctx, "Missing END instruction" ); } /* Check if all declared registers were used. -- cgit v1.2.3 From a06d38a74e865a0373a7314aad26b25c27ef8c57 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 17 Sep 2008 16:51:53 -0600 Subject: gallium: fix wide point / point coord semantic info (generic, not fog) --- src/gallium/auxiliary/draw/draw_pipe_wide_point.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c index 4f1326053d..e1af9e56a2 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c +++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c @@ -222,8 +222,8 @@ static void widepoint_first_point( struct draw_stage *stage, /* find fragment shader PointCoord/Fog input */ wide->point_coord_fs_input = 0; /* XXX fix this! */ - /* setup extra vp output */ - draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_FOG; + /* setup extra vp output (point coord implemented as a texcoord) */ + draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC; draw->extra_vp_outputs.semantic_index = 0; draw->extra_vp_outputs.slot = draw->vs.num_vs_outputs; } -- cgit v1.2.3 From 1672e8e05996d48e51a1998bd7e9b08b78e012f5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 18 Sep 2008 11:10:09 +0900 Subject: pipebuffer: New callback to flush all temporary-held buffers. Used mostly to aid debugging memory issues or to clean up resources when the drivers are long lived. --- .../auxiliary/pipebuffer/pb_buffer_malloc.c | 14 +++++++++--- src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 14 ++++++++---- src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c | 16 +++++++++++++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 11 ++++++--- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 11 +++++++++ .../auxiliary/pipebuffer/pb_bufmgr_fenced.c | 14 ++++++++++++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 10 ++++++++- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 8 +++++++ src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 26 ++++++++++++++++++++++ 9 files changed, 113 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 20fc87b39d..1bf22a2ec0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -129,7 +129,7 @@ pb_malloc_buffer_create(size_t size, static struct pb_buffer * -pb_malloc_buffer_create_buffer(struct pb_manager *mgr, +pb_malloc_bufmgr_create_buffer(struct pb_manager *mgr, size_t size, const struct pb_desc *desc) { @@ -137,6 +137,13 @@ pb_malloc_buffer_create_buffer(struct pb_manager *mgr, } +static void +pb_malloc_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + static void pb_malloc_bufmgr_destroy(struct pb_manager *mgr) { @@ -146,8 +153,9 @@ pb_malloc_bufmgr_destroy(struct pb_manager *mgr) static struct pb_manager pb_malloc_bufmgr = { - pb_malloc_buffer_create_buffer, - pb_malloc_bufmgr_destroy + pb_malloc_bufmgr_destroy, + pb_malloc_bufmgr_create_buffer, + pb_malloc_bufmgr_flush }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 32867029ee..cafbee045a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -69,13 +69,22 @@ struct pipe_winsys; */ struct pb_manager { + void + (*destroy)( struct pb_manager *mgr ); + struct pb_buffer * (*create_buffer)( struct pb_manager *mgr, size_t size, const struct pb_desc *desc); + /** + * Flush all temporary-held buffers. + * + * Used mostly to aid debugging memory issues or to clean up resources when + * the drivers are long lived. + */ void - (*destroy)( struct pb_manager *mgr ); + (*flush)( struct pb_manager *mgr ); }; @@ -153,9 +162,6 @@ struct pb_manager * pb_cache_manager_create(struct pb_manager *provider, unsigned usecs); -void -pb_cache_flush(struct pb_manager *mgr); - /** * Fenced buffer manager. diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c index 2afaeafa1a..c956924cc7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c @@ -75,6 +75,21 @@ pb_alt_manager_create_buffer(struct pb_manager *_mgr, } +static void +pb_alt_manager_flush(struct pb_manager *_mgr) +{ + struct pb_alt_manager *mgr = pb_alt_manager(_mgr); + + assert(mgr->provider1->flush); + if(mgr->provider1->flush) + mgr->provider1->flush(mgr->provider1); + + assert(mgr->provider2->flush); + if(mgr->provider2->flush) + mgr->provider2->flush(mgr->provider2); +} + + static void pb_alt_manager_destroy(struct pb_manager *mgr) { @@ -97,6 +112,7 @@ pb_alt_manager_create(struct pb_manager *provider1, mgr->base.destroy = pb_alt_manager_destroy; mgr->base.create_buffer = pb_alt_manager_create_buffer; + mgr->base.flush = pb_alt_manager_flush; mgr->provider1 = provider1; mgr->provider2 = provider2; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 1ec422fb19..8f118874ec 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -306,8 +306,8 @@ pb_cache_manager_create_buffer(struct pb_manager *_mgr, } -void -pb_cache_flush(struct pb_manager *_mgr) +static void +pb_cache_manager_flush(struct pb_manager *_mgr) { struct pb_cache_manager *mgr = pb_cache_manager(_mgr); struct list_head *curr, *next; @@ -323,13 +323,17 @@ pb_cache_flush(struct pb_manager *_mgr) next = curr->next; } pipe_mutex_unlock(mgr->mutex); + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); } static void pb_cache_manager_destroy(struct pb_manager *mgr) { - pb_cache_flush(mgr); + pb_cache_manager_flush(mgr); FREE(mgr); } @@ -349,6 +353,7 @@ pb_cache_manager_create(struct pb_manager *provider, mgr->base.destroy = pb_cache_manager_destroy; mgr->base.create_buffer = pb_cache_manager_create_buffer; + mgr->base.flush = pb_cache_manager_flush; mgr->provider = provider; mgr->usecs = usecs; LIST_INITHEAD(&mgr->delayed); diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 5f1ed3e5a8..1675e6e182 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -313,6 +313,16 @@ pb_debug_manager_create_buffer(struct pb_manager *_mgr, } +static void +pb_debug_manager_flush(struct pb_manager *_mgr) +{ + struct pb_debug_manager *mgr = pb_debug_manager(_mgr); + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + static void pb_debug_manager_destroy(struct pb_manager *_mgr) { @@ -336,6 +346,7 @@ pb_debug_manager_create(struct pb_manager *provider, size_t band_size) mgr->base.destroy = pb_debug_manager_destroy; mgr->base.create_buffer = pb_debug_manager_create_buffer; + mgr->base.flush = pb_debug_manager_flush; mgr->provider = provider; mgr->band_size = band_size; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 8fc63ce648..633ee70a75 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -94,6 +94,19 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr, } +static void +fenced_bufmgr_flush(struct pb_manager *mgr) +{ + struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr); + + fenced_buffer_list_check_free(fenced_mgr->fenced_list, TRUE); + + assert(fenced_mgr->provider->flush); + if(fenced_mgr->provider->flush) + fenced_mgr->provider->flush(fenced_mgr->provider); +} + + static void fenced_bufmgr_destroy(struct pb_manager *mgr) { @@ -123,6 +136,7 @@ fenced_bufmgr_create(struct pb_manager *provider, fenced_mgr->base.destroy = fenced_bufmgr_destroy; fenced_mgr->base.create_buffer = fenced_bufmgr_create_buffer; + fenced_mgr->base.flush = fenced_bufmgr_flush; fenced_mgr->provider = provider; fenced_mgr->fenced_list = fenced_buffer_list_create(winsys); diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index e8c7f8e1f8..fe80ca30ee 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -199,6 +199,13 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, } +static void +mm_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + static void mm_bufmgr_destroy(struct pb_manager *mgr) { @@ -230,8 +237,9 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, if (!mm) return NULL; - mm->base.create_buffer = mm_bufmgr_create_buffer; mm->base.destroy = mm_bufmgr_destroy; + mm->base.create_buffer = mm_bufmgr_create_buffer; + mm->base.flush = mm_bufmgr_flush; mm->size = size; mm->align2 = align2; /* 64-byte alignment */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 3ef72c5bbb..61ac291ed7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -202,6 +202,13 @@ pool_bufmgr_create_buffer(struct pb_manager *mgr, } +static void +pool_bufmgr_flush(struct pb_manager *mgr) +{ + /* No-op */ +} + + static void pool_bufmgr_destroy(struct pb_manager *mgr) { @@ -238,6 +245,7 @@ pool_bufmgr_create(struct pb_manager *provider, pool->base.destroy = pool_bufmgr_destroy; pool->base.create_buffer = pool_bufmgr_create_buffer; + pool->base.flush = pool_bufmgr_flush; LIST_INITHEAD(&pool->free); diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 8698c4cff6..2a80154920 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -406,6 +406,17 @@ pb_slab_manager_create_buffer(struct pb_manager *_mgr, } +static void +pb_slab_manager_flush(struct pb_manager *_mgr) +{ + struct pb_slab_manager *mgr = pb_slab_manager(_mgr); + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + static void pb_slab_manager_destroy(struct pb_manager *_mgr) { @@ -430,6 +441,7 @@ pb_slab_manager_create(struct pb_manager *provider, mgr->base.destroy = pb_slab_manager_destroy; mgr->base.create_buffer = pb_slab_manager_create_buffer; + mgr->base.flush = pb_slab_manager_flush; mgr->provider = provider; mgr->bufSize = bufSize; @@ -465,6 +477,19 @@ pb_slab_range_manager_create_buffer(struct pb_manager *_mgr, } +static void +pb_slab_range_manager_flush(struct pb_manager *_mgr) +{ + struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr); + + /* Individual slabs don't hold any temporary buffers so no need to call them */ + + assert(mgr->provider->flush); + if(mgr->provider->flush) + mgr->provider->flush(mgr->provider); +} + + static void pb_slab_range_manager_destroy(struct pb_manager *_mgr) { @@ -499,6 +524,7 @@ pb_slab_range_manager_create(struct pb_manager *provider, mgr->base.destroy = pb_slab_range_manager_destroy; mgr->base.create_buffer = pb_slab_range_manager_create_buffer; + mgr->base.flush = pb_slab_range_manager_flush; mgr->provider = provider; mgr->minBufSize = minBufSize; -- cgit v1.2.3 From f8bba34d4e12ef4c620cac881a4b697a1e668377 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Thu, 18 Sep 2008 01:29:41 -0600 Subject: CELL: finish fragment ops blending (except for unusual D3D modes) - Added new "macro" functions spe_float_min() and spe_float_max() to rtasm_ppc_spe.{ch}. These emit instructions that cause the minimum or maximum of each element in a vector of floats to be saved in the destination register. - Major changes to cell_gen_fragment.c to implement all the blending modes (except for the mysterious D3D-based PIPE_BLENDFACTOR_SRC1_COLOR, PIPE_BLENDFACTOR_SRC1_ALPHA, PIPE_BLENDFACTOR_INV_SRC1_COLOR, and PIPE_BLENDFACTOR_INV_SRC1_ALPHA). - Some revamping of code in cell_gen_fragment.c: use the new spe_float_min() and spe_float_max() functions (instead of expanding these calculations inline via macros); create and use an inline utility function for handling "optional" register allocation (for the {1,1,1,1} vector, and the blend color vectors) instead of expanding with macros; use the Float Multiply and Subtract (fnms) instruction to simplify and optimize many blending calculations. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 41 +- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 8 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 546 ++++++++++++++--------- 3 files changed, 377 insertions(+), 218 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 870ae802c5..12e0826fb9 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -384,7 +384,7 @@ void spe_release_func(struct spe_function *p) /** - * Alloate a SPE register. + * Allocate a SPE register. * \return register index or -1 if none left. */ int spe_allocate_available_register(struct spe_function *p) @@ -646,5 +646,44 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) } } +/* For each 32-bit float element of rA and rB, choose the smaller of the + * two, compositing them into the rT register. + * + * The Float Compare Greater Than (fcgt) instruction will put 1s into + * compare_reg where rA > rB, and 0s where rA <= rB. + * + * Then the Select Bits (selb) instruction will take bits from rA where + * compare_reg is 0, and from rB where compare_reg is 1; i.e., from rA + * where rA <= rB and from rB where rB > rA, which is exactly the + * "min" operation. + * + * The compare_reg could in many cases be the same as rT, unless + * rT == rA || rt == rB. But since this is common in constructions + * like "x = min(x, a)", we always allocate a new register to be safe. + */ +void +spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB) +{ + unsigned int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rA, rB, compare_reg); + spe_release_register(p, compare_reg); +} + +/* For each 32-bit float element of rA and rB, choose the greater of the + * two, compositing them into the rT register. + * + * The logic is similar to that of spe_float_min() above; the only + * difference is that the registers on spe_selb() have been reversed, + * so that the larger of the two is selected instead of the smaller. + */ +void +spe_float_max(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB) +{ + unsigned int compare_reg = spe_allocate_available_register(p); + spe_fcgt(p, compare_reg, rA, rB); + spe_selb(p, rT, rB, rA, compare_reg); + spe_release_register(p, compare_reg); +} #endif /* GALLIUM_CELL */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 2579045232..4ef05ea27d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -322,6 +322,14 @@ spe_zero(struct spe_function *p, unsigned rT); extern void spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word); +/** rT = float min(rA, rB) */ +extern void +spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); + +/** rT = float max(rA, rB) */ +extern void +spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); + /* Floating-point instructions */ diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 2c80dd712e..9d25e820ad 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -229,35 +229,26 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, spe_release_register(f, amask_reg); } -/* This is a convenient and oft-used sequence. It chooses - * the smaller of each element of reg1 and reg2, and combines them - * into the result register, as follows: - * - * The Float Compare Greater Than (fcgt) instruction will put - * 1s into compare_reg where reg1 > reg2, and 0s where reg1 <= reg2. - * - * Then the Select Bits (selb) instruction will take bits from - * reg1 where compare_reg is 0, and from reg2 where compare_reg is - * 1. Ergo, result_reg will have the bits from reg1 where reg1 <= reg2, - * and the bits from reg2 where reg1 > reg2, which is exactly the - * MIN operation. +/* This pair of functions is used inline to allocate and deallocate + * optional constant registers. Once a constant is discovered to be + * needed, we will likely need it again, so we don't want to deallocate + * it and have to allocate and load it again unnecessarily. */ -#define FLOAT_VECTOR_MIN(f, result_reg, reg1, reg2) {\ - int compare_reg = spe_allocate_available_register(f); \ - spe_fcgt(f, compare_reg, reg1, reg2); \ - spe_selb(f, result_reg, reg1, reg2, compare_reg); \ - spe_release_register(f, compare_reg); \ +static inline void +setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +{ + if (*is_already_set) return; + *r = spe_allocate_available_register(f); + spe_load_float(f, *r, value); + *is_already_set = true; } -/* The FLOAT_VECTOR_MAX sequence is similar to the FLOAT_VECTOR_MIN - * sequence above, except that the registers specified when selecting - * bits are reversed. - */ -#define FLOAT_VECTOR_MAX(f, result_reg, reg1, reg2) {\ - int compare_reg = spe_allocate_available_register(f); \ - spe_fcgt(f, compare_reg, reg1, reg2); \ - spe_selb(f, result_reg, reg2, reg1, compare_reg); \ - spe_release_register(f, compare_reg); \ +static inline void +release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +{ + if (!*is_already_set) return; + spe_release_register(f, r); + *is_already_set = false; } /** @@ -294,51 +285,15 @@ gen_blend(const struct pipe_blend_state *blend, int tmp_reg = spe_allocate_available_register(f); - /* These values might or might not eventually get put into - * registers. We avoid allocating them and setting them until - * they're actually needed; then we avoid setting them more than - * once, and release them at the end of code generation. + /* Optional constant registers we might or might not end up using; + * if we do use them, make sure we only allocate them once by + * keeping a flag on each one. */ - boolean one_reg_set = false; - int one_reg; -#define SET_ONE_REG_IF_UNSET(f) if (!one_reg_set) {\ - one_reg = spe_allocate_available_register(f); \ - spe_load_float(f, one_reg, 1.0f); \ - one_reg_set = true; \ -} -#define RELEASE_ONE_REG_IF_USED(f) if (one_reg_set) {\ - spe_release_register(f, one_reg); \ -} - - boolean const_color_set = false; - int constR_reg, constG_reg, constB_reg; -#define SET_CONST_COLOR_IF_UNSET(f, blend_color) if (!const_color_set) {\ - constR_reg = spe_allocate_available_register(f); \ - constG_reg = spe_allocate_available_register(f); \ - constG_reg = spe_allocate_available_register(f); \ - spe_load_float(f, constR_reg, blend_color->color[0]); \ - spe_load_float(f, constG_reg, blend_color->color[1]); \ - spe_load_float(f, constB_reg, blend_color->color[2]); \ - const_color_set = true;\ -} -#define RELEASE_CONST_COLOR_IF_USED(f) if (const_color_set) {\ - spe_release_register(f, constR_reg); \ - spe_release_register(f, constG_reg); \ - spe_release_register(f, constB_reg); \ -} - - boolean const_alpha_set = false; - int constA_reg; -#define SET_CONST_ALPHA_IF_UNSET(f, blend_color) if (!const_alpha_set) {\ - constA_reg = spe_allocate_available_register(f); \ - spe_load_float(f, constA_reg, blend_color->color[3]); \ - const_alpha_set = true; \ -} -#define RELEASE_CONST_ALPHA_IF_USED(f) if (const_alpha_set) {\ - spe_release_register(f, constA_reg); \ -} - - /* Real code starts here */ + boolean one_reg_set = false; + unsigned int one_reg; + boolean constR_reg_set = false, constG_reg_set = false, + constB_reg_set = false, constA_reg_set = false; + unsigned int constR_reg, constG_reg, constB_reg, constA_reg; ASSERT(blend->blend_enable); @@ -419,10 +374,11 @@ gen_blend(const struct pipe_blend_state *blend, spe_release_register(f, mask_reg); } - /* * Compute Src RGB terms. We're actually looking for the value - * of (the appropriate RGB factors) * (the incoming source RGB color). + * of (the appropriate RGB factors) * (the incoming source RGB color), + * because in some cases (like PIPE_BLENDFACTOR_ONE and + * PIPE_BLENDFACTOR_ZERO) we can avoid doing unnecessary math. */ switch (blend->rgb_src_factor) { case PIPE_BLENDFACTOR_ONE: @@ -450,18 +406,13 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term1B_reg, fragB_reg, fragA_reg); break; case PIPE_BLENDFACTOR_INV_SRC_COLOR: - /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) */ - /* we'll need the optional constant {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) - /* tmp = 1 - R */ - spe_fs(f, tmp_reg, one_reg, fragR_reg); - /* term = R * tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - /* repeat for G and B */ - spe_fs(f, tmp_reg, one_reg, fragG_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fs(f, tmp_reg, one_reg, fragB_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + /* factors = (1-R,1-G,1-B), so term = (R*(1-R), G*(1-G), B*(1-B)) + * or in other words term = (R-R*R, G-G*G, B-B*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_DST_COLOR: /* factors = (Rfb,Gfb,Bfb), so term = (R*Rfb, G*Gfb, B*Bfb) */ @@ -470,30 +421,22 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term1B_reg, fragB_reg, fbB_reg); break; case PIPE_BLENDFACTOR_INV_DST_COLOR: - /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) */ - /* we'll need the optional constant {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) - /* tmp = 1 - Rfb */ - spe_fs(f, tmp_reg, one_reg, fbR_reg); - /* term = R * tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - /* repeat for G and B */ - spe_fs(f, tmp_reg, one_reg, fbG_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fs(f, tmp_reg, one_reg, fbB_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (R*(1-Rfb),G*(1-Gfb),B*(1-Bfb)) + * or term = (R-R*Rfb, G-G*Gfb, B-B*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: - /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) */ - /* we'll need the optional constant {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) - /* tmp = 1 - A */ - spe_fs(f, tmp_reg, one_reg, fragA_reg); - /* term = R * tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - /* repeat for G and B with the same (1-A) factor */ - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + /* factors = (1-A,1-A,1-A), so term = (R*(1-A),G*(1-A),B*(1-A)) + * or term = (R-R*A,G-G*A,B-B*A) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fragA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fragA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fragA_reg, fragB_reg); break; case PIPE_BLENDFACTOR_DST_ALPHA: /* factors = (Afb, Afb, Afb), so term = (R*Afb, G*Afb, B*Afb) */ @@ -502,19 +445,19 @@ gen_blend(const struct pipe_blend_state *blend, spe_fm(f, term1B_reg, fragB_reg, fbA_reg); break; case PIPE_BLENDFACTOR_INV_DST_ALPHA: - /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) */ - /* we'll need the optional constant {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) - /* tmp = 1 - A */ - spe_fs(f, tmp_reg, one_reg, fbA_reg); - /* term = R * tmp, G*tmp, and B*tmp */ - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (R*(1-Afb),G*(1-Afb),B*(1-Afb)) + * or term = (R-R*Afb,G-G*Afb,b-B*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term1R_reg, fragR_reg, fbA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, fbA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, fbA_reg, fragB_reg); break; case PIPE_BLENDFACTOR_CONST_COLOR: - /* We'll need the optional blend color registers */ - SET_CONST_COLOR_IF_UNSET(f,blend_color) + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); /* now, factor = (Rc,Gc,Bc), so term = (R*Rc,G*Gc,B*Bc) */ spe_fm(f, term1R_reg, fragR_reg, constR_reg); spe_fm(f, term1G_reg, fragG_reg, constG_reg); @@ -522,55 +465,61 @@ gen_blend(const struct pipe_blend_state *blend, break; case PIPE_BLENDFACTOR_CONST_ALPHA: /* we'll need the optional constant alpha register */ - SET_CONST_ALPHA_IF_UNSET(f, blend_color) + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); /* factor = (Ac,Ac,Ac), so term = (R*Ac,G*Ac,B*Ac) */ spe_fm(f, term1R_reg, fragR_reg, constA_reg); spe_fm(f, term1G_reg, fragG_reg, constA_reg); spe_fm(f, term1B_reg, fragB_reg, constA_reg); break; case PIPE_BLENDFACTOR_INV_CONST_COLOR: - /* We need both the optional {1,1,1,1} register, and the optional - * constant color registers + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) + * or term = (R-R*Rc, G-G*Gc, B-B*Bc) + * fnms(a,b,c,d) computes a = d - b*c */ - SET_ONE_REG_IF_UNSET(f) - SET_CONST_COLOR_IF_UNSET(f, blend_color) - /* factor = (1-Rc,1-Gc,1-Bc), so term = (R*(1-Rc),G*(1-Gc),B*(1-Bc)) */ - spe_fs(f, tmp_reg, one_reg, constR_reg); - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fs(f, tmp_reg, one_reg, constG_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fs(f, tmp_reg, one_reg, constB_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + spe_fnms(f, term1R_reg, fragR_reg, constR_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constG_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constB_reg, fragB_reg); break; case PIPE_BLENDFACTOR_INV_CONST_ALPHA: - /* We need the optional {1,1,1,1} register and the optional - * constant alpha register + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) + * or term = (R-R*Ac,G-G*Ac,B-B*Ac) + * fnms(a,b,c,d) computes a = d - b*c */ - SET_ONE_REG_IF_UNSET(f) - SET_CONST_ALPHA_IF_UNSET(f, blend_color) - /* factor = (1-Ac,1-Ac,1-Ac), so term = (R*(1-Ac),G*(1-Ac),B*(1-Ac)) */ - spe_fs(f, tmp_reg, one_reg, constA_reg); - spe_fm(f, term1R_reg, fragR_reg, tmp_reg); - spe_fm(f, term1G_reg, fragG_reg, tmp_reg); - spe_fm(f, term1B_reg, fragB_reg, tmp_reg); + spe_fnms(f, term1R_reg, fragR_reg, constA_reg, fragR_reg); + spe_fnms(f, term1G_reg, fragG_reg, constA_reg, fragG_reg); + spe_fnms(f, term1B_reg, fragB_reg, constA_reg, fragB_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* We'll need the optional {1,1,1,1} register */ - SET_ONE_REG_IF_UNSET(f) + setup_const_register(f, &one_reg_set, &one_reg, 1.0f); /* factor = (min(A,1-Afb),min(A,1-Afb),min(A,1-Afb)), so * term = (R*min(A,1-Afb), G*min(A,1-Afb), B*min(A,1-Afb)) + * We could expand the term (as a*min(b,c) == min(a*b,a*c) + * as long as a is positive), but then we'd have to do three + * spe_float_min() functions instead of one, so this is simpler. */ /* tmp = 1 - Afb */ spe_fs(f, tmp_reg, one_reg, fbA_reg); /* tmp = min(A,tmp) */ - FLOAT_VECTOR_MIN(f, tmp_reg, fragA_reg, tmp_reg) + spe_float_min(f, tmp_reg, fragA_reg, tmp_reg); /* term = R*tmp */ spe_fm(f, term1R_reg, fragR_reg, tmp_reg); spe_fm(f, term1G_reg, fragG_reg, tmp_reg); spe_fm(f, term1B_reg, fragB_reg, tmp_reg); break; - /* non-OpenGL cases? */ + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ case PIPE_BLENDFACTOR_SRC1_COLOR: case PIPE_BLENDFACTOR_SRC1_ALPHA: case PIPE_BLENDFACTOR_INV_SRC1_COLOR: @@ -581,132 +530,293 @@ gen_blend(const struct pipe_blend_state *blend, } /* - * Compute Src Alpha term + * Compute Src Alpha term. Like the above, we're looking for + * the full term A*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->alpha_src_factor) { + case PIPE_BLENDFACTOR_ZERO: + /* factor = 0, so term = 0 */ + spe_load_float(f, term1A_reg, 0.0f); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* fall through */ case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = A */ spe_move(f, term1A_reg, fragA_reg); break; + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = A*A */ spe_fm(f, term1A_reg, fragA_reg, fragA_reg); break; case PIPE_BLENDFACTOR_SRC_ALPHA: spe_fm(f, term1A_reg, fragA_reg, fragA_reg); break; - /* XXX more cases */ + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = A*(1-A) = A-A*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fragA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = A*Afb */ + spe_fm(f, term1A_reg, fragA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = A*(1-Afb) = A - A*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, fbA_reg, fragA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = A*Ac */ + spe_fm(f, term1A_reg, fragA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = A*(1-Ac) = A-A*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term1A_reg, fragA_reg, constA_reg, fragA_reg); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: default: ASSERT(0); } /* - * Compute Dest RGB terms + * Compute Dest RGB term. Like the above, we're looking for + * the full term (Rfb,Gfb,Bfb)*(factor), not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->rgb_dst_factor) { case PIPE_BLENDFACTOR_ONE: + /* factors = (1,1,1), so term = (Rfb,Gfb,Bfb) */ spe_move(f, term2R_reg, fbR_reg); spe_move(f, term2G_reg, fbG_reg); spe_move(f, term2B_reg, fbB_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term2R_reg); - spe_zero(f, term2G_reg); - spe_zero(f, term2B_reg); + /* factor s= (0,0,0), so term = (0,0,0) */ + spe_load_float(f, term2R_reg, 0.0f); + spe_load_float(f, term2G_reg, 0.0f); + spe_load_float(f, term2B_reg, 0.0f); break; case PIPE_BLENDFACTOR_SRC_COLOR: + /* factors = (R,G,B), so term = (R*Rfb, G*Gfb, B*Bfb) */ spe_fm(f, term2R_reg, fbR_reg, fragR_reg); spe_fm(f, term2G_reg, fbG_reg, fragG_reg); spe_fm(f, term2B_reg, fbB_reg, fragB_reg); break; + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factors = (1-R,1-G,1-B), so term = (Rfb*(1-R), Gfb*(1-G), Bfb*(1-B)) + * or in other words term = (Rfb-Rfb*R, Gfb-Gfb*G, Bfb-Bfb*B) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fragR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fragG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fragB_reg, fbB_reg, fbB_reg); + break; case PIPE_BLENDFACTOR_SRC_ALPHA: + /* factors = (A,A,A), so term = (Rfb*A, Gfb*A, Bfb*A) */ spe_fm(f, term2R_reg, fbR_reg, fragA_reg); spe_fm(f, term2G_reg, fbG_reg, fragA_reg); spe_fm(f, term2B_reg, fbB_reg, fragA_reg); break; case PIPE_BLENDFACTOR_INV_SRC_ALPHA: -#if 0 - /* one = {1.0, 1.0, 1.0, 1.0} */ - if (!one_reg_set) { - one_reg = spe_allocate_available_register(f); - spe_load_float(f, one_reg, 1.0f); - one_reg_set = true; - } - /* tmp = one - fragA */ - spe_fs(f, tmp_reg, one_reg, fragA_reg); - /* term = fb * tmp */ - spe_fm(f, term2R_reg, fbR_reg, tmp_reg); - spe_fm(f, term2G_reg, fbG_reg, tmp_reg); - spe_fm(f, term2B_reg, fbB_reg, tmp_reg); -#else - /* Compute: term2x = fbx * (1.0 - fragA) - * Which is: term2x = fbx - fbx * fragA - * Use fnms t,a,b,c which computes t=c-a*b - */ + /* factors = (1-A,1-A,1-A) so term = (Rfb-Rfb*A,Gfb-Gfb*A,Bfb-Bfb*A) */ + /* fnms(a,b,c,d) computes a = d - b*c */ spe_fnms(f, term2R_reg, fbR_reg, fragA_reg, fbR_reg); spe_fnms(f, term2G_reg, fbG_reg, fragA_reg, fbG_reg); spe_fnms(f, term2B_reg, fbB_reg, fragA_reg, fbB_reg); -#endif break; - /* XXX more cases */ - // GL_ONE_MINUS_SRC_COLOR - // GL_DST_COLOR - // GL_ONE_MINUS_DST_COLOR - // GL_DST_ALPHA - // GL_CONSTANT_COLOR - // GL_ONE_MINUS_CONSTANT_COLOR - // GL_CONSTANT_ALPHA - // GL_ONE_MINUS_CONSTANT_ALPHA + case PIPE_BLENDFACTOR_DST_COLOR: + /* factors = (Rfb,Gfb,Bfb), so term = (Rfb*Rfb, Gfb*Gfb, Bfb*Bfb) */ + spe_fm(f, term2R_reg, fbR_reg, fbR_reg); + spe_fm(f, term2G_reg, fbG_reg, fbG_reg); + spe_fm(f, term2B_reg, fbB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factors = (1-Rfb,1-Gfb,1-Bfb), so term = (Rfb*(1-Rfb),Gfb*(1-Gfb),Bfb*(1-Bfb)) + * or term = (Rfb-Rfb*Rfb, Gfb-Gfb*Gfb, Bfb-Bfb*Bfb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbB_reg, fbB_reg); + break; + + case PIPE_BLENDFACTOR_DST_ALPHA: + /* factors = (Afb, Afb, Afb), so term = (Rfb*Afb, Gfb*Afb, Bfb*Afb) */ + spe_fm(f, term2R_reg, fbR_reg, fbA_reg); + spe_fm(f, term2G_reg, fbG_reg, fbA_reg); + spe_fm(f, term2B_reg, fbB_reg, fbA_reg); + break; + case PIPE_BLENDFACTOR_INV_DST_ALPHA: + /* factors = (1-Afb, 1-Afb, 1-Afb), so term = (Rfb*(1-Afb),Gfb*(1-Afb),Bfb*(1-Afb)) + * or term = (Rfb-Rfb*Afb,Gfb-Gfb*Afb,Bfb-Bfb*Afb) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, fbA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, fbA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, fbA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* now, factor = (Rc,Gc,Bc), so term = (Rfb*Rc,Gfb*Gc,Bfb*Bc) */ + spe_fm(f, term2R_reg, fbR_reg, constR_reg); + spe_fm(f, term2G_reg, fbG_reg, constG_reg); + spe_fm(f, term2B_reg, fbB_reg, constB_reg); + break; + case PIPE_BLENDFACTOR_CONST_ALPHA: + /* we'll need the optional constant alpha register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = (Ac,Ac,Ac), so term = (Rfb*Ac,Gfb*Ac,Bfb*Ac) */ + spe_fm(f, term2R_reg, fbR_reg, constA_reg); + spe_fm(f, term2G_reg, fbG_reg, constA_reg); + spe_fm(f, term2B_reg, fbB_reg, constA_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Rc,1-Gc,1-Bc), so term = (Rfb*(1-Rc),Gfb*(1-Gc),Bfb*(1-Bc)) + * or term = (Rfb-Rfb*Rc, Gfb-Gfb*Gc, Bfb-Bfb*Bc) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constR_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constG_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constB_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: + /* We need the optional constant color registers */ + setup_const_register(f, &constR_reg_set, &constR_reg, blend_color->color[0]); + setup_const_register(f, &constG_reg_set, &constG_reg, blend_color->color[1]); + setup_const_register(f, &constB_reg_set, &constB_reg, blend_color->color[2]); + /* factor = (1-Ac,1-Ac,1-Ac), so term = (Rfb*(1-Ac),Gfb*(1-Ac),Bfb*(1-Ac)) + * or term = (Rfb-Rfb*Ac,Gfb-Gfb*Ac,Bfb-Bfb*Ac) + * fnms(a,b,c,d) computes a = d - b*c + */ + spe_fnms(f, term2R_reg, fbR_reg, constA_reg, fbR_reg); + spe_fnms(f, term2G_reg, fbG_reg, constA_reg, fbG_reg); + spe_fnms(f, term2B_reg, fbB_reg, constA_reg, fbB_reg); + break; + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest RGB */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: + default: ASSERT(0); } /* - * Compute Dest Alpha term + * Compute Dest Alpha term. Like the above, we're looking for + * the full term Afb*factor, not just the factor itself, because + * in many cases we can avoid doing unnecessary multiplies. */ switch (blend->alpha_dst_factor) { case PIPE_BLENDFACTOR_ONE: + /* factor = 1, so term = Afb */ spe_move(f, term2A_reg, fbA_reg); break; case PIPE_BLENDFACTOR_ZERO: - spe_zero(f, term2A_reg); + /* factor = 0, so term = 0 */ + spe_load_float(f, term2A_reg, 0.0f); break; - case PIPE_BLENDFACTOR_SRC_ALPHA: + + case PIPE_BLENDFACTOR_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_SRC_COLOR: + /* factor = A, so term = Afb*A */ spe_fm(f, term2A_reg, fbA_reg, fragA_reg); break; - case PIPE_BLENDFACTOR_INV_SRC_ALPHA: -#if 0 - /* one = {1.0, 1.0, 1.0, 1.0} */ - if (!one_reg_set) { - one_reg = spe_allocate_available_register(f); - spe_load_float(f, one_reg, 1.0f); - one_reg_set = true; - } - /* tmp = one - fragA */ - spe_fs(f, tmp_reg, one_reg, fragA_reg); - /* termA = fbA * tmp */ - spe_fm(f, term2A_reg, fbA_reg, tmp_reg); -#else - /* Compute: term2A = fbA * (1.0 - fragA) - * Which is: term2A = fbA - fbA * fragA - * Use fnms t,a,b,c which computes t=c-a*b - */ + + case PIPE_BLENDFACTOR_INV_SRC_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_SRC_COLOR: + /* factor = 1-A, so term = Afb*(1-A) = Afb-Afb*A */ + /* fnms(a,b,c,d) computes a = d - b*c */ spe_fnms(f, term2A_reg, fbA_reg, fragA_reg, fbA_reg); -#endif break; - /* XXX more cases */ - // GL_ONE_MINUS_SRC_COLOR - // GL_DST_COLOR - // GL_ONE_MINUS_DST_COLOR - // GL_DST_ALPHA - // GL_CONSTANT_COLOR - // GL_ONE_MINUS_CONSTANT_COLOR - // GL_CONSTANT_ALPHA - // GL_ONE_MINUS_CONSTANT_ALPHA + + case PIPE_BLENDFACTOR_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_DST_COLOR: + /* factor = Afb, so term = Afb*Afb */ + spe_fm(f, term2A_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_INV_DST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_DST_COLOR: + /* factor = 1-Afb, so term = Afb*(1-Afb) = Afb - Afb*Afb */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, fbA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = Ac, so term = Afb*Ac */ + spe_fm(f, term2A_reg, fbA_reg, constA_reg); + break; + + case PIPE_BLENDFACTOR_INV_CONST_ALPHA: /* fall through */ + case PIPE_BLENDFACTOR_INV_CONST_COLOR: + /* We need the optional constA_reg register */ + setup_const_register(f, &constA_reg_set, &constA_reg, blend_color->color[3]); + /* factor = 1-Ac, so term = Afb*(1-Ac) = Afb-Afb*Ac */ + /* fnms(a,b,c,d) computes a = d - b*c */ + spe_fnms(f, term2A_reg, fbA_reg, constA_reg, fbA_reg); + break; + + case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: /* not supported for dest alpha */ + ASSERT(0); + break; + + /* These are special D3D cases involving a second color output + * from the fragment shader. I'm not sure we can support them + * yet... XXX + */ + case PIPE_BLENDFACTOR_SRC1_COLOR: + case PIPE_BLENDFACTOR_SRC1_ALPHA: + case PIPE_BLENDFACTOR_INV_SRC1_COLOR: + case PIPE_BLENDFACTOR_INV_SRC1_ALPHA: default: ASSERT(0); } /* - * Combine Src/Dest RGB terms + * Combine Src/Dest RGB terms as per the blend equation. */ switch (blend->rgb_func) { case PIPE_BLEND_ADD: @@ -725,14 +835,14 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, fragB_reg, term2B_reg, term1B_reg); break; case PIPE_BLEND_MIN: - FLOAT_VECTOR_MIN(f, fragR_reg, term1R_reg, term2R_reg) - FLOAT_VECTOR_MIN(f, fragG_reg, term1G_reg, term2G_reg) - FLOAT_VECTOR_MIN(f, fragB_reg, term1B_reg, term2B_reg) + spe_float_min(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_min(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_min(f, fragB_reg, term1B_reg, term2B_reg); break; case PIPE_BLEND_MAX: - FLOAT_VECTOR_MAX(f, fragR_reg, term1R_reg, term2R_reg) - FLOAT_VECTOR_MAX(f, fragG_reg, term1G_reg, term2G_reg) - FLOAT_VECTOR_MAX(f, fragB_reg, term1B_reg, term2B_reg) + spe_float_max(f, fragR_reg, term1R_reg, term2R_reg); + spe_float_max(f, fragG_reg, term1G_reg, term2G_reg); + spe_float_max(f, fragB_reg, term1B_reg, term2B_reg); break; default: ASSERT(0); @@ -752,10 +862,10 @@ gen_blend(const struct pipe_blend_state *blend, spe_fs(f, fragA_reg, term2A_reg, term1A_reg); break; case PIPE_BLEND_MIN: - FLOAT_VECTOR_MIN(f, fragA_reg, term1A_reg, term2A_reg) + spe_float_min(f, fragA_reg, term1A_reg, term2A_reg); break; case PIPE_BLEND_MAX: - FLOAT_VECTOR_MAX(f, fragA_reg, term1A_reg, term2A_reg) + spe_float_max(f, fragA_reg, term1A_reg, term2A_reg); break; default: ASSERT(0); @@ -779,9 +889,11 @@ gen_blend(const struct pipe_blend_state *blend, spe_release_register(f, tmp_reg); /* Free any optional registers that actually got used */ - RELEASE_ONE_REG_IF_USED(f) - RELEASE_CONST_COLOR_IF_USED(f) - RELEASE_CONST_ALPHA_IF_USED(f) + release_const_register(f, &one_reg_set, one_reg); + release_const_register(f, &constR_reg_set, constR_reg); + release_const_register(f, &constG_reg_set, constG_reg); + release_const_register(f, &constB_reg_set, constB_reg); + release_const_register(f, &constA_reg_set, constA_reg); } -- cgit v1.2.3 From 5e1ef85dc430a4439cd60b66262eab9062dd5f4f Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 18 Sep 2008 14:48:45 +0200 Subject: tgsi: Make tgsi dumps look more like mesa shader dumps. --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index afc8ffa553..3177f54952 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -68,6 +68,7 @@ dump_enum( #define CHR(C) ctx->printf( ctx, "%c", C ) #define UIX(I) ctx->printf( ctx, "0x%x", I ) #define UID(I) ctx->printf( ctx, "%u", I ) +#define INSTID(I) ctx->printf( ctx, "% 3u", I ) #define SID(I) ctx->printf( ctx, "%d", I ) #define FLT(F) ctx->printf( ctx, "%10.4f", F ) #define ENM(E,ENUMS) dump_enum( ctx, E, ENUMS, sizeof( ENUMS ) / sizeof( *ENUMS ) ) @@ -315,8 +316,8 @@ iter_instruction( uint i; boolean first_reg = TRUE; - UID( instno ); - CHR( ':' ); + INSTID( instno ); + TXT( ": " ); TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic ); switch (inst->Instruction.Saturate) { -- cgit v1.2.3 From f68d2a0febca38bc7b31f9ab9718e944935b48bc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 18 Sep 2008 08:10:34 -0600 Subject: gallium: fix surface object memory leak in cso module --- src/gallium/auxiliary/cso_cache/cso_context.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index b1ccfc0374..68508f24de 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -80,6 +80,10 @@ struct cso_context { }; +static void +free_framebuffer_state(struct pipe_framebuffer_state *fb); + + static boolean delete_blend_state(struct cso_context *ctx, void *state) { struct cso_blend *cso = (struct cso_blend *)state; @@ -252,6 +256,9 @@ void cso_release_all( struct cso_context *ctx ) pipe_texture_reference(&ctx->textures_saved[i], NULL); } + free_framebuffer_state(&ctx->fb); + free_framebuffer_state(&ctx->fb_saved); + if (ctx->cache) { cso_cache_delete( ctx->cache ); ctx->cache = NULL; @@ -784,6 +791,18 @@ copy_framebuffer_state(struct pipe_framebuffer_state *dst, } +static void +free_framebuffer_state(struct pipe_framebuffer_state *fb) +{ + uint i; + + for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { + pipe_surface_reference(&fb->cbufs[i], NULL); + } + pipe_surface_reference(&fb->zsbuf, NULL); +} + + enum pipe_error cso_set_framebuffer(struct cso_context *ctx, const struct pipe_framebuffer_state *fb) { @@ -804,6 +823,7 @@ void cso_restore_framebuffer(struct cso_context *ctx) if (memcmp(&ctx->fb, &ctx->fb_saved, sizeof(ctx->fb))) { copy_framebuffer_state(&ctx->fb, &ctx->fb_saved); ctx->pipe->set_framebuffer_state(ctx->pipe, &ctx->fb); + free_framebuffer_state(&ctx->fb_saved); } } -- cgit v1.2.3 From 451888ee8f9d9749a2d52351374a14525c07ef2b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 18 Sep 2008 22:24:45 +0900 Subject: util: Add missing p_debug.h include. --- src/gallium/auxiliary/util/u_math.h | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 0b10622ee7..196aeb28fa 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -40,6 +40,7 @@ #include "pipe/p_compiler.h" +#include "pipe/p_debug.h" #ifdef __cplusplus -- cgit v1.2.3 From eb5c288df48fa818ecb2bbd71f263ec68ebbef8a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 18 Sep 2008 23:00:18 +0900 Subject: util: A few more memory debugging checks. --- src/gallium/auxiliary/util/p_debug_mem.c | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index ed18c6540e..9511479cbb 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -122,8 +122,12 @@ debug_malloc(const char *file, unsigned line, const char *function, struct debug_memory_footer *ftr; hdr = real_malloc(sizeof(*hdr) + size + sizeof(*ftr)); - if(!hdr) + if(!hdr) { + debug_printf("%s:%u:%s: out of memory when trying to allocate %lu bytes\n", + file, line, function, + (long unsigned)size); return NULL; + } hdr->no = last_no++; hdr->file = file; @@ -219,8 +223,12 @@ debug_realloc(const char *file, unsigned line, const char *function, /* alloc new */ new_hdr = real_malloc(sizeof(*new_hdr) + new_size + sizeof(*new_ftr)); - if(!new_hdr) + if(!new_hdr) { + debug_printf("%s:%u:%s: out of memory when trying to allocate %lu bytes\n", + file, line, function, + (long unsigned)new_size); return NULL; + } new_hdr->no = old_hdr->no; new_hdr->file = old_hdr->file; new_hdr->line = old_hdr->line; @@ -261,8 +269,19 @@ debug_memory_end(unsigned long start_no) for (; entry != &list; entry = entry->prev) { struct debug_memory_header *hdr; void *ptr; + struct debug_memory_footer *ftr; + hdr = LIST_ENTRY(struct debug_memory_header, entry, head); ptr = data_from_header(hdr); + ftr = footer_from_header(hdr); + + if(hdr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: bad or corrupted memory %p\n", + hdr->file, hdr->line, hdr->function, + ptr); + debug_assert(0); + } + if((start_no <= hdr->no && hdr->no < last_no) || (last_no < start_no && (hdr->no < last_no || start_no <= hdr->no))) { debug_printf("%s:%u:%s: %u bytes at %p not freed\n", @@ -270,7 +289,15 @@ debug_memory_end(unsigned long start_no) hdr->size, ptr); total_size += hdr->size; } + + if(ftr->magic != DEBUG_MEMORY_MAGIC) { + debug_printf("%s:%u:%s: buffer overflow %p\n", + hdr->file, hdr->line, hdr->function, + ptr); + debug_assert(0); + } } + if(total_size) { debug_printf("Total of %u KB of system memory apparently leaked\n", (total_size + 1023)/1024); -- cgit v1.2.3 From 0b8e19ffc51c29543796d4f1e3243e97d8c32671 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Thu, 18 Sep 2008 16:28:16 +0200 Subject: tgsi: Build tgsi_text with make --- src/gallium/auxiliary/tgsi/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index c5d2082087..c7155a9316 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -13,6 +13,7 @@ C_SOURCES = \ tgsi_parse.c \ tgsi_scan.c \ tgsi_sse2.c \ + tgsi_text.c \ tgsi_transform.c \ tgsi_util.c -- cgit v1.2.3 From a57fbe53dcb54694da9c9b4be1533c9d800079d2 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Fri, 19 Sep 2008 01:55:00 -0600 Subject: CELL: add codegen for logic op, color mask - rtasm_ppc_spe.c, rtasm_ppc_spe.h: added a new macro function "spe_load_uint" for loading and splatting unsigned integers in a register; it will use "ila" for values 18 bits or less, "ilh" for word values that are symmetric across halfwords, "ilhu" for values that have zeroes in their bottom halfwords, or "ilhu" followed by "iohl" for general 32-bit values. Of the 15 color masks of interest, 4 are 18 bits or less, 2 are symmetric across halfwords, 3 are zero in the bottom halfword, and 6 require two instructions to load. - cell_gen_fragment.c: added full codegen for logic op and color mask. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 23 +++- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 4 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 143 ++++++++++++++++++++++- 3 files changed, 163 insertions(+), 7 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 12e0826fb9..f60bfba3f5 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -592,11 +592,32 @@ spe_load_int(struct spe_function *p, unsigned rT, int i) } } +void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) +{ + /* If the whole value is in the lower 18 bits, use ila, which + * doesn't sign-extend. Otherwise, if the two halfwords of + * the constant are identical, use ilh. Otherwise, we have + * to use ilhu followed by iohl. + */ + if ((ui & 0xfffc0000) == ui) { + spe_ila(p, rT, ui); + } + else if ((ui >> 16) == (ui & 0xffff)) { + spe_ilh(p, rT, ui & 0xffff); + } + else { + spe_ilhu(p, rT, ui >> 16); + if (ui & 0xffff) + spe_iohl(p, rT, ui & 0xffff); + } +} + void spe_splat(struct spe_function *p, unsigned rT, unsigned rA) { - spe_ila(p, rT, 66051); + /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ + spe_ila(p, rT, 0x00010203); spe_shufb(p, rT, rA, rA, rT); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 4ef05ea27d..09400b3fb2 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -302,6 +302,10 @@ spe_load_float(struct spe_function *p, unsigned rT, float x); extern void spe_load_int(struct spe_function *p, unsigned rT, int i); +/** Load/splat immediate unsigned int into rT. */ +extern void +spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); + /** Replicate word 0 of rA across rT. */ extern void spe_splat(struct spe_function *p, unsigned rT, unsigned rA); diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 9d25e820ad..899d8423b2 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -902,8 +902,69 @@ gen_logicop(const struct pipe_blend_state *blend, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg) { - /* XXX to-do */ - /* operate on 32-bit packed pixels, not float colors */ + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. + * */ + ASSERT(blend->logicop_enable); + + switch(blend->logicop_func) { + case PIPE_LOGICOP_CLEAR: /* 0 */ + spe_zero(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOR: /* ~(s | d) */ + spe_nor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND_INVERTED: /* ~s & d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ + spe_complement(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ + /* andc R, A, B computes R = A & ~B */ + spe_andc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_INVERT: /* ~d */ + /* Note that (A nor A) == ~(A|A) == ~A */ + spe_nor(f, fragRGBA_reg, fbRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_XOR: /* s ^ d */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_NAND: /* ~(s & d) */ + spe_nand(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_AND: /* s & d */ + spe_and(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ + spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + spe_complement(f, fragRGBA_reg); + break; + case PIPE_LOGICOP_NOOP: /* d */ + spe_move(f, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR_INVERTED: /* ~s | d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); + break; + case PIPE_LOGICOP_COPY: /* s */ + break; + case PIPE_LOGICOP_OR_REVERSE: /* s | ~d */ + /* orc R, A, B computes R = A | ~B */ + spe_orc(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_OR: /* s | d */ + spe_or(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); + break; + case PIPE_LOGICOP_SET: /* 1 */ + spe_load_int(f, fragRGBA_reg, 0xffffffff); + break; + default: + ASSERT(0); + } } @@ -912,11 +973,81 @@ gen_colormask(uint colormask, struct spe_function *f, int fragRGBA_reg, int fbRGBA_reg) { - /* XXX to-do */ - /* operate on 32-bit packed pixels, not float colors */ -} + /* We've got four 32-bit RGBA packed pixels in each of + * fragRGBA_reg and fbRGBA_reg, not sets of floating-point + * reds, greens, blues, and alphas. + * */ + + /* The color mask operation can prevent any set of color + * components in the incoming fragment from being written to the frame + * buffer; we do this by replacing the masked components of the + * fragment with the frame buffer values. + * + * There are only 16 possibilities, with a unique mask for + * each of the possibilities. (Technically, there are only 15 + * possibilities, since we shouldn't be called for the one mask + * that does nothing, but the complete implementation is here + * anyway to avoid confusion.) + * + * We implement this via a constant static array which we'll index + * into to get the correct mask. + * + * We're dependent on the mask values being low-order bits, + * with particular values for each bit; so we start with a + * few assertions, which will fail if any of the values were + * to change. + */ + ASSERT(PIPE_MASK_R == 0x1); + ASSERT(PIPE_MASK_G == 0x2); + ASSERT(PIPE_MASK_B == 0x4); + ASSERT(PIPE_MASK_A == 0x8); + /* Here's the list of all possible colormasks, indexed by the + * value of the combined mask specifier. + */ + static const unsigned int colormasks[16] = { + 0x00000000, /* 0: all colors masked */ + 0xff000000, /* 1: PIPE_MASK_R */ + 0x00ff0000, /* 2: PIPE_MASK_G */ + 0xffff0000, /* 3: PIPE_MASK_R | PIPE_MASK_G */ + 0x0000ff00, /* 4: PIPE_MASK_B */ + 0xff00ff00, /* 5: PIPE_MASK_R | PIPE_MASK_B */ + 0x00ffff00, /* 6: PIPE_MASK_G | PIPE_MASK_B */ + 0xffffff00, /* 7: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B */ + 0x000000ff, /* 8: PIPE_MASK_A */ + 0xff0000ff, /* 9: PIPE_MASK_R | PIPE_MASK_A */ + 0x00ff00ff, /* 10: PIPE_MASK_G | PIPE_MASK_A */ + 0xffff00ff, /* 11: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_A */ + 0x0000ffff, /* 12: PIPE_MASK_B | PIPE_MASK_A */ + 0xff00ffff, /* 13: PIPE_MASK_R | PIPE_MASK_B | PIPE_MASK_A */ + 0x00ffffff, /* 14: PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */ + 0xffffffff /* 15: PIPE_MASK_R | PIPE_MASK_G | PIPE_MASK_B | PIPE_MASK_A */ + }; + + /* Get a temporary register to hold the mask */ + int colormask_reg = spe_allocate_available_register(f); + + /* Look up the desired mask directly and load it into the mask register. + * This will load the same mask into each of the four words in the + * mask register. + */ + spe_load_uint(f, colormask_reg, colormasks[colormask]); + + /* Use the mask register to select between the fragment color + * values and the frame buffer color values. Wherever the + * mask has a 0 bit, the current frame buffer color should override + * the fragment color. Wherever the mask has a 1 bit, the + * fragment color should persevere. The Select Bits (selb rt, rA, rB, rM) + * instruction will select bits from its first operand rA wherever the + * the mask bits rM are 0, and from its second operand rB wherever the + * mask bits rM are 1. That means that the frame buffer color is the + * first operand, and the fragment color the second. + */ + spe_selb(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg, colormask_reg); + /* Release the temporary register and we're done */ + spe_release_register(f, colormask_reg); +} /** * Generate code to pack a quad of float colors into a four 32-bit integers. @@ -1223,7 +1354,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } - if (blend->colormask != 0xf) { + if (blend->colormask != PIPE_MASK_RGBA) { gen_colormask(blend->colormask, f, rgba_reg, fbRGBA_reg); } -- cgit v1.2.3 From dda5c0c611d55449a7079c9efeaccc417552c5db Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 19 Sep 2008 23:25:29 +0900 Subject: util: Use OpenGL rasterization rules in blits and mipmap generation. --- src/gallium/auxiliary/util/u_blit.c | 1 + src/gallium/auxiliary/util/u_gen_mipmap.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 9adf72944e..d28201ac8d 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -104,6 +104,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; ctx->rasterizer.bypass_clipping = 1; /*ctx->rasterizer.bypass_vs = 1;*/ + ctx->rasterizer.gl_rasterization_rules = 1; /* samplers */ memset(&ctx->sampler, 0, sizeof(ctx->sampler)); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b19a649bbc..9d305ad763 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -725,6 +725,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; ctx->rasterizer.bypass_clipping = 1; /*ctx->rasterizer.bypass_vs = 1;*/ + ctx->rasterizer.gl_rasterization_rules = 1; /* sampler state */ memset(&ctx->sampler, 0, sizeof(ctx->sampler)); -- cgit v1.2.3 From 0838b702750d85b0284a97be211fa379e9f8d8d8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Sep 2008 09:36:29 -0600 Subject: cell: change spe_complement() to take a src and dst reg, like other instructions --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 14 ++++++++------ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 4 ++-- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 4 ++-- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 4 ++-- 4 files changed, 14 insertions(+), 12 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index f60bfba3f5..85280f680a 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -623,9 +623,9 @@ spe_splat(struct spe_function *p, unsigned rT, unsigned rA) void -spe_complement(struct spe_function *p, unsigned rT) +spe_complement(struct spe_function *p, unsigned rT, unsigned rA) { - spe_nor(p, rT, rT, rT); + spe_nor(p, rT, rA, rA); } @@ -667,7 +667,8 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) } } -/* For each 32-bit float element of rA and rB, choose the smaller of the +/** + * For each 32-bit float element of rA and rB, choose the smaller of the * two, compositing them into the rT register. * * The Float Compare Greater Than (fcgt) instruction will put 1s into @@ -683,7 +684,7 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) * like "x = min(x, a)", we always allocate a new register to be safe. */ void -spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB) +spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) { unsigned int compare_reg = spe_allocate_available_register(p); spe_fcgt(p, compare_reg, rA, rB); @@ -691,7 +692,8 @@ spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned spe_release_register(p, compare_reg); } -/* For each 32-bit float element of rA and rB, choose the greater of the +/** + * For each 32-bit float element of rA and rB, choose the greater of the * two, compositing them into the rT register. * * The logic is similar to that of spe_float_min() above; the only @@ -699,7 +701,7 @@ spe_float_min(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned * so that the larger of the two is selected instead of the smaller. */ void -spe_float_max(struct spe_function *p, unsigned int rT, unsigned int rA, unsigned int rB) +spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) { unsigned int compare_reg = spe_allocate_available_register(p); spe_fcgt(p, compare_reg, rA, rB); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 09400b3fb2..8a0d70fdac 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -310,9 +310,9 @@ spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); extern void spe_splat(struct spe_function *p, unsigned rT, unsigned rA); -/** Complement/invert all bits in rT. */ +/** rT = complement_all_bits(rA). */ extern void -spe_complement(struct spe_function *p, unsigned rT); +spe_complement(struct spe_function *p, unsigned rT, unsigned rA); /** rT = rA. */ extern void diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 6f2b89c695..d835aae255 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -924,7 +924,7 @@ emit_IF(struct codegen *gen, const struct tgsi_full_instruction *inst) /* tmp = (s1_reg == 0) */ spe_ceqi(gen->f, tmp_reg, s1_reg, 0); /* tmp = !tmp */ - spe_complement(gen->f, tmp_reg); + spe_complement(gen->f, tmp_reg, tmp_reg); /* exec_mask = exec_mask & tmp */ spe_and(gen->f, exec_reg, exec_reg, tmp_reg); @@ -944,7 +944,7 @@ emit_ELSE(struct codegen *gen, const struct tgsi_full_instruction *inst) spe_comment(gen->f, -4, "ELSE:"); /* exec_mask = !exec_mask */ - spe_complement(gen->f, exec_reg); + spe_complement(gen->f, exec_reg, exec_reg); return true; } diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 899d8423b2..06a9fa102f 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -920,7 +920,7 @@ gen_logicop(const struct pipe_blend_state *blend, spe_andc(f, fragRGBA_reg, fbRGBA_reg, fragRGBA_reg); break; case PIPE_LOGICOP_COPY_INVERTED: /* ~s */ - spe_complement(f, fragRGBA_reg); + spe_complement(f, fragRGBA_reg, fragRGBA_reg); break; case PIPE_LOGICOP_AND_REVERSE: /* s & ~d */ /* andc R, A, B computes R = A & ~B */ @@ -941,7 +941,7 @@ gen_logicop(const struct pipe_blend_state *blend, break; case PIPE_LOGICOP_EQUIV: /* ~(s ^ d) */ spe_xor(f, fragRGBA_reg, fragRGBA_reg, fbRGBA_reg); - spe_complement(f, fragRGBA_reg); + spe_complement(f, fragRGBA_reg, fragRGBA_reg); break; case PIPE_LOGICOP_NOOP: /* d */ spe_move(f, fragRGBA_reg, fbRGBA_reg); -- cgit v1.2.3 From 7af5f944e5709920623c766bc572f8d587709270 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Sep 2008 17:45:51 -0600 Subject: gallium: added spe_code_size() --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 7 +++++++ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 1 + 2 files changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 85280f680a..1c3e21b4c0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -383,6 +383,13 @@ void spe_release_func(struct spe_function *p) } +/** Return current code size in bytes. */ +unsigned spe_code_size(const struct spe_function *p) +{ + return p->num_inst * SPE_INST_SIZE; +} + + /** * Allocate a SPE register. * \return register index or -1 if none left. diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 8a0d70fdac..4165a971a2 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -72,6 +72,7 @@ struct spe_function extern void spe_init_func(struct spe_function *p, unsigned code_size); extern void spe_release_func(struct spe_function *p); +extern unsigned spe_code_size(const struct spe_function *p); extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); -- cgit v1.2.3 From 99cdfc997b9da10fee57cf1048a55354e1ee4244 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Sep 2008 17:55:54 -0600 Subject: cell: use different opcodes for spe_move() depending on even/odd address --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 1c3e21b4c0..491141f190 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -639,7 +639,13 @@ spe_complement(struct spe_function *p, unsigned rT, unsigned rA) void spe_move(struct spe_function *p, unsigned rT, unsigned rA) { - spe_ori(p, rT, rA, 0); + /* Use different instructions depending on the instruction address + * to take advantage of the dual pipelines. + */ + if (p->num_inst & 1) + spe_shlqbyi(p, rT, rA, 0); /* odd pipe */ + else + spe_ori(p, rT, rA, 0); /* even pipe */ } -- cgit v1.2.3 From 6901d6ef24224c27e20c3e864d035db1552aeeb8 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Tue, 23 Sep 2008 10:09:36 -0600 Subject: CELL: improve legibility of CELL_DEBUG environment variable output --- src/gallium/auxiliary/util/p_debug.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index b6cff281e6..3ed8bdfdf3 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -306,6 +306,13 @@ debug_get_flags_option(const char *name, str = _debug_get_option(name); if(!str) result = dfault; + else if (!util_strcmp(str, "help")) { + result = dfault; + while (flags->name) { + debug_printf("%s: help for %s: %s [0x%lx]\n", __FUNCTION__, name, flags->name, flags->value); + flags++; + } + } else { result = 0; while( flags->name ) { @@ -315,7 +322,12 @@ debug_get_flags_option(const char *name, } } - debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + if (str) { + debug_printf("%s: %s = 0x%lx (%s)\n", __FUNCTION__, name, result, str); + } + else { + debug_printf("%s: %s = 0x%lx\n", __FUNCTION__, name, result); + } return result; } -- cgit v1.2.3 From 4fe186f8dc4fc7eeab3b1069c886458cfc2e517c Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Wed, 24 Sep 2008 20:42:57 +0100 Subject: add cso_hash_contains() function --- src/gallium/auxiliary/cso_cache/cso_hash.c | 6 ++++++ src/gallium/auxiliary/cso_cache/cso_hash.h | 6 ++++++ 2 files changed, 12 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.c b/src/gallium/auxiliary/cso_cache/cso_hash.c index 7f0044c5a7..4e7664f9bf 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.c +++ b/src/gallium/auxiliary/cso_cache/cso_hash.c @@ -431,3 +431,9 @@ struct cso_hash_iter cso_hash_erase(struct cso_hash *hash, struct cso_hash_iter --hash->data.d->size; return ret; } + +boolean cso_hash_contains(struct cso_hash *hash, unsigned key) +{ + struct cso_node **node = cso_hash_find_node(hash, key); + return (*node != hash->data.e); +} diff --git a/src/gallium/auxiliary/cso_cache/cso_hash.h b/src/gallium/auxiliary/cso_cache/cso_hash.h index 85f3e276c6..5891c325fa 100644 --- a/src/gallium/auxiliary/cso_cache/cso_hash.h +++ b/src/gallium/auxiliary/cso_cache/cso_hash.h @@ -44,6 +44,7 @@ #ifndef CSO_HASH_H #define CSO_HASH_H +#include "pipe/p_compiler.h" #ifdef __cplusplus extern "C" { @@ -95,6 +96,11 @@ struct cso_hash_iter cso_hash_first_node(struct cso_hash *hash); */ struct cso_hash_iter cso_hash_find(struct cso_hash *hash, unsigned key); +/** + * Returns true if a value with the given key exists in the hash + */ +boolean cso_hash_contains(struct cso_hash *hash, unsigned key); + int cso_hash_iter_is_null(struct cso_hash_iter iter); unsigned cso_hash_iter_key(struct cso_hash_iter iter); -- cgit v1.2.3 From 9f3e37de17a5636625f0275ca639fdc25ef1e95b Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 23 Sep 2008 16:40:49 +0900 Subject: util: Update fast_log2 article url. --- src/gallium/auxiliary/util/u_math.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 196aeb28fa..084655e6dd 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -254,7 +254,7 @@ util_fast_exp2(float x) /** - * Based on code from http://www.flipcode.com/totd/ + * Based on code from http://www.flipcode.com/archives/Fast_log_Function.shtml */ static INLINE float util_fast_log2(float val) -- cgit v1.2.3 From 938e12c1caee7e34fcc6630f17f422ebdd824ec3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 26 Sep 2008 17:06:22 -0600 Subject: gallium: SPU register comments --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 4165a971a2..61c7edeb60 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -40,10 +40,10 @@ /** number of general-purpose SIMD registers */ #define SPE_NUM_REGS 128 -/** Return Address register */ +/** Return Address register (aka $lr / Link Register) */ #define SPE_REG_RA 0 -/** Stack Pointer register */ +/** Stack Pointer register (aka $sp) */ #define SPE_REG_SP 1 -- cgit v1.2.3 From ab74b8e3549838c0c480555134f5451949bac59f Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 28 Sep 2008 18:33:23 +0200 Subject: Gallivm: make it compile again, add some opcodes. --- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 + src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 1 + src/gallium/auxiliary/gallivm/instructions.cpp | 1210 ++++++++++++++---------- src/gallium/auxiliary/gallivm/instructions.h | 26 +- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 60 +- 5 files changed, 792 insertions(+), 506 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 2ce30b9a02..727977bc3a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -32,6 +32,7 @@ * Brian Paul */ +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" #include "draw_context.h" diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index e64bfb1c6c..3a4a41e544 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -46,6 +46,7 @@ #include "tgsi/tgsi_dump.h" #include "util/u_memory.h" +#include "util/u_math.h" #include #include diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index a82dc30306..5fdfe09d18 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -83,6 +83,7 @@ Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicB m_llvmPow = 0; m_llvmFloor = 0; m_llvmFlog = 0; + m_llvmFexp = 0; m_llvmLit = 0; m_fmtPtr = 0; @@ -92,194 +93,247 @@ Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicB m_mod = ParseBitcodeFile(buffer); } -llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) +llvm::BasicBlock * Instructions::currentBlock() const { - return m_builder.CreateAdd(in1, in2, name("add")); + return m_builder.GetInsertBlock(); } -llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) +llvm::Value * Instructions::abs(llvm::Value *in) { - Value *mulRes = mul(in1, in2); - return add(mulRes, in3); + std::vector vec = extractVector(in); + Value *xabs = callFAbs(vec[0]); + Value *yabs = callFAbs(vec[1]); + Value *zabs = callFAbs(vec[2]); + Value *wabs = callFAbs(vec[3]); + return vectorFromVals(xabs, yabs, zabs, wabs); } - -llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) + +llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) { - return m_builder.CreateMul(in1, in2, name("mul")); + return m_builder.CreateAdd(in1, in2, name("add")); } -const char * Instructions::name(const char *prefix) +llvm::Value * Instructions::arl(llvm::Value *in) { - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; + return floor(in); } -llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) +void Instructions::beginLoop() { - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *z = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(2), - name("extractz")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); - return vectorFromVals(dot3, dot3, dot3, dot3); + BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); + BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); + + m_builder.CreateBr(begin); + Loop loop; + loop.begin = begin; + loop.end = end; + m_builder.SetInsertPoint(begin); + m_loopStack.push(loop); } -llvm::Value *Instructions::callFSqrt(llvm::Value *val) +void Instructions::bgnSub(unsigned label) { - if (!m_llvmFSqrt) { - // predeclare the intrinsic - std::vector fsqrtArgs; - fsqrtArgs.push_back(Type::FloatTy); - PAListPtr fsqrtPal; - FunctionType* fsqrtType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fsqrtArgs, - /*isVarArg=*/false); - m_llvmFSqrt = Function::Create( - /*Type=*/fsqrtType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.sqrt.f32", m_mod); - m_llvmFSqrt->setCallingConv(CallingConv::C); - m_llvmFSqrt->setParamAttrs(fsqrtPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, - name("sqrt")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; + llvm::Function *func = findFunction(label); + + Function::arg_iterator args = func->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("INPUT"); + m_storage->pushArguments(ptr_INPUT); + + llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); + + m_func = func; + m_builder.SetInsertPoint(entry); } -llvm::Value * Instructions::rsq(llvm::Value *in1) +void Instructions::brk() { - Value *x = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("extractx")); - Value *abs = callFAbs(x); - Value *sqrt = callFSqrt(abs); + assert(!m_loopStack.empty()); + BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); + m_builder.CreateBr(m_loopStack.top().end); + m_builder.SetInsertPoint(unr); +} - Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), - sqrt, - name("rsqrt")); - return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); +void Instructions::cal(int label, llvm::Value *input) +{ + std::vector params; + params.push_back(input); + llvm::Function *func = findFunction(label); + + m_builder.CreateCall(func, params.begin(), params.end()); } -llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w) +llvm::Value * Instructions::clamp(llvm::Value *in1) { - Constant *const_vec = Constant::getNullValue(m_floatVecType); - Value *res = m_builder.CreateInsertElement(const_vec, x, - m_storage->constantInt(0), - name("vecx")); - res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), - name("vecxy")); - res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), - name("vecxyz")); - if (w) - res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), - name("vecxyzw")); - return res; + // FIXME } -llvm::Value *Instructions::callFAbs(llvm::Value *val) +llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) { - if (!m_llvmFAbs) { - // predeclare the intrinsic - std::vector fabsArgs; - fabsArgs.push_back(Type::FloatTy); - PAListPtr fabsPal; - FunctionType* fabsType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fabsArgs, - /*isVarArg=*/false); - m_llvmFAbs = Function::Create( - /*Type=*/fabsType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"fabs", m_mod); - m_llvmFAbs->setCallingConv(CallingConv::C); - m_llvmFAbs->setParamAttrs(fabsPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, - name("fabs")); - call->setCallingConv(CallingConv::C); + llvm::Function *func = m_mod->getFunction("cmp"); + assert(func); + + std::vector params; + params.push_back(in1); + params.push_back(in2); + params.push_back(in3); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); call->setTailCall(false); return call; } -llvm::Value * Instructions::lit(llvm::Value *in) +llvm::Value * Instructions::cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) { - if (!m_llvmLit) { - m_llvmLit = m_mod->getFunction("lit"); - } - CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + std::vector vec3 = extractVector(in3); + Constant *half = ConstantFP::get(APFloat(0.5f)); + + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], half, name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], half, name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], half, name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], half, name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); } -llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) { - Value *res = m_builder.CreateSub(in1, in2, name("sub")); - return res; + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + std::vector vec3 = extractVector(in3); + Constant *zero = Constant::getNullValue(Type::FloatTy); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], zero, name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], zero, name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], zero, name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], zero, name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); } -llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) +llvm::Value * Instructions::cos(llvm::Value *in) { - if (!m_llvmPow) { - // predeclare the intrinsic - std::vector powArgs; - powArgs.push_back(Type::FloatTy); - powArgs.push_back(Type::FloatTy); - PAListPtr powPal; - FunctionType* powType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/powArgs, - /*isVarArg=*/false); - m_llvmPow = Function::Create( - /*Type=*/powType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.pow.f32", m_mod); - m_llvmPow->setCallingConv(CallingConv::C); - m_llvmPow->setParamAttrs(powPal); - } - std::vector params; - params.push_back(val1); - params.push_back(val2); - CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), - name("pow")); - call->setCallingConv(CallingConv::C); +#if 0 + llvm::Function *func = m_mod->getFunction("vcos"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("cosres")); call->setTailCall(false); return call; +#else + std::vector elems = extractVector(in); + Function *func = m_mod->getFunction("cosf"); + assert(func); + CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); + cos->setCallingConv(CallingConv::C); + cos->setTailCall(true); + return vectorFromVals(cos, cos, cos, cos); +#endif } -llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) { Value *x1 = m_builder.CreateExtractElement(in1, m_storage->constantInt(0), name("x1")); + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z1")); + Value *x2 = m_builder.CreateExtractElement(in2, m_storage->constantInt(0), name("x2")); - llvm::Value *val = callPow(x1, x2); - return vectorFromVals(val, val, val, val); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *z2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(2), + name("z2")); + Value *y1z2 = mul(y1, z2); + Value *z1y2 = mul(z1, y2); + + Value *z1x2 = mul(z1, x2); + Value *x1z2 = mul(x1, z2); + + Value *x1y2 = mul(x1, y2); + Value *y1x2 = mul(y1, x2); + + return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); } -llvm::Value * Instructions::rcp(llvm::Value *in1) +llvm::Value * Instructions::ddx(llvm::Value *in) { - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), - x1, name("rcp")); - return vectorFromVals(res, res, res, res); + // FIXME +} + +llvm::Value * Instructions::ddy(llvm::Value *in) +{ + // FIXME +} + +llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateFDiv(in1, in2, name("div")); +} + +llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(in3, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot2add = m_builder.CreateAdd(xy, z, name("dot2add")); + return vectorFromVals(dot2add, dot2add, dot2add, dot2add); +} + +llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); + return vectorFromVals(dot3, dot3, dot3, dot3); } llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) @@ -302,23 +356,70 @@ llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) return vectorFromVals(dph, dph, dph, dph); } -llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) +{ + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *w = m_builder.CreateExtractElement(in2, + m_storage->constantInt(3), + name("w")); + Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); + return vectorFromVals(ConstantFP::get(APFloat(1.f)), + ry, z, w); +} + +void Instructions::elseop() +{ + assert(!m_ifStack.empty()); + BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); + m_builder.CreateBr(ifend); + m_builder.SetInsertPoint(m_ifStack.top()); + currentBlock()->setName(name("ifelse")); + m_ifStack.pop(); + m_ifStack.push(ifend); +} + +void Instructions::endif() +{ + assert(!m_ifStack.empty()); + m_builder.CreateBr(m_ifStack.top()); + m_builder.SetInsertPoint(m_ifStack.top()); + m_ifStack.pop(); +} + +void Instructions::endLoop() +{ + assert(!m_loopStack.empty()); + Loop loop = m_loopStack.top(); + m_builder.CreateBr(loop.begin); + loop.end->moveAfter(currentBlock()); + m_builder.SetInsertPoint(loop.end); + m_loopStack.pop(); +} + +void Instructions::end() +{ + m_builder.CreateRetVoid(); +} + +void Instructions::endSub() +{ + m_func = 0; + m_builder.SetInsertPoint(0); +} + +llvm::Value * Instructions::exp(llvm::Value *in) { - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *w = m_builder.CreateExtractElement(in2, - m_storage->constantInt(3), - name("w")); - Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); - return vectorFromVals(ConstantFP::get(APFloat(1.f)), - ry, z, w); + std::vector vec = extractVector(in); + return vectorFromVals(callFExp(vec[0]), callFExp(vec[1]), + callFExp(vec[2]), callFExp(vec[3])); } llvm::Value * Instructions::ex2(llvm::Value *in) @@ -330,31 +431,6 @@ llvm::Value * Instructions::ex2(llvm::Value *in) return vectorFromVals(val, val, val, val); } -llvm::Value * Instructions::callFloor(llvm::Value *val) -{ - if (!m_llvmFloor) { - // predeclare the intrinsic - std::vector floorArgs; - floorArgs.push_back(Type::FloatTy); - PAListPtr floorPal; - FunctionType* floorType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/floorArgs, - /*isVarArg=*/false); - m_llvmFloor = Function::Create( - /*Type=*/floorType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"floorf", m_mod); - m_llvmFloor->setCallingConv(CallingConv::C); - m_llvmFloor->setParamAttrs(floorPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFloor, val, - name("floorf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - llvm::Value * Instructions::floor(llvm::Value *in) { std::vector vec = extractVector(in); @@ -362,42 +438,52 @@ llvm::Value * Instructions::floor(llvm::Value *in) callFloor(vec[2]), callFloor(vec[3])); } -llvm::Value * Instructions::arl(llvm::Value *in) -{ - return floor(in); -} - llvm::Value * Instructions::frc(llvm::Value *in) { llvm::Value *flr = floor(in); return sub(in, flr); } -llvm::Value * Instructions::callFLog(llvm::Value *val) +void Instructions::ifop(llvm::Value *in) { - if (!m_llvmFlog) { - // predeclare the intrinsic - std::vector flogArgs; - flogArgs.push_back(Type::FloatTy); - PAListPtr flogPal; - FunctionType* flogType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/flogArgs, - /*isVarArg=*/false); - m_llvmFlog = Function::Create( - /*Type=*/flogType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"logf", m_mod); - m_llvmFlog->setCallingConv(CallingConv::C); - m_llvmFlog->setParamAttrs(flogPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFlog, val, - name("logf")); - call->setCallingConv(CallingConv::C); + BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); + BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); + + //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); + //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); + //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); + + Constant *float0 = Constant::getNullValue(Type::FloatTy); + + Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), + name("extractx")); + Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); + m_builder.CreateCondBr(xcmp, ifthen, ifend); + //m_builder.SetInsertPoint(yblock); + + m_builder.SetInsertPoint(ifthen); + m_ifStack.push(ifend); +} + +llvm::Value * Instructions::kil(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("kil"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); call->setTailCall(false); return call; } +llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + llvm::Value *m = mul(in1, in2); + llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); + llvm::Value *s = sub(vec1, in1); + return add(m, mul(s, in3)); +} + llvm::Value * Instructions::lg2(llvm::Value *in) { std::vector vec = extractVector(in); @@ -407,120 +493,192 @@ llvm::Value * Instructions::lg2(llvm::Value *in) callFLog(vec[2]), callFLog(vec[3])), const_vec); } -llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::lit(llvm::Value *in) +{ + if (!m_llvmLit) { + m_llvmLit = m_mod->getFunction("lit"); + } + CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::log(llvm::Value *in) +{ + std::vector vec = extractVector(in); + return vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), + callFLog(vec[2]), callFLog(vec[3])); +} + +llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + return add(mulRes, in3); +} + +llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) { std::vector vec1 = extractVector(in1); std::vector vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], + name("xcmp")); Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], name("selx")); - Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], + name("ycmp")); Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], name("sely")); - Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], + name("zcmp")); Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], name("selz")); - Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], + name("wcmp")); Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], name("selw")); return vectorFromVals(selx, sely, selz, selw); } -llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) { std::vector vec1 = extractVector(in1); std::vector vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], - name("xcmp")); + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], name("selx")); - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], - name("ycmp")); + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], name("sely")); - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], - name("zcmp")); + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], name("selz")); - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], - name("wcmp")); + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], name("selw")); return vectorFromVals(selx, sely, selz, selw); } -void Instructions::printVector(llvm::Value *val) +llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) { - static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; + return m_builder.CreateMul(in1, in2, name("mul")); +} - if (!m_fmtPtr) { - Constant *format = ConstantArray::get(frmt, true); - ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); - GlobalVariable* globalFormat = new GlobalVariable( - /*Type=*/arrayTy, - /*isConstant=*/true, - /*Linkage=*/GlobalValue::InternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name(".str"), - m_mod); - globalFormat->setInitializer(format); +llvm::Value * Instructions::neg(llvm::Value *in) +{ + Value *neg = m_builder.CreateNeg(in, name("neg")); + return neg; +} - Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); - std::vector const_ptr_21_indices; - const_ptr_21_indices.push_back(const_int0); - const_ptr_21_indices.push_back(const_int0); - m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, - &const_ptr_21_indices[0], const_ptr_21_indices.size()); - } +llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + llvm::Value *val = callPow(x1, x2); + return vectorFromVals(val, val, val, val); +} - Function *func_printf = m_mod->getFunction("printf"); - if (!func_printf) - func_printf = declarePrintf(); - assert(func_printf); - std::vector vec = extractVector(val); - Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); - Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); - Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); - Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); - std::vector params; - params.push_back(m_fmtPtr); - params.push_back(dx); - params.push_back(dy); - params.push_back(dz); - params.push_back(dw); - CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), - name("printf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(true); +llvm::Value * Instructions::rcp(llvm::Value *in1) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + x1, name("rcp")); + return vectorFromVals(res, res, res, res); +} + +llvm::Value * Instructions::rsq(llvm::Value *in1) +{ + Value *x = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("extractx")); + Value *abs = callFAbs(x); + Value *sqrt = callFSqrt(abs); + + Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + sqrt, + name("rsqrt")); + return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); +} + +llvm::Value * Instructions::scs(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("scs"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("scsres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::seq(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOEQ(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOEQ(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOEQ(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOEQ(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); } -llvm::Function * Instructions::declarePrintf() +llvm::Value * Instructions::sfl(llvm::Value *in1, llvm::Value *in2) { - std::vector args; - PAListPtr params; - FunctionType* funcTy = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/args, - /*isVarArg=*/true); - Function* func_printf = Function::Create( - /*Type=*/funcTy, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"printf", m_mod); - func_printf->setCallingConv(CallingConv::C); - func_printf->setParamAttrs(params); - return func_printf; + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + return vectorFromVals(const0f, const0f, const0f, const0f); } +llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) { @@ -543,7 +701,18 @@ llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) return vectorFromVals(x, y, z, w); } -llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) + +llvm::Value * Instructions::sin(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("vsin"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("sinres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::sle(llvm::Value *in1, llvm::Value *in2) { Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); Constant *const0f = Constant::getNullValue(Type::FloatTy); @@ -551,22 +720,21 @@ llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) std::vector vec1 = extractVector(in1); std::vector vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); + Value *xcmp = m_builder.CreateFCmpOLE(vec1[0], vec2[0], name("xcmp")); Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); + Value *ycmp = m_builder.CreateFCmpOLE(vec1[1], vec2[1], name("ycmp")); Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); + Value *zcmp = m_builder.CreateFCmpOLE(vec1[2], vec2[2], name("zcmp")); Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); + Value *wcmp = m_builder.CreateFCmpOLE(vec1[3], vec2[3], name("wcmp")); Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); return vectorFromVals(x, y, z, w); } - llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) { Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); @@ -590,169 +758,331 @@ llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) return vectorFromVals(x, y, z, w); } -llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::sne(llvm::Value *in1, llvm::Value *in2) { - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z1")); + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); - Value *x2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(0), - name("x2")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *z2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(2), - name("z2")); - Value *y1z2 = mul(y1, z2); - Value *z1y2 = mul(z1, y2); + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); - Value *z1x2 = mul(z1, x2); - Value *x1z2 = mul(x1, z2); + Value *xcmp = m_builder.CreateFCmpONE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - Value *x1y2 = mul(x1, y2); - Value *y1x2 = mul(y1, x2); + Value *ycmp = m_builder.CreateFCmpONE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); + Value *zcmp = m_builder.CreateFCmpONE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpONE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); } +llvm::Value * Instructions::str(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); -llvm::Value * Instructions::abs(llvm::Value *in) + return vectorFromVals(const1f, const1f, const1f, const1f); +} + +llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) +{ + Value *res = m_builder.CreateSub(in1, in2, name("sub")); + return res; +} + +llvm::Value * Instructions::trunc(llvm::Value *in) { std::vector vec = extractVector(in); - Value *xabs = callFAbs(vec[0]); - Value *yabs = callFAbs(vec[1]); - Value *zabs = callFAbs(vec[2]); - Value *wabs = callFAbs(vec[3]); - return vectorFromVals(xabs, yabs, zabs, wabs); + Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), + name("ftoix")); + Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), + name("ftoiy")); + Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), + name("ftoiz")); + Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), + name("ftoiw")); + Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, + name("fx")); + Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, + name("fy")); + Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, + name("fz")); + Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, + name("fw")); + return vectorFromVals(fx, fy, fz, fw); } -void Instructions::ifop(llvm::Value *in) +llvm::Value * Instructions::x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) { - BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); - BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + std::vector vec3 = extractVector(in3); - //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); - //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); - //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); + Value *x2x3 = m_builder.CreateMul( vec2[0], vec3[0], name("x2x3")); + Value *y2y3 = m_builder.CreateMul( vec2[1], vec3[1], name("y2y3")); + Value *x1px2x3 = m_builder.CreateAdd (vec1[0], x2x3, name("x1 + x2x3")); + Value *x1px2x3py2y3 = m_builder.CreateAdd (x1px2x3, y2y3, name("x1 + x2x3 + y2y3")); - Constant *float0 = Constant::getNullValue(Type::FloatTy); + Value *x2z3 = m_builder.CreateMul( vec2[0], vec3[2], name("x2z3")); + Value *y2w3 = m_builder.CreateMul( vec2[1], vec3[3], name("y2w3")); + Value *y1px2z3 = m_builder.CreateAdd (vec1[1], x2z3, name("y1 + x2z3")); + Value *y1px2z3py2w3 = m_builder.CreateAdd (y1px2z3, y2w3, name("y1 + x2z3 + y2w3")); - Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), - name("extractx")); - Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); - m_builder.CreateCondBr(xcmp, ifthen, ifend); - //m_builder.SetInsertPoint(yblock); + return vectorFromVals(x1px2x3py2y3, y1px2z3py2w3, x1px2x3py2y3, y1px2z3py2w3); +} - m_builder.SetInsertPoint(ifthen); - m_ifStack.push(ifend); +void Instructions::printVector(llvm::Value *val) +{ + static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; + + if (!m_fmtPtr) { + Constant *format = ConstantArray::get(frmt, true); + ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); + GlobalVariable* globalFormat = new GlobalVariable( + /*Type=*/arrayTy, + /*isConstant=*/true, + /*Linkage=*/GlobalValue::InternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name(".str"), + m_mod); + globalFormat->setInitializer(format); + + Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); + std::vector const_ptr_21_indices; + const_ptr_21_indices.push_back(const_int0); + const_ptr_21_indices.push_back(const_int0); + m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, + &const_ptr_21_indices[0], const_ptr_21_indices.size()); + } + + Function *func_printf = m_mod->getFunction("printf"); + if (!func_printf) + func_printf = declarePrintf(); + assert(func_printf); + std::vector vec = extractVector(val); + Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); + Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); + Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); + Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); + std::vector params; + params.push_back(m_fmtPtr); + params.push_back(dx); + params.push_back(dy); + params.push_back(dz); + params.push_back(dw); + CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), + name("printf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(true); } -llvm::BasicBlock * Instructions::currentBlock() const +const char * Instructions::name(const char *prefix) { - return m_builder.GetInsertBlock(); + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; } -void Instructions::elseop() +llvm::Value *Instructions::callFAbs(llvm::Value *val) { - assert(!m_ifStack.empty()); - BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); - m_builder.CreateBr(ifend); - m_builder.SetInsertPoint(m_ifStack.top()); - currentBlock()->setName(name("ifelse")); - m_ifStack.pop(); - m_ifStack.push(ifend); + if (!m_llvmFAbs) { + // predeclare the intrinsic + std::vector fabsArgs; + fabsArgs.push_back(Type::FloatTy); + PAListPtr fabsPal; + FunctionType* fabsType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fabsArgs, + /*isVarArg=*/false); + m_llvmFAbs = Function::Create( + /*Type=*/fabsType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"fabs", m_mod); + m_llvmFAbs->setCallingConv(CallingConv::C); + m_llvmFAbs->setParamAttrs(fabsPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, + name("fabs")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -void Instructions::endif() +llvm::Value * Instructions::callFExp(llvm::Value *val) { - assert(!m_ifStack.empty()); - m_builder.CreateBr(m_ifStack.top()); - m_builder.SetInsertPoint(m_ifStack.top()); - m_ifStack.pop(); + if (!m_llvmFexp) { + // predeclare the intrinsic + std::vector fexpArgs; + fexpArgs.push_back(Type::FloatTy); + PAListPtr fexpPal; + FunctionType* fexpType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fexpArgs, + /*isVarArg=*/false); + m_llvmFexp = Function::Create( + /*Type=*/fexpType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"expf", m_mod); + m_llvmFexp->setCallingConv(CallingConv::C); + m_llvmFexp->setParamAttrs(fexpPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFexp, val, + name("expf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) +llvm::Value * Instructions::callFLog(llvm::Value *val) { - llvm::Value *m = mul(in1, in2); - llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); - llvm::Value *s = sub(vec1, in1); - return add(m, mul(s, in3)); + if (!m_llvmFlog) { + // predeclare the intrinsic + std::vector flogArgs; + flogArgs.push_back(Type::FloatTy); + PAListPtr flogPal; + FunctionType* flogType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/flogArgs, + /*isVarArg=*/false); + m_llvmFlog = Function::Create( + /*Type=*/flogType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"logf", m_mod); + m_llvmFlog->setCallingConv(CallingConv::C); + m_llvmFlog->setParamAttrs(flogPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFlog, val, + name("logf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -void Instructions::beginLoop() +llvm::Value * Instructions::callFloor(llvm::Value *val) { - BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); - BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); - - m_builder.CreateBr(begin); - Loop loop; - loop.begin = begin; - loop.end = end; - m_builder.SetInsertPoint(begin); - m_loopStack.push(loop); + if (!m_llvmFloor) { + // predeclare the intrinsic + std::vector floorArgs; + floorArgs.push_back(Type::FloatTy); + PAListPtr floorPal; + FunctionType* floorType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/floorArgs, + /*isVarArg=*/false); + m_llvmFloor = Function::Create( + /*Type=*/floorType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"floorf", m_mod); + m_llvmFloor->setCallingConv(CallingConv::C); + m_llvmFloor->setParamAttrs(floorPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFloor, val, + name("floorf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -void Instructions::endLoop() +llvm::Value *Instructions::callFSqrt(llvm::Value *val) { - assert(!m_loopStack.empty()); - Loop loop = m_loopStack.top(); - m_builder.CreateBr(loop.begin); - loop.end->moveAfter(currentBlock()); - m_builder.SetInsertPoint(loop.end); - m_loopStack.pop(); + if (!m_llvmFSqrt) { + // predeclare the intrinsic + std::vector fsqrtArgs; + fsqrtArgs.push_back(Type::FloatTy); + PAListPtr fsqrtPal; + FunctionType* fsqrtType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fsqrtArgs, + /*isVarArg=*/false); + m_llvmFSqrt = Function::Create( + /*Type=*/fsqrtType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.sqrt.f32", m_mod); + m_llvmFSqrt->setCallingConv(CallingConv::C); + m_llvmFSqrt->setParamAttrs(fsqrtPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, + name("sqrt")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -void Instructions::brk() +llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) { - assert(!m_loopStack.empty()); - BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); - m_builder.CreateBr(m_loopStack.top().end); - m_builder.SetInsertPoint(unr); + if (!m_llvmPow) { + // predeclare the intrinsic + std::vector powArgs; + powArgs.push_back(Type::FloatTy); + powArgs.push_back(Type::FloatTy); + PAListPtr powPal; + FunctionType* powType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/powArgs, + /*isVarArg=*/false); + m_llvmPow = Function::Create( + /*Type=*/powType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.pow.f32", m_mod); + m_llvmPow->setCallingConv(CallingConv::C); + m_llvmPow->setParamAttrs(powPal); + } + std::vector params; + params.push_back(val1); + params.push_back(val2); + CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), + name("pow")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -llvm::Value * Instructions::trunc(llvm::Value *in) +llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) { - std::vector vec = extractVector(in); - Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), - name("ftoix")); - Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), - name("ftoiy")); - Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), - name("ftoiz")); - Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), - name("ftoiw")); - Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, - name("fx")); - Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, - name("fy")); - Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, - name("fz")); - Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, - name("fw")); - return vectorFromVals(fx, fy, fz, fw); + Constant *const_vec = Constant::getNullValue(m_floatVecType); + Value *res = m_builder.CreateInsertElement(const_vec, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; } -void Instructions::end() +llvm::Value * Instructions::constVector(float x, float y, float z, float w) { - m_builder.CreateRetVoid(); + std::vector vec(4); + vec[0] = ConstantFP::get(APFloat(x)); + vec[1] = ConstantFP::get(APFloat(y)); + vec[2] = ConstantFP::get(APFloat(z)); + vec[3] = ConstantFP::get(APFloat(w)); + return ConstantVector::get(m_floatVecType, vec); } -void Instructions::cal(int label, llvm::Value *input) +llvm::Function * Instructions::declarePrintf() { - std::vector params; - params.push_back(input); - llvm::Function *func = findFunction(label); - - m_builder.CreateCall(func, params.begin(), params.end()); + std::vector args; + PAListPtr params; + FunctionType* funcTy = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/args, + /*isVarArg=*/true); + Function* func_printf = Function::Create( + /*Type=*/funcTy, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"printf", m_mod); + func_printf->setCallingConv(CallingConv::C); + func_printf->setParamAttrs(params); + return func_printf; } llvm::Function * Instructions::declareFunc(int label) @@ -778,27 +1108,6 @@ llvm::Function * Instructions::declareFunc(int label) return func; } -void Instructions::bgnSub(unsigned label) -{ - llvm::Function *func = findFunction(label); - - Function::arg_iterator args = func->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("INPUT"); - m_storage->pushArguments(ptr_INPUT); - - llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); - - m_func = func; - m_builder.SetInsertPoint(entry); -} - -void Instructions::endSub() -{ - m_func = 0; - m_builder.SetInsertPoint(0); -} - llvm::Function * Instructions::findFunction(int label) { llvm::Function *func = m_functions[label]; @@ -809,17 +1118,6 @@ llvm::Function * Instructions::findFunction(int label) return func; } -llvm::Value * Instructions::constVector(float x, float y, float z, float w) -{ - std::vector vec(4); - vec[0] = ConstantFP::get(APFloat(x)); - vec[1] = ConstantFP::get(APFloat(y)); - vec[2] = ConstantFP::get(APFloat(z)); - vec[3] = ConstantFP::get(APFloat(w)); - return ConstantVector::get(m_floatVecType, vec); -} - - std::vector Instructions::extractVector(llvm::Value *vec) { std::vector elems(4); @@ -834,69 +1132,7 @@ std::vector Instructions::extractVector(llvm::Value *vec) return elems; } -llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - llvm::Function *func = m_mod->getFunction("cmp"); - assert(func); - - std::vector params; - params.push_back(in1); - params.push_back(in2); - params.push_back(in3); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::cos(llvm::Value *in) -{ -#if 0 - llvm::Function *func = m_mod->getFunction("vcos"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("cosres")); - call->setTailCall(false); - return call; -#else - std::vector elems = extractVector(in); - Function *func = m_mod->getFunction("cosf"); - assert(func); - CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); - cos->setCallingConv(CallingConv::C); - cos->setTailCall(true); - return vectorFromVals(cos, cos, cos, cos); -#endif -} - -llvm::Value * Instructions::scs(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("scs"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("scsres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::kil(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("kil"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::sin(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("vsin"); - assert(func); - CallInst *call = m_builder.CreateCall(func, in, name("sinres")); - call->setTailCall(false); - return call; -} #endif //MESA_LLVM diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index d286ce80c7..8df30f62c8 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -57,15 +57,22 @@ public: llvm::BasicBlock *currentBlock() const; llvm::Value *abs(llvm::Value *in1); - llvm::Value *arl(llvm::Value *in1); llvm::Value *add(llvm::Value *in1, llvm::Value *in2); + llvm::Value *arl(llvm::Value *in1); void beginLoop(); void bgnSub(unsigned); void brk(); void cal(int label, llvm::Value *input); + llvm::Value *clamp(llvm::Value *in); llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); llvm::Value *cos(llvm::Value *in); llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); + llvm::Value *ddx(llvm::Value *in); + llvm::Value *ddy(llvm::Value *in); + llvm::Value *div(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3); llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); @@ -75,6 +82,7 @@ public: void endLoop(); void end(); void endSub(); + llvm::Value *exp(llvm::Value *in); llvm::Value *ex2(llvm::Value *in); llvm::Value *floor(llvm::Value *in); llvm::Value *frc(llvm::Value *in); @@ -82,32 +90,41 @@ public: llvm::Value *kil(llvm::Value *in); llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *lit(llvm::Value *in); llvm::Value *lg2(llvm::Value *in); + llvm::Value *lit(llvm::Value *in); + llvm::Value *log(llvm::Value *in); llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *min(llvm::Value *in1, llvm::Value *in2); llvm::Value *max(llvm::Value *in1, llvm::Value *in2); + llvm::Value *min(llvm::Value *in1, llvm::Value *in2); llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); + llvm::Value *neg(llvm::Value *in); llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); llvm::Value *rcp(llvm::Value *in); llvm::Value *rsq(llvm::Value *in); llvm::Value *scs(llvm::Value *in); + llvm::Value *seq(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sfl(llvm::Value *in1, llvm::Value *in2); llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); llvm::Value *sin(llvm::Value *in); + llvm::Value *sle(llvm::Value *in1, llvm::Value *in2); llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sne(llvm::Value *in1, llvm::Value *in2); + llvm::Value *str(llvm::Value *in1, llvm::Value *in2); llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); llvm::Value *trunc(llvm::Value *in); + llvm::Value *x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); void printVector(llvm::Value *val); private: const char *name(const char *prefix); llvm::Value *callFAbs(llvm::Value *val); + llvm::Value *callFExp(llvm::Value *val); + llvm::Value *callFLog(llvm::Value *val); llvm::Value *callFloor(llvm::Value *val); llvm::Value *callFSqrt(llvm::Value *val); - llvm::Value *callFLog(llvm::Value *val); llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, @@ -135,6 +152,7 @@ private: llvm::Function *m_llvmPow; llvm::Function *m_llvmFloor; llvm::Function *m_llvmFlog; + llvm::Function *m_llvmFexp; llvm::Function *m_llvmLit; llvm::Constant *m_fmtPtr; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index cc1516a45e..398fbd67bd 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -286,9 +286,13 @@ translate_instruction(llvm::Module *module, out = instr->rsq(inputs[0]); } break; - case TGSI_OPCODE_EXP: + case TGSI_OPCODE_EXP: { + out = instr->exp(inputs[0]); + } break; - case TGSI_OPCODE_LOG: + case TGSI_OPCODE_LOG: { + out = instr->log(inputs[0]); + } break; case TGSI_OPCODE_MUL: { out = instr->mul(inputs[0], inputs[1]); @@ -338,21 +342,31 @@ translate_instruction(llvm::Module *module, out = instr->lerp(inputs[0], inputs[1], inputs[2]); } break; - case TGSI_OPCODE_CND: + case TGSI_OPCODE_CND: { + out = instr->cnd(inputs[0], inputs[1], inputs[2]); + } break; - case TGSI_OPCODE_CND0: + case TGSI_OPCODE_CND0: { + out = instr->cnd0(inputs[0], inputs[1], inputs[2]); + } break; - case TGSI_OPCODE_DOT2ADD: + case TGSI_OPCODE_DOT2ADD: { + out = instr->dot2add(inputs[0], inputs[1], inputs[2]); + } break; case TGSI_OPCODE_INDEX: break; - case TGSI_OPCODE_NEGATE: + case TGSI_OPCODE_NEGATE: { + out = instr->neg(inputs[0]); + } break; case TGSI_OPCODE_FRAC: { out = instr->frc(inputs[0]); } break; - case TGSI_OPCODE_CLAMP: + case TGSI_OPCODE_CLAMP: { + out = instr->clamp(inputs[0]); + } break; case TGSI_OPCODE_FLOOR: { out = instr->floor(inputs[0]); @@ -392,9 +406,13 @@ translate_instruction(llvm::Module *module, out = instr->cos(inputs[0]); } break; - case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDX: { + out = instr->ddx(inputs[0]); + } break; - case TGSI_OPCODE_DDY: + case TGSI_OPCODE_DDY: { + out = instr->ddy(inputs[0]); + } break; case TGSI_OPCODE_KILP: break; @@ -408,9 +426,13 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_RFL: break; - case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SEQ: { + out = instr->seq(inputs[0], inputs[1]); + } break; - case TGSI_OPCODE_SFL: + case TGSI_OPCODE_SFL: { + out = instr->sfl(inputs[0], inputs[1]); + } break; case TGSI_OPCODE_SGT: { out = instr->sgt(inputs[0], inputs[1]); @@ -420,11 +442,17 @@ translate_instruction(llvm::Module *module, out = instr->sin(inputs[0]); } break; - case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SLE: { + out = instr->sle(inputs[0], inputs[1]); + } break; - case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SNE: { + out = instr->sne(inputs[0], inputs[1]); + } break; - case TGSI_OPCODE_STR: + case TGSI_OPCODE_STR: { + out = instr->str(inputs[0], inputs[1]); + } break; case TGSI_OPCODE_TEX: break; @@ -438,7 +466,9 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_UP4UB: break; - case TGSI_OPCODE_X2D: + case TGSI_OPCODE_X2D: { + out = instr->x2d(inputs[0], inputs[1], inputs[2]); + } break; case TGSI_OPCODE_ARA: break; -- cgit v1.2.3 From a0a06cbc5b26d7530bd5066f09efe3c1f980d35d Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 28 Sep 2008 19:48:26 +0200 Subject: Gallivm: more instructions. --- src/gallium/auxiliary/gallivm/instructions.cpp | 61 ++++++++++++++++++++++++-- src/gallium/auxiliary/gallivm/instructions.h | 5 +++ src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 15 ++++--- 3 files changed, 73 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 5fdfe09d18..3eaf9aacf6 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -163,9 +163,18 @@ void Instructions::cal(int label, llvm::Value *input) m_builder.CreateCall(func, params.begin(), params.end()); } +llvm::Value * Instructions::ceil(llvm::Value *in) +{ + std::vector vec = extractVector(in); + return vectorFromVals(callCeil(vec[0]), callCeil(vec[1]), + callCeil(vec[2]), callCeil(vec[3])); +} + llvm::Value * Instructions::clamp(llvm::Value *in1) { - // FIXME + llvm::Value *zero = constVector(0.0f, 0.0f, 0.0f, 0.0f); + llvm::Value *one = constVector(1.0f, 1.0f, 1.0f, 1.0f); + return min( max(zero, in1), one); } llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) @@ -289,12 +298,14 @@ llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) llvm::Value * Instructions::ddx(llvm::Value *in) { - // FIXME + // FIXME + assert(0); } llvm::Value * Instructions::ddy(llvm::Value *in) { - // FIXME + // FIXME + assert(0); } llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2) @@ -319,6 +330,19 @@ llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Va return vectorFromVals(dot2add, dot2add, dot2add, dot2add); } +llvm::Value * Instructions::dp2(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + return vectorFromVals(xy, xy, xy, xy); +} + llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) { Value *mulRes = mul(in1, in2); @@ -581,6 +605,12 @@ llvm::Value * Instructions::neg(llvm::Value *in) return neg; } +llvm::Value * Instructions::nrm(llvm::Value *in) +{ + llvm::Value *v = rsq(in); + return mul(v, in); +} + llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) { Value *x1 = m_builder.CreateExtractElement(in1, @@ -887,6 +917,31 @@ const char * Instructions::name(const char *prefix) return m_name; } +llvm::Value * Instructions::callCeil(llvm::Value *val) +{ + if (!m_llvmCeil) { + // predeclare the intrinsic + std::vector ceilArgs; + ceilArgs.push_back(Type::FloatTy); + PAListPtr ceilPal; + FunctionType* ceilType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/ceilArgs, + /*isVarArg=*/false); + m_llvmCeil = Function::Create( + /*Type=*/ceilType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"ceilf", m_mod); + m_llvmCeil->setCallingConv(CallingConv::C); + m_llvmCeil->setParamAttrs(ceilPal); + } + CallInst *call = m_builder.CreateCall(m_llvmCeil, val, + name("ceilf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + llvm::Value *Instructions::callFAbs(llvm::Value *val) { if (!m_llvmFAbs) { diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index 8df30f62c8..c3b28e9746 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -63,6 +63,7 @@ public: void bgnSub(unsigned); void brk(); void cal(int label, llvm::Value *input); + llvm::Value *ceil(llvm::Value *in); llvm::Value *clamp(llvm::Value *in); llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); @@ -73,6 +74,7 @@ public: llvm::Value *ddy(llvm::Value *in); llvm::Value *div(llvm::Value *in1, llvm::Value *in2); llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3); + llvm::Value *dp2(llvm::Value *in1, llvm::Value *in2); llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); @@ -99,6 +101,7 @@ public: llvm::Value *min(llvm::Value *in1, llvm::Value *in2); llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); llvm::Value *neg(llvm::Value *in); + llvm::Value *nrm(llvm::Value *in); llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); llvm::Value *rcp(llvm::Value *in); llvm::Value *rsq(llvm::Value *in); @@ -120,6 +123,7 @@ public: private: const char *name(const char *prefix); + llvm::Value *callCeil(llvm::Value *val); llvm::Value *callFAbs(llvm::Value *val); llvm::Value *callFExp(llvm::Value *val); llvm::Value *callFLog(llvm::Value *val); @@ -147,6 +151,7 @@ private: llvm::VectorType *m_floatVecType; + llvm::Function *m_llvmCeil; llvm::Function *m_llvmFSqrt; llvm::Function *m_llvmFAbs; llvm::Function *m_llvmPow; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 398fbd67bd..fdfbb76c16 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -498,11 +498,18 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_TXB: break; - case TGSI_OPCODE_NRM: + case TGSI_OPCODE_NRM4: + case TGSI_OPCODE_NRM: { + out = instr->nrm(inputs[0]); + } break; - case TGSI_OPCODE_DIV: + case TGSI_OPCODE_DIV: { + out = instr->div(inputs[0], inputs[1]); + } break; - case TGSI_OPCODE_DP2: + case TGSI_OPCODE_DP2: { + out = instr->dp2(inputs[0], inputs[1]); + } break; case TGSI_OPCODE_TXL: break; @@ -620,8 +627,6 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_M3X2: break; - case TGSI_OPCODE_NRM4: - break; case TGSI_OPCODE_CALLNZ: break; case TGSI_OPCODE_IFC: -- cgit v1.2.3 From 7379d0ef8f533b0aa760cd21b219223602002a56 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 28 Sep 2008 23:18:55 +0200 Subject: Gallivm: fix off-by-one. --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index efddc04e81..9a3ed9f538 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -259,7 +259,7 @@ void InstructionsSoa::createBuiltins() { MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); + (const char*)&soabuiltins_data[Elements(soabuiltins_data)-1]); m_builtins = ParseBitcodeFile(buffer); std::cout<<"Builtins created at "< Date: Mon, 29 Sep 2008 19:09:39 +0900 Subject: rtasm: Implement immediate group 1 instructions. Fix SIB emition. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 66 +++++++++++++++++++++++++----- src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 11 ++--- 2 files changed, 62 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 6d4c081e04..3bba9dcc07 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -240,7 +240,8 @@ static void emit_modrm( struct x86_function *p, /* Oh-oh we've stumbled into the SIB thing. */ if (regmem.file == file_REG32 && - regmem.idx == reg_SP) { + regmem.idx == reg_SP && + regmem.mod != mod_REG) { emit_1ub(p, 0x24); /* simplistic! */ } @@ -435,25 +436,70 @@ void x86_call( struct x86_function *p, struct x86_reg reg) } -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ) { DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); emit_1ub(p, 0xb8 + dst.idx); emit_1i(p, imm); } -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ) +/** + * Immediate group 1 instructions. + */ +static INLINE void +x86_group1_imm( struct x86_function *p, + unsigned op, struct x86_reg dst, int imm ) { - DUMP_RI( dst, imm ); + assert(dst.file == file_REG32); assert(dst.mod == mod_REG); - emit_1ub(p, 0x80); - emit_modrm_noreg(p, 0, dst); - emit_1ub(p, imm); + if(-0x80 <= imm && imm < 0x80) { + emit_1ub(p, 0x83); + emit_modrm_noreg(p, op, dst); + emit_1b(p, (char)imm); + } + else { + emit_1ub(p, 0x81); + emit_modrm_noreg(p, op, dst); + emit_1i(p, imm); + } +} + +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 0, dst, imm); +} + +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 1, dst, imm); +} + +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 4, dst, imm); +} + +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 5, dst, imm); +} + +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 6, dst, imm); +} + +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ) +{ + DUMP_RI( dst, imm ); + x86_group1_imm(p, 7, dst, imm); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index af94577aab..510aa1b0de 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -152,12 +152,13 @@ void x86_jmp( struct x86_function *p, int label ); /* void x86_call( struct x86_function *p, void (*label)() ); */ void x86_call( struct x86_function *p, struct x86_reg reg); -/* michal: - * Temporary. As I need immediate operands, and dont want to mess with the codegen, - * I load the immediate into general purpose register and use it. - */ void x86_mov_reg_imm( struct x86_function *p, struct x86_reg dst, int imm ); -void x86_add_reg_imm8( struct x86_function *p, struct x86_reg dst, ubyte imm ); +void x86_add_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_or_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_and_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_sub_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_xor_imm( struct x86_function *p, struct x86_reg dst, int imm ); +void x86_cmp_imm( struct x86_function *p, struct x86_reg dst, int imm ); /* Macro for sse_shufps() and sse2_pshufd(): -- cgit v1.2.3 From 906336cd7ce5ff1cf9d10fb21375b9c0bcd5fe57 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 30 Sep 2008 01:07:09 +0900 Subject: util: Header for SSE2 intrinsics portability. --- src/gallium/auxiliary/util/u_sse.h | 72 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 72 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_sse.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h new file mode 100644 index 0000000000..0c8356cd05 --- /dev/null +++ b/src/gallium/auxiliary/util/u_sse.h @@ -0,0 +1,72 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * SSE intrinsics portability header. + * + * Although the SSE intrinsics are support by all modern x86 and x86-64 + * compilers, there are some intrisincs missing in some implementations + * (especially older MSVC versions). This header abstracts that away. + */ + +#ifndef U_SSE_H_ +#define U_SSE_H_ + +#include +#include + + +/* MSVC before VC8 does not support the _mm_castxxx_yyy */ +#if defined(_MSC_VER) && _MSC_VER < 1500 + +union __declspec(align(16)) m128_types { + __m128 m128; + __m128i m128i; + __m128d m128d; +}; + +static __inline __m128 +_mm_castsi128_ps(__m128i a) +{ + union m128_types u; + u.m128i = a; + return u.m128; +} + +static __inline __m128i +_mm_castps_si128(__m128 a) +{ + union m128_types u; + u.m128 = a; + return u.m128i; +} + +#endif + + +#endif /* U_SSE_H_ */ -- cgit v1.2.3 From 5dc8e67078be8b8c42a809311debd275ac7d64a7 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 30 Sep 2008 01:12:52 +0900 Subject: tgsi: SSE2 optimized exp2, log2 and pow implementations. Special care must be taken when calling compiler generated SSE2 functions from the runtime generated SSE2: saving the xmm registers, and notify gcc the stack is not 16byte aligned. It would be more efficient to keep the stack pointer 16byte aligned, but too hairy, and not consistent in all x86 architectures. This has been tested in linux x86 and windows x86 userspace. Not tested on x86-64 because it is broken for other reasons (even without this change). --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 287 ++++++++++++++++++++++++--------- 1 file changed, 211 insertions(+), 76 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 4681b29f52..4fdad3a5c7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -28,6 +28,7 @@ #include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#include "util/u_sse.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -480,10 +481,31 @@ emit_coef_dady( * Function call helpers. */ +/** + * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be + * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee + * that the stack pointer is 16 byte aligned, as expected. + */ static void -emit_push_gp( - struct x86_function *func ) +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + void (PIPE_CDECL *code)() ) { + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + unsigned i, n, xmm; + unsigned xmm_mask; + + /* Bitmask of the xmm registers to save */ + xmm_mask = (1 << xmm_save) - 1; + xmm_mask &= ~(1 << xmm_dst); + + sse_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + x86_push( func, x86_make_reg( file_REG32, reg_AX) ); @@ -493,12 +515,49 @@ emit_push_gp( x86_push( func, x86_make_reg( file_REG32, reg_DX) ); -} + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) + ++n; + + x86_sub_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ), + make_xmm( xmm ) ); + ++n; + } + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + x86_pop(func, ecx ); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + make_xmm( xmm ), + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) ); + ++n; + } + + x86_add_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); -static void -x86_pop_gp( - struct x86_function *func ) -{ /* Restore GP registers in a reverse order. */ x86_pop( @@ -510,39 +569,6 @@ x86_pop_gp( x86_pop( func, x86_make_reg( file_REG32, reg_AX) ); -} - -static void -emit_func_call_dst( - struct x86_function *func, - unsigned xmm_dst, - void (PIPE_CDECL *code)() ) -{ - sse_movaps( - func, - get_temp( TEMP_R0, 0 ), - make_xmm( xmm_dst ) ); - - emit_push_gp( - func ); - - { - struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - - x86_lea( - func, - ecx, - get_temp( TEMP_R0, 0 ) ); - - x86_push( func, ecx ); - x86_mov_reg_imm( func, ecx, (unsigned long) code ); - x86_call( func, ecx ); - x86_pop(func, ecx ); - } - - - x86_pop_gp( - func ); sse_movaps( func, @@ -553,6 +579,7 @@ emit_func_call_dst( static void emit_func_call_dst_src( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst, unsigned xmm_src, void (PIPE_CDECL *code)() ) @@ -564,10 +591,111 @@ emit_func_call_dst_src( emit_func_call_dst( func, + xmm_save, xmm_dst, code ); } +/* + * Fast SSE2 implementation of special math functions. + */ + +#define POLY0(x, c0) _mm_set1_ps(c0) +#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0)) +#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0)) +#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0)) +#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0)) +#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0)) + +#define EXP_POLY_DEGREE 3 +#define LOG_POLY_DEGREE 5 + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +exp2f4(__m128 x) +{ + __m128i ipart; + __m128 fpart, expipart, expfpart; + + x = _mm_min_ps(x, _mm_set1_ps( 129.00000f)); + x = _mm_max_ps(x, _mm_set1_ps(-126.99999f)); + + /* ipart = int(x - 0.5) */ + ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f))); + + /* fpart = x - ipart */ + fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart)); + + /* expipart = (float) (1 << ipart) */ + expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23)); + + /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */ +#if EXP_POLY_DEGREE == 5 + expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f); +#elif EXP_POLY_DEGREE == 4 + expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f); +#elif EXP_POLY_DEGREE == 3 + expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f); +#elif EXP_POLY_DEGREE == 2 + expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f); +#else +#error +#endif + + return _mm_mul_ps(expipart, expfpart); +} + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +log2f4(__m128 x) +{ + __m128i expmask = _mm_set1_epi32(0x7f800000); + __m128i mantmask = _mm_set1_epi32(0x007fffff); + __m128 one = _mm_set1_ps(1.0f); + + __m128i i = _mm_castps_si128(x); + + /* exp = (float) exponent(x) */ + __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127))); + + /* mant = (float) mantissa(x) */ + __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one); + + __m128 logmant; + + /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ + * These coefficients can be generate with + * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + */ +#if LOG_POLY_DEGREE == 6 + logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f); +#elif LOG_POLY_DEGREE == 5 + logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); +#elif LOG_POLY_DEGREE == 4 + logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); +#elif LOG_POLY_DEGREE == 3 + logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); +#else +#error +#endif + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one)); + + return _mm_add_ps(logmant, exp); +} + +static INLINE __m128 +powf4(__m128 x, __m128 y) +{ + return exp2f4(_mm_mul_ps(log2f4(x), y)); +} + + /** * Low-level instruction translators. */ @@ -610,38 +738,35 @@ cos4f( static void emit_cos( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, cos4f ); } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif ex24f( float *store ) { -#if FAST_MATH - store[0] = util_fast_exp2( store[0] ); - store[1] = util_fast_exp2( store[1] ); - store[2] = util_fast_exp2( store[2] ); - store[3] = util_fast_exp2( store[3] ); -#else - store[0] = powf( 2.0f, store[0] ); - store[1] = powf( 2.0f, store[1] ); - store[2] = powf( 2.0f, store[2] ); - store[3] = powf( 2.0f, store[3] ); -#endif + _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) )); } static void emit_ex2( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, ex24f ); } @@ -670,10 +795,12 @@ flr4f( static void emit_flr( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, flr4f ); } @@ -691,31 +818,35 @@ frc4f( static void emit_frc( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, frc4f ); } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif lg24f( float *store ) { - store[0] = util_fast_log2( store[0] ); - store[1] = util_fast_log2( store[1] ); - store[2] = util_fast_log2( store[2] ); - store[3] = util_fast_log2( store[3] ); + _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) )); } static void emit_lg2( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, + xmm_save, xmm_dst, lg24f ); } @@ -757,14 +888,14 @@ emit_neg( } static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif pow4f( float *store ) { -#if FAST_MATH - store[0] = util_fast_pow( store[0], store[4] ); - store[1] = util_fast_pow( store[1], store[5] ); - store[2] = util_fast_pow( store[2], store[6] ); - store[3] = util_fast_pow( store[3], store[7] ); +#if 1 + _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) )); #else store[0] = powf( store[0], store[4] ); store[1] = powf( store[1], store[5] ); @@ -776,11 +907,13 @@ pow4f( static void emit_pow( struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst, unsigned xmm_src ) { emit_func_call_dst_src( func, + xmm_save, xmm_dst, xmm_src, pow4f ); @@ -873,10 +1006,12 @@ sin4f( static void emit_sin (struct x86_function *func, + unsigned xmm_save, unsigned xmm_dst) { emit_func_call_dst( func, + xmm_save, xmm_dst, sin4f ); } @@ -1296,7 +1431,7 @@ emit_instruction( get_temp( TGSI_EXEC_TEMP_MINUS_128_I, TGSI_EXEC_TEMP_MINUS_128_C ) ); - emit_pow( func, 1, 2 ); + emit_pow( func, 3, 1, 2 ); FETCH( func, *inst, 0, 0, CHAN_X ); sse_xorps( func, @@ -1342,11 +1477,11 @@ emit_instruction( if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { emit_MOV( func, 1, 0 ); - emit_flr( func, 1 ); + emit_flr( func, 2, 1 ); /* dst.x = ex2(floor(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { emit_MOV( func, 2, 1 ); - emit_ex2( func, 2 ); + emit_ex2( func, 3, 2 ); STORE( func, *inst, 2, 0, CHAN_X ); } /* dst.y = src.x - floor(src.x) */ @@ -1358,7 +1493,7 @@ emit_instruction( } /* dst.z = ex2(src.x) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - emit_ex2( func, 0 ); + emit_ex2( func, 3, 0 ); STORE( func, *inst, 0, 0, CHAN_Z ); } } @@ -1376,21 +1511,21 @@ emit_instruction( FETCH( func, *inst, 0, 0, CHAN_X ); emit_abs( func, 0 ); emit_MOV( func, 1, 0 ); - emit_lg2( func, 1 ); + emit_lg2( func, 2, 1 ); /* dst.z = lg2(abs(src.x)) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { STORE( func, *inst, 1, 0, CHAN_Z ); } if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_flr( func, 1 ); + emit_flr( func, 2, 1 ); /* dst.x = floor(lg2(abs(src.x))) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { STORE( func, *inst, 1, 0, CHAN_X ); } /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_ex2( func, 1 ); + emit_ex2( func, 2, 1 ); emit_rcp( func, 1, 1 ); emit_mul( func, 0, 1 ); STORE( func, *inst, 0, 0, CHAN_Y ); @@ -1580,7 +1715,7 @@ emit_instruction( /* TGSI_OPCODE_FRC */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_frc( func, 0 ); + emit_frc( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1593,7 +1728,7 @@ emit_instruction( /* TGSI_OPCODE_FLR */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_flr( func, 0 ); + emit_flr( func, 0, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1605,7 +1740,7 @@ emit_instruction( case TGSI_OPCODE_EXPBASE2: /* TGSI_OPCODE_EX2 */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_ex2( func, 0 ); + emit_ex2( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1614,7 +1749,7 @@ emit_instruction( case TGSI_OPCODE_LOGBASE2: /* TGSI_OPCODE_LG2 */ FETCH( func, *inst, 0, 0, CHAN_X ); - emit_lg2( func, 0 ); + emit_lg2( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1624,7 +1759,7 @@ emit_instruction( /* TGSI_OPCODE_POW */ FETCH( func, *inst, 0, 0, CHAN_X ); FETCH( func, *inst, 1, 1, CHAN_X ); - emit_pow( func, 0, 1 ); + emit_pow( func, 0, 0, 1 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1715,7 +1850,7 @@ emit_instruction( case TGSI_OPCODE_COS: FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); + emit_cos( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1774,7 +1909,7 @@ emit_instruction( case TGSI_OPCODE_SIN: FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); + emit_sin( func, 0, 0 ); FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE( func, *inst, 0, 0, chan_index ); } @@ -1868,12 +2003,12 @@ emit_instruction( case TGSI_OPCODE_SCS: IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0 ); + emit_cos( func, 0, 0 ); STORE( func, *inst, 0, 0, CHAN_X ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0 ); + emit_sin( func, 0, 0 ); STORE( func, *inst, 0, 0, CHAN_Y ); } IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { -- cgit v1.2.3 From 8415d06d90a197e16554dab98d160334fd9f9f93 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 1 Oct 2008 01:13:40 +0900 Subject: util: Fix util_fast_pow/exp2/log2. - Use a lookup table for log2. - Compute (float) (1 << ipart) by tweaking with the exponent directly to avoid integer overflow and float conversion. - Also table negative exponents to avoid float division and branching. - Implement util_fast_exp as function of util_fast_exp2. --- src/gallium/auxiliary/util/u_math.c | 21 +++++-- src/gallium/auxiliary/util/u_math.h | 112 +++++++++++++++--------------------- 2 files changed, 64 insertions(+), 69 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index 0729114d6a..5b3cab4642 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -30,7 +30,7 @@ #include "util/u_math.h" - +/** 2^x, for x in [-1.0, 1.0[ */ float pow2_table[POW2_TABLE_SIZE]; @@ -38,9 +38,21 @@ static void init_pow2_table(void) { int i; - for (i = 0; i < POW2_TABLE_SIZE; i++) { - pow2_table[i] = (float) pow(2.0, i / POW2_TABLE_SCALE); - } + for (i = 0; i < POW2_TABLE_SIZE; i++) + pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE); +} + + +/** log2(x), for x in [1.0, 2.0[ */ +float log2_table[LOG2_TABLE_SIZE]; + + +static void +init_log2_table(void) +{ + unsigned i; + for (i = 0; i < LOG2_TABLE_SIZE; i++) + log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SIZE)); } @@ -53,6 +65,7 @@ util_init_math(void) static boolean initialized = FALSE; if (!initialized) { init_pow2_table(); + init_log2_table(); initialized = TRUE; } } diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 084655e6dd..be7303e550 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -174,8 +174,10 @@ static INLINE float logf( float f ) -#define POW2_TABLE_SIZE 256 -#define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1)) +#define POW2_TABLE_SIZE_LOG2 9 +#define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2) +#define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2) +#define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2)) extern float pow2_table[POW2_TABLE_SIZE]; @@ -186,98 +188,78 @@ util_init_math(void); union fi { float f; - int i; - unsigned ui; + int32_t i; + uint32_t ui; }; /** - * Fast approximation to exp(x). - * Compute with base 2 exponents: exp(x) = exp2(log2(e) * x) - * Note: log2(e) is a constant, k = 1.44269 - * So, exp(x) = exp2(k * x); + * Fast version of 2^x * Identity: exp2(a + b) = exp2(a) * exp2(b) - * Let ipart = int(k*x) - * Let fpart = k*x - ipart; - * So, exp2(k*x) = exp2(ipart) * exp2(fpart) + * Let ipart = int(x) + * Let fpart = x - ipart; + * So, exp2(x) = exp2(ipart) * exp2(fpart) * Compute exp2(ipart) with i << ipart * Compute exp2(fpart) with lookup table. */ static INLINE float -util_fast_exp(float x) +util_fast_exp2(float x) { - if (x >= 0.0f) { - float k = 1.44269f; /* = log2(e) */ - float kx = k * x; - int ipart = (int) kx; - float fpart = kx - (float) ipart; - float y = (float) (1 << ipart) - * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; - return y; - } - else { - /* exp(-x) = 1.0 / exp(x) */ - float k = -1.44269f; - float kx = k * x; - int ipart = (int) kx; - float fpart = kx - (float) ipart; - float y = (float) (1 << ipart) - * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; - return 1.0f / y; - } + int32_t ipart; + float fpart, mpart; + union fi epart; + + if(x > 129.00000f) + return 3.402823466e+38f; + + if(x < -126.99999f) + return 0.0f; + + ipart = (int32_t) x; + fpart = x - (float) ipart; + + /* same as + * epart.f = (float) (1 << ipart) + * but faster and without integer overflow for ipart > 31 */ + epart.i = (ipart + 127 ) << 23; + + mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)]; + + return epart.f * mpart; } /** - * Fast version of 2^x - * XXX the above function could be implemented in terms of this one. + * Fast approximation to exp(x). */ static INLINE float -util_fast_exp2(float x) +util_fast_exp(float x) { - if (x >= 0.0f) { - int ipart = (int) x; - float fpart = x - (float) ipart; - float y = (float) (1 << ipart) - * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; - return y; - } - else { - /* exp(-x) = 1.0 / exp(x) */ - int ipart = (int) -x; - float fpart = -x - (float) ipart; - float y = (float) (1 << ipart) - * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; - return 1.0f / y; - } + const float k = 1.44269f; /* = log2(e) */ + return util_fast_exp2(k * x); } -/** - * Based on code from http://www.flipcode.com/archives/Fast_log_Function.shtml - */ +#define LOG2_TABLE_SIZE_LOG2 8 +#define LOG2_TABLE_SIZE (1 << LOG2_TABLE_SIZE_LOG2) +extern float log2_table[LOG2_TABLE_SIZE]; + + static INLINE float -util_fast_log2(float val) +util_fast_log2(float x) { union fi num; - int log_2; - num.f = val; - log_2 = ((num.i >> 23) & 255) - 128; - num.i &= ~(255 << 23); - num.i += 127 << 23; - num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3; - return num.f + log_2; + float epart, mpart; + num.f = x; + epart = (float)(((num.i & 0x7f800000) >> 23) - 127); + mpart = log2_table[(num.i & 0x007fffff) >> (23 - LOG2_TABLE_SIZE_LOG2)]; + return epart + mpart; } static INLINE float util_fast_pow(float x, float y) { - /* XXX these tests may need adjustment */ - if (y >= 3.0f && (-0.02f <= x && x <= 0.02f)) - return 0.0f; - if (y >= 50.0f && (-0.9f <= x && x <= 0.9f)) - return 0.0f; return util_fast_exp2(util_fast_log2(x) * y); } -- cgit v1.2.3 From 4ae161e9409f8b5d73306bbf382c7b27d5038ab3 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Tue, 30 Sep 2008 20:50:49 +0200 Subject: Gallivm: port to llvm 2.4. --- configs/linux-llvm | 1 + src/gallium/auxiliary/gallivm/gallivm_builtins.cpp | 252 ++++++++++----------- src/gallium/auxiliary/gallivm/instructions.cpp | 36 +-- src/gallium/auxiliary/gallivm/instructions.h | 2 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 6 +- src/gallium/auxiliary/gallivm/instructionssoa.h | 2 +- 6 files changed, 150 insertions(+), 149 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/configs/linux-llvm b/configs/linux-llvm index 3b32db34d8..489cfd0546 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -31,4 +31,5 @@ else LLVM_CXXFLAGS= endif +LD = g++ GL_LIB_DEPS = $(LLVM_LDFLAGS) $(LLVM_LIBS) $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread -lstdc++ diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index 0fc5c4ec5c..fcc5c05794 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -1,140 +1,140 @@ static const unsigned char llvm_builtins_data[] = { -0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x29,0x02,0x00,0x00,0x01,0x10,0x00,0x00, +0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x27,0x02,0x00,0x00,0x01,0x10,0x00,0x00, 0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, 0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, 0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, 0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, -0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x02,0x00,0x00,0x00,0x0b,0x04,0x00,0x0c, -0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x32,0x22,0x48,0x09, -0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45,0xc6,0x05,0x42,0x52, -0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,0x29,0x80,0x21,0x00, -0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47,0x80,0x50,0x2b,0x03, -0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40,0x14,0x01,0x80,0x11, -0x80,0x22,0x88,0x00,0x13,0xa2,0x74,0xb0,0x03,0x3c,0xb0,0x83,0x36,0x80,0x87,0x71, -0x68,0x03,0x76,0x48,0x07,0x77,0xa8,0x07,0x7c,0x68,0x83,0x73,0x70,0x87,0x7a,0xd8, -0x70,0x0f,0xe5,0xd0,0x06,0xf0,0xa0,0x07,0x73,0x20,0x07,0x7a,0x30,0x07,0x72,0xa0, -0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x71,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06, -0xe9,0x80,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e,0x71,0x60,0x07,0x7a, -0x10,0x07,0x76,0xa0,0x07,0x71,0x60,0x07,0x6d,0x90,0x0e,0x73,0x20,0x07,0x7a,0x30, -0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07, -0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0e,0x73,0x20,0x07,0x7a,0x30,0x07,0x72, -0xa0,0x07,0x73,0x20,0x07,0x6d,0x60,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07,0x72,0xa0, -0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x76,0x40,0x07,0x7a,0x60,0x07,0x74,0xa0,0x07, -0x76,0x40,0x07,0x6d,0x60,0x0f,0x71,0x20,0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78, -0xa0,0x07,0x71,0x20,0x07,0x78,0xd0,0x06,0xe1,0x00,0x07,0x7a,0x00,0x07,0x7a,0x60, -0x07,0x74,0xd0,0x06,0xe6,0x80,0x07,0x70,0xa0,0x07,0x71,0x20,0x07,0x78,0xa0,0x07, -0x71,0x20,0x07,0x78,0xa0,0xf3,0x40,0x88,0x04,0x32,0x32,0x02,0x04,0x20,0x76,0x46, -0xfc,0x6c,0x48,0x92,0x00,0x40,0x00,0x00,0x00,0x00,0x0c,0x49,0x12,0x20,0x00,0x00, -0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x01,0x00,0x00,0x00,0x30,0x24,0x59,0x00,0x20, -0x08,0x00,0x00,0x00,0x86,0x24,0x0a,0x00,0x04,0x00,0x00,0x00,0xc0,0x90,0x84,0x01, -0x02,0x00,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, -0x05,0x00,0x02,0x00,0x00,0x00,0x60,0x48,0x72,0x00,0x01,0x00,0x00,0x00,0x00,0x0c, -0x49,0x14,0x00,0x08,0x00,0x00,0x00,0x80,0x21,0x49,0x01,0x00,0x41,0x00,0x00,0x00, -0x90,0x05,0x02,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, +0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x03,0x00,0x00,0x00,0x0b,0x84,0xff,0xff, +0xff,0xff,0x1f,0xc0,0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00, +0x32,0x22,0x48,0x09,0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45, +0xc6,0x05,0x42,0x52,0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83, +0x29,0x80,0x21,0x00,0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47, +0x80,0x50,0x2b,0x03,0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40, +0x14,0x01,0x80,0x11,0x80,0x22,0x88,0x00,0x13,0x30,0x7c,0xc0,0x03,0x3b,0xf8,0x05, +0x3b,0xa0,0x83,0x36,0xa8,0x07,0x77,0x58,0x07,0x77,0x78,0x87,0x7b,0x70,0x87,0x36, +0x60,0x87,0x74,0x70,0x87,0x7a,0xc0,0x87,0x36,0x38,0x07,0x77,0xa8,0x87,0x0d,0xf7, +0x50,0x0e,0x6d,0x00,0x0f,0x7a,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xe9,0x10,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e, +0x78,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06,0xe9,0x10,0x07,0x76,0xa0,0x07,0x71, +0x60,0x07,0x7a,0x10,0x07,0x76,0xd0,0x06,0xe9,0x30,0x07,0x72,0xa0,0x07,0x73,0x20, +0x07,0x7a,0x30,0x07,0x72,0xd0,0x06,0xe9,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07, +0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x7a, +0x30,0x07,0x72,0xd0,0x06,0xe6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xf6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,0x07, +0x74,0xd0,0x06,0xf6,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a, +0x10,0x07,0x72,0x80,0x07,0x6d,0x10,0x0e,0x70,0xa0,0x07,0x70,0xa0,0x07,0x76,0x40, +0x07,0x6d,0x60,0x0e,0x78,0x00,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07, +0x72,0x80,0x07,0x3a,0x0f,0x84,0x48,0x20,0x23,0x24,0x40,0x00,0x62,0x67,0x88,0x9f, +0x19,0x92,0x24,0x00,0x10,0x04,0x00,0x00,0x00,0x43,0x92,0x04,0x08,0x00,0x00,0x00, +0x00,0x60,0x48,0xa2,0x00,0x40,0x10,0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x02, +0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x41,0x00,0x00,0x00,0x30,0x24,0x61,0x80,0x00, +0x00,0x00,0x00,0x00,0x86,0x24,0x07,0x10,0x00,0x00,0x00,0x00,0xc0,0x90,0x44,0x01, +0x80,0x20,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, +0x05,0x00,0x82,0x00,0x00,0x00,0x60,0x48,0x52,0x00,0x40,0x10,0x00,0x00,0x00,0x64, +0x81,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, 0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02, 0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66, 0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95, 0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00, 0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60, -0x04,0x00,0x00,0x00,0x93,0x0c,0xce,0x43,0x4c,0x31,0x3c,0x8e,0x34,0xc9,0x30,0x41, -0xc2,0x14,0x03,0x34,0x51,0x93,0x0c,0x4d,0x44,0x4c,0x31,0x44,0x8d,0x35,0x56,0x01, -0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0x46,0x41,0x08,0xcc, -0x73,0x9b,0x05,0x21,0x30,0xcf,0x6e,0x18,0x84,0x00,0x2c,0x8b,0x35,0x04,0x80,0x39, -0x04,0x81,0x5d,0x20,0x80,0x0f,0x0c,0x43,0xe4,0xd3,0x36,0x81,0x04,0x3e,0x30,0x0c, -0x91,0x4f,0x5b,0x05,0x12,0xf8,0xc0,0x30,0x44,0x7e,0x7d,0x00,0x05,0xd1,0x4c,0x11, -0x66,0x12,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x2a,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0xc3,0x0d,0xce,0x43,0x4c,0x37,0x3c,0x8e,0x34,0xdc,0x30,0x41, +0xc2,0x74,0x03,0x34,0x51,0xc3,0x0d,0x4d,0x44,0x4c,0x37,0x44,0x8d,0x35,0x56,0x01, +0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0xd6,0x10,0x00,0xe6, +0x10,0x04,0x76,0x81,0x00,0x3e,0x30,0x0c,0x91,0x4f,0x1b,0x05,0x21,0x30,0x8f,0x6d, +0x13,0x48,0xe0,0x03,0xc3,0x10,0xf9,0xb4,0x55,0x20,0x81,0x0f,0x0c,0x43,0xe4,0xd7, +0x66,0x41,0x08,0xcc,0xa3,0x1f,0x40,0x41,0x34,0x53,0x84,0x99,0xc4,0x20,0x30,0x8f, +0x61,0x10,0x02,0xb0,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x27,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, 0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84, 0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00, -0x63,0x08,0x0d,0x34,0xc9,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, +0x63,0x08,0x0d,0x34,0xdc,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, 0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19, -0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x13,0x00,0x00,0x00,0x17,0x60,0x20,0xc5, -0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,0x14,0x13,0xf3,0xd4,0xb8,0x69, -0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,0xba,0x35,0x0c,0x13,0xf3,0x9c, -0x80,0xe4,0x36,0x48,0x81,0x10,0xc3,0x4a,0x4c,0x54,0xd4,0x6c,0x8b,0x23,0x28,0x76, -0x41,0x4c,0xcc,0xa3,0x1b,0x07,0x21,0x00,0xcb,0x72,0x00,0x05,0xd1,0x4c,0x11,0x66, -0x18,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91,0x11,0x00,0x00,0x00, -0x63,0x08,0x4d,0x64,0x16,0xc1,0x49,0x86,0xab,0x22,0x66,0x19,0x02,0x01,0x1b,0x43, -0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81,0x0a,0x20,0x0b,0x34, -0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x24,0x83,0x57,0x11,0xb3, -0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0,0x48,0xb3,0x04,0xc6, -0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48,0x63,0x08,0xcd,0x64, -0x64,0x40,0x70,0x92,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc,0x60,0x0c,0xc1,0x99, -0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80,0x33,0x38,0xd0,0x00, -0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80,0xe0,0x24,0x03,0x1b, -0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12,0x30,0x63,0x08,0x0f, -0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7,0xa0,0x06,0x70,0x00, -0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x76,0x52,0x4c,0xcc, -0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46,0xc6,0x50,0x8a,0x89, -0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89,0x79,0x68,0x73,0x20, -0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62,0x9e,0xdb,0x32,0x88, -0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98,0xa7,0xb7,0x95,0x62, -0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a,0x34,0x35,0x56,0x62, -0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25,0x2c,0x82,0xd3,0x0c, -0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7,0x24,0x01,0x63,0xec, -0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9,0xfc,0xc4,0xd0,0x90, -0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6,0xc1,0x71,0x7b,0x29, -0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01,0x32,0xf6,0xe6,0x46, -0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08,0x4e,0x33,0x58,0x47, -0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e,0x33,0xe1,0xbc,0xa5, -0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e,0x3a,0x40,0xc6,0xde, -0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee,0xed,0x82,0x10,0x9c, -0xa6,0xba,0x81,0x44,0x70,0x9a,0xc1,0x17,0x9c,0x66,0x32,0x93,0x42,0x60,0x1e,0x7b, -0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a,0xa7,0x6d,0xa4,0x98, -0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10,0x9c,0x66,0xc0,0x7b, -0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06,0x8b,0xe0,0x34,0x83, -0x2f,0x38,0xcd,0x64,0xd3,0xe6,0x61,0x08,0x4e,0x53,0xd5,0xf6,0x01,0x14,0x44,0x33, -0x45,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x4a,0x00,0x00,0x00, -0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x24,0xca,0x60,0x04, -0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10,0x1e,0xe1,0x19,0xc6, -0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63,0x15,0xc1,0x31,0x84, -0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1,0x71,0x6c,0x23,0x38, -0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48,0x35,0xc7,0x20,0x79, -0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58,0x04,0x10,0x20,0xd5, -0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7,0x30,0x06,0x64,0xe0, -0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82,0x63,0x21,0x40,0x70, -0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00,0x25,0x00,0x00,0x00, -0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb,0x05,0x31,0x31,0xcf, -0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6,0x41,0x08,0xc0,0xb2, -0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5,0x9b,0x8a,0x21,0x00, -0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18,0x82,0xd3,0x54,0xb7, -0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f,0x9d,0x9b,0x87,0x21, -0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59,0x10,0x82,0xd3,0x54, -0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8,0xb4,0x8d,0x14,0x13, -0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86,0x00,0x2c,0x8b,0xcd, -0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72,0x00,0x05,0xd1,0x4c, -0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0x4a,0x60,0x04, -0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0x48, -0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3,0x44,0x39,0x58,0x85, -0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x16,0x41,0x4c,0xcc,0x63,0xdb,0x04,0x31, -0x31,0x4f,0x6e,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x56,0x41,0x4c, -0xcc,0xd3,0x1b,0x45,0x21,0x00,0xcb,0xb2,0x9b,0x04,0x21,0x00,0xcb,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00,0x13,0x04,0x41,0x2c, -0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80, -0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca,0x34,0xc7,0x20,0x51, -0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c,0x94,0x83,0x58,0x38, -0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x76,0x51, -0x4c,0xcc,0x53,0xdb,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31,0x31,0x8f,0x6e,0x0d, -0x43,0x05,0x2c,0x66,0x41,0x4c,0xcc,0xd3,0x1f,0x40,0x41,0x34,0x53,0x84,0x19,0x05, -0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x2f,0x00,0x00,0x00, -0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0xa0,0x04, -0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34,0xc9,0x30,0x49,0xc4, -0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xc9,0x50,0x49,0xc4,0x2c,0x03,0x21,0x58, -0x63,0x08,0x4d,0x34,0xc9,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60,0x63,0x08,0x8d,0x33, -0xc9,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00,0x1a,0x00,0x00,0x00, -0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb,0x04,0x31,0x31,0x4f, -0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x47,0x20,0xb9,0x0d,0x52, -0x20,0xc4,0xb0,0x12,0x13,0x15,0x35,0xdb,0xe2,0x08,0x8a,0x5d,0x10,0x13,0xf3,0xec, -0x37,0x90,0x2c,0x4e,0xf4,0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87, -0x74,0x02,0xc8,0xe2,0x44,0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d, -0x18,0x11,0x31,0x55,0xc0,0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61, -0x46,0x31,0x08,0xcc,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00, -0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82,0x23,0x59,0xc2,0x20,0x09,0x92,0x1d,0x18, -0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3,0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5, -0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73, -0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,0x84,0x34,0x85,0x31,0x10,0x0a,0xb2,0x3c, -0x56,0x30,0x08,0xcc,0x63,0x0b,0x44,0x25,0x21,0x0d,0x00,0x00,0x00,0x00,0x00,0x00}; +0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x27,0x50,0x20,0x05, +0xd1,0x0c,0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d, +0x14,0x13,0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4, +0xba,0x35,0x0c,0x13,0xf3,0xd8,0x05,0x31,0x31,0x8f,0x6e,0x1c,0x84,0x00,0x2c,0xcb, +0x01,0x14,0x44,0x33,0x45,0x98,0x61,0x0c,0x02,0xf3,0x00,0x00,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91, +0x11,0x00,0x00,0x00,0x63,0x08,0x4d,0x64,0x16,0xc1,0xe1,0x86,0xab,0x22,0x66,0x19, +0x02,0x01,0x1b,0x43,0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81, +0x0a,0x20,0x0b,0x34,0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x70, +0x83,0x57,0x11,0xb3,0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0, +0x48,0xb3,0x04,0xc6,0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48, +0x63,0x08,0xcd,0x64,0x64,0x40,0x70,0xb8,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc, +0x60,0x0c,0xc1,0x99,0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80, +0x33,0x38,0xd0,0x00,0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80, +0xe0,0x70,0x03,0x1b,0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12, +0x30,0x63,0x08,0x0f,0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7, +0xa0,0x06,0x70,0x00,0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, +0x76,0x52,0x4c,0xcc,0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46, +0xc6,0x50,0x8a,0x89,0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89, +0x79,0x68,0x73,0x20,0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62, +0x9e,0xdb,0x32,0x88,0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98, +0xa7,0xb7,0x95,0x62,0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a, +0x34,0x35,0x56,0x62,0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25, +0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7, +0x24,0x01,0x63,0xec,0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9, +0xfc,0xc4,0xd0,0x90,0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6, +0xc1,0x71,0x7b,0x29,0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01, +0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08, +0x4e,0x33,0x58,0x47,0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e, +0x33,0xe1,0xbc,0xa5,0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e, +0x3a,0x40,0xc6,0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee, +0x6f,0x20,0x11,0x9c,0x66,0xf0,0x05,0xa7,0x99,0xec,0x82,0x10,0x9c,0xa6,0x32,0x93, +0x42,0x60,0x1e,0x7b,0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a, +0xa7,0x6d,0xa4,0x98,0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10, +0x9c,0x66,0xc0,0x7b,0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06, +0x8b,0xe0,0x34,0x83,0x2f,0x38,0xcd,0x64,0xd3,0x07,0x50,0x10,0xcd,0x14,0x61,0xe6, +0x61,0x08,0x4e,0x53,0xd5,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00, +0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10, +0x1e,0xe1,0x19,0xc6,0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63, +0x15,0xc1,0x31,0x84,0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1, +0x71,0x6c,0x23,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48, +0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58, +0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7, +0x30,0x06,0x64,0xe0,0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82, +0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00, +0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb, +0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6, +0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5, +0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18, +0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f, +0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59, +0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8, +0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86, +0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72, +0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33, +0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3, +0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x26,0x41,0x08,0xc0, +0xb2,0x18,0x45,0x21,0x00,0xcb,0xb2,0x5b,0x04,0x31,0x31,0x8f,0x6d,0x13,0xc4,0xc4, +0x3c,0xb9,0x35,0x0c,0x15,0xb0,0x58,0x05,0x31,0x31,0x4f,0x7f,0x00,0x05,0xd1,0x4c, +0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00, +0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04, +0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca, +0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c, +0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x76,0x51,0x4c,0xcc, +0x53,0xdb,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31, +0x31,0x8f,0x6e,0x16,0xc4,0xc4,0x3c,0xbd,0x51,0x10,0x02,0xb0,0x2c,0xd6,0x30,0x54, +0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x2c,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34, +0xdc,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xdc,0x50,0x49,0xc4, +0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xdc,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60, +0x63,0x08,0x8d,0x33,0xdc,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00, +0x17,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb, +0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x76, +0x41,0x4c,0xcc,0xb3,0x1f,0x81,0x11,0x11,0x13,0x15,0x35,0x37,0x90,0x2c,0x4e,0xf4, +0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44, +0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0, +0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03, +0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82, +0x23,0x19,0xc3,0xa0,0x20,0x8b,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3, +0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, +0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, +0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, +0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 3eaf9aacf6..599975d5ad 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -923,7 +923,7 @@ llvm::Value * Instructions::callCeil(llvm::Value *val) // predeclare the intrinsic std::vector ceilArgs; ceilArgs.push_back(Type::FloatTy); - PAListPtr ceilPal; + AttrListPtr ceilPal; FunctionType* ceilType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/ceilArgs, @@ -933,7 +933,7 @@ llvm::Value * Instructions::callCeil(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"ceilf", m_mod); m_llvmCeil->setCallingConv(CallingConv::C); - m_llvmCeil->setParamAttrs(ceilPal); + m_llvmCeil->setAttributes(ceilPal); } CallInst *call = m_builder.CreateCall(m_llvmCeil, val, name("ceilf")); @@ -948,7 +948,7 @@ llvm::Value *Instructions::callFAbs(llvm::Value *val) // predeclare the intrinsic std::vector fabsArgs; fabsArgs.push_back(Type::FloatTy); - PAListPtr fabsPal; + AttrListPtr fabsPal; FunctionType* fabsType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fabsArgs, @@ -958,7 +958,7 @@ llvm::Value *Instructions::callFAbs(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"fabs", m_mod); m_llvmFAbs->setCallingConv(CallingConv::C); - m_llvmFAbs->setParamAttrs(fabsPal); + m_llvmFAbs->setAttributes(fabsPal); } CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, name("fabs")); @@ -973,7 +973,7 @@ llvm::Value * Instructions::callFExp(llvm::Value *val) // predeclare the intrinsic std::vector fexpArgs; fexpArgs.push_back(Type::FloatTy); - PAListPtr fexpPal; + AttrListPtr fexpPal; FunctionType* fexpType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fexpArgs, @@ -983,7 +983,7 @@ llvm::Value * Instructions::callFExp(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"expf", m_mod); m_llvmFexp->setCallingConv(CallingConv::C); - m_llvmFexp->setParamAttrs(fexpPal); + m_llvmFexp->setAttributes(fexpPal); } CallInst *call = m_builder.CreateCall(m_llvmFexp, val, name("expf")); @@ -998,7 +998,7 @@ llvm::Value * Instructions::callFLog(llvm::Value *val) // predeclare the intrinsic std::vector flogArgs; flogArgs.push_back(Type::FloatTy); - PAListPtr flogPal; + AttrListPtr flogPal; FunctionType* flogType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/flogArgs, @@ -1008,7 +1008,7 @@ llvm::Value * Instructions::callFLog(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"logf", m_mod); m_llvmFlog->setCallingConv(CallingConv::C); - m_llvmFlog->setParamAttrs(flogPal); + m_llvmFlog->setAttributes(flogPal); } CallInst *call = m_builder.CreateCall(m_llvmFlog, val, name("logf")); @@ -1023,7 +1023,7 @@ llvm::Value * Instructions::callFloor(llvm::Value *val) // predeclare the intrinsic std::vector floorArgs; floorArgs.push_back(Type::FloatTy); - PAListPtr floorPal; + AttrListPtr floorPal; FunctionType* floorType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/floorArgs, @@ -1033,7 +1033,7 @@ llvm::Value * Instructions::callFloor(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"floorf", m_mod); m_llvmFloor->setCallingConv(CallingConv::C); - m_llvmFloor->setParamAttrs(floorPal); + m_llvmFloor->setAttributes(floorPal); } CallInst *call = m_builder.CreateCall(m_llvmFloor, val, name("floorf")); @@ -1048,7 +1048,7 @@ llvm::Value *Instructions::callFSqrt(llvm::Value *val) // predeclare the intrinsic std::vector fsqrtArgs; fsqrtArgs.push_back(Type::FloatTy); - PAListPtr fsqrtPal; + AttrListPtr fsqrtPal; FunctionType* fsqrtType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fsqrtArgs, @@ -1058,7 +1058,7 @@ llvm::Value *Instructions::callFSqrt(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"llvm.sqrt.f32", m_mod); m_llvmFSqrt->setCallingConv(CallingConv::C); - m_llvmFSqrt->setParamAttrs(fsqrtPal); + m_llvmFSqrt->setAttributes(fsqrtPal); } CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, name("sqrt")); @@ -1074,7 +1074,7 @@ llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) std::vector powArgs; powArgs.push_back(Type::FloatTy); powArgs.push_back(Type::FloatTy); - PAListPtr powPal; + AttrListPtr powPal; FunctionType* powType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/powArgs, @@ -1084,7 +1084,7 @@ llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"llvm.pow.f32", m_mod); m_llvmPow->setCallingConv(CallingConv::C); - m_llvmPow->setParamAttrs(powPal); + m_llvmPow->setAttributes(powPal); } std::vector params; params.push_back(val1); @@ -1126,7 +1126,7 @@ llvm::Value * Instructions::constVector(float x, float y, float z, float w) llvm::Function * Instructions::declarePrintf() { std::vector args; - PAListPtr params; + AttrListPtr params; FunctionType* funcTy = FunctionType::get( /*Result=*/IntegerType::get(32), /*Params=*/args, @@ -1136,7 +1136,7 @@ llvm::Function * Instructions::declarePrintf() /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"printf", m_mod); func_printf->setCallingConv(CallingConv::C); - func_printf->setParamAttrs(params); + func_printf->setAttributes(params); return func_printf; } @@ -1148,7 +1148,7 @@ llvm::Function * Instructions::declareFunc(int label) args.push_back(vecPtr); args.push_back(vecPtr); args.push_back(vecPtr); - PAListPtr params; + AttrListPtr params; FunctionType *funcType = FunctionType::get( /*Result=*/Type::VoidTy, /*Params=*/args, @@ -1159,7 +1159,7 @@ llvm::Function * Instructions::declareFunc(int label) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/name.c_str(), m_mod); func->setCallingConv(CallingConv::C); - func->setParamAttrs(params); + func->setAttributes(params); return func; } diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index c3b28e9746..e18571251e 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -146,7 +146,7 @@ private: llvm::Module *m_mod; llvm::Function *m_func; char m_name[32]; - llvm::IRBuilder m_builder; + llvm::IRBuilder<> m_builder; int m_idx; llvm::VectorType *m_floatVecType; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 9a3ed9f538..5863f37095 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -259,7 +259,7 @@ void InstructionsSoa::createBuiltins() { MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data)-1]); + (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); m_builtins = ParseBitcodeFile(buffer); std::cout<<"Builtins created at "<getFunctionType(), GlobalValue::ExternalLinkage, originalFunc->getName(), currentModule()); func->setCallingConv(CallingConv::C); - const PAListPtr pal; - func->setParamAttrs(pal); + const AttrListPtr pal; + func->setAttributes(pal); currentModule()->dump(); } else { DenseMap val; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 3e20b652dd..20cab3b3bb 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -96,7 +96,7 @@ private: const std::vector in3); void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); private: - llvm::IRBuilder m_builder; + llvm::IRBuilder<> m_builder; StorageSoa *m_storage; std::map m_functionsMap; -- cgit v1.2.3 From 5e585719ebab17959d972e2e69c04203ecd3f2f3 Mon Sep 17 00:00:00 2001 From: Jonathan White Date: Tue, 30 Sep 2008 14:07:09 -0600 Subject: cell: Moved X86 checks to wrap #include section so that Cell targets will compile again. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 4fdad3a5c7..94b2df2dbb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -25,6 +25,8 @@ * **************************************************************************/ +#ifdef PIPE_ARCH_X86 + #include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" @@ -36,8 +38,6 @@ #include "rtasm/rtasm_x86sse.h" -#ifdef PIPE_ARCH_X86 - /* for 1/sqrt() * * This costs about 100fps (close to 10%) in gears: -- cgit v1.2.3 From a6ff215777da2181d7099284f2da28eff78273a9 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Wed, 1 Oct 2008 00:00:58 +0200 Subject: Gallivm: add slt. glxgears should be running, except it isn't. --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 9 ++ src/gallium/auxiliary/gallivm/instructionssoa.h | 2 + src/gallium/auxiliary/gallivm/soabuiltins.c | 155 +++++++++++++--------- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 1 + 4 files changed, 101 insertions(+), 66 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 5863f37095..a658072551 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -181,6 +181,7 @@ void InstructionsSoa::createFunctionMap() m_functionsMap[TGSI_OPCODE_POWER] = "pow"; m_functionsMap[TGSI_OPCODE_LIT] = "lit"; m_functionsMap[TGSI_OPCODE_RSQ] = "rsq"; + m_functionsMap[TGSI_OPCODE_SLT] = "slt"; } void InstructionsSoa::createDependencies() @@ -280,6 +281,14 @@ std::vector InstructionsSoa::dp3(const std::vector i return callBuiltin(func, in1, in2); } + +std::vector InstructionsSoa::slt(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_SLT); + return callBuiltin(func, in1, in2); +} + llvm::Value * InstructionsSoa::allocaTemp() { VectorType *vector = VectorType::get(Type::FloatTy, 4); diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 20cab3b3bb..3817fdc904 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -69,6 +69,8 @@ public: std::vector pow(const std::vector in1, const std::vector in2); std::vector rsq(const std::vector in1); + std::vector slt(const std::vector in1, + const std::vector in2); std::vector sub(const std::vector in1, const std::vector in2); void end(); diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 78f84510e2..cb85e1734e 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -36,6 +36,8 @@ typedef __attribute__(( ext_vector_type(4) )) float float4; extern float fabsf(float val); +/* helpers */ + float4 absvec(float4 vec) { float4 res; @@ -47,6 +49,58 @@ float4 absvec(float4 vec) return res; } +float4 maxvec(float4 a, float4 b) +{ + return (float4){(a.x > b.x) ? a.x : b.x, + (a.y > b.y) ? a.y : b.y, + (a.z > b.z) ? a.z : b.z, + (a.w > b.w) ? a.w : b.w}; +} + +float4 minvec(float4 a, float4 b) +{ + return (float4){(a.x < b.x) ? a.x : b.x, + (a.y < b.y) ? a.y : b.y, + (a.z < b.z) ? a.z : b.z, + (a.w < b.w) ? a.w : b.w}; +} + +extern float powf(float num, float p); +extern float sqrtf(float x); + +float4 powvec(float4 vec, float4 q) +{ + float4 p; + p.x = powf(vec.x, q.x); + p.y = powf(vec.y, q.y); + p.z = powf(vec.z, q.z); + p.w = powf(vec.w, q.w); + return p; +} + +float4 sqrtvec(float4 vec) +{ + float4 p; + p.x = sqrtf(vec.x); + p.y = sqrtf(vec.y); + p.z = sqrtf(vec.z); + p.w = sqrtf(vec.w); + return p; +} + +float4 sltvec(float4 v1, float4 v2) +{ + float4 p; + p.x = (v1.x < v2.x) ? 1.0 : 0.0; + p.y = (v1.y < v2.y) ? 1.0 : 0.0; + p.z = (v1.z < v2.z) ? 1.0 : 0.0; + p.w = (v1.w < v2.w) ? 1.0 : 0.0; + return p; +} + + +/* instructions */ + void abs(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) { @@ -69,7 +123,6 @@ void dp3(float4 *res, res[3] = dot; } - void dp4(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) @@ -83,35 +136,25 @@ void dp4(float4 *res, res[3] = dot; } -extern float powf(float num, float p); -extern float sqrtf(float x); - -float4 powvec(float4 vec, float4 q) -{ - float4 p; - p.x = powf(vec.x, q.x); - p.y = powf(vec.y, q.y); - p.z = powf(vec.z, q.z); - p.w = powf(vec.w, q.w); - return p; -} - -void pow(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +void lit(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) { - res[0] = powvec(tmp0x, tmp1x); - res[1] = res[0]; - res[2] = res[0]; - res[3] = res[0]; -} + const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; + const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; + const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; -float4 minvec(float4 a, float4 b) -{ - return (float4){(a.x < b.x) ? a.x : b.x, - (a.y < b.y) ? a.y : b.y, - (a.z < b.z) ? a.z : b.z, - (a.w < b.w) ? a.w : b.w}; + res[0] = (float4){1.0, 1.0, 1.0, 1.0}; + if (tmp0x.x > 0) { + float4 tmpy = maxvec(tmp0y, zerovec); + float4 tmpw = minvec(tmp0w, plus128); + tmpw = maxvec(tmpw, min128); + res[1] = tmp0x; + res[2] = powvec(tmpy, tmpw); + } else { + res[1] = zerovec; + res[2] = zerovec; + } + res[3] = (float4){1.0, 1.0, 1.0, 1.0}; } void min(float4 *res, @@ -125,14 +168,6 @@ void min(float4 *res, } -float4 maxvec(float4 a, float4 b) -{ - return (float4){(a.x > b.x) ? a.x : b.x, - (a.y > b.y) ? a.y : b.y, - (a.z > b.z) ? a.z : b.z, - (a.w > b.w) ? a.w : b.w}; -} - void max(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) @@ -143,37 +178,14 @@ void max(float4 *res, res[3] = maxvec(tmp0w, tmp1w); } - -void lit(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) -{ - const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; - const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; - const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; - - res[0] = (float4){1.0, 1.0, 1.0, 1.0}; - if (tmp0x.x > 0) { - float4 tmpy = maxvec(tmp0y, zerovec); - float4 tmpw = minvec(tmp0w, plus128); - tmpw = maxvec(tmpw, min128); - res[1] = tmp0x; - res[2] = powvec(tmpy, tmpw); - } else { - res[1] = zerovec; - res[2] = zerovec; - } - res[3] = (float4){1.0, 1.0, 1.0, 1.0}; -} - - -float4 sqrtvec(float4 vec) +void pow(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) { - float4 p; - p.x = sqrtf(vec.x); - p.y = sqrtf(vec.y); - p.z = sqrtf(vec.z); - p.w = sqrtf(vec.w); - return p; + res[0] = powvec(tmp0x, tmp1x); + res[1] = res[0]; + res[2] = res[0]; + res[3] = res[0]; } void rsq(float4 *res, @@ -185,3 +197,14 @@ void rsq(float4 *res, res[2] = onevec/sqrtvec(absvec(tmp0z)); res[3] = onevec/sqrtvec(absvec(tmp0w)); } + +void slt(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = sltvec(tmp0x, tmp1x); + res[1] = sltvec(tmp0y, tmp1y); + res[2] = sltvec(tmp0z, tmp1z); + res[3] = sltvec(tmp0w, tmp1w); +} + diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index fdfbb76c16..7292c0e366 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -767,6 +767,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); } break; case TGSI_OPCODE_SGE: { -- cgit v1.2.3 From cbfce4175bf72788842bb45fa11c7e19caa8e6a8 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 1 Oct 2008 08:27:20 +0900 Subject: tgsi: Include p_config.h. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 94b2df2dbb..79f424b692 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -25,6 +25,8 @@ * **************************************************************************/ +#include "pipe/p_config.h" + #ifdef PIPE_ARCH_X86 #include "pipe/p_debug.h" -- cgit v1.2.3 From cb8a3ba433190b7af254349b00d356b31e813a1a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 1 Oct 2008 08:28:05 +0900 Subject: util: No-op u_sse.h outside PIPE_ARCH_X86/X86_64. --- src/gallium/auxiliary/util/u_sse.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 0c8356cd05..68e56f0816 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -37,6 +37,10 @@ #ifndef U_SSE_H_ #define U_SSE_H_ +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) + #include #include @@ -66,7 +70,8 @@ _mm_castps_si128(__m128 a) return u.m128i; } -#endif +#endif /* defined(_MSC_VER) && _MSC_VER < 1500 */ +#endif /* PIPE_ARCH_X86 || PIPE_ARCH_X86_64 */ #endif /* U_SSE_H_ */ -- cgit v1.2.3 From dd7e5a498066e4ebdb7ad40773de48e5bc993164 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 1 Oct 2008 13:34:38 +0100 Subject: draw: add streamlined paths for fetching linear verts --- src/gallium/auxiliary/draw/draw_vs_aos.c | 44 +++++---- src/gallium/auxiliary/draw/draw_vs_aos.h | 19 ++-- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 137 +++++++++++++++++++++------- 3 files changed, 134 insertions(+), 66 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index a556477a76..4c794e0e23 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -92,9 +92,9 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp, assert(which_reg == 1); offset = Offset(struct aos_machine, constants); break; - case X86_ATTRIBS: + case X86_BUFFERS: assert(which_reg == 0); - offset = Offset(struct aos_machine, attrib); + offset = Offset(struct aos_machine, buffer); break; default: assert(0); @@ -1939,6 +1939,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, save_fpu_state( &cp ); set_fpu_round_nearest( &cp ); + aos_init_inputs( &cp, linear ); + /* Note address for loop jump */ label = x86_get_label(cp.func); @@ -2018,13 +2020,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, /* Incr index */ - if (linear) { - x86_inc(cp.func, cp.idx_EBX); - } - else { - x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4)); - } - + aos_incr_inputs( &cp, linear ); } /* decr count, loop if not zero */ @@ -2065,14 +2061,10 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, unsigned stride ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - unsigned i; - for (i = 0; i < vaos->base.key.nr_inputs; i++) { - if (vaos->base.key.element[i].in.buffer == buf) { - vaos->attrib[i].input_ptr = ((char *)ptr + - vaos->base.key.element[i].in.offset); - vaos->attrib[i].input_stride = stride; - } + if (buf < vaos->nr_vb) { + vaos->buffer[buf].base_ptr = (char *)ptr; + vaos->buffer[buf].stride = stride; } } @@ -2089,7 +2081,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; - machine->attrib = vaos->attrib; + machine->buffer = vaos->buffer; vaos->gen_run_elts( machine, elts, @@ -2108,7 +2100,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; - machine->attrib = vaos->attrib; + machine->buffer = vaos->buffer; vaos->gen_run_linear( machine, start, @@ -2127,7 +2119,7 @@ static void vaos_destroy( struct draw_vs_varient *varient ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - FREE( vaos->attrib ); + FREE( vaos->buffer ); x86_release_func( &vaos->func[0] ); x86_release_func( &vaos->func[1] ); @@ -2140,6 +2132,7 @@ static void vaos_destroy( struct draw_vs_varient *varient ) static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, const struct draw_vs_varient_key *key ) { + unsigned i; struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); if (!vaos) @@ -2154,10 +2147,15 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->draw = vs->draw; - vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) ); - if (!vaos->attrib) + for (i = 0; i < key->nr_inputs; i++) + vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 ); + + vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) ); + if (!vaos->buffer) goto fail; + debug_printf("nr_vb: %d\n", vaos->nr_vb); + #if 0 tgsi_dump(vs->state.tokens, 0); #endif @@ -2179,8 +2177,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, return &vaos->base; fail: - if (vaos && vaos->attrib) - FREE(vaos->attrib); + if (vaos && vaos->buffer) + FREE(vaos->buffer); if (vaos) x86_release_func( &vaos->func[0] ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 7fe6f79db0..306392e5d6 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -87,9 +87,10 @@ struct lit_info { #define MAX_SHINE_TAB 4 #define MAX_LIT_INFO 16 -struct aos_attrib { - const void *input_ptr; - unsigned input_stride; +struct aos_buffer { + const void *base_ptr; + unsigned stride; + void *ptr; /* updated per vertex */ }; @@ -123,7 +124,7 @@ struct aos_machine { const float (*immediates)[4]; /* points to shader data */ const float (*constants)[4]; /* points to draw data */ - const struct aos_attrib *attrib; /* points to ? */ + const struct aos_buffer *buffer; /* points to ? */ }; @@ -179,8 +180,9 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, unsigned file, unsigned idx ); -boolean aos_fetch_inputs( struct aos_compilation *cp, - boolean linear ); +boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ); +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ); +boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ); boolean aos_emit_outputs( struct aos_compilation *cp ); @@ -210,7 +212,7 @@ do { \ #define X86_NULL 0 #define X86_IMMEDIATES 1 #define X86_CONSTANTS 2 -#define X86_ATTRIBS 3 +#define X86_BUFFERS 3 struct x86_reg aos_get_x86( struct aos_compilation *cp, unsigned which_reg, @@ -232,7 +234,8 @@ struct draw_vs_varient_aos_sse { struct draw_vs_varient base; struct draw_context *draw; - struct aos_attrib *attrib; + struct aos_buffer *buffer; + unsigned nr_vb; vaos_run_linear_func gen_run_linear; vaos_run_elts_func gen_run_elts; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 26297c74f8..8e08b9285f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -95,28 +95,6 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, -static void get_src_ptr( struct aos_compilation *cp, - struct x86_reg src, - struct x86_reg elt, - unsigned a ) -{ - struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ), - a * sizeof(struct aos_attrib)); - - struct x86_reg input_ptr = x86_make_disp(attrib, - Offset(struct aos_attrib, input_ptr)); - - struct x86_reg input_stride = x86_make_disp(attrib, - Offset(struct aos_attrib, input_stride)); - - /* Calculate pointer to current attrib: - */ - x86_mov(cp->func, src, input_stride); - x86_imul(cp->func, src, elt); - x86_add(cp->func, src, input_ptr); -} - - /* Extended swizzles? Maybe later. */ static void emit_swizzle( struct aos_compilation *cp, @@ -128,22 +106,44 @@ static void emit_swizzle( struct aos_compilation *cp, } + +static boolean get_buffer_ptr( struct aos_compilation *cp, + unsigned buf_idx, + struct x86_reg elt, + struct x86_reg ptr) +{ + struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + buf_idx * sizeof(struct aos_buffer)); + + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); + + struct x86_reg buf_stride = x86_make_disp(buf, + Offset(struct aos_buffer, stride)); + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + + return TRUE; +} + + + + static boolean load_input( struct aos_compilation *cp, unsigned idx, - boolean linear ) + struct x86_reg bufptr ) { unsigned format = cp->vaos->base.key.element[idx].in.format; - struct x86_reg src = cp->tmp_EAX; + unsigned offset = cp->vaos->base.key.element[idx].in.offset; struct x86_reg dataXMM = aos_get_xmm_reg(cp); /* Figure out source pointer address: */ - get_src_ptr(cp, - src, - linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), - idx); - - src = x86_deref(src); + struct x86_reg src = x86_make_disp(bufptr, offset); aos_adopt_xmm_reg( cp, dataXMM, @@ -179,20 +179,87 @@ static boolean load_input( struct aos_compilation *cp, return TRUE; } - -boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) +static boolean load_inputs( struct aos_compilation *cp, + unsigned buffer, + struct x86_reg ptr ) { unsigned i; - + for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { - if (!load_input( cp, i, linear )) + if (cp->vaos->base.key.element[i].in.buffer == buffer) { + + if (!load_input( cp, i, ptr )) + return FALSE; + + cp->insn_counter++; + } + } + + return TRUE; +} + +boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + + struct x86_reg elt = cp->idx_EBX; + struct x86_reg ptr = cp->tmp_EAX; + + if (!get_buffer_ptr( cp, 0, elt, ptr )) return FALSE; - cp->insn_counter++; + + /* In the linear, single buffer case, keep the buffer pointer + * instead of the index number. + */ + x86_mov( cp->func, elt, ptr ); + } + + return TRUE; +} + +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + + load_inputs( cp, 0, cp->idx_EBX ); + + } + else { + struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); + unsigned j; + + for (j = 0; j < cp->vaos->nr_vb; j++) { + struct x86_reg ptr = cp->tmp_EAX; + + if (!get_buffer_ptr( cp, j, elt, ptr )) + return FALSE; + + cp->insn_counter++; + + if (!load_inputs( cp, j, ptr )) + return FALSE; + } } return TRUE; } +boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + (0 * sizeof(struct aos_buffer) + + Offset(struct aos_buffer, stride))); + + x86_add(cp->func, cp->idx_EBX, stride); + } + else if (linear) { + x86_inc(cp->func, cp->idx_EBX); + } + else { + x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); + } +} -- cgit v1.2.3 From 102daee1b8971cf39235e220b9524bec1e4a7089 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 2 Oct 2008 12:46:01 +0100 Subject: rtasm: add prefetch instructions --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 26 ++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 5 +++++ 2 files changed, 31 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 6d4c081e04..9085f4cc0e 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -629,6 +629,32 @@ void x86_and( struct x86_function *p, * SSE instructions */ +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 0, ptr); +} + +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 1, ptr); +} + +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 2, ptr); +} + + + void sse_movss( struct x86_function *p, struct x86_reg dst, diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index af94577aab..2d7715f965 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -184,6 +184,11 @@ void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg ar void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); + void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From af9cfea9cc80411351f9879d8eeb525bf7b4ca50 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 1 Oct 2008 18:40:01 +0100 Subject: draw: don't keep refetching constant inputs --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 37 +++--- src/gallium/auxiliary/draw/draw_vs.h | 4 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 26 ++++- src/gallium/auxiliary/draw/draw_vs_aos.h | 2 + src/gallium/auxiliary/draw/draw_vs_aos_io.c | 127 +++++++++++++++------ src/gallium/auxiliary/draw/draw_vs_varient.c | 10 +- 6 files changed, 144 insertions(+), 62 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 73fc70c1bc..a0e08dd10a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -79,6 +79,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; const struct vertex_info *vinfo; unsigned i; + unsigned nr_vbs = 0; if (!draw->render->set_primitive( draw->render, @@ -102,7 +103,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->key.viewport = !draw->identity_viewport; fse->key.clip = !draw->bypass_clipping; - fse->key.pad = 0; + fse->key.const_vbuffers = 0; memset(fse->key.element, 0, fse->key.nr_elements * sizeof(fse->key.element[0])); @@ -116,9 +117,16 @@ static void fse_prepare( struct draw_pt_middle_end *middle, */ fse->key.element[i].in.buffer = src->vertex_buffer_index; fse->key.element[i].in.offset = src->src_offset; + nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1); } + for (i = 0; i < 5 && i < nr_vbs; i++) { + if (draw->pt.vertex_buffer[i].pitch == 0) + fse->key.const_vbuffers |= (1<key.const_vbuffers); + { unsigned dst_offset = 0; @@ -162,13 +170,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, } } - - /* Would normally look up a vertex shader and peruse its list of - * varients somehow. We omitted that step and put all the - * hardcoded "shaders" into an array. We're just making the - * assumption that this happens to be a matching shader... ie - * you're running isosurf, aren't you? - */ + fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, &fse->key ); @@ -177,18 +179,17 @@ static void fse_prepare( struct draw_pt_middle_end *middle, return ; } + if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__, + fse->active->key.const_vbuffers); + /* Now set buffer pointers: */ - for (i = 0; i < num_vs_inputs; i++) { - unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; - - fse->active->set_input( fse->active, - i, - - ((const ubyte *) draw->pt.user.vbuffer[buf] + - draw->pt.vertex_buffer[buf].buffer_offset), - - draw->pt.vertex_buffer[buf].pitch ); + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + fse->active->set_buffer( fse->active, + i, + ((const ubyte *) draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 45992d1986..68c24abad3 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -64,7 +64,7 @@ struct draw_vs_varient_key { unsigned nr_outputs:8; unsigned viewport:1; unsigned clip:1; - unsigned pad:5; + unsigned const_vbuffers:5; struct draw_varient_element element[PIPE_MAX_ATTRIBS]; }; @@ -76,7 +76,7 @@ struct draw_vs_varient { struct draw_vertex_shader *vs; - void (*set_input)( struct draw_vs_varient *, + void (*set_buffer)( struct draw_vs_varient *, unsigned i, const void *ptr, unsigned stride ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 4c794e0e23..87232865e2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -196,6 +196,18 @@ static void spill( struct aos_compilation *cp, unsigned idx ) } +void aos_spill_all( struct aos_compilation *cp ) +{ + unsigned i; + + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } +} + + static struct x86_reg get_xmm_writable( struct aos_compilation *cp, struct x86_reg reg ) { @@ -1941,6 +1953,9 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, aos_init_inputs( &cp, linear ); + cp.x86_reg[0] = 0; + cp.x86_reg[1] = 0; + /* Note address for loop jump */ label = x86_get_label(cp.func); @@ -2066,6 +2081,8 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, vaos->buffer[buf].base_ptr = (char *)ptr; vaos->buffer[buf].stride = stride; } + + if (0) debug_printf("%s %d/%d: %p %d\n", __FUNCTION__, buf, vaos->nr_vb, ptr, stride); } @@ -2078,6 +2095,8 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; @@ -2097,6 +2116,9 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count, + vaos->base.key.const_vbuffers); + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; @@ -2140,7 +2162,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.key = *key; vaos->base.vs = vs; - vaos->base.set_input = vaos_set_buffer; + vaos->base.set_buffer = vaos_set_buffer; vaos->base.destroy = vaos_destroy; vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; @@ -2154,7 +2176,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->buffer) goto fail; - debug_printf("nr_vb: %d\n", vaos->nr_vb); + debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); #if 0 tgsi_dump(vs->state.tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 306392e5d6..264387517b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -176,6 +176,8 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp, unsigned idx, unsigned dirty ); +void aos_spill_all( struct aos_compilation *cp ); + struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, unsigned file, unsigned idx ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 8e08b9285f..b0c51d7fa1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -108,29 +108,45 @@ static void emit_swizzle( struct aos_compilation *cp, static boolean get_buffer_ptr( struct aos_compilation *cp, - unsigned buf_idx, - struct x86_reg elt, - struct x86_reg ptr) + boolean linear, + unsigned buf_idx, + struct x86_reg elt, + struct x86_reg ptr) { struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), buf_idx * sizeof(struct aos_buffer)); - struct x86_reg buf_base_ptr = x86_make_disp(buf, - Offset(struct aos_buffer, base_ptr)); - struct x86_reg buf_stride = x86_make_disp(buf, Offset(struct aos_buffer, stride)); + if (linear) { + struct x86_reg buf_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, ptr)); - /* Calculate pointer to current attrib: - */ - x86_mov(cp->func, ptr, buf_stride); - x86_imul(cp->func, ptr, elt); - x86_add(cp->func, ptr, buf_base_ptr); - return TRUE; -} + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_ptr); + x86_mov(cp->func, elt, buf_stride); + x86_add(cp->func, elt, ptr); + sse_prefetchnta(cp->func, x86_deref(elt)); + x86_mov(cp->func, buf_ptr, elt); + } + else { + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + } + cp->insn_counter++; + return TRUE; +} static boolean load_input( struct aos_compilation *cp, @@ -200,18 +216,57 @@ static boolean load_inputs( struct aos_compilation *cp, boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) { - if (linear && cp->vaos->nr_vb == 1) { + unsigned i; + for (i = 0; i < cp->vaos->nr_vb; i++) { + struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + i * sizeof(struct aos_buffer)); - struct x86_reg elt = cp->idx_EBX; - struct x86_reg ptr = cp->tmp_EAX; + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); - if (!get_buffer_ptr( cp, 0, elt, ptr )) - return FALSE; + if (cp->vaos->base.key.const_vbuffers & (1<tmp_EAX; - /* In the linear, single buffer case, keep the buffer pointer - * instead of the index number. - */ - x86_mov( cp->func, elt, ptr ); + x86_mov(cp->func, ptr, buf_base_ptr); + + /* Load all inputs for this constant vertex buffer + */ + load_inputs( cp, i, x86_deref(ptr) ); + + /* Then just force them out to aos_machine.input[] + */ + aos_spill_all( cp ); + + } + else if (linear) { + + struct x86_reg elt = cp->idx_EBX; + struct x86_reg ptr = cp->tmp_EAX; + + struct x86_reg buf_stride = x86_make_disp(buf, + Offset(struct aos_buffer, stride)); + + struct x86_reg buf_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + + + /* In the linear case, keep the buffer pointer instead of the + * index number. + */ + if (cp->vaos->nr_vb == 1) + x86_mov( cp->func, elt, ptr ); + else + x86_mov( cp->func, buf_ptr, ptr ); + + cp->insn_counter++; + } } return TRUE; @@ -219,23 +274,22 @@ boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) { - if (linear && cp->vaos->nr_vb == 1) { - - load_inputs( cp, 0, cp->idx_EBX ); + unsigned j; - } - else { - struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); - unsigned j; - - for (j = 0; j < cp->vaos->nr_vb; j++) { + for (j = 0; j < cp->vaos->nr_vb; j++) { + if (cp->vaos->base.key.const_vbuffers & (1<vaos->nr_vb == 1) { + load_inputs( cp, 0, cp->idx_EBX ); + } + else { + struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); struct x86_reg ptr = cp->tmp_EAX; - if (!get_buffer_ptr( cp, j, elt, ptr )) + if (!get_buffer_ptr( cp, linear, j, elt, ptr )) return FALSE; - cp->insn_counter++; - if (!load_inputs( cp, j, ptr )) return FALSE; } @@ -252,13 +306,16 @@ boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) Offset(struct aos_buffer, stride))); x86_add(cp->func, cp->idx_EBX, stride); + sse_prefetchnta(cp->func, x86_deref(cp->idx_EBX)); } else if (linear) { - x86_inc(cp->func, cp->idx_EBX); + /* Nothing to do */ } else { x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); } + + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 4daf05dae7..7ee567d478 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -64,10 +64,10 @@ struct draw_vs_varient_generic { -static void vsvg_set_input( struct draw_vs_varient *varient, - unsigned buffer, - const void *ptr, - unsigned stride ) +static void vsvg_set_buffer( struct draw_vs_varient *varient, + unsigned buffer, + const void *ptr, + unsigned stride ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -265,7 +265,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->base.key = *key; vsvg->base.vs = vs; - vsvg->base.set_input = vsvg_set_input; + vsvg->base.set_buffer = vsvg_set_buffer; vsvg->base.run_elts = vsvg_run_elts; vsvg->base.run_linear = vsvg_run_linear; vsvg->base.destroy = vsvg_destroy; -- cgit v1.2.3 From 918a444913435bdee33214e25811875100f873b0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 2 Oct 2008 12:53:11 +0100 Subject: draw: modify prefetching slightly --- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index b0c51d7fa1..dd79bc799a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -54,6 +54,7 @@ static void emit_load_R32G32B32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { +#if 1 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); /* data = z ? ? ? */ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); @@ -62,6 +63,16 @@ static void emit_load_R32G32B32( struct aos_compilation *cp, /* data = ? 0 z 1 */ sse_movlps(cp->func, data, src_ptr); /* data = x y z 1 */ +#else + sse_movups(cp->func, data, src_ptr); + /* data = x y z ? */ + sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) ); + /* data = ? x y z */ + sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) ); + /* data = 1 x y z */ + sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) ); + /* data = x y z 1 */ +#endif } static void emit_load_R32G32( struct aos_compilation *cp, @@ -128,7 +139,7 @@ static boolean get_buffer_ptr( struct aos_compilation *cp, x86_mov(cp->func, ptr, buf_ptr); x86_mov(cp->func, elt, buf_stride); x86_add(cp->func, elt, ptr); - sse_prefetchnta(cp->func, x86_deref(elt)); + if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192)); x86_mov(cp->func, buf_ptr, elt); } else { @@ -306,7 +317,7 @@ boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) Offset(struct aos_buffer, stride))); x86_add(cp->func, cp->idx_EBX, stride); - sse_prefetchnta(cp->func, x86_deref(cp->idx_EBX)); + sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192)); } else if (linear) { /* Nothing to do */ @@ -327,7 +338,7 @@ static void emit_store_R32G32B32A32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { - sse_movups(cp->func, dst_ptr, dataXMM); + sse_movaps(cp->func, dst_ptr, dataXMM); } static void emit_store_R32G32B32( struct aos_compilation *cp, @@ -430,7 +441,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) if (data.file != file_XMM) { struct x86_reg tmp = aos_get_xmm_reg( cp ); - sse_movups(cp->func, tmp, data); + sse_movaps(cp->func, tmp, data); data = tmp; } -- cgit v1.2.3 From 3f477e111a96493ff2863af06a98e8849ffbc6d8 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 28 Sep 2008 18:33:23 +0200 Subject: Gallivm: make it compile again, add some opcodes. --- src/gallium/auxiliary/draw/draw_vs_llvm.c | 1 + src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 1 + src/gallium/auxiliary/gallivm/instructions.cpp | 1210 ++++++++++++++---------- src/gallium/auxiliary/gallivm/instructions.h | 26 +- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 60 +- 5 files changed, 792 insertions(+), 506 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c index 2ce30b9a02..727977bc3a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_llvm.c +++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c @@ -32,6 +32,7 @@ * Brian Paul */ +#include "util/u_memory.h" #include "pipe/p_shader_tokens.h" #include "draw_private.h" #include "draw_context.h" diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index e64bfb1c6c..3a4a41e544 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -46,6 +46,7 @@ #include "tgsi/tgsi_dump.h" #include "util/u_memory.h" +#include "util/u_math.h" #include #include diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index a82dc30306..5fdfe09d18 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -83,6 +83,7 @@ Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicB m_llvmPow = 0; m_llvmFloor = 0; m_llvmFlog = 0; + m_llvmFexp = 0; m_llvmLit = 0; m_fmtPtr = 0; @@ -92,194 +93,247 @@ Instructions::Instructions(llvm::Module *mod, llvm::Function *func, llvm::BasicB m_mod = ParseBitcodeFile(buffer); } -llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) +llvm::BasicBlock * Instructions::currentBlock() const { - return m_builder.CreateAdd(in1, in2, name("add")); + return m_builder.GetInsertBlock(); } -llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) +llvm::Value * Instructions::abs(llvm::Value *in) { - Value *mulRes = mul(in1, in2); - return add(mulRes, in3); + std::vector vec = extractVector(in); + Value *xabs = callFAbs(vec[0]); + Value *yabs = callFAbs(vec[1]); + Value *zabs = callFAbs(vec[2]); + Value *wabs = callFAbs(vec[3]); + return vectorFromVals(xabs, yabs, zabs, wabs); } - -llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) + +llvm::Value * Instructions::add(llvm::Value *in1, llvm::Value *in2) { - return m_builder.CreateMul(in1, in2, name("mul")); + return m_builder.CreateAdd(in1, in2, name("add")); } -const char * Instructions::name(const char *prefix) +llvm::Value * Instructions::arl(llvm::Value *in) { - ++m_idx; - snprintf(m_name, 32, "%s%d", prefix, m_idx); - return m_name; + return floor(in); } -llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) +void Instructions::beginLoop() { - Value *mulRes = mul(in1, in2); - Value *x = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(0), - name("extractx")); - Value *y = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(1), - name("extracty")); - Value *z = m_builder.CreateExtractElement(mulRes, - m_storage->constantInt(2), - name("extractz")); - Value *xy = m_builder.CreateAdd(x, y,name("xy")); - Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); - return vectorFromVals(dot3, dot3, dot3, dot3); + BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); + BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); + + m_builder.CreateBr(begin); + Loop loop; + loop.begin = begin; + loop.end = end; + m_builder.SetInsertPoint(begin); + m_loopStack.push(loop); } -llvm::Value *Instructions::callFSqrt(llvm::Value *val) +void Instructions::bgnSub(unsigned label) { - if (!m_llvmFSqrt) { - // predeclare the intrinsic - std::vector fsqrtArgs; - fsqrtArgs.push_back(Type::FloatTy); - PAListPtr fsqrtPal; - FunctionType* fsqrtType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fsqrtArgs, - /*isVarArg=*/false); - m_llvmFSqrt = Function::Create( - /*Type=*/fsqrtType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.sqrt.f32", m_mod); - m_llvmFSqrt->setCallingConv(CallingConv::C); - m_llvmFSqrt->setParamAttrs(fsqrtPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, - name("sqrt")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; + llvm::Function *func = findFunction(label); + + Function::arg_iterator args = func->arg_begin(); + Value *ptr_INPUT = args++; + ptr_INPUT->setName("INPUT"); + m_storage->pushArguments(ptr_INPUT); + + llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); + + m_func = func; + m_builder.SetInsertPoint(entry); } -llvm::Value * Instructions::rsq(llvm::Value *in1) +void Instructions::brk() { - Value *x = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("extractx")); - Value *abs = callFAbs(x); - Value *sqrt = callFSqrt(abs); + assert(!m_loopStack.empty()); + BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); + m_builder.CreateBr(m_loopStack.top().end); + m_builder.SetInsertPoint(unr); +} - Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), - sqrt, - name("rsqrt")); - return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); +void Instructions::cal(int label, llvm::Value *input) +{ + std::vector params; + params.push_back(input); + llvm::Function *func = findFunction(label); + + m_builder.CreateCall(func, params.begin(), params.end()); } -llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, - llvm::Value *z, llvm::Value *w) +llvm::Value * Instructions::clamp(llvm::Value *in1) { - Constant *const_vec = Constant::getNullValue(m_floatVecType); - Value *res = m_builder.CreateInsertElement(const_vec, x, - m_storage->constantInt(0), - name("vecx")); - res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), - name("vecxy")); - res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), - name("vecxyz")); - if (w) - res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), - name("vecxyzw")); - return res; + // FIXME } -llvm::Value *Instructions::callFAbs(llvm::Value *val) +llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) { - if (!m_llvmFAbs) { - // predeclare the intrinsic - std::vector fabsArgs; - fabsArgs.push_back(Type::FloatTy); - PAListPtr fabsPal; - FunctionType* fabsType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/fabsArgs, - /*isVarArg=*/false); - m_llvmFAbs = Function::Create( - /*Type=*/fabsType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"fabs", m_mod); - m_llvmFAbs->setCallingConv(CallingConv::C); - m_llvmFAbs->setParamAttrs(fabsPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, - name("fabs")); - call->setCallingConv(CallingConv::C); + llvm::Function *func = m_mod->getFunction("cmp"); + assert(func); + + std::vector params; + params.push_back(in1); + params.push_back(in2); + params.push_back(in3); + CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); call->setTailCall(false); return call; } -llvm::Value * Instructions::lit(llvm::Value *in) +llvm::Value * Instructions::cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) { - if (!m_llvmLit) { - m_llvmLit = m_mod->getFunction("lit"); - } - CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + std::vector vec3 = extractVector(in3); + Constant *half = ConstantFP::get(APFloat(0.5f)); + + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], half, name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], half, name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], half, name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], half, name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); } -llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) { - Value *res = m_builder.CreateSub(in1, in2, name("sub")); - return res; + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + std::vector vec3 = extractVector(in3); + Constant *zero = Constant::getNullValue(Type::FloatTy); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], zero, name("xcmp")); + Value *selx = m_builder.CreateSelect(xcmp, vec2[0], vec3[0], + name("selx")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], zero, name("ycmp")); + Value *sely = m_builder.CreateSelect(ycmp, vec2[1], vec3[1], + name("sely")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], zero, name("zcmp")); + Value *selz = m_builder.CreateSelect(zcmp, vec2[2], vec3[2], + name("selz")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], zero, name("wcmp")); + Value *selw = m_builder.CreateSelect(wcmp, vec2[3], vec3[3], + name("selw")); + + return vectorFromVals(selx, sely, selz, selw); } -llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) +llvm::Value * Instructions::cos(llvm::Value *in) { - if (!m_llvmPow) { - // predeclare the intrinsic - std::vector powArgs; - powArgs.push_back(Type::FloatTy); - powArgs.push_back(Type::FloatTy); - PAListPtr powPal; - FunctionType* powType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/powArgs, - /*isVarArg=*/false); - m_llvmPow = Function::Create( - /*Type=*/powType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"llvm.pow.f32", m_mod); - m_llvmPow->setCallingConv(CallingConv::C); - m_llvmPow->setParamAttrs(powPal); - } - std::vector params; - params.push_back(val1); - params.push_back(val2); - CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), - name("pow")); - call->setCallingConv(CallingConv::C); +#if 0 + llvm::Function *func = m_mod->getFunction("vcos"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("cosres")); call->setTailCall(false); return call; +#else + std::vector elems = extractVector(in); + Function *func = m_mod->getFunction("cosf"); + assert(func); + CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); + cos->setCallingConv(CallingConv::C); + cos->setTailCall(true); + return vectorFromVals(cos, cos, cos, cos); +#endif } -llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) { Value *x1 = m_builder.CreateExtractElement(in1, m_storage->constantInt(0), name("x1")); + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z1")); + Value *x2 = m_builder.CreateExtractElement(in2, m_storage->constantInt(0), name("x2")); - llvm::Value *val = callPow(x1, x2); - return vectorFromVals(val, val, val, val); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *z2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(2), + name("z2")); + Value *y1z2 = mul(y1, z2); + Value *z1y2 = mul(z1, y2); + + Value *z1x2 = mul(z1, x2); + Value *x1z2 = mul(x1, z2); + + Value *x1y2 = mul(x1, y2); + Value *y1x2 = mul(y1, x2); + + return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); } -llvm::Value * Instructions::rcp(llvm::Value *in1) +llvm::Value * Instructions::ddx(llvm::Value *in) { - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), - x1, name("rcp")); - return vectorFromVals(res, res, res, res); + // FIXME +} + +llvm::Value * Instructions::ddy(llvm::Value *in) +{ + // FIXME +} + +llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2) +{ + return m_builder.CreateFDiv(in1, in2, name("div")); +} + +llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(in3, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot2add = m_builder.CreateAdd(xy, z, name("dot2add")); + return vectorFromVals(dot2add, dot2add, dot2add, dot2add); +} + +llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *z = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(2), + name("extractz")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + Value *dot3 = m_builder.CreateAdd(xy, z, name("dot3")); + return vectorFromVals(dot3, dot3, dot3, dot3); } llvm::Value * Instructions::dp4(llvm::Value *in1, llvm::Value *in2) @@ -302,23 +356,70 @@ llvm::Value * Instructions::dph(llvm::Value *in1, llvm::Value *in2) return vectorFromVals(dph, dph, dph, dph); } -llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::dst(llvm::Value *in1, llvm::Value *in2) +{ + Value *y1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(1), + name("y1")); + Value *z = m_builder.CreateExtractElement(in1, + m_storage->constantInt(2), + name("z")); + Value *y2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(1), + name("y2")); + Value *w = m_builder.CreateExtractElement(in2, + m_storage->constantInt(3), + name("w")); + Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); + return vectorFromVals(ConstantFP::get(APFloat(1.f)), + ry, z, w); +} + +void Instructions::elseop() +{ + assert(!m_ifStack.empty()); + BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); + m_builder.CreateBr(ifend); + m_builder.SetInsertPoint(m_ifStack.top()); + currentBlock()->setName(name("ifelse")); + m_ifStack.pop(); + m_ifStack.push(ifend); +} + +void Instructions::endif() +{ + assert(!m_ifStack.empty()); + m_builder.CreateBr(m_ifStack.top()); + m_builder.SetInsertPoint(m_ifStack.top()); + m_ifStack.pop(); +} + +void Instructions::endLoop() +{ + assert(!m_loopStack.empty()); + Loop loop = m_loopStack.top(); + m_builder.CreateBr(loop.begin); + loop.end->moveAfter(currentBlock()); + m_builder.SetInsertPoint(loop.end); + m_loopStack.pop(); +} + +void Instructions::end() +{ + m_builder.CreateRetVoid(); +} + +void Instructions::endSub() +{ + m_func = 0; + m_builder.SetInsertPoint(0); +} + +llvm::Value * Instructions::exp(llvm::Value *in) { - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *w = m_builder.CreateExtractElement(in2, - m_storage->constantInt(3), - name("w")); - Value *ry = m_builder.CreateMul(y1, y2, name("tyuy")); - return vectorFromVals(ConstantFP::get(APFloat(1.f)), - ry, z, w); + std::vector vec = extractVector(in); + return vectorFromVals(callFExp(vec[0]), callFExp(vec[1]), + callFExp(vec[2]), callFExp(vec[3])); } llvm::Value * Instructions::ex2(llvm::Value *in) @@ -330,31 +431,6 @@ llvm::Value * Instructions::ex2(llvm::Value *in) return vectorFromVals(val, val, val, val); } -llvm::Value * Instructions::callFloor(llvm::Value *val) -{ - if (!m_llvmFloor) { - // predeclare the intrinsic - std::vector floorArgs; - floorArgs.push_back(Type::FloatTy); - PAListPtr floorPal; - FunctionType* floorType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/floorArgs, - /*isVarArg=*/false); - m_llvmFloor = Function::Create( - /*Type=*/floorType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"floorf", m_mod); - m_llvmFloor->setCallingConv(CallingConv::C); - m_llvmFloor->setParamAttrs(floorPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFloor, val, - name("floorf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(false); - return call; -} - llvm::Value * Instructions::floor(llvm::Value *in) { std::vector vec = extractVector(in); @@ -362,42 +438,52 @@ llvm::Value * Instructions::floor(llvm::Value *in) callFloor(vec[2]), callFloor(vec[3])); } -llvm::Value * Instructions::arl(llvm::Value *in) -{ - return floor(in); -} - llvm::Value * Instructions::frc(llvm::Value *in) { llvm::Value *flr = floor(in); return sub(in, flr); } -llvm::Value * Instructions::callFLog(llvm::Value *val) +void Instructions::ifop(llvm::Value *in) { - if (!m_llvmFlog) { - // predeclare the intrinsic - std::vector flogArgs; - flogArgs.push_back(Type::FloatTy); - PAListPtr flogPal; - FunctionType* flogType = FunctionType::get( - /*Result=*/Type::FloatTy, - /*Params=*/flogArgs, - /*isVarArg=*/false); - m_llvmFlog = Function::Create( - /*Type=*/flogType, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"logf", m_mod); - m_llvmFlog->setCallingConv(CallingConv::C); - m_llvmFlog->setParamAttrs(flogPal); - } - CallInst *call = m_builder.CreateCall(m_llvmFlog, val, - name("logf")); - call->setCallingConv(CallingConv::C); + BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); + BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); + + //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); + //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); + //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); + + Constant *float0 = Constant::getNullValue(Type::FloatTy); + + Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), + name("extractx")); + Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); + m_builder.CreateCondBr(xcmp, ifthen, ifend); + //m_builder.SetInsertPoint(yblock); + + m_builder.SetInsertPoint(ifthen); + m_ifStack.push(ifend); +} + +llvm::Value * Instructions::kil(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("kil"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); call->setTailCall(false); return call; } +llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + llvm::Value *m = mul(in1, in2); + llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); + llvm::Value *s = sub(vec1, in1); + return add(m, mul(s, in3)); +} + llvm::Value * Instructions::lg2(llvm::Value *in) { std::vector vec = extractVector(in); @@ -407,120 +493,192 @@ llvm::Value * Instructions::lg2(llvm::Value *in) callFLog(vec[2]), callFLog(vec[3])), const_vec); } -llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::lit(llvm::Value *in) +{ + if (!m_llvmLit) { + m_llvmLit = m_mod->getFunction("lit"); + } + CallInst *call = m_builder.CreateCall(m_llvmLit, in, name("litres")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::log(llvm::Value *in) +{ + std::vector vec = extractVector(in); + return vectorFromVals(callFLog(vec[0]), callFLog(vec[1]), + callFLog(vec[2]), callFLog(vec[3])); +} + +llvm::Value * Instructions::madd(llvm::Value *in1, llvm::Value *in2, + llvm::Value *in3) +{ + Value *mulRes = mul(in1, in2); + return add(mulRes, in3); +} + +llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) { std::vector vec1 = extractVector(in1); std::vector vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); + Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], + name("xcmp")); Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], name("selx")); - Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); + Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], + name("ycmp")); Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], name("sely")); - Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); + Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], + name("zcmp")); Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], name("selz")); - Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); + Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], + name("wcmp")); Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], name("selw")); return vectorFromVals(selx, sely, selz, selw); } -llvm::Value * Instructions::max(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::min(llvm::Value *in1, llvm::Value *in2) { std::vector vec1 = extractVector(in1); std::vector vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOGT(vec1[0], vec2[0], - name("xcmp")); + Value *xcmp = m_builder.CreateFCmpOLT(vec1[0], vec2[0], name("xcmp")); Value *selx = m_builder.CreateSelect(xcmp, vec1[0], vec2[0], name("selx")); - Value *ycmp = m_builder.CreateFCmpOGT(vec1[1], vec2[1], - name("ycmp")); + Value *ycmp = m_builder.CreateFCmpOLT(vec1[1], vec2[1], name("ycmp")); Value *sely = m_builder.CreateSelect(ycmp, vec1[1], vec2[1], name("sely")); - Value *zcmp = m_builder.CreateFCmpOGT(vec1[2], vec2[2], - name("zcmp")); + Value *zcmp = m_builder.CreateFCmpOLT(vec1[2], vec2[2], name("zcmp")); Value *selz = m_builder.CreateSelect(zcmp, vec1[2], vec2[2], name("selz")); - Value *wcmp = m_builder.CreateFCmpOGT(vec1[3], vec2[3], - name("wcmp")); + Value *wcmp = m_builder.CreateFCmpOLT(vec1[3], vec2[3], name("wcmp")); Value *selw = m_builder.CreateSelect(wcmp, vec1[3], vec2[3], name("selw")); return vectorFromVals(selx, sely, selz, selw); } -void Instructions::printVector(llvm::Value *val) +llvm::Value * Instructions::mul(llvm::Value *in1, llvm::Value *in2) { - static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; + return m_builder.CreateMul(in1, in2, name("mul")); +} - if (!m_fmtPtr) { - Constant *format = ConstantArray::get(frmt, true); - ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); - GlobalVariable* globalFormat = new GlobalVariable( - /*Type=*/arrayTy, - /*isConstant=*/true, - /*Linkage=*/GlobalValue::InternalLinkage, - /*Initializer=*/0, // has initializer, specified below - /*Name=*/name(".str"), - m_mod); - globalFormat->setInitializer(format); +llvm::Value * Instructions::neg(llvm::Value *in) +{ + Value *neg = m_builder.CreateNeg(in, name("neg")); + return neg; +} - Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); - std::vector const_ptr_21_indices; - const_ptr_21_indices.push_back(const_int0); - const_ptr_21_indices.push_back(const_int0); - m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, - &const_ptr_21_indices[0], const_ptr_21_indices.size()); - } +llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *x2 = m_builder.CreateExtractElement(in2, + m_storage->constantInt(0), + name("x2")); + llvm::Value *val = callPow(x1, x2); + return vectorFromVals(val, val, val, val); +} - Function *func_printf = m_mod->getFunction("printf"); - if (!func_printf) - func_printf = declarePrintf(); - assert(func_printf); - std::vector vec = extractVector(val); - Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); - Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); - Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); - Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); - std::vector params; - params.push_back(m_fmtPtr); - params.push_back(dx); - params.push_back(dy); - params.push_back(dz); - params.push_back(dw); - CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), - name("printf")); - call->setCallingConv(CallingConv::C); - call->setTailCall(true); +llvm::Value * Instructions::rcp(llvm::Value *in1) +{ + Value *x1 = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("x1")); + Value *res = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + x1, name("rcp")); + return vectorFromVals(res, res, res, res); +} + +llvm::Value * Instructions::rsq(llvm::Value *in1) +{ + Value *x = m_builder.CreateExtractElement(in1, + m_storage->constantInt(0), + name("extractx")); + Value *abs = callFAbs(x); + Value *sqrt = callFSqrt(abs); + + Value *rsqrt = m_builder.CreateFDiv(ConstantFP::get(APFloat(1.f)), + sqrt, + name("rsqrt")); + return vectorFromVals(rsqrt, rsqrt, rsqrt, rsqrt); +} + +llvm::Value * Instructions::scs(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("scs"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("scsres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::seq(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOEQ(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOEQ(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOEQ(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOEQ(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); } -llvm::Function * Instructions::declarePrintf() +llvm::Value * Instructions::sfl(llvm::Value *in1, llvm::Value *in2) { - std::vector args; - PAListPtr params; - FunctionType* funcTy = FunctionType::get( - /*Result=*/IntegerType::get(32), - /*Params=*/args, - /*isVarArg=*/true); - Function* func_printf = Function::Create( - /*Type=*/funcTy, - /*Linkage=*/GlobalValue::ExternalLinkage, - /*Name=*/"printf", m_mod); - func_printf->setCallingConv(CallingConv::C); - func_printf->setParamAttrs(params); - return func_printf; + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + return vectorFromVals(const0f, const0f, const0f, const0f); } +llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); + + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + + Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); + + Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); + + Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); +} llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) { @@ -543,7 +701,18 @@ llvm::Value * Instructions::sgt(llvm::Value *in1, llvm::Value *in2) return vectorFromVals(x, y, z, w); } -llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) + +llvm::Value * Instructions::sin(llvm::Value *in) +{ + llvm::Function *func = m_mod->getFunction("vsin"); + assert(func); + + CallInst *call = m_builder.CreateCall(func, in, name("sinres")); + call->setTailCall(false); + return call; +} + +llvm::Value * Instructions::sle(llvm::Value *in1, llvm::Value *in2) { Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); Constant *const0f = Constant::getNullValue(Type::FloatTy); @@ -551,22 +720,21 @@ llvm::Value * Instructions::sge(llvm::Value *in1, llvm::Value *in2) std::vector vec1 = extractVector(in1); std::vector vec2 = extractVector(in2); - Value *xcmp = m_builder.CreateFCmpOGE(vec1[0], vec2[0], name("xcmp")); + Value *xcmp = m_builder.CreateFCmpOLE(vec1[0], vec2[0], name("xcmp")); Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - Value *ycmp = m_builder.CreateFCmpOGE(vec1[1], vec2[1], name("ycmp")); + Value *ycmp = m_builder.CreateFCmpOLE(vec1[1], vec2[1], name("ycmp")); Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - Value *zcmp = m_builder.CreateFCmpOGE(vec1[2], vec2[2], name("zcmp")); + Value *zcmp = m_builder.CreateFCmpOLE(vec1[2], vec2[2], name("zcmp")); Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); - Value *wcmp = m_builder.CreateFCmpOGE(vec1[3], vec2[3], name("wcmp")); + Value *wcmp = m_builder.CreateFCmpOLE(vec1[3], vec2[3], name("wcmp")); Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); return vectorFromVals(x, y, z, w); } - llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) { Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); @@ -590,169 +758,331 @@ llvm::Value * Instructions::slt(llvm::Value *in1, llvm::Value *in2) return vectorFromVals(x, y, z, w); } -llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) +llvm::Value * Instructions::sne(llvm::Value *in1, llvm::Value *in2) { - Value *x1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(0), - name("x1")); - Value *y1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(1), - name("y1")); - Value *z1 = m_builder.CreateExtractElement(in1, - m_storage->constantInt(2), - name("z1")); + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); + Constant *const0f = Constant::getNullValue(Type::FloatTy); - Value *x2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(0), - name("x2")); - Value *y2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(1), - name("y2")); - Value *z2 = m_builder.CreateExtractElement(in2, - m_storage->constantInt(2), - name("z2")); - Value *y1z2 = mul(y1, z2); - Value *z1y2 = mul(z1, y2); + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); - Value *z1x2 = mul(z1, x2); - Value *x1z2 = mul(x1, z2); + Value *xcmp = m_builder.CreateFCmpONE(vec1[0], vec2[0], name("xcmp")); + Value *x = m_builder.CreateSelect(xcmp, const1f, const0f, name("xsel")); - Value *x1y2 = mul(x1, y2); - Value *y1x2 = mul(y1, x2); + Value *ycmp = m_builder.CreateFCmpONE(vec1[1], vec2[1], name("ycmp")); + Value *y = m_builder.CreateSelect(ycmp, const1f, const0f, name("ysel")); - return vectorFromVals(sub(y1z2, z1y2), sub(z1x2, x1z2), sub(x1y2, y1x2)); + Value *zcmp = m_builder.CreateFCmpONE(vec1[2], vec2[2], name("zcmp")); + Value *z = m_builder.CreateSelect(zcmp, const1f, const0f, name("zsel")); + + Value *wcmp = m_builder.CreateFCmpONE(vec1[3], vec2[3], name("wcmp")); + Value *w = m_builder.CreateSelect(wcmp, const1f, const0f, name("wsel")); + + return vectorFromVals(x, y, z, w); } +llvm::Value * Instructions::str(llvm::Value *in1, llvm::Value *in2) +{ + Constant *const1f = ConstantFP::get(APFloat(1.000000e+00f)); -llvm::Value * Instructions::abs(llvm::Value *in) + return vectorFromVals(const1f, const1f, const1f, const1f); +} + +llvm::Value * Instructions::sub(llvm::Value *in1, llvm::Value *in2) +{ + Value *res = m_builder.CreateSub(in1, in2, name("sub")); + return res; +} + +llvm::Value * Instructions::trunc(llvm::Value *in) { std::vector vec = extractVector(in); - Value *xabs = callFAbs(vec[0]); - Value *yabs = callFAbs(vec[1]); - Value *zabs = callFAbs(vec[2]); - Value *wabs = callFAbs(vec[3]); - return vectorFromVals(xabs, yabs, zabs, wabs); + Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), + name("ftoix")); + Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), + name("ftoiy")); + Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), + name("ftoiz")); + Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), + name("ftoiw")); + Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, + name("fx")); + Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, + name("fy")); + Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, + name("fz")); + Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, + name("fw")); + return vectorFromVals(fx, fy, fz, fw); } -void Instructions::ifop(llvm::Value *in) +llvm::Value * Instructions::x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) { - BasicBlock *ifthen = BasicBlock::Create(name("ifthen"), m_func,0); - BasicBlock *ifend = BasicBlock::Create(name("ifthenend"), m_func,0); + std::vector vec1 = extractVector(in1); + std::vector vec2 = extractVector(in2); + std::vector vec3 = extractVector(in3); - //BasicBlock *yblock = new BasicBlock(name("yblock"), m_func,0); - //BasicBlock *zblock = new BasicBlock(name("zblock"), m_func,0); - //BasicBlock *wblock = new BasicBlock(name("wblock"), m_func,0); + Value *x2x3 = m_builder.CreateMul( vec2[0], vec3[0], name("x2x3")); + Value *y2y3 = m_builder.CreateMul( vec2[1], vec3[1], name("y2y3")); + Value *x1px2x3 = m_builder.CreateAdd (vec1[0], x2x3, name("x1 + x2x3")); + Value *x1px2x3py2y3 = m_builder.CreateAdd (x1px2x3, y2y3, name("x1 + x2x3 + y2y3")); - Constant *float0 = Constant::getNullValue(Type::FloatTy); + Value *x2z3 = m_builder.CreateMul( vec2[0], vec3[2], name("x2z3")); + Value *y2w3 = m_builder.CreateMul( vec2[1], vec3[3], name("y2w3")); + Value *y1px2z3 = m_builder.CreateAdd (vec1[1], x2z3, name("y1 + x2z3")); + Value *y1px2z3py2w3 = m_builder.CreateAdd (y1px2z3, y2w3, name("y1 + x2z3 + y2w3")); - Value *x = m_builder.CreateExtractElement(in, m_storage->constantInt(0), - name("extractx")); - Value *xcmp = m_builder.CreateFCmpUNE(x, float0, name("xcmp")); - m_builder.CreateCondBr(xcmp, ifthen, ifend); - //m_builder.SetInsertPoint(yblock); + return vectorFromVals(x1px2x3py2y3, y1px2z3py2w3, x1px2x3py2y3, y1px2z3py2w3); +} - m_builder.SetInsertPoint(ifthen); - m_ifStack.push(ifend); +void Instructions::printVector(llvm::Value *val) +{ + static const char *frmt = "Vector is [%f, %f, %f, %f]\x0A"; + + if (!m_fmtPtr) { + Constant *format = ConstantArray::get(frmt, true); + ArrayType *arrayTy = ArrayType::get(IntegerType::get(8), strlen(frmt) + 1); + GlobalVariable* globalFormat = new GlobalVariable( + /*Type=*/arrayTy, + /*isConstant=*/true, + /*Linkage=*/GlobalValue::InternalLinkage, + /*Initializer=*/0, // has initializer, specified below + /*Name=*/name(".str"), + m_mod); + globalFormat->setInitializer(format); + + Constant* const_int0 = Constant::getNullValue(IntegerType::get(32)); + std::vector const_ptr_21_indices; + const_ptr_21_indices.push_back(const_int0); + const_ptr_21_indices.push_back(const_int0); + m_fmtPtr = ConstantExpr::getGetElementPtr(globalFormat, + &const_ptr_21_indices[0], const_ptr_21_indices.size()); + } + + Function *func_printf = m_mod->getFunction("printf"); + if (!func_printf) + func_printf = declarePrintf(); + assert(func_printf); + std::vector vec = extractVector(val); + Value *dx = m_builder.CreateFPExt(vec[0], Type::DoubleTy, name("dx")); + Value *dy = m_builder.CreateFPExt(vec[1], Type::DoubleTy, name("dy")); + Value *dz = m_builder.CreateFPExt(vec[2], Type::DoubleTy, name("dz")); + Value *dw = m_builder.CreateFPExt(vec[3], Type::DoubleTy, name("dw")); + std::vector params; + params.push_back(m_fmtPtr); + params.push_back(dx); + params.push_back(dy); + params.push_back(dz); + params.push_back(dw); + CallInst *call = m_builder.CreateCall(func_printf, params.begin(), params.end(), + name("printf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(true); } -llvm::BasicBlock * Instructions::currentBlock() const +const char * Instructions::name(const char *prefix) { - return m_builder.GetInsertBlock(); + ++m_idx; + snprintf(m_name, 32, "%s%d", prefix, m_idx); + return m_name; } -void Instructions::elseop() +llvm::Value *Instructions::callFAbs(llvm::Value *val) { - assert(!m_ifStack.empty()); - BasicBlock *ifend = BasicBlock::Create(name("ifend"), m_func,0); - m_builder.CreateBr(ifend); - m_builder.SetInsertPoint(m_ifStack.top()); - currentBlock()->setName(name("ifelse")); - m_ifStack.pop(); - m_ifStack.push(ifend); + if (!m_llvmFAbs) { + // predeclare the intrinsic + std::vector fabsArgs; + fabsArgs.push_back(Type::FloatTy); + PAListPtr fabsPal; + FunctionType* fabsType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fabsArgs, + /*isVarArg=*/false); + m_llvmFAbs = Function::Create( + /*Type=*/fabsType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"fabs", m_mod); + m_llvmFAbs->setCallingConv(CallingConv::C); + m_llvmFAbs->setParamAttrs(fabsPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, + name("fabs")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -void Instructions::endif() +llvm::Value * Instructions::callFExp(llvm::Value *val) { - assert(!m_ifStack.empty()); - m_builder.CreateBr(m_ifStack.top()); - m_builder.SetInsertPoint(m_ifStack.top()); - m_ifStack.pop(); + if (!m_llvmFexp) { + // predeclare the intrinsic + std::vector fexpArgs; + fexpArgs.push_back(Type::FloatTy); + PAListPtr fexpPal; + FunctionType* fexpType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fexpArgs, + /*isVarArg=*/false); + m_llvmFexp = Function::Create( + /*Type=*/fexpType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"expf", m_mod); + m_llvmFexp->setCallingConv(CallingConv::C); + m_llvmFexp->setParamAttrs(fexpPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFexp, val, + name("expf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -llvm::Value * Instructions::lerp(llvm::Value *in1, llvm::Value *in2, - llvm::Value *in3) +llvm::Value * Instructions::callFLog(llvm::Value *val) { - llvm::Value *m = mul(in1, in2); - llvm::Value *vec1 = constVector(1.f, 1.f, 1.f, 1.f); - llvm::Value *s = sub(vec1, in1); - return add(m, mul(s, in3)); + if (!m_llvmFlog) { + // predeclare the intrinsic + std::vector flogArgs; + flogArgs.push_back(Type::FloatTy); + PAListPtr flogPal; + FunctionType* flogType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/flogArgs, + /*isVarArg=*/false); + m_llvmFlog = Function::Create( + /*Type=*/flogType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"logf", m_mod); + m_llvmFlog->setCallingConv(CallingConv::C); + m_llvmFlog->setParamAttrs(flogPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFlog, val, + name("logf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -void Instructions::beginLoop() +llvm::Value * Instructions::callFloor(llvm::Value *val) { - BasicBlock *begin = BasicBlock::Create(name("loop"), m_func,0); - BasicBlock *end = BasicBlock::Create(name("endloop"), m_func,0); - - m_builder.CreateBr(begin); - Loop loop; - loop.begin = begin; - loop.end = end; - m_builder.SetInsertPoint(begin); - m_loopStack.push(loop); + if (!m_llvmFloor) { + // predeclare the intrinsic + std::vector floorArgs; + floorArgs.push_back(Type::FloatTy); + PAListPtr floorPal; + FunctionType* floorType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/floorArgs, + /*isVarArg=*/false); + m_llvmFloor = Function::Create( + /*Type=*/floorType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"floorf", m_mod); + m_llvmFloor->setCallingConv(CallingConv::C); + m_llvmFloor->setParamAttrs(floorPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFloor, val, + name("floorf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -void Instructions::endLoop() +llvm::Value *Instructions::callFSqrt(llvm::Value *val) { - assert(!m_loopStack.empty()); - Loop loop = m_loopStack.top(); - m_builder.CreateBr(loop.begin); - loop.end->moveAfter(currentBlock()); - m_builder.SetInsertPoint(loop.end); - m_loopStack.pop(); + if (!m_llvmFSqrt) { + // predeclare the intrinsic + std::vector fsqrtArgs; + fsqrtArgs.push_back(Type::FloatTy); + PAListPtr fsqrtPal; + FunctionType* fsqrtType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/fsqrtArgs, + /*isVarArg=*/false); + m_llvmFSqrt = Function::Create( + /*Type=*/fsqrtType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.sqrt.f32", m_mod); + m_llvmFSqrt->setCallingConv(CallingConv::C); + m_llvmFSqrt->setParamAttrs(fsqrtPal); + } + CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, + name("sqrt")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -void Instructions::brk() +llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) { - assert(!m_loopStack.empty()); - BasicBlock *unr = BasicBlock::Create(name("unreachable"), m_func,0); - m_builder.CreateBr(m_loopStack.top().end); - m_builder.SetInsertPoint(unr); + if (!m_llvmPow) { + // predeclare the intrinsic + std::vector powArgs; + powArgs.push_back(Type::FloatTy); + powArgs.push_back(Type::FloatTy); + PAListPtr powPal; + FunctionType* powType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/powArgs, + /*isVarArg=*/false); + m_llvmPow = Function::Create( + /*Type=*/powType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"llvm.pow.f32", m_mod); + m_llvmPow->setCallingConv(CallingConv::C); + m_llvmPow->setParamAttrs(powPal); + } + std::vector params; + params.push_back(val1); + params.push_back(val2); + CallInst *call = m_builder.CreateCall(m_llvmPow, params.begin(), params.end(), + name("pow")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; } -llvm::Value * Instructions::trunc(llvm::Value *in) +llvm::Value * Instructions::vectorFromVals(llvm::Value *x, llvm::Value *y, + llvm::Value *z, llvm::Value *w) { - std::vector vec = extractVector(in); - Value *icastx = m_builder.CreateFPToSI(vec[0], IntegerType::get(32), - name("ftoix")); - Value *icasty = m_builder.CreateFPToSI(vec[1], IntegerType::get(32), - name("ftoiy")); - Value *icastz = m_builder.CreateFPToSI(vec[2], IntegerType::get(32), - name("ftoiz")); - Value *icastw = m_builder.CreateFPToSI(vec[3], IntegerType::get(32), - name("ftoiw")); - Value *fx = m_builder.CreateSIToFP(icastx, Type::FloatTy, - name("fx")); - Value *fy = m_builder.CreateSIToFP(icasty, Type::FloatTy, - name("fy")); - Value *fz = m_builder.CreateSIToFP(icastz, Type::FloatTy, - name("fz")); - Value *fw = m_builder.CreateSIToFP(icastw, Type::FloatTy, - name("fw")); - return vectorFromVals(fx, fy, fz, fw); + Constant *const_vec = Constant::getNullValue(m_floatVecType); + Value *res = m_builder.CreateInsertElement(const_vec, x, + m_storage->constantInt(0), + name("vecx")); + res = m_builder.CreateInsertElement(res, y, m_storage->constantInt(1), + name("vecxy")); + res = m_builder.CreateInsertElement(res, z, m_storage->constantInt(2), + name("vecxyz")); + if (w) + res = m_builder.CreateInsertElement(res, w, m_storage->constantInt(3), + name("vecxyzw")); + return res; } -void Instructions::end() +llvm::Value * Instructions::constVector(float x, float y, float z, float w) { - m_builder.CreateRetVoid(); + std::vector vec(4); + vec[0] = ConstantFP::get(APFloat(x)); + vec[1] = ConstantFP::get(APFloat(y)); + vec[2] = ConstantFP::get(APFloat(z)); + vec[3] = ConstantFP::get(APFloat(w)); + return ConstantVector::get(m_floatVecType, vec); } -void Instructions::cal(int label, llvm::Value *input) +llvm::Function * Instructions::declarePrintf() { - std::vector params; - params.push_back(input); - llvm::Function *func = findFunction(label); - - m_builder.CreateCall(func, params.begin(), params.end()); + std::vector args; + PAListPtr params; + FunctionType* funcTy = FunctionType::get( + /*Result=*/IntegerType::get(32), + /*Params=*/args, + /*isVarArg=*/true); + Function* func_printf = Function::Create( + /*Type=*/funcTy, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"printf", m_mod); + func_printf->setCallingConv(CallingConv::C); + func_printf->setParamAttrs(params); + return func_printf; } llvm::Function * Instructions::declareFunc(int label) @@ -778,27 +1108,6 @@ llvm::Function * Instructions::declareFunc(int label) return func; } -void Instructions::bgnSub(unsigned label) -{ - llvm::Function *func = findFunction(label); - - Function::arg_iterator args = func->arg_begin(); - Value *ptr_INPUT = args++; - ptr_INPUT->setName("INPUT"); - m_storage->pushArguments(ptr_INPUT); - - llvm::BasicBlock *entry = BasicBlock::Create("entry", func, 0); - - m_func = func; - m_builder.SetInsertPoint(entry); -} - -void Instructions::endSub() -{ - m_func = 0; - m_builder.SetInsertPoint(0); -} - llvm::Function * Instructions::findFunction(int label) { llvm::Function *func = m_functions[label]; @@ -809,17 +1118,6 @@ llvm::Function * Instructions::findFunction(int label) return func; } -llvm::Value * Instructions::constVector(float x, float y, float z, float w) -{ - std::vector vec(4); - vec[0] = ConstantFP::get(APFloat(x)); - vec[1] = ConstantFP::get(APFloat(y)); - vec[2] = ConstantFP::get(APFloat(z)); - vec[3] = ConstantFP::get(APFloat(w)); - return ConstantVector::get(m_floatVecType, vec); -} - - std::vector Instructions::extractVector(llvm::Value *vec) { std::vector elems(4); @@ -834,69 +1132,7 @@ std::vector Instructions::extractVector(llvm::Value *vec) return elems; } -llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) -{ - llvm::Function *func = m_mod->getFunction("cmp"); - assert(func); - - std::vector params; - params.push_back(in1); - params.push_back(in2); - params.push_back(in3); - CallInst *call = m_builder.CreateCall(func, params.begin(), params.end(), name("cmpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::cos(llvm::Value *in) -{ -#if 0 - llvm::Function *func = m_mod->getFunction("vcos"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("cosres")); - call->setTailCall(false); - return call; -#else - std::vector elems = extractVector(in); - Function *func = m_mod->getFunction("cosf"); - assert(func); - CallInst *cos = m_builder.CreateCall(func, elems[0], name("cosres")); - cos->setCallingConv(CallingConv::C); - cos->setTailCall(true); - return vectorFromVals(cos, cos, cos, cos); -#endif -} - -llvm::Value * Instructions::scs(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("scs"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("scsres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::kil(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("kil"); - assert(func); - - CallInst *call = m_builder.CreateCall(func, in, name("kilpres")); - call->setTailCall(false); - return call; -} - -llvm::Value * Instructions::sin(llvm::Value *in) -{ - llvm::Function *func = m_mod->getFunction("vsin"); - assert(func); - CallInst *call = m_builder.CreateCall(func, in, name("sinres")); - call->setTailCall(false); - return call; -} #endif //MESA_LLVM diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index d286ce80c7..8df30f62c8 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -57,15 +57,22 @@ public: llvm::BasicBlock *currentBlock() const; llvm::Value *abs(llvm::Value *in1); - llvm::Value *arl(llvm::Value *in1); llvm::Value *add(llvm::Value *in1, llvm::Value *in2); + llvm::Value *arl(llvm::Value *in1); void beginLoop(); void bgnSub(unsigned); void brk(); void cal(int label, llvm::Value *input); + llvm::Value *clamp(llvm::Value *in); llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); + llvm::Value *cnd0(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); llvm::Value *cos(llvm::Value *in); llvm::Value *cross(llvm::Value *in1, llvm::Value *in2); + llvm::Value *ddx(llvm::Value *in); + llvm::Value *ddy(llvm::Value *in); + llvm::Value *div(llvm::Value *in1, llvm::Value *in2); + llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3); llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); @@ -75,6 +82,7 @@ public: void endLoop(); void end(); void endSub(); + llvm::Value *exp(llvm::Value *in); llvm::Value *ex2(llvm::Value *in); llvm::Value *floor(llvm::Value *in); llvm::Value *frc(llvm::Value *in); @@ -82,32 +90,41 @@ public: llvm::Value *kil(llvm::Value *in); llvm::Value *lerp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *lit(llvm::Value *in); llvm::Value *lg2(llvm::Value *in); + llvm::Value *lit(llvm::Value *in); + llvm::Value *log(llvm::Value *in); llvm::Value *madd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); - llvm::Value *min(llvm::Value *in1, llvm::Value *in2); llvm::Value *max(llvm::Value *in1, llvm::Value *in2); + llvm::Value *min(llvm::Value *in1, llvm::Value *in2); llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); + llvm::Value *neg(llvm::Value *in); llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); llvm::Value *rcp(llvm::Value *in); llvm::Value *rsq(llvm::Value *in); llvm::Value *scs(llvm::Value *in); + llvm::Value *seq(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sfl(llvm::Value *in1, llvm::Value *in2); llvm::Value *sge(llvm::Value *in1, llvm::Value *in2); llvm::Value *sgt(llvm::Value *in1, llvm::Value *in2); llvm::Value *sin(llvm::Value *in); + llvm::Value *sle(llvm::Value *in1, llvm::Value *in2); llvm::Value *slt(llvm::Value *in1, llvm::Value *in2); + llvm::Value *sne(llvm::Value *in1, llvm::Value *in2); + llvm::Value *str(llvm::Value *in1, llvm::Value *in2); llvm::Value *sub(llvm::Value *in1, llvm::Value *in2); llvm::Value *trunc(llvm::Value *in); + llvm::Value *x2d(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); void printVector(llvm::Value *val); private: const char *name(const char *prefix); llvm::Value *callFAbs(llvm::Value *val); + llvm::Value *callFExp(llvm::Value *val); + llvm::Value *callFLog(llvm::Value *val); llvm::Value *callFloor(llvm::Value *val); llvm::Value *callFSqrt(llvm::Value *val); - llvm::Value *callFLog(llvm::Value *val); llvm::Value *callPow(llvm::Value *val1, llvm::Value *val2); llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, @@ -135,6 +152,7 @@ private: llvm::Function *m_llvmPow; llvm::Function *m_llvmFloor; llvm::Function *m_llvmFlog; + llvm::Function *m_llvmFexp; llvm::Function *m_llvmLit; llvm::Constant *m_fmtPtr; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index cc1516a45e..398fbd67bd 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -286,9 +286,13 @@ translate_instruction(llvm::Module *module, out = instr->rsq(inputs[0]); } break; - case TGSI_OPCODE_EXP: + case TGSI_OPCODE_EXP: { + out = instr->exp(inputs[0]); + } break; - case TGSI_OPCODE_LOG: + case TGSI_OPCODE_LOG: { + out = instr->log(inputs[0]); + } break; case TGSI_OPCODE_MUL: { out = instr->mul(inputs[0], inputs[1]); @@ -338,21 +342,31 @@ translate_instruction(llvm::Module *module, out = instr->lerp(inputs[0], inputs[1], inputs[2]); } break; - case TGSI_OPCODE_CND: + case TGSI_OPCODE_CND: { + out = instr->cnd(inputs[0], inputs[1], inputs[2]); + } break; - case TGSI_OPCODE_CND0: + case TGSI_OPCODE_CND0: { + out = instr->cnd0(inputs[0], inputs[1], inputs[2]); + } break; - case TGSI_OPCODE_DOT2ADD: + case TGSI_OPCODE_DOT2ADD: { + out = instr->dot2add(inputs[0], inputs[1], inputs[2]); + } break; case TGSI_OPCODE_INDEX: break; - case TGSI_OPCODE_NEGATE: + case TGSI_OPCODE_NEGATE: { + out = instr->neg(inputs[0]); + } break; case TGSI_OPCODE_FRAC: { out = instr->frc(inputs[0]); } break; - case TGSI_OPCODE_CLAMP: + case TGSI_OPCODE_CLAMP: { + out = instr->clamp(inputs[0]); + } break; case TGSI_OPCODE_FLOOR: { out = instr->floor(inputs[0]); @@ -392,9 +406,13 @@ translate_instruction(llvm::Module *module, out = instr->cos(inputs[0]); } break; - case TGSI_OPCODE_DDX: + case TGSI_OPCODE_DDX: { + out = instr->ddx(inputs[0]); + } break; - case TGSI_OPCODE_DDY: + case TGSI_OPCODE_DDY: { + out = instr->ddy(inputs[0]); + } break; case TGSI_OPCODE_KILP: break; @@ -408,9 +426,13 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_RFL: break; - case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SEQ: { + out = instr->seq(inputs[0], inputs[1]); + } break; - case TGSI_OPCODE_SFL: + case TGSI_OPCODE_SFL: { + out = instr->sfl(inputs[0], inputs[1]); + } break; case TGSI_OPCODE_SGT: { out = instr->sgt(inputs[0], inputs[1]); @@ -420,11 +442,17 @@ translate_instruction(llvm::Module *module, out = instr->sin(inputs[0]); } break; - case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SLE: { + out = instr->sle(inputs[0], inputs[1]); + } break; - case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SNE: { + out = instr->sne(inputs[0], inputs[1]); + } break; - case TGSI_OPCODE_STR: + case TGSI_OPCODE_STR: { + out = instr->str(inputs[0], inputs[1]); + } break; case TGSI_OPCODE_TEX: break; @@ -438,7 +466,9 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_UP4UB: break; - case TGSI_OPCODE_X2D: + case TGSI_OPCODE_X2D: { + out = instr->x2d(inputs[0], inputs[1], inputs[2]); + } break; case TGSI_OPCODE_ARA: break; -- cgit v1.2.3 From 0116ea34e1308a233e406a5d26f09217a69a5ed6 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 28 Sep 2008 19:48:26 +0200 Subject: Gallivm: more instructions. --- src/gallium/auxiliary/gallivm/instructions.cpp | 61 ++++++++++++++++++++++++-- src/gallium/auxiliary/gallivm/instructions.h | 5 +++ src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 15 ++++--- 3 files changed, 73 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 5fdfe09d18..3eaf9aacf6 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -163,9 +163,18 @@ void Instructions::cal(int label, llvm::Value *input) m_builder.CreateCall(func, params.begin(), params.end()); } +llvm::Value * Instructions::ceil(llvm::Value *in) +{ + std::vector vec = extractVector(in); + return vectorFromVals(callCeil(vec[0]), callCeil(vec[1]), + callCeil(vec[2]), callCeil(vec[3])); +} + llvm::Value * Instructions::clamp(llvm::Value *in1) { - // FIXME + llvm::Value *zero = constVector(0.0f, 0.0f, 0.0f, 0.0f); + llvm::Value *one = constVector(1.0f, 1.0f, 1.0f, 1.0f); + return min( max(zero, in1), one); } llvm::Value * Instructions::cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3) @@ -289,12 +298,14 @@ llvm::Value * Instructions::cross(llvm::Value *in1, llvm::Value *in2) llvm::Value * Instructions::ddx(llvm::Value *in) { - // FIXME + // FIXME + assert(0); } llvm::Value * Instructions::ddy(llvm::Value *in) { - // FIXME + // FIXME + assert(0); } llvm::Value * Instructions::div(llvm::Value *in1, llvm::Value *in2) @@ -319,6 +330,19 @@ llvm::Value * Instructions::dot2add(llvm::Value *in1, llvm::Value *in2, llvm::Va return vectorFromVals(dot2add, dot2add, dot2add, dot2add); } +llvm::Value * Instructions::dp2(llvm::Value *in1, llvm::Value *in2) +{ + Value *mulRes = mul(in1, in2); + Value *x = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(0), + name("extractx")); + Value *y = m_builder.CreateExtractElement(mulRes, + m_storage->constantInt(1), + name("extracty")); + Value *xy = m_builder.CreateAdd(x, y,name("xy")); + return vectorFromVals(xy, xy, xy, xy); +} + llvm::Value * Instructions::dp3(llvm::Value *in1, llvm::Value *in2) { Value *mulRes = mul(in1, in2); @@ -581,6 +605,12 @@ llvm::Value * Instructions::neg(llvm::Value *in) return neg; } +llvm::Value * Instructions::nrm(llvm::Value *in) +{ + llvm::Value *v = rsq(in); + return mul(v, in); +} + llvm::Value * Instructions::pow(llvm::Value *in1, llvm::Value *in2) { Value *x1 = m_builder.CreateExtractElement(in1, @@ -887,6 +917,31 @@ const char * Instructions::name(const char *prefix) return m_name; } +llvm::Value * Instructions::callCeil(llvm::Value *val) +{ + if (!m_llvmCeil) { + // predeclare the intrinsic + std::vector ceilArgs; + ceilArgs.push_back(Type::FloatTy); + PAListPtr ceilPal; + FunctionType* ceilType = FunctionType::get( + /*Result=*/Type::FloatTy, + /*Params=*/ceilArgs, + /*isVarArg=*/false); + m_llvmCeil = Function::Create( + /*Type=*/ceilType, + /*Linkage=*/GlobalValue::ExternalLinkage, + /*Name=*/"ceilf", m_mod); + m_llvmCeil->setCallingConv(CallingConv::C); + m_llvmCeil->setParamAttrs(ceilPal); + } + CallInst *call = m_builder.CreateCall(m_llvmCeil, val, + name("ceilf")); + call->setCallingConv(CallingConv::C); + call->setTailCall(false); + return call; +} + llvm::Value *Instructions::callFAbs(llvm::Value *val) { if (!m_llvmFAbs) { diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index 8df30f62c8..c3b28e9746 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -63,6 +63,7 @@ public: void bgnSub(unsigned); void brk(); void cal(int label, llvm::Value *input); + llvm::Value *ceil(llvm::Value *in); llvm::Value *clamp(llvm::Value *in); llvm::Value *cmp(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); llvm::Value *cnd(llvm::Value *in1, llvm::Value *in2, llvm::Value *in3); @@ -73,6 +74,7 @@ public: llvm::Value *ddy(llvm::Value *in); llvm::Value *div(llvm::Value *in1, llvm::Value *in2); llvm::Value *dot2add(llvm::Value *in, llvm::Value *in2, llvm::Value *in3); + llvm::Value *dp2(llvm::Value *in1, llvm::Value *in2); llvm::Value *dp3(llvm::Value *in1, llvm::Value *in2); llvm::Value *dp4(llvm::Value *in1, llvm::Value *in2); llvm::Value *dph(llvm::Value *in1, llvm::Value *in2); @@ -99,6 +101,7 @@ public: llvm::Value *min(llvm::Value *in1, llvm::Value *in2); llvm::Value *mul(llvm::Value *in1, llvm::Value *in2); llvm::Value *neg(llvm::Value *in); + llvm::Value *nrm(llvm::Value *in); llvm::Value *pow(llvm::Value *in1, llvm::Value *in2); llvm::Value *rcp(llvm::Value *in); llvm::Value *rsq(llvm::Value *in); @@ -120,6 +123,7 @@ public: private: const char *name(const char *prefix); + llvm::Value *callCeil(llvm::Value *val); llvm::Value *callFAbs(llvm::Value *val); llvm::Value *callFExp(llvm::Value *val); llvm::Value *callFLog(llvm::Value *val); @@ -147,6 +151,7 @@ private: llvm::VectorType *m_floatVecType; + llvm::Function *m_llvmCeil; llvm::Function *m_llvmFSqrt; llvm::Function *m_llvmFAbs; llvm::Function *m_llvmPow; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 398fbd67bd..fdfbb76c16 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -498,11 +498,18 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_TXB: break; - case TGSI_OPCODE_NRM: + case TGSI_OPCODE_NRM4: + case TGSI_OPCODE_NRM: { + out = instr->nrm(inputs[0]); + } break; - case TGSI_OPCODE_DIV: + case TGSI_OPCODE_DIV: { + out = instr->div(inputs[0], inputs[1]); + } break; - case TGSI_OPCODE_DP2: + case TGSI_OPCODE_DP2: { + out = instr->dp2(inputs[0], inputs[1]); + } break; case TGSI_OPCODE_TXL: break; @@ -620,8 +627,6 @@ translate_instruction(llvm::Module *module, break; case TGSI_OPCODE_M3X2: break; - case TGSI_OPCODE_NRM4: - break; case TGSI_OPCODE_CALLNZ: break; case TGSI_OPCODE_IFC: -- cgit v1.2.3 From fdcaf569d446db830a6eafd9c7f7c1b1030c0a93 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 28 Sep 2008 23:18:55 +0200 Subject: Gallivm: fix off-by-one. --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index efddc04e81..9a3ed9f538 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -259,7 +259,7 @@ void InstructionsSoa::createBuiltins() { MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); + (const char*)&soabuiltins_data[Elements(soabuiltins_data)-1]); m_builtins = ParseBitcodeFile(buffer); std::cout<<"Builtins created at "< Date: Tue, 30 Sep 2008 20:50:49 +0200 Subject: Gallivm: port to llvm 2.4. --- configs/linux-llvm | 1 + src/gallium/auxiliary/gallivm/gallivm_builtins.cpp | 252 ++++++++++----------- src/gallium/auxiliary/gallivm/instructions.cpp | 36 +-- src/gallium/auxiliary/gallivm/instructions.h | 2 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 6 +- src/gallium/auxiliary/gallivm/instructionssoa.h | 2 +- 6 files changed, 150 insertions(+), 149 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/configs/linux-llvm b/configs/linux-llvm index 3b32db34d8..489cfd0546 100644 --- a/configs/linux-llvm +++ b/configs/linux-llvm @@ -31,4 +31,5 @@ else LLVM_CXXFLAGS= endif +LD = g++ GL_LIB_DEPS = $(LLVM_LDFLAGS) $(LLVM_LIBS) $(EXTRA_LIB_PATH) -lX11 -lXext -lm -lpthread -lstdc++ diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index 0fc5c4ec5c..fcc5c05794 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -1,140 +1,140 @@ static const unsigned char llvm_builtins_data[] = { -0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x29,0x02,0x00,0x00,0x01,0x10,0x00,0x00, +0x42,0x43,0xc0,0xde,0x21,0x0c,0x00,0x00,0x27,0x02,0x00,0x00,0x01,0x10,0x00,0x00, 0x10,0x00,0x00,0x00,0x07,0x81,0x23,0x91,0x41,0xc8,0x04,0x49,0x06,0x10,0x32,0x39, 0x92,0x01,0x84,0x0c,0x25,0x05,0x08,0x19,0x1e,0x04,0x8b,0x62,0x80,0x14,0x45,0x02, 0x42,0x92,0x0b,0x42,0xa4,0x10,0x32,0x14,0x38,0x08,0x18,0x49,0x0a,0x32,0x44,0x24, 0x48,0x0a,0x90,0x21,0x23,0x44,0x72,0x80,0x8c,0x14,0x21,0x86,0x0a,0x8a,0x0a,0x64, -0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x02,0x00,0x00,0x00,0x0b,0x04,0x00,0x0c, -0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x32,0x22,0x48,0x09, -0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45,0xc6,0x05,0x42,0x52, -0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83,0x29,0x80,0x21,0x00, -0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47,0x80,0x50,0x2b,0x03, -0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40,0x14,0x01,0x80,0x11, -0x80,0x22,0x88,0x00,0x13,0xa2,0x74,0xb0,0x03,0x3c,0xb0,0x83,0x36,0x80,0x87,0x71, -0x68,0x03,0x76,0x48,0x07,0x77,0xa8,0x07,0x7c,0x68,0x83,0x73,0x70,0x87,0x7a,0xd8, -0x70,0x0f,0xe5,0xd0,0x06,0xf0,0xa0,0x07,0x73,0x20,0x07,0x7a,0x30,0x07,0x72,0xa0, -0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x71,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06, -0xe9,0x80,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e,0x71,0x60,0x07,0x7a, -0x10,0x07,0x76,0xa0,0x07,0x71,0x60,0x07,0x6d,0x90,0x0e,0x73,0x20,0x07,0x7a,0x30, -0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x6d,0x90,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07, -0x72,0xa0,0x07,0x76,0x40,0x07,0x6d,0x60,0x0e,0x73,0x20,0x07,0x7a,0x30,0x07,0x72, -0xa0,0x07,0x73,0x20,0x07,0x6d,0x60,0x0e,0x76,0x40,0x07,0x7a,0x30,0x07,0x72,0xa0, -0x07,0x76,0x40,0x07,0x6d,0x60,0x0f,0x76,0x40,0x07,0x7a,0x60,0x07,0x74,0xa0,0x07, -0x76,0x40,0x07,0x6d,0x60,0x0f,0x71,0x20,0x07,0x78,0xa0,0x07,0x71,0x20,0x07,0x78, -0xa0,0x07,0x71,0x20,0x07,0x78,0xd0,0x06,0xe1,0x00,0x07,0x7a,0x00,0x07,0x7a,0x60, -0x07,0x74,0xd0,0x06,0xe6,0x80,0x07,0x70,0xa0,0x07,0x71,0x20,0x07,0x78,0xa0,0x07, -0x71,0x20,0x07,0x78,0xa0,0xf3,0x40,0x88,0x04,0x32,0x32,0x02,0x04,0x20,0x76,0x46, -0xfc,0x6c,0x48,0x92,0x00,0x40,0x00,0x00,0x00,0x00,0x0c,0x49,0x12,0x20,0x00,0x00, -0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x01,0x00,0x00,0x00,0x30,0x24,0x59,0x00,0x20, -0x08,0x00,0x00,0x00,0x86,0x24,0x0a,0x00,0x04,0x00,0x00,0x00,0xc0,0x90,0x84,0x01, -0x02,0x00,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, -0x05,0x00,0x02,0x00,0x00,0x00,0x60,0x48,0x72,0x00,0x01,0x00,0x00,0x00,0x00,0x0c, -0x49,0x14,0x00,0x08,0x00,0x00,0x00,0x80,0x21,0x49,0x01,0x00,0x41,0x00,0x00,0x00, -0x90,0x05,0x02,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, +0x0c,0x1f,0x00,0x00,0x49,0x18,0x00,0x00,0x03,0x00,0x00,0x00,0x0b,0x84,0xff,0xff, +0xff,0xff,0x1f,0xc0,0x00,0x00,0x00,0x00,0x51,0x20,0x00,0x00,0x12,0x00,0x00,0x00, +0x32,0x22,0x48,0x09,0x20,0x65,0x82,0x84,0x00,0x26,0x45,0x48,0x05,0x09,0x26,0x45, +0xc6,0x05,0x42,0x52,0x26,0x08,0xae,0x19,0x80,0x61,0x04,0x02,0x98,0x23,0x00,0x83, +0x29,0x80,0x21,0x00,0xb2,0x73,0x04,0x01,0x51,0x8a,0xf4,0x08,0x92,0xa4,0x39,0x47, +0x80,0x50,0x2b,0x03,0x00,0xa0,0x08,0x21,0x5c,0x46,0x2b,0x44,0x08,0x21,0xd4,0x40, +0x14,0x01,0x80,0x11,0x80,0x22,0x88,0x00,0x13,0x30,0x7c,0xc0,0x03,0x3b,0xf8,0x05, +0x3b,0xa0,0x83,0x36,0xa8,0x07,0x77,0x58,0x07,0x77,0x78,0x87,0x7b,0x70,0x87,0x36, +0x60,0x87,0x74,0x70,0x87,0x7a,0xc0,0x87,0x36,0x38,0x07,0x77,0xa8,0x87,0x0d,0xf7, +0x50,0x0e,0x6d,0x00,0x0f,0x7a,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xe9,0x10,0x07,0x7a,0x80,0x07,0x7a,0x80,0x07,0x6d,0x90,0x0e, +0x78,0xa0,0x07,0x78,0xa0,0x07,0x78,0xd0,0x06,0xe9,0x10,0x07,0x76,0xa0,0x07,0x71, +0x60,0x07,0x7a,0x10,0x07,0x76,0xd0,0x06,0xe9,0x30,0x07,0x72,0xa0,0x07,0x73,0x20, +0x07,0x7a,0x30,0x07,0x72,0xd0,0x06,0xe9,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07, +0x7a,0x60,0x07,0x74,0xd0,0x06,0xe6,0x30,0x07,0x72,0xa0,0x07,0x73,0x20,0x07,0x7a, +0x30,0x07,0x72,0xd0,0x06,0xe6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60, +0x07,0x74,0xd0,0x06,0xf6,0x60,0x07,0x74,0xa0,0x07,0x76,0x40,0x07,0x7a,0x60,0x07, +0x74,0xd0,0x06,0xf6,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a, +0x10,0x07,0x72,0x80,0x07,0x6d,0x10,0x0e,0x70,0xa0,0x07,0x70,0xa0,0x07,0x76,0x40, +0x07,0x6d,0x60,0x0e,0x78,0x00,0x07,0x7a,0x10,0x07,0x72,0x80,0x07,0x7a,0x10,0x07, +0x72,0x80,0x07,0x3a,0x0f,0x84,0x48,0x20,0x23,0x24,0x40,0x00,0x62,0x67,0x88,0x9f, +0x19,0x92,0x24,0x00,0x10,0x04,0x00,0x00,0x00,0x43,0x92,0x04,0x08,0x00,0x00,0x00, +0x00,0x60,0x48,0xa2,0x00,0x40,0x10,0x00,0x00,0x00,0x0c,0x49,0x16,0x00,0x08,0x02, +0x00,0x00,0x80,0x21,0x89,0x02,0x00,0x41,0x00,0x00,0x00,0x30,0x24,0x61,0x80,0x00, +0x00,0x00,0x00,0x00,0x86,0x24,0x07,0x10,0x00,0x00,0x00,0x00,0xc0,0x90,0x44,0x01, +0x80,0x20,0x00,0x00,0x00,0x18,0x92,0x1c,0x40,0x00,0x00,0x00,0x00,0x00,0x43,0x12, +0x05,0x00,0x82,0x00,0x00,0x00,0x60,0x48,0x52,0x00,0x40,0x10,0x00,0x00,0x00,0x64, +0x81,0x00,0x00,0x00,0x10,0x00,0x00,0x00,0x32,0x1e,0x98,0x10,0x19,0x11,0x4c,0x90, 0x8c,0x09,0x26,0x47,0xc6,0x04,0x43,0x8a,0x8a,0x59,0x8b,0x43,0x50,0xd2,0x09,0x02, 0x81,0xd2,0x73,0x50,0xc9,0x0c,0x2a,0x99,0x41,0x25,0x33,0xa8,0x64,0x56,0x28,0x66, 0x2d,0x0e,0x41,0xcf,0x2a,0x15,0x04,0x4a,0xcf,0x41,0x25,0x33,0xa8,0x64,0x06,0x95, 0xcc,0xa0,0x92,0x59,0x01,0x00,0x00,0x00,0x53,0x82,0x26,0x0c,0x04,0x00,0x00,0x00, 0x22,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x05,0x00,0x00,0x00, 0x04,0xc6,0x08,0x40,0x10,0x04,0xe1,0x70,0x18,0x23,0x00,0x41,0x10,0x84,0xc3,0x60, -0x04,0x00,0x00,0x00,0x93,0x0c,0xce,0x43,0x4c,0x31,0x3c,0x8e,0x34,0xc9,0x30,0x41, -0xc2,0x14,0x03,0x34,0x51,0x93,0x0c,0x4d,0x44,0x4c,0x31,0x44,0x8d,0x35,0x56,0x01, -0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0x46,0x41,0x08,0xcc, -0x73,0x9b,0x05,0x21,0x30,0xcf,0x6e,0x18,0x84,0x00,0x2c,0x8b,0x35,0x04,0x80,0x39, -0x04,0x81,0x5d,0x20,0x80,0x0f,0x0c,0x43,0xe4,0xd3,0x36,0x81,0x04,0x3e,0x30,0x0c, -0x91,0x4f,0x5b,0x05,0x12,0xf8,0xc0,0x30,0x44,0x7e,0x7d,0x00,0x05,0xd1,0x4c,0x11, -0x66,0x12,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x2a,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0xc3,0x0d,0xce,0x43,0x4c,0x37,0x3c,0x8e,0x34,0xdc,0x30,0x41, +0xc2,0x74,0x03,0x34,0x51,0xc3,0x0d,0x4d,0x44,0x4c,0x37,0x44,0x8d,0x35,0x56,0x01, +0x04,0xc3,0x55,0x21,0x16,0x0e,0x04,0x00,0x0f,0x00,0x00,0x00,0xd6,0x10,0x00,0xe6, +0x10,0x04,0x76,0x81,0x00,0x3e,0x30,0x0c,0x91,0x4f,0x1b,0x05,0x21,0x30,0x8f,0x6d, +0x13,0x48,0xe0,0x03,0xc3,0x10,0xf9,0xb4,0x55,0x20,0x81,0x0f,0x0c,0x43,0xe4,0xd7, +0x66,0x41,0x08,0xcc,0xa3,0x1f,0x40,0x41,0x34,0x53,0x84,0x99,0xc4,0x20,0x30,0x8f, +0x61,0x10,0x02,0xb0,0x2c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x27,0x00,0x00,0x00,0x13,0x04,0x43,0x2c,0x10,0x00,0x00,0x00,0x08,0x00,0x00,0x00, 0x24,0x8a,0xa0,0x0c,0x46,0x00,0x4a,0x80,0xc2,0x1c,0x84,0x55,0x55,0xd6,0x1c,0x84, 0x45,0x51,0x16,0x81,0x19,0x80,0x11,0x80,0x31,0x02,0x10,0x04,0x41,0xfc,0x03,0x00, -0x63,0x08,0x0d,0x34,0xc9,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, +0x63,0x08,0x0d,0x34,0xdc,0x70,0x55,0xc2,0x2c,0x43,0x20,0x60,0x73,0x0c,0xd3,0x15, 0x8d,0x21,0x34,0xd1,0x18,0x42,0xf3,0x8c,0x55,0x00,0x81,0xa0,0x6d,0x73,0x0c,0x19, -0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x13,0x00,0x00,0x00,0x17,0x60,0x20,0xc5, -0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d,0x14,0x13,0xf3,0xd4,0xb8,0x69, -0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4,0xba,0x35,0x0c,0x13,0xf3,0x9c, -0x80,0xe4,0x36,0x48,0x81,0x10,0xc3,0x4a,0x4c,0x54,0xd4,0x6c,0x8b,0x23,0x28,0x76, -0x41,0x4c,0xcc,0xa3,0x1b,0x07,0x21,0x00,0xcb,0x72,0x00,0x05,0xd1,0x4c,0x11,0x66, -0x18,0x83,0xc0,0x3c,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, -0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00,0x04,0x00,0x00,0x00, -0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91,0x11,0x00,0x00,0x00, -0x63,0x08,0x4d,0x64,0x16,0xc1,0x49,0x86,0xab,0x22,0x66,0x19,0x02,0x01,0x1b,0x43, -0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81,0x0a,0x20,0x0b,0x34, -0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x24,0x83,0x57,0x11,0xb3, -0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0,0x48,0xb3,0x04,0xc6, -0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48,0x63,0x08,0xcd,0x64, -0x64,0x40,0x70,0x92,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc,0x60,0x0c,0xc1,0x99, -0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80,0x33,0x38,0xd0,0x00, -0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80,0xe0,0x24,0x03,0x1b, -0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12,0x30,0x63,0x08,0x0f, -0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7,0xa0,0x06,0x70,0x00, -0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00,0x76,0x52,0x4c,0xcc, -0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46,0xc6,0x50,0x8a,0x89, -0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89,0x79,0x68,0x73,0x20, -0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62,0x9e,0xdb,0x32,0x88, -0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98,0xa7,0xb7,0x95,0x62, -0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a,0x34,0x35,0x56,0x62, -0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25,0x2c,0x82,0xd3,0x0c, -0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7,0x24,0x01,0x63,0xec, -0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9,0xfc,0xc4,0xd0,0x90, -0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6,0xc1,0x71,0x7b,0x29, -0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01,0x32,0xf6,0xe6,0x46, -0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08,0x4e,0x33,0x58,0x47, -0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e,0x33,0xe1,0xbc,0xa5, -0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e,0x3a,0x40,0xc6,0xde, -0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee,0xed,0x82,0x10,0x9c, -0xa6,0xba,0x81,0x44,0x70,0x9a,0xc1,0x17,0x9c,0x66,0x32,0x93,0x42,0x60,0x1e,0x7b, -0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a,0xa7,0x6d,0xa4,0x98, -0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10,0x9c,0x66,0xc0,0x7b, -0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06,0x8b,0xe0,0x34,0x83, -0x2f,0x38,0xcd,0x64,0xd3,0xe6,0x61,0x08,0x4e,0x53,0xd5,0xf6,0x01,0x14,0x44,0x33, -0x45,0x18,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x4a,0x00,0x00,0x00, -0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00,0x24,0xca,0x60,0x04, -0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10,0x1e,0xe1,0x19,0xc6, -0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63,0x15,0xc1,0x31,0x84, -0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1,0x71,0x6c,0x23,0x38, -0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48,0x35,0xc7,0x20,0x79, -0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58,0x04,0x10,0x20,0xd5, -0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7,0x30,0x06,0x64,0xe0, -0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82,0x63,0x21,0x40,0x70, -0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00,0x25,0x00,0x00,0x00, -0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb,0x05,0x31,0x31,0xcf, -0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6,0x41,0x08,0xc0,0xb2, -0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5,0x9b,0x8a,0x21,0x00, -0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18,0x82,0xd3,0x54,0xb7, -0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f,0x9d,0x9b,0x87,0x21, -0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59,0x10,0x82,0xd3,0x54, -0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8,0xb4,0x8d,0x14,0x13, -0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86,0x00,0x2c,0x8b,0xcd, -0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72,0x00,0x05,0xd1,0x4c, -0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x19,0x00,0x00,0x00, -0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0x4a,0x60,0x04, -0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0x48, -0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3,0x44,0x39,0x58,0x85, -0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x16,0x41,0x4c,0xcc,0x63,0xdb,0x04,0x31, -0x31,0x4f,0x6e,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x56,0x41,0x4c, -0xcc,0xd3,0x1b,0x45,0x21,0x00,0xcb,0xb2,0x9b,0x04,0x21,0x00,0xcb,0x02,0x00,0x00, -0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00,0x13,0x04,0x41,0x2c, -0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80, -0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca,0x34,0xc7,0x20,0x51, -0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c,0x94,0x83,0x58,0x38, -0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x76,0x51, -0x4c,0xcc,0x53,0xdb,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31,0x31,0x8f,0x6e,0x0d, -0x43,0x05,0x2c,0x66,0x41,0x4c,0xcc,0xd3,0x1f,0x40,0x41,0x34,0x53,0x84,0x19,0x05, -0x21,0x00,0xcb,0x02,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x2f,0x00,0x00,0x00, -0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0xa0,0x04, -0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34,0xc9,0x30,0x49,0xc4, -0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xc9,0x50,0x49,0xc4,0x2c,0x03,0x21,0x58, -0x63,0x08,0x4d,0x34,0xc9,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60,0x63,0x08,0x8d,0x33, -0xc9,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00,0x1a,0x00,0x00,0x00, -0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb,0x04,0x31,0x31,0x4f, -0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x47,0x20,0xb9,0x0d,0x52, -0x20,0xc4,0xb0,0x12,0x13,0x15,0x35,0xdb,0xe2,0x08,0x8a,0x5d,0x10,0x13,0xf3,0xec, -0x37,0x90,0x2c,0x4e,0xf4,0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87, -0x74,0x02,0xc8,0xe2,0x44,0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d, -0x18,0x11,0x31,0x55,0xc0,0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61, -0x46,0x31,0x08,0xcc,0x03,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00, -0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82,0x23,0x59,0xc2,0x20,0x09,0x92,0x1d,0x18, -0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3,0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5, -0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c,0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73, -0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84,0x84,0x34,0x85,0x31,0x10,0x0a,0xb2,0x3c, -0x56,0x30,0x08,0xcc,0x63,0x0b,0x44,0x25,0x21,0x0d,0x00,0x00,0x00,0x00,0x00,0x00}; +0xe7,0x60,0x87,0x52,0x38,0x10,0x00,0x00,0x10,0x00,0x00,0x00,0x27,0x50,0x20,0x05, +0xd1,0x0c,0x17,0x60,0x20,0xc5,0x74,0x10,0x8d,0x65,0x14,0x13,0xf3,0xd4,0xb4,0x6d, +0x14,0x13,0xf3,0xd4,0xb8,0x69,0x14,0x13,0xf3,0xd4,0xb6,0x75,0x14,0x13,0xf3,0xd4, +0xba,0x35,0x0c,0x13,0xf3,0xd8,0x05,0x31,0x31,0x8f,0x6e,0x1c,0x84,0x00,0x2c,0xcb, +0x01,0x14,0x44,0x33,0x45,0x98,0x61,0x0c,0x02,0xf3,0x00,0x00,0x00,0x00,0x00,0x00, +0x61,0x20,0x00,0x00,0x81,0x00,0x00,0x00,0x13,0x04,0x4d,0x2c,0x10,0x00,0x00,0x00, +0x04,0x00,0x00,0x00,0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x91, +0x11,0x00,0x00,0x00,0x63,0x08,0x4d,0x64,0x16,0xc1,0xe1,0x86,0xab,0x22,0x66,0x19, +0x02,0x01,0x1b,0x43,0x70,0xa2,0x59,0x82,0x61,0x0c,0xe1,0x89,0x66,0x09,0x86,0x81, +0x0a,0x20,0x0b,0x34,0x61,0x8e,0x81,0xda,0xa2,0x31,0x84,0x46,0xb2,0x8e,0xe0,0x70, +0x83,0x57,0x11,0xb3,0x0c,0x44,0xf1,0x8d,0x21,0x38,0xd2,0x2c,0x81,0x31,0x86,0xf0, +0x48,0xb3,0x04,0xc6,0x40,0x05,0x00,0x06,0x44,0x18,0x14,0x73,0x0c,0x9c,0x18,0x48, +0x63,0x08,0xcd,0x64,0x64,0x40,0x70,0xb8,0xa1,0x0c,0x2a,0x62,0x96,0xe1,0x40,0xcc, +0x60,0x0c,0xc1,0x99,0x66,0x09,0x92,0x31,0x84,0x67,0x9a,0x25,0x48,0x06,0x2a,0x80, +0x33,0x38,0xd0,0x00,0x99,0x63,0x18,0x83,0x34,0x98,0xc6,0x10,0x1a,0xc8,0xd6,0x80, +0xe0,0x70,0x03,0x1b,0x54,0xc4,0x2c,0x83,0xb2,0xb4,0xc1,0x18,0x82,0x03,0xcd,0x12, +0x30,0x63,0x08,0x0f,0x34,0x4b,0xc0,0x0c,0x54,0x00,0x6e,0xa0,0xbc,0xc1,0x32,0xc7, +0xa0,0x06,0x70,0x00,0x61,0x1c,0x84,0x03,0x01,0x00,0x00,0x00,0x4e,0x00,0x00,0x00, +0x76,0x52,0x4c,0xcc,0x73,0xd3,0x24,0x05,0x64,0xec,0xcd,0x8d,0xcc,0xe5,0x87,0x46, +0xc6,0x50,0x8a,0x89,0x79,0xee,0xdb,0x54,0x8a,0x89,0x79,0xee,0xdd,0x1a,0x88,0x89, +0x79,0x68,0x73,0x20,0x26,0xe6,0xa9,0xed,0x81,0x98,0x98,0xc7,0x36,0x0b,0x62,0x62, +0x9e,0xdb,0x32,0x88,0x89,0x79,0x72,0xd3,0x20,0x26,0xe6,0xd9,0x8d,0x83,0x98,0x98, +0xa7,0xb7,0x95,0x62,0x62,0x9e,0xbb,0x27,0x2d,0x20,0x63,0x6f,0x6e,0x64,0x2e,0x3a, +0x34,0x35,0x56,0x62,0x08,0x4e,0x53,0xd9,0xba,0xb5,0x14,0x02,0xf3,0xe0,0xf5,0x25, +0x2c,0x82,0xd3,0x0c,0xbe,0xe0,0x34,0xd3,0x8d,0x9b,0x88,0x21,0x38,0xcd,0x60,0xd7, +0x24,0x01,0x63,0xec,0xcd,0x8d,0xcc,0x45,0x87,0x44,0x80,0x8c,0xbd,0xb9,0x91,0xb9, +0xfc,0xc4,0xd0,0x90,0x02,0x8c,0xb1,0x37,0x37,0x32,0x97,0x1f,0x73,0x29,0x26,0xe6, +0xc1,0x71,0x7b,0x29,0x26,0xe6,0xc1,0x77,0xfb,0x28,0x04,0xe6,0xa9,0x6f,0x52,0x01, +0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x13,0x73,0x63,0x18,0x83,0xc0,0x3c,0xb6,0x41,0x08, +0x4e,0x33,0x58,0x47,0x31,0x31,0x4f,0x5d,0x1f,0xc3,0x22,0x38,0xcd,0xe0,0x0b,0x4e, +0x33,0xe1,0xbc,0xa5,0x18,0x82,0xd3,0x0c,0x77,0x6e,0x20,0xc5,0xc4,0x3c,0xb5,0x4e, +0x3a,0x40,0xc6,0xde,0xdc,0xc8,0x5c,0x7e,0x64,0x70,0x2c,0xa4,0x98,0x98,0xa7,0xee, +0x6f,0x20,0x11,0x9c,0x66,0xf0,0x05,0xa7,0x99,0xec,0x82,0x10,0x9c,0xa6,0x32,0x93, +0x42,0x60,0x1e,0x7b,0xb7,0x98,0x62,0x62,0x9e,0xbc,0x36,0x16,0x43,0x70,0x9a,0x0a, +0xa7,0x6d,0xa4,0x98,0x98,0xc7,0xbe,0x8d,0xa4,0x98,0x98,0xc7,0xce,0x0d,0xc6,0x10, +0x9c,0x66,0xc0,0x7b,0x12,0x02,0x32,0xf6,0xe6,0x46,0xe6,0xa2,0x33,0x13,0x73,0x06, +0x8b,0xe0,0x34,0x83,0x2f,0x38,0xcd,0x64,0xd3,0x07,0x50,0x10,0xcd,0x14,0x61,0xe6, +0x61,0x08,0x4e,0x53,0xd5,0x36,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x4a,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x07,0x00,0x00,0x00, +0x24,0xca,0x60,0x04,0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0xb9,0x61,0x0c,0x04,0x10, +0x1e,0xe1,0x19,0xc6,0x40,0x02,0xe1,0x11,0x1e,0x00,0x00,0x00,0x63,0x08,0xcd,0x63, +0x15,0xc1,0x31,0x84,0x06,0xb2,0x8b,0xe0,0x18,0x42,0x13,0x59,0x46,0x70,0x0c,0xa1, +0x71,0x6c,0x23,0x38,0x16,0x02,0x04,0xc7,0x64,0x61,0x1a,0x37,0x16,0x01,0x04,0x48, +0x35,0xc7,0x20,0x79,0xcf,0x58,0x04,0x10,0x20,0xd5,0x1c,0xc3,0x07,0x06,0xd0,0x58, +0x04,0x10,0x20,0xd5,0x1c,0x43,0x18,0x88,0x41,0x34,0x16,0x01,0x04,0x48,0x35,0xc7, +0x30,0x06,0x64,0xe0,0x98,0x47,0xd0,0xc0,0x80,0xa0,0x89,0x01,0x41,0x23,0x03,0x82, +0x63,0x21,0x40,0x70,0x50,0x66,0x70,0x06,0x68,0x90,0x06,0x58,0x06,0xe1,0x40,0x00, +0x25,0x00,0x00,0x00,0x56,0x52,0x4c,0xcc,0x73,0xd3,0x56,0x41,0x4c,0xcc,0x53,0xdb, +0x05,0x31,0x31,0xcf,0x6d,0x19,0xc4,0xc4,0x3c,0xba,0x6d,0x10,0x13,0xf3,0xf4,0xd6, +0x41,0x08,0xc0,0xb2,0x18,0x46,0x21,0x38,0x4d,0x85,0x9b,0x46,0x21,0x38,0x4d,0xb5, +0x9b,0x8a,0x21,0x00,0xcb,0x82,0xdf,0x66,0x62,0x08,0x4e,0x53,0xdd,0xb7,0x9d,0x18, +0x82,0xd3,0x54,0xb7,0x6e,0x28,0x86,0xe0,0x34,0xd5,0xdd,0xdb,0x47,0x31,0x31,0x4f, +0x9d,0x9b,0x87,0x21,0x00,0xcb,0x52,0xdf,0x06,0x62,0x08,0xc0,0xb2,0xd4,0xbc,0x59, +0x10,0x82,0xd3,0x54,0x96,0x62,0x08,0x4e,0x53,0xe1,0xb6,0x85,0x14,0x13,0xf3,0xd8, +0xb4,0x8d,0x14,0x13,0xf3,0xd8,0xb9,0x89,0x18,0x02,0xb0,0x2c,0xf6,0x6d,0x24,0x86, +0x00,0x2c,0x8b,0xcd,0x1b,0x87,0x21,0x38,0x4d,0x55,0xd3,0xd6,0x30,0x54,0xc0,0x72, +0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x19,0x00,0x00,0x00,0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0x4a,0x60,0x04,0x80,0xc2,0x0c,0x00,0x00,0x00,0x00,0x00,0x63,0x08,0xcd,0x33, +0x16,0x01,0x04,0x48,0x34,0xc7,0x00,0x49,0xcf,0x58,0x04,0x10,0x28,0xd1,0x1c,0xc3, +0x44,0x39,0x58,0x85,0x03,0x01,0x00,0x00,0x0a,0x00,0x00,0x00,0x26,0x41,0x08,0xc0, +0xb2,0x18,0x45,0x21,0x00,0xcb,0xb2,0x5b,0x04,0x31,0x31,0x8f,0x6d,0x13,0xc4,0xc4, +0x3c,0xb9,0x35,0x0c,0x15,0xb0,0x58,0x05,0x31,0x31,0x4f,0x7f,0x00,0x05,0xd1,0x4c, +0x11,0x06,0x00,0x00,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00,0x1b,0x00,0x00,0x00, +0x13,0x04,0x41,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00,0x24,0xca,0x60,0x04, +0xa0,0x04,0x8a,0x80,0xc2,0x0c,0x00,0x00,0x63,0x08,0xcd,0x33,0x16,0x01,0x04,0xca, +0x34,0xc7,0x20,0x51,0xcf,0x1c,0x43,0x45,0x41,0x73,0x0c,0x16,0x15,0xcd,0x31,0x5c, +0x94,0x83,0x58,0x38,0x10,0x00,0x00,0x00,0x0b,0x00,0x00,0x00,0x76,0x51,0x4c,0xcc, +0x53,0xdb,0x86,0x51,0x4c,0xcc,0x53,0xe7,0x36,0x41,0x4c,0xcc,0x63,0x5b,0x05,0x31, +0x31,0x8f,0x6e,0x16,0xc4,0xc4,0x3c,0xbd,0x51,0x10,0x02,0xb0,0x2c,0xd6,0x30,0x54, +0xc0,0x72,0x00,0x05,0xd1,0x4c,0x11,0x06,0x00,0x00,0x00,0x00,0x61,0x20,0x00,0x00, +0x2c,0x00,0x00,0x00,0x13,0x04,0x45,0x2c,0x10,0x00,0x00,0x00,0x03,0x00,0x00,0x00, +0x24,0xca,0xa0,0x04,0x46,0x00,0x8a,0x80,0xc0,0x08,0x00,0x00,0x63,0x08,0x0d,0x34, +0xdc,0x30,0x49,0xc4,0x2c,0x03,0x11,0x50,0x63,0x08,0xcd,0x33,0xdc,0x50,0x49,0xc4, +0x2c,0x03,0x21,0x58,0x63,0x08,0x4d,0x34,0xdc,0x70,0x49,0xc4,0x2c,0x03,0x31,0x60, +0x63,0x08,0x8d,0x33,0xdc,0x90,0x49,0x84,0x69,0x22,0x70,0xc3,0x27,0x1c,0x08,0x00, +0x17,0x00,0x00,0x00,0x96,0x51,0x4c,0xcc,0x53,0xdf,0x66,0x41,0x08,0xcc,0x83,0xdb, +0x04,0x31,0x31,0x4f,0x6d,0x15,0xc4,0xc4,0x3c,0xb7,0x61,0x10,0x02,0xf3,0xf0,0x76, +0x41,0x4c,0xcc,0xb3,0x1f,0x81,0x11,0x11,0x13,0x15,0x35,0x37,0x90,0x2c,0x4e,0xf4, +0x47,0x87,0x54,0xd7,0x17,0x70,0x2c,0x4e,0xf4,0x47,0x87,0x74,0x02,0xc8,0xe2,0x44, +0x7f,0x74,0x48,0xb9,0x69,0x14,0x02,0xf3,0xd4,0xb8,0x6d,0x18,0x11,0x31,0x55,0xc0, +0x62,0x0d,0x43,0x05,0x2c,0x07,0x50,0x10,0xcd,0x14,0x61,0x46,0x31,0x08,0xcc,0x03, +0x00,0x00,0x00,0x00,0x71,0x20,0x00,0x00,0x12,0x00,0x00,0x00,0x66,0x40,0x54,0x82, +0x23,0x19,0xc3,0xa0,0x20,0x8b,0x1d,0x18,0x4f,0x84,0x34,0x53,0x61,0x03,0xc4,0xe3, +0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, +0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, +0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, +0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 3eaf9aacf6..599975d5ad 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -923,7 +923,7 @@ llvm::Value * Instructions::callCeil(llvm::Value *val) // predeclare the intrinsic std::vector ceilArgs; ceilArgs.push_back(Type::FloatTy); - PAListPtr ceilPal; + AttrListPtr ceilPal; FunctionType* ceilType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/ceilArgs, @@ -933,7 +933,7 @@ llvm::Value * Instructions::callCeil(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"ceilf", m_mod); m_llvmCeil->setCallingConv(CallingConv::C); - m_llvmCeil->setParamAttrs(ceilPal); + m_llvmCeil->setAttributes(ceilPal); } CallInst *call = m_builder.CreateCall(m_llvmCeil, val, name("ceilf")); @@ -948,7 +948,7 @@ llvm::Value *Instructions::callFAbs(llvm::Value *val) // predeclare the intrinsic std::vector fabsArgs; fabsArgs.push_back(Type::FloatTy); - PAListPtr fabsPal; + AttrListPtr fabsPal; FunctionType* fabsType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fabsArgs, @@ -958,7 +958,7 @@ llvm::Value *Instructions::callFAbs(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"fabs", m_mod); m_llvmFAbs->setCallingConv(CallingConv::C); - m_llvmFAbs->setParamAttrs(fabsPal); + m_llvmFAbs->setAttributes(fabsPal); } CallInst *call = m_builder.CreateCall(m_llvmFAbs, val, name("fabs")); @@ -973,7 +973,7 @@ llvm::Value * Instructions::callFExp(llvm::Value *val) // predeclare the intrinsic std::vector fexpArgs; fexpArgs.push_back(Type::FloatTy); - PAListPtr fexpPal; + AttrListPtr fexpPal; FunctionType* fexpType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fexpArgs, @@ -983,7 +983,7 @@ llvm::Value * Instructions::callFExp(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"expf", m_mod); m_llvmFexp->setCallingConv(CallingConv::C); - m_llvmFexp->setParamAttrs(fexpPal); + m_llvmFexp->setAttributes(fexpPal); } CallInst *call = m_builder.CreateCall(m_llvmFexp, val, name("expf")); @@ -998,7 +998,7 @@ llvm::Value * Instructions::callFLog(llvm::Value *val) // predeclare the intrinsic std::vector flogArgs; flogArgs.push_back(Type::FloatTy); - PAListPtr flogPal; + AttrListPtr flogPal; FunctionType* flogType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/flogArgs, @@ -1008,7 +1008,7 @@ llvm::Value * Instructions::callFLog(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"logf", m_mod); m_llvmFlog->setCallingConv(CallingConv::C); - m_llvmFlog->setParamAttrs(flogPal); + m_llvmFlog->setAttributes(flogPal); } CallInst *call = m_builder.CreateCall(m_llvmFlog, val, name("logf")); @@ -1023,7 +1023,7 @@ llvm::Value * Instructions::callFloor(llvm::Value *val) // predeclare the intrinsic std::vector floorArgs; floorArgs.push_back(Type::FloatTy); - PAListPtr floorPal; + AttrListPtr floorPal; FunctionType* floorType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/floorArgs, @@ -1033,7 +1033,7 @@ llvm::Value * Instructions::callFloor(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"floorf", m_mod); m_llvmFloor->setCallingConv(CallingConv::C); - m_llvmFloor->setParamAttrs(floorPal); + m_llvmFloor->setAttributes(floorPal); } CallInst *call = m_builder.CreateCall(m_llvmFloor, val, name("floorf")); @@ -1048,7 +1048,7 @@ llvm::Value *Instructions::callFSqrt(llvm::Value *val) // predeclare the intrinsic std::vector fsqrtArgs; fsqrtArgs.push_back(Type::FloatTy); - PAListPtr fsqrtPal; + AttrListPtr fsqrtPal; FunctionType* fsqrtType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/fsqrtArgs, @@ -1058,7 +1058,7 @@ llvm::Value *Instructions::callFSqrt(llvm::Value *val) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"llvm.sqrt.f32", m_mod); m_llvmFSqrt->setCallingConv(CallingConv::C); - m_llvmFSqrt->setParamAttrs(fsqrtPal); + m_llvmFSqrt->setAttributes(fsqrtPal); } CallInst *call = m_builder.CreateCall(m_llvmFSqrt, val, name("sqrt")); @@ -1074,7 +1074,7 @@ llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) std::vector powArgs; powArgs.push_back(Type::FloatTy); powArgs.push_back(Type::FloatTy); - PAListPtr powPal; + AttrListPtr powPal; FunctionType* powType = FunctionType::get( /*Result=*/Type::FloatTy, /*Params=*/powArgs, @@ -1084,7 +1084,7 @@ llvm::Value * Instructions::callPow(llvm::Value *val1, llvm::Value *val2) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"llvm.pow.f32", m_mod); m_llvmPow->setCallingConv(CallingConv::C); - m_llvmPow->setParamAttrs(powPal); + m_llvmPow->setAttributes(powPal); } std::vector params; params.push_back(val1); @@ -1126,7 +1126,7 @@ llvm::Value * Instructions::constVector(float x, float y, float z, float w) llvm::Function * Instructions::declarePrintf() { std::vector args; - PAListPtr params; + AttrListPtr params; FunctionType* funcTy = FunctionType::get( /*Result=*/IntegerType::get(32), /*Params=*/args, @@ -1136,7 +1136,7 @@ llvm::Function * Instructions::declarePrintf() /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/"printf", m_mod); func_printf->setCallingConv(CallingConv::C); - func_printf->setParamAttrs(params); + func_printf->setAttributes(params); return func_printf; } @@ -1148,7 +1148,7 @@ llvm::Function * Instructions::declareFunc(int label) args.push_back(vecPtr); args.push_back(vecPtr); args.push_back(vecPtr); - PAListPtr params; + AttrListPtr params; FunctionType *funcType = FunctionType::get( /*Result=*/Type::VoidTy, /*Params=*/args, @@ -1159,7 +1159,7 @@ llvm::Function * Instructions::declareFunc(int label) /*Linkage=*/GlobalValue::ExternalLinkage, /*Name=*/name.c_str(), m_mod); func->setCallingConv(CallingConv::C); - func->setParamAttrs(params); + func->setAttributes(params); return func; } diff --git a/src/gallium/auxiliary/gallivm/instructions.h b/src/gallium/auxiliary/gallivm/instructions.h index c3b28e9746..e18571251e 100644 --- a/src/gallium/auxiliary/gallivm/instructions.h +++ b/src/gallium/auxiliary/gallivm/instructions.h @@ -146,7 +146,7 @@ private: llvm::Module *m_mod; llvm::Function *m_func; char m_name[32]; - llvm::IRBuilder m_builder; + llvm::IRBuilder<> m_builder; int m_idx; llvm::VectorType *m_floatVecType; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 9a3ed9f538..5863f37095 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -259,7 +259,7 @@ void InstructionsSoa::createBuiltins() { MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data)-1]); + (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); m_builtins = ParseBitcodeFile(buffer); std::cout<<"Builtins created at "<getFunctionType(), GlobalValue::ExternalLinkage, originalFunc->getName(), currentModule()); func->setCallingConv(CallingConv::C); - const PAListPtr pal; - func->setParamAttrs(pal); + const AttrListPtr pal; + func->setAttributes(pal); currentModule()->dump(); } else { DenseMap val; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 3e20b652dd..20cab3b3bb 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -96,7 +96,7 @@ private: const std::vector in3); void injectFunction(llvm::Function *originalFunc, int op = TGSI_OPCODE_LAST); private: - llvm::IRBuilder m_builder; + llvm::IRBuilder<> m_builder; StorageSoa *m_storage; std::map m_functionsMap; -- cgit v1.2.3 From 8bdb4d2b2fdb12d0ba5249c289d349e35d893d00 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Wed, 1 Oct 2008 00:00:58 +0200 Subject: Gallivm: add slt. glxgears should be running, except it isn't. --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 9 ++ src/gallium/auxiliary/gallivm/instructionssoa.h | 2 + src/gallium/auxiliary/gallivm/soabuiltins.c | 155 +++++++++++++--------- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 1 + 4 files changed, 101 insertions(+), 66 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 5863f37095..a658072551 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -181,6 +181,7 @@ void InstructionsSoa::createFunctionMap() m_functionsMap[TGSI_OPCODE_POWER] = "pow"; m_functionsMap[TGSI_OPCODE_LIT] = "lit"; m_functionsMap[TGSI_OPCODE_RSQ] = "rsq"; + m_functionsMap[TGSI_OPCODE_SLT] = "slt"; } void InstructionsSoa::createDependencies() @@ -280,6 +281,14 @@ std::vector InstructionsSoa::dp3(const std::vector i return callBuiltin(func, in1, in2); } + +std::vector InstructionsSoa::slt(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_SLT); + return callBuiltin(func, in1, in2); +} + llvm::Value * InstructionsSoa::allocaTemp() { VectorType *vector = VectorType::get(Type::FloatTy, 4); diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 20cab3b3bb..3817fdc904 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -69,6 +69,8 @@ public: std::vector pow(const std::vector in1, const std::vector in2); std::vector rsq(const std::vector in1); + std::vector slt(const std::vector in1, + const std::vector in2); std::vector sub(const std::vector in1, const std::vector in2); void end(); diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index 78f84510e2..cb85e1734e 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -36,6 +36,8 @@ typedef __attribute__(( ext_vector_type(4) )) float float4; extern float fabsf(float val); +/* helpers */ + float4 absvec(float4 vec) { float4 res; @@ -47,6 +49,58 @@ float4 absvec(float4 vec) return res; } +float4 maxvec(float4 a, float4 b) +{ + return (float4){(a.x > b.x) ? a.x : b.x, + (a.y > b.y) ? a.y : b.y, + (a.z > b.z) ? a.z : b.z, + (a.w > b.w) ? a.w : b.w}; +} + +float4 minvec(float4 a, float4 b) +{ + return (float4){(a.x < b.x) ? a.x : b.x, + (a.y < b.y) ? a.y : b.y, + (a.z < b.z) ? a.z : b.z, + (a.w < b.w) ? a.w : b.w}; +} + +extern float powf(float num, float p); +extern float sqrtf(float x); + +float4 powvec(float4 vec, float4 q) +{ + float4 p; + p.x = powf(vec.x, q.x); + p.y = powf(vec.y, q.y); + p.z = powf(vec.z, q.z); + p.w = powf(vec.w, q.w); + return p; +} + +float4 sqrtvec(float4 vec) +{ + float4 p; + p.x = sqrtf(vec.x); + p.y = sqrtf(vec.y); + p.z = sqrtf(vec.z); + p.w = sqrtf(vec.w); + return p; +} + +float4 sltvec(float4 v1, float4 v2) +{ + float4 p; + p.x = (v1.x < v2.x) ? 1.0 : 0.0; + p.y = (v1.y < v2.y) ? 1.0 : 0.0; + p.z = (v1.z < v2.z) ? 1.0 : 0.0; + p.w = (v1.w < v2.w) ? 1.0 : 0.0; + return p; +} + + +/* instructions */ + void abs(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) { @@ -69,7 +123,6 @@ void dp3(float4 *res, res[3] = dot; } - void dp4(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) @@ -83,35 +136,25 @@ void dp4(float4 *res, res[3] = dot; } -extern float powf(float num, float p); -extern float sqrtf(float x); - -float4 powvec(float4 vec, float4 q) -{ - float4 p; - p.x = powf(vec.x, q.x); - p.y = powf(vec.y, q.y); - p.z = powf(vec.z, q.z); - p.w = powf(vec.w, q.w); - return p; -} - -void pow(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, - float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +void lit(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) { - res[0] = powvec(tmp0x, tmp1x); - res[1] = res[0]; - res[2] = res[0]; - res[3] = res[0]; -} + const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; + const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; + const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; -float4 minvec(float4 a, float4 b) -{ - return (float4){(a.x < b.x) ? a.x : b.x, - (a.y < b.y) ? a.y : b.y, - (a.z < b.z) ? a.z : b.z, - (a.w < b.w) ? a.w : b.w}; + res[0] = (float4){1.0, 1.0, 1.0, 1.0}; + if (tmp0x.x > 0) { + float4 tmpy = maxvec(tmp0y, zerovec); + float4 tmpw = minvec(tmp0w, plus128); + tmpw = maxvec(tmpw, min128); + res[1] = tmp0x; + res[2] = powvec(tmpy, tmpw); + } else { + res[1] = zerovec; + res[2] = zerovec; + } + res[3] = (float4){1.0, 1.0, 1.0, 1.0}; } void min(float4 *res, @@ -125,14 +168,6 @@ void min(float4 *res, } -float4 maxvec(float4 a, float4 b) -{ - return (float4){(a.x > b.x) ? a.x : b.x, - (a.y > b.y) ? a.y : b.y, - (a.z > b.z) ? a.z : b.z, - (a.w > b.w) ? a.w : b.w}; -} - void max(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) @@ -143,37 +178,14 @@ void max(float4 *res, res[3] = maxvec(tmp0w, tmp1w); } - -void lit(float4 *res, - float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w) -{ - const float4 zerovec = (float4) {0.0, 0.0, 0.0, 0.0}; - const float4 min128 = (float4) {-128.f, -128.f, -128.f, -128.f}; - const float4 plus128 = (float4) {128.f, 128.f, 128.f, 128.f}; - - res[0] = (float4){1.0, 1.0, 1.0, 1.0}; - if (tmp0x.x > 0) { - float4 tmpy = maxvec(tmp0y, zerovec); - float4 tmpw = minvec(tmp0w, plus128); - tmpw = maxvec(tmpw, min128); - res[1] = tmp0x; - res[2] = powvec(tmpy, tmpw); - } else { - res[1] = zerovec; - res[2] = zerovec; - } - res[3] = (float4){1.0, 1.0, 1.0, 1.0}; -} - - -float4 sqrtvec(float4 vec) +void pow(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) { - float4 p; - p.x = sqrtf(vec.x); - p.y = sqrtf(vec.y); - p.z = sqrtf(vec.z); - p.w = sqrtf(vec.w); - return p; + res[0] = powvec(tmp0x, tmp1x); + res[1] = res[0]; + res[2] = res[0]; + res[3] = res[0]; } void rsq(float4 *res, @@ -185,3 +197,14 @@ void rsq(float4 *res, res[2] = onevec/sqrtvec(absvec(tmp0z)); res[3] = onevec/sqrtvec(absvec(tmp0w)); } + +void slt(float4 *res, + float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, + float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) +{ + res[0] = sltvec(tmp0x, tmp1x); + res[1] = sltvec(tmp0y, tmp1y); + res[2] = sltvec(tmp0z, tmp1z); + res[3] = sltvec(tmp0w, tmp1w); +} + diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index fdfbb76c16..7292c0e366 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -767,6 +767,7 @@ translate_instructionir(llvm::Module *module, } break; case TGSI_OPCODE_SLT: { + out = instr->slt(inputs[0], inputs[1]); } break; case TGSI_OPCODE_SGE: { -- cgit v1.2.3 From a15699c3f54edb5d5b42960e7568e587b752e407 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 1 Oct 2008 13:34:38 +0100 Subject: draw: add streamlined paths for fetching linear verts --- src/gallium/auxiliary/draw/draw_vs_aos.c | 44 +++++---- src/gallium/auxiliary/draw/draw_vs_aos.h | 19 ++-- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 137 +++++++++++++++++++++------- 3 files changed, 134 insertions(+), 66 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index a556477a76..4c794e0e23 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -92,9 +92,9 @@ struct x86_reg aos_get_x86( struct aos_compilation *cp, assert(which_reg == 1); offset = Offset(struct aos_machine, constants); break; - case X86_ATTRIBS: + case X86_BUFFERS: assert(which_reg == 0); - offset = Offset(struct aos_machine, attrib); + offset = Offset(struct aos_machine, buffer); break; default: assert(0); @@ -1939,6 +1939,8 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, save_fpu_state( &cp ); set_fpu_round_nearest( &cp ); + aos_init_inputs( &cp, linear ); + /* Note address for loop jump */ label = x86_get_label(cp.func); @@ -2018,13 +2020,7 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, /* Incr index */ - if (linear) { - x86_inc(cp.func, cp.idx_EBX); - } - else { - x86_lea(cp.func, cp.idx_EBX, x86_make_disp(cp.idx_EBX, 4)); - } - + aos_incr_inputs( &cp, linear ); } /* decr count, loop if not zero */ @@ -2065,14 +2061,10 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, unsigned stride ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - unsigned i; - for (i = 0; i < vaos->base.key.nr_inputs; i++) { - if (vaos->base.key.element[i].in.buffer == buf) { - vaos->attrib[i].input_ptr = ((char *)ptr + - vaos->base.key.element[i].in.offset); - vaos->attrib[i].input_stride = stride; - } + if (buf < vaos->nr_vb) { + vaos->buffer[buf].base_ptr = (char *)ptr; + vaos->buffer[buf].stride = stride; } } @@ -2089,7 +2081,7 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; - machine->attrib = vaos->attrib; + machine->buffer = vaos->buffer; vaos->gen_run_elts( machine, elts, @@ -2108,7 +2100,7 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; - machine->attrib = vaos->attrib; + machine->buffer = vaos->buffer; vaos->gen_run_linear( machine, start, @@ -2127,7 +2119,7 @@ static void vaos_destroy( struct draw_vs_varient *varient ) { struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; - FREE( vaos->attrib ); + FREE( vaos->buffer ); x86_release_func( &vaos->func[0] ); x86_release_func( &vaos->func[1] ); @@ -2140,6 +2132,7 @@ static void vaos_destroy( struct draw_vs_varient *varient ) static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, const struct draw_vs_varient_key *key ) { + unsigned i; struct draw_vs_varient_aos_sse *vaos = CALLOC_STRUCT(draw_vs_varient_aos_sse); if (!vaos) @@ -2154,10 +2147,15 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->draw = vs->draw; - vaos->attrib = MALLOC( key->nr_inputs * sizeof(vaos->attrib[0]) ); - if (!vaos->attrib) + for (i = 0; i < key->nr_inputs; i++) + vaos->nr_vb = MAX2( vaos->nr_vb, key->element[i].in.buffer + 1 ); + + vaos->buffer = MALLOC( vaos->nr_vb * sizeof(vaos->buffer[0]) ); + if (!vaos->buffer) goto fail; + debug_printf("nr_vb: %d\n", vaos->nr_vb); + #if 0 tgsi_dump(vs->state.tokens, 0); #endif @@ -2179,8 +2177,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, return &vaos->base; fail: - if (vaos && vaos->attrib) - FREE(vaos->attrib); + if (vaos && vaos->buffer) + FREE(vaos->buffer); if (vaos) x86_release_func( &vaos->func[0] ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 7fe6f79db0..306392e5d6 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -87,9 +87,10 @@ struct lit_info { #define MAX_SHINE_TAB 4 #define MAX_LIT_INFO 16 -struct aos_attrib { - const void *input_ptr; - unsigned input_stride; +struct aos_buffer { + const void *base_ptr; + unsigned stride; + void *ptr; /* updated per vertex */ }; @@ -123,7 +124,7 @@ struct aos_machine { const float (*immediates)[4]; /* points to shader data */ const float (*constants)[4]; /* points to draw data */ - const struct aos_attrib *attrib; /* points to ? */ + const struct aos_buffer *buffer; /* points to ? */ }; @@ -179,8 +180,9 @@ struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, unsigned file, unsigned idx ); -boolean aos_fetch_inputs( struct aos_compilation *cp, - boolean linear ); +boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ); +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ); +boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ); boolean aos_emit_outputs( struct aos_compilation *cp ); @@ -210,7 +212,7 @@ do { \ #define X86_NULL 0 #define X86_IMMEDIATES 1 #define X86_CONSTANTS 2 -#define X86_ATTRIBS 3 +#define X86_BUFFERS 3 struct x86_reg aos_get_x86( struct aos_compilation *cp, unsigned which_reg, @@ -232,7 +234,8 @@ struct draw_vs_varient_aos_sse { struct draw_vs_varient base; struct draw_context *draw; - struct aos_attrib *attrib; + struct aos_buffer *buffer; + unsigned nr_vb; vaos_run_linear_func gen_run_linear; vaos_run_elts_func gen_run_elts; diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 26297c74f8..8e08b9285f 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -95,28 +95,6 @@ static void emit_load_R8G8B8A8_UNORM( struct aos_compilation *cp, -static void get_src_ptr( struct aos_compilation *cp, - struct x86_reg src, - struct x86_reg elt, - unsigned a ) -{ - struct x86_reg attrib = x86_make_disp(aos_get_x86( cp, 0, X86_ATTRIBS ), - a * sizeof(struct aos_attrib)); - - struct x86_reg input_ptr = x86_make_disp(attrib, - Offset(struct aos_attrib, input_ptr)); - - struct x86_reg input_stride = x86_make_disp(attrib, - Offset(struct aos_attrib, input_stride)); - - /* Calculate pointer to current attrib: - */ - x86_mov(cp->func, src, input_stride); - x86_imul(cp->func, src, elt); - x86_add(cp->func, src, input_ptr); -} - - /* Extended swizzles? Maybe later. */ static void emit_swizzle( struct aos_compilation *cp, @@ -128,22 +106,44 @@ static void emit_swizzle( struct aos_compilation *cp, } + +static boolean get_buffer_ptr( struct aos_compilation *cp, + unsigned buf_idx, + struct x86_reg elt, + struct x86_reg ptr) +{ + struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + buf_idx * sizeof(struct aos_buffer)); + + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); + + struct x86_reg buf_stride = x86_make_disp(buf, + Offset(struct aos_buffer, stride)); + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + + return TRUE; +} + + + + static boolean load_input( struct aos_compilation *cp, unsigned idx, - boolean linear ) + struct x86_reg bufptr ) { unsigned format = cp->vaos->base.key.element[idx].in.format; - struct x86_reg src = cp->tmp_EAX; + unsigned offset = cp->vaos->base.key.element[idx].in.offset; struct x86_reg dataXMM = aos_get_xmm_reg(cp); /* Figure out source pointer address: */ - get_src_ptr(cp, - src, - linear ? cp->idx_EBX : x86_deref(cp->idx_EBX), - idx); - - src = x86_deref(src); + struct x86_reg src = x86_make_disp(bufptr, offset); aos_adopt_xmm_reg( cp, dataXMM, @@ -179,20 +179,87 @@ static boolean load_input( struct aos_compilation *cp, return TRUE; } - -boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) +static boolean load_inputs( struct aos_compilation *cp, + unsigned buffer, + struct x86_reg ptr ) { unsigned i; - + for (i = 0; i < cp->vaos->base.key.nr_inputs; i++) { - if (!load_input( cp, i, linear )) + if (cp->vaos->base.key.element[i].in.buffer == buffer) { + + if (!load_input( cp, i, ptr )) + return FALSE; + + cp->insn_counter++; + } + } + + return TRUE; +} + +boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + + struct x86_reg elt = cp->idx_EBX; + struct x86_reg ptr = cp->tmp_EAX; + + if (!get_buffer_ptr( cp, 0, elt, ptr )) return FALSE; - cp->insn_counter++; + + /* In the linear, single buffer case, keep the buffer pointer + * instead of the index number. + */ + x86_mov( cp->func, elt, ptr ); + } + + return TRUE; +} + +boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + + load_inputs( cp, 0, cp->idx_EBX ); + + } + else { + struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); + unsigned j; + + for (j = 0; j < cp->vaos->nr_vb; j++) { + struct x86_reg ptr = cp->tmp_EAX; + + if (!get_buffer_ptr( cp, j, elt, ptr )) + return FALSE; + + cp->insn_counter++; + + if (!load_inputs( cp, j, ptr )) + return FALSE; + } } return TRUE; } +boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) +{ + if (linear && cp->vaos->nr_vb == 1) { + struct x86_reg stride = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + (0 * sizeof(struct aos_buffer) + + Offset(struct aos_buffer, stride))); + + x86_add(cp->func, cp->idx_EBX, stride); + } + else if (linear) { + x86_inc(cp->func, cp->idx_EBX); + } + else { + x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); + } +} -- cgit v1.2.3 From 66d4beb874606baab95fb6539de895eb373b0ccb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 2 Oct 2008 12:46:01 +0100 Subject: rtasm: add prefetch instructions --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 26 ++++++++++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 5 +++++ 2 files changed, 31 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 3bba9dcc07..a5abbcde49 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -675,6 +675,32 @@ void x86_and( struct x86_function *p, * SSE instructions */ +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 0, ptr); +} + +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 1, ptr); +} + +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) +{ + DUMP_R( ptr ); + assert(ptr.mod != mod_REG); + emit_2ub(p, 0x0f, 0x18); + emit_modrm_noreg(p, 2, ptr); +} + + + void sse_movss( struct x86_function *p, struct x86_reg dst, diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 510aa1b0de..86091e7f6b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -185,6 +185,11 @@ void sse2_pshufd( struct x86_function *p, struct x86_reg dest, struct x86_reg ar void sse2_rcpps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse2_rcpss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); + +void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); +void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); + void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From 21f98ad30aaeab5085d12278830f485e61b47cc1 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 1 Oct 2008 18:40:01 +0100 Subject: draw: don't keep refetching constant inputs --- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 37 +++--- src/gallium/auxiliary/draw/draw_vs.h | 4 +- src/gallium/auxiliary/draw/draw_vs_aos.c | 26 ++++- src/gallium/auxiliary/draw/draw_vs_aos.h | 2 + src/gallium/auxiliary/draw/draw_vs_aos_io.c | 127 +++++++++++++++------ src/gallium/auxiliary/draw/draw_vs_varient.c | 10 +- 6 files changed, 144 insertions(+), 62 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 73fc70c1bc..a0e08dd10a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -79,6 +79,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, unsigned num_vs_inputs = draw->vs.vertex_shader->info.num_inputs; const struct vertex_info *vinfo; unsigned i; + unsigned nr_vbs = 0; if (!draw->render->set_primitive( draw->render, @@ -102,7 +103,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, fse->key.viewport = !draw->identity_viewport; fse->key.clip = !draw->bypass_clipping; - fse->key.pad = 0; + fse->key.const_vbuffers = 0; memset(fse->key.element, 0, fse->key.nr_elements * sizeof(fse->key.element[0])); @@ -116,9 +117,16 @@ static void fse_prepare( struct draw_pt_middle_end *middle, */ fse->key.element[i].in.buffer = src->vertex_buffer_index; fse->key.element[i].in.offset = src->src_offset; + nr_vbs = MAX2(nr_vbs, src->vertex_buffer_index + 1); } + for (i = 0; i < 5 && i < nr_vbs; i++) { + if (draw->pt.vertex_buffer[i].pitch == 0) + fse->key.const_vbuffers |= (1<key.const_vbuffers); + { unsigned dst_offset = 0; @@ -162,13 +170,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, } } - - /* Would normally look up a vertex shader and peruse its list of - * varients somehow. We omitted that step and put all the - * hardcoded "shaders" into an array. We're just making the - * assumption that this happens to be a matching shader... ie - * you're running isosurf, aren't you? - */ + fse->active = draw_vs_lookup_varient( draw->vs.vertex_shader, &fse->key ); @@ -177,18 +179,17 @@ static void fse_prepare( struct draw_pt_middle_end *middle, return ; } + if (0) debug_printf("%s: found const_vbuffers: %x\n", __FUNCTION__, + fse->active->key.const_vbuffers); + /* Now set buffer pointers: */ - for (i = 0; i < num_vs_inputs; i++) { - unsigned buf = draw->pt.vertex_element[i].vertex_buffer_index; - - fse->active->set_input( fse->active, - i, - - ((const ubyte *) draw->pt.user.vbuffer[buf] + - draw->pt.vertex_buffer[buf].buffer_offset), - - draw->pt.vertex_buffer[buf].pitch ); + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + fse->active->set_buffer( fse->active, + i, + ((const ubyte *) draw->pt.user.vbuffer[i] + + draw->pt.vertex_buffer[i].buffer_offset), + draw->pt.vertex_buffer[i].pitch ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 45992d1986..68c24abad3 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -64,7 +64,7 @@ struct draw_vs_varient_key { unsigned nr_outputs:8; unsigned viewport:1; unsigned clip:1; - unsigned pad:5; + unsigned const_vbuffers:5; struct draw_varient_element element[PIPE_MAX_ATTRIBS]; }; @@ -76,7 +76,7 @@ struct draw_vs_varient { struct draw_vertex_shader *vs; - void (*set_input)( struct draw_vs_varient *, + void (*set_buffer)( struct draw_vs_varient *, unsigned i, const void *ptr, unsigned stride ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 4c794e0e23..87232865e2 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -196,6 +196,18 @@ static void spill( struct aos_compilation *cp, unsigned idx ) } +void aos_spill_all( struct aos_compilation *cp ) +{ + unsigned i; + + for (i = 0; i < 8; i++) { + if (cp->xmm[i].dirty) + spill(cp, i); + aos_release_xmm_reg(cp, i); + } +} + + static struct x86_reg get_xmm_writable( struct aos_compilation *cp, struct x86_reg reg ) { @@ -1941,6 +1953,9 @@ static boolean build_vertex_program( struct draw_vs_varient_aos_sse *varient, aos_init_inputs( &cp, linear ); + cp.x86_reg[0] = 0; + cp.x86_reg[1] = 0; + /* Note address for loop jump */ label = x86_get_label(cp.func); @@ -2066,6 +2081,8 @@ static void vaos_set_buffer( struct draw_vs_varient *varient, vaos->buffer[buf].base_ptr = (char *)ptr; vaos->buffer[buf].stride = stride; } + + if (0) debug_printf("%s %d/%d: %p %d\n", __FUNCTION__, buf, vaos->nr_vb, ptr, stride); } @@ -2078,6 +2095,8 @@ static void PIPE_CDECL vaos_run_elts( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + if (0) debug_printf("%s %d\n", __FUNCTION__, count); + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; @@ -2097,6 +2116,9 @@ static void PIPE_CDECL vaos_run_linear( struct draw_vs_varient *varient, struct draw_vs_varient_aos_sse *vaos = (struct draw_vs_varient_aos_sse *)varient; struct aos_machine *machine = vaos->draw->vs.aos_machine; + if (0) debug_printf("%s %d %d const: %x\n", __FUNCTION__, start, count, + vaos->base.key.const_vbuffers); + machine->internal[IMM_PSIZE][0] = vaos->draw->rasterizer->point_size; machine->constants = vaos->draw->vs.aligned_constants; machine->immediates = vaos->base.vs->immediates; @@ -2140,7 +2162,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, vaos->base.key = *key; vaos->base.vs = vs; - vaos->base.set_input = vaos_set_buffer; + vaos->base.set_buffer = vaos_set_buffer; vaos->base.destroy = vaos_destroy; vaos->base.run_linear = vaos_run_linear; vaos->base.run_elts = vaos_run_elts; @@ -2154,7 +2176,7 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->buffer) goto fail; - debug_printf("nr_vb: %d\n", vaos->nr_vb); + debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); #if 0 tgsi_dump(vs->state.tokens, 0); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.h b/src/gallium/auxiliary/draw/draw_vs_aos.h index 306392e5d6..264387517b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.h +++ b/src/gallium/auxiliary/draw/draw_vs_aos.h @@ -176,6 +176,8 @@ void aos_adopt_xmm_reg( struct aos_compilation *cp, unsigned idx, unsigned dirty ); +void aos_spill_all( struct aos_compilation *cp ); + struct x86_reg aos_get_shader_reg( struct aos_compilation *cp, unsigned file, unsigned idx ); diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index 8e08b9285f..b0c51d7fa1 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -108,29 +108,45 @@ static void emit_swizzle( struct aos_compilation *cp, static boolean get_buffer_ptr( struct aos_compilation *cp, - unsigned buf_idx, - struct x86_reg elt, - struct x86_reg ptr) + boolean linear, + unsigned buf_idx, + struct x86_reg elt, + struct x86_reg ptr) { struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), buf_idx * sizeof(struct aos_buffer)); - struct x86_reg buf_base_ptr = x86_make_disp(buf, - Offset(struct aos_buffer, base_ptr)); - struct x86_reg buf_stride = x86_make_disp(buf, Offset(struct aos_buffer, stride)); + if (linear) { + struct x86_reg buf_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, ptr)); - /* Calculate pointer to current attrib: - */ - x86_mov(cp->func, ptr, buf_stride); - x86_imul(cp->func, ptr, elt); - x86_add(cp->func, ptr, buf_base_ptr); - return TRUE; -} + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_ptr); + x86_mov(cp->func, elt, buf_stride); + x86_add(cp->func, elt, ptr); + sse_prefetchnta(cp->func, x86_deref(elt)); + x86_mov(cp->func, buf_ptr, elt); + } + else { + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + } + cp->insn_counter++; + return TRUE; +} static boolean load_input( struct aos_compilation *cp, @@ -200,18 +216,57 @@ static boolean load_inputs( struct aos_compilation *cp, boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) { - if (linear && cp->vaos->nr_vb == 1) { + unsigned i; + for (i = 0; i < cp->vaos->nr_vb; i++) { + struct x86_reg buf = x86_make_disp(aos_get_x86( cp, 0, X86_BUFFERS ), + i * sizeof(struct aos_buffer)); - struct x86_reg elt = cp->idx_EBX; - struct x86_reg ptr = cp->tmp_EAX; + struct x86_reg buf_base_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, base_ptr)); - if (!get_buffer_ptr( cp, 0, elt, ptr )) - return FALSE; + if (cp->vaos->base.key.const_vbuffers & (1<tmp_EAX; - /* In the linear, single buffer case, keep the buffer pointer - * instead of the index number. - */ - x86_mov( cp->func, elt, ptr ); + x86_mov(cp->func, ptr, buf_base_ptr); + + /* Load all inputs for this constant vertex buffer + */ + load_inputs( cp, i, x86_deref(ptr) ); + + /* Then just force them out to aos_machine.input[] + */ + aos_spill_all( cp ); + + } + else if (linear) { + + struct x86_reg elt = cp->idx_EBX; + struct x86_reg ptr = cp->tmp_EAX; + + struct x86_reg buf_stride = x86_make_disp(buf, + Offset(struct aos_buffer, stride)); + + struct x86_reg buf_ptr = x86_make_disp(buf, + Offset(struct aos_buffer, ptr)); + + + /* Calculate pointer to current attrib: + */ + x86_mov(cp->func, ptr, buf_stride); + x86_imul(cp->func, ptr, elt); + x86_add(cp->func, ptr, buf_base_ptr); + + + /* In the linear case, keep the buffer pointer instead of the + * index number. + */ + if (cp->vaos->nr_vb == 1) + x86_mov( cp->func, elt, ptr ); + else + x86_mov( cp->func, buf_ptr, ptr ); + + cp->insn_counter++; + } } return TRUE; @@ -219,23 +274,22 @@ boolean aos_init_inputs( struct aos_compilation *cp, boolean linear ) boolean aos_fetch_inputs( struct aos_compilation *cp, boolean linear ) { - if (linear && cp->vaos->nr_vb == 1) { - - load_inputs( cp, 0, cp->idx_EBX ); + unsigned j; - } - else { - struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); - unsigned j; - - for (j = 0; j < cp->vaos->nr_vb; j++) { + for (j = 0; j < cp->vaos->nr_vb; j++) { + if (cp->vaos->base.key.const_vbuffers & (1<vaos->nr_vb == 1) { + load_inputs( cp, 0, cp->idx_EBX ); + } + else { + struct x86_reg elt = linear ? cp->idx_EBX : x86_deref(cp->idx_EBX); struct x86_reg ptr = cp->tmp_EAX; - if (!get_buffer_ptr( cp, j, elt, ptr )) + if (!get_buffer_ptr( cp, linear, j, elt, ptr )) return FALSE; - cp->insn_counter++; - if (!load_inputs( cp, j, ptr )) return FALSE; } @@ -252,13 +306,16 @@ boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) Offset(struct aos_buffer, stride))); x86_add(cp->func, cp->idx_EBX, stride); + sse_prefetchnta(cp->func, x86_deref(cp->idx_EBX)); } else if (linear) { - x86_inc(cp->func, cp->idx_EBX); + /* Nothing to do */ } else { x86_lea(cp->func, cp->idx_EBX, x86_make_disp(cp->idx_EBX, 4)); } + + return TRUE; } diff --git a/src/gallium/auxiliary/draw/draw_vs_varient.c b/src/gallium/auxiliary/draw/draw_vs_varient.c index 4daf05dae7..7ee567d478 100644 --- a/src/gallium/auxiliary/draw/draw_vs_varient.c +++ b/src/gallium/auxiliary/draw/draw_vs_varient.c @@ -64,10 +64,10 @@ struct draw_vs_varient_generic { -static void vsvg_set_input( struct draw_vs_varient *varient, - unsigned buffer, - const void *ptr, - unsigned stride ) +static void vsvg_set_buffer( struct draw_vs_varient *varient, + unsigned buffer, + const void *ptr, + unsigned stride ) { struct draw_vs_varient_generic *vsvg = (struct draw_vs_varient_generic *)varient; @@ -265,7 +265,7 @@ struct draw_vs_varient *draw_vs_varient_generic( struct draw_vertex_shader *vs, vsvg->base.key = *key; vsvg->base.vs = vs; - vsvg->base.set_input = vsvg_set_input; + vsvg->base.set_buffer = vsvg_set_buffer; vsvg->base.run_elts = vsvg_run_elts; vsvg->base.run_linear = vsvg_run_linear; vsvg->base.destroy = vsvg_destroy; -- cgit v1.2.3 From 22eb067c8863cbd9078f136706effd5df3375dbb Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 2 Oct 2008 12:53:11 +0100 Subject: draw: modify prefetching slightly --- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index b0c51d7fa1..dd79bc799a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -54,6 +54,7 @@ static void emit_load_R32G32B32( struct aos_compilation *cp, struct x86_reg data, struct x86_reg src_ptr ) { +#if 1 sse_movss(cp->func, data, x86_make_disp(src_ptr, 8)); /* data = z ? ? ? */ sse_shufps(cp->func, data, aos_get_internal_xmm( cp, IMM_IDENTITY ), SHUF(X,Y,Z,W) ); @@ -62,6 +63,16 @@ static void emit_load_R32G32B32( struct aos_compilation *cp, /* data = ? 0 z 1 */ sse_movlps(cp->func, data, src_ptr); /* data = x y z 1 */ +#else + sse_movups(cp->func, data, src_ptr); + /* data = x y z ? */ + sse2_pshufd(cp->func, data, data, SHUF(W,X,Y,Z) ); + /* data = ? x y z */ + sse_movss(cp->func, data, aos_get_internal_xmm( cp, IMM_ONES ) ); + /* data = 1 x y z */ + sse2_pshufd(cp->func, data, data, SHUF(Y,Z,W,X) ); + /* data = x y z 1 */ +#endif } static void emit_load_R32G32( struct aos_compilation *cp, @@ -128,7 +139,7 @@ static boolean get_buffer_ptr( struct aos_compilation *cp, x86_mov(cp->func, ptr, buf_ptr); x86_mov(cp->func, elt, buf_stride); x86_add(cp->func, elt, ptr); - sse_prefetchnta(cp->func, x86_deref(elt)); + if (buf_idx == 0) sse_prefetchnta(cp->func, x86_make_disp(elt, 192)); x86_mov(cp->func, buf_ptr, elt); } else { @@ -306,7 +317,7 @@ boolean aos_incr_inputs( struct aos_compilation *cp, boolean linear ) Offset(struct aos_buffer, stride))); x86_add(cp->func, cp->idx_EBX, stride); - sse_prefetchnta(cp->func, x86_deref(cp->idx_EBX)); + sse_prefetchnta(cp->func, x86_make_disp(cp->idx_EBX, 192)); } else if (linear) { /* Nothing to do */ @@ -327,7 +338,7 @@ static void emit_store_R32G32B32A32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { - sse_movups(cp->func, dst_ptr, dataXMM); + sse_movaps(cp->func, dst_ptr, dataXMM); } static void emit_store_R32G32B32( struct aos_compilation *cp, @@ -430,7 +441,7 @@ boolean aos_emit_outputs( struct aos_compilation *cp ) if (data.file != file_XMM) { struct x86_reg tmp = aos_get_xmm_reg( cp ); - sse_movups(cp->func, tmp, data); + sse_movaps(cp->func, tmp, data); data = tmp; } -- cgit v1.2.3 From 6965532e14717f71a6f4353fb683c5070c6b7d7a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 3 Oct 2008 13:50:34 +0100 Subject: rtasm: add sse_movntps --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 12 ++++++++++++ src/gallium/auxiliary/rtasm/rtasm_x86sse.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index 9085f4cc0e..cc5871f873 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -653,6 +653,18 @@ void sse_prefetch1( struct x86_function *p, struct x86_reg ptr) emit_modrm_noreg(p, 2, ptr); } +void sse_movntps( struct x86_function *p, + struct x86_reg dst, + struct x86_reg src) +{ + DUMP_RR( dst, reg ); + + assert(dst.mod != mod_REG); + assert(src.mod == mod_REG); + emit_2ub(p, 0x0f, 0x2b); + emit_modrm(p, src, dst); +} + diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h index 2d7715f965..af79f07dd3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h @@ -189,6 +189,8 @@ void sse_prefetchnta( struct x86_function *p, struct x86_reg ptr); void sse_prefetch0( struct x86_function *p, struct x86_reg ptr); void sse_prefetch1( struct x86_function *p, struct x86_reg ptr); +void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src); + void sse_addps( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_addss( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); void sse_cvtps2pi( struct x86_function *p, struct x86_reg dst, struct x86_reg src ); -- cgit v1.2.3 From afaa53040bd01ca86762e7d7b1a5a65810767921 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Fri, 3 Oct 2008 18:00:43 -0600 Subject: CELL: changes to generate SPU code for stenciling This set of code changes are for stencil code generation support. Both one-sided and two-sided stenciling are supported. In addition to the raw code generation changes, these changes had to be made elsewhere in the system: - Added new "register set" feature to the SPE assembly generation. A "register set" is a way to allocate multiple registers and free them all at the same time, delegating register allocation management to the spe_function unit. It's quite useful in complex register allocation schemes (like stenciling). - Added and improved SPE macro calculations. These are operations between registers and unsigned integer immediates. In many cases, the calculation can be performed with a single instruction; the macros will generate the single instruction if possible, or generate a register load and register-to-register operation if not. These macro functions are: spe_load_uint() (which has new ways to load a value in a single instruction), spe_and_uint(), spe_xor_uint(), spe_compare_equal_uint(), and spe_compare_greater_uint(). - Added facing to fragment generation. While rendering, the rasterizer needs to be able to determine front- and back-facing fragments, in order to correctly apply two-sided stencil. That requires these changes: - Added front_winding field to the cell_command_render block, so that the state tracker could communicate to the rasterizer what it considered to be the front-facing direction. - Added fragment facing as an input to the fragment function. - Calculated facing is passed during emit_quad(). --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 246 +++++- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 41 +- src/gallium/drivers/cell/common.h | 1 + src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 881 ++++++++++++++++++--- src/gallium/drivers/cell/ppu/cell_render.c | 1 + src/gallium/drivers/cell/ppu/cell_vbuf.c | 1 + src/gallium/drivers/cell/spu/spu_main.h | 3 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 19 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 3 +- src/gallium/drivers/cell/spu/spu_render.c | 4 +- src/gallium/drivers/cell/spu/spu_tri.c | 35 +- src/gallium/drivers/cell/spu/spu_tri.h | 2 +- 12 files changed, 1091 insertions(+), 146 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 491141f190..8a87e9abb1 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -359,14 +359,21 @@ void _name (struct spe_function *p, int imm) \ */ void spe_init_func(struct spe_function *p, unsigned code_size) { + register unsigned int i; + p->store = align_malloc(code_size, 16); p->num_inst = 0; p->max_inst = code_size / SPE_INST_SIZE; + p->set_count = 0; + memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); + /* Conservatively treat R0 - R2 and R80 - R127 as non-volatile. */ - p->regs[0] = ~7; - p->regs[1] = (1U << (80 - 64)) - 1; + p->regs[0] = p->regs[1] = p->regs[2] = 1; + for (i = 80; i <= 127; i++) { + p->regs[i] = 1; + } p->print = false; p->indent = 0; @@ -398,12 +405,8 @@ int spe_allocate_available_register(struct spe_function *p) { unsigned i; for (i = 0; i < SPE_NUM_REGS; i++) { - const uint64_t mask = (1ULL << (i % 64)); - const unsigned idx = i / 64; - - assert(idx < 2); - if ((p->regs[idx] & mask) != 0) { - p->regs[idx] &= ~mask; + if (p->regs[i] == 0) { + p->regs[i] = 1; return i; } } @@ -417,31 +420,68 @@ int spe_allocate_available_register(struct spe_function *p) */ int spe_allocate_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) != 0); - - p->regs[idx] &= ~(1ULL << bit); + assert(p->regs[reg] == 0); + p->regs[reg] = 1; return reg; } /** - * Mark the given SPE register as "unallocated". + * Mark the given SPE register as "unallocated". Note that this should + * only be used on registers allocated in the current register set; an + * assertion will fail if an attempt is made to deallocate a register + * allocated in an earlier register set. */ void spe_release_register(struct spe_function *p, int reg) { - const unsigned idx = reg / 64; - const unsigned bit = reg % 64; + assert(reg < SPE_NUM_REGS); + assert(p->regs[reg] == 1); - assert(idx < 2); + p->regs[reg] = 0; +} - assert(reg < SPE_NUM_REGS); - assert((p->regs[idx] & (1ULL << bit)) == 0); +/** + * Start a new set of registers. This can be called if + * it will be difficult later to determine exactly what + * registers were actually allocated during a code generation + * sequence, and you really just want to deallocate all of them. + */ +void spe_allocate_register_set(struct spe_function *p) +{ + register unsigned int i; + + /* Keep track of the set count. If it ever wraps around to 0, + * we're in trouble. + */ + p->set_count++; + assert(p->set_count > 0); + + /* Increment the allocation count of all registers currently + * allocated. Then any registers that are allocated in this set + * will be the only ones with a count of 1; they'll all be released + * when the register set is released. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) p->regs[i]++; + } +} + +void spe_release_register_set(struct spe_function *p) +{ + unsigned int i; + + /* If the set count drops below zero, we're in trouble. */ + assert(p->set_count > 0); + p->set_count--; - p->regs[idx] |= (1ULL << bit); + /* Drop the allocation level of all registers. Any allocated + * during this register set will drop to 0 and then become + * available. + */ + for (i = 0; i < SPE_NUM_REGS; i++) { + if (p->regs[i] > 0) p->regs[i]--; + } } @@ -603,8 +643,10 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) { /* If the whole value is in the lower 18 bits, use ila, which * doesn't sign-extend. Otherwise, if the two halfwords of - * the constant are identical, use ilh. Otherwise, we have - * to use ilhu followed by iohl. + * the constant are identical, use ilh. Otherwise, if every byte of + * the desired value is 0x00 or 0xff, we can use Form Select Mask for + * Bytes Immediate (fsmbi) to load the value in a single instruction. + * Otherwise, in the general case, we have to use ilhu followed by iohl. */ if ((ui & 0xfffc0000) == ui) { spe_ila(p, rT, ui); @@ -612,13 +654,171 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) else if ((ui >> 16) == (ui & 0xffff)) { spe_ilh(p, rT, ui & 0xffff); } + else if ( + ((ui & 0x000000ff) == 0 || (ui & 0x000000ff) == 0x000000ff) && + ((ui & 0x0000ff00) == 0 || (ui & 0x0000ff00) == 0x0000ff00) && + ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && + ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) + ) { + unsigned int mask = 0; + /* fsmbi duplicates each bit in the given mask eight times, + * using a 16-bit value to initialize a 16-byte quadword. + * Each 4-bit nybble of the mask corresponds to a full word + * of the result; look at the value and figure out the mask + * (replicated for each word in the quadword), and then + * form the "select mask" to get the value. + */ + if ((ui & 0x000000ff) == 0x000000ff) mask |= 0x1111; + if ((ui & 0x0000ff00) == 0x0000ff00) mask |= 0x2222; + if ((ui & 0x00ff0000) == 0x00ff0000) mask |= 0x4444; + if ((ui & 0xff000000) == 0xff000000) mask |= 0x8888; + spe_fsmbi(p, rT, mask); + } else { + /* The general case: this usually uses two instructions, but + * may use only one if the low-order 16 bits of each word are 0. + */ spe_ilhu(p, rT, ui >> 16); if (ui & 0xffff) spe_iohl(p, rT, ui & 0xffff); } } +/* This function is constructed identically to spe_sor_uint() below. + * Changes to one should be made in the other. + */ +void spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If we can, emit a single instruction, either And Byte Immediate + * (which uses the same constant across each byte), And Halfword Immediate + * (which sign-extends a 10-bit immediate to 16 bits and uses that + * across each halfword), or And Word Immediate (which sign-extends + * a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + register unsigned int tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use And Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_andi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use And Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_andhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the And Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_andbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_and(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +/* This function is constructed identically to spe_and_uint() above. + * Changes to one should be made in the other. + */ +void spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If we can, emit a single instruction, either Exclusive Or Byte + * Immediate (which uses the same constant across each byte), Exclusive + * Or Halfword Immediate (which sign-extends a 10-bit immediate to + * 16 bits and uses that across each halfword), or Exclusive Or Word + * Immediate (which sign-extends a 10-bit immediate to 32 bits). + * + * Otherwise, we'll need to use a temporary register. + */ + register unsigned int tmp; + + /* If the upper 23 bits are all 0s or all 1s, sign extension + * will work and we can use Exclusive Or Word Immediate + */ + tmp = ui & 0xfffffe00; + if (tmp == 0xfffffe00 || tmp == 0) { + spe_xori(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric along halfword boundaries and + * the upper 7 bits of each halfword are all 0s or 1s, we + * can use Exclusive Or Halfword Immediate + */ + tmp = ui & 0xfe00fe00; + if ((tmp == 0xfe00fe00 || tmp == 0) && ((ui >> 16) == (ui & 0x0000ffff))) { + spe_xorhi(p, rT, rA, ui & 0x000003ff); + return; + } + + /* If the ui field is symmetric in each byte, then we can use + * the Exclusive Or Byte Immediate instruction. + */ + tmp = ui & 0x000000ff; + if ((ui >> 24) == tmp && ((ui >> 16) & 0xff) == tmp && ((ui >> 8) & 0xff) == tmp) { + spe_xorbi(p, rT, rA, tmp); + return; + } + + /* Otherwise, we'll have to use a temporary register. */ + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_xor(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); +} + +void +spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If the comparison value is 9 bits or less, it fits inside a + * Compare Equal Word Immediate instruction. + */ + if ((ui & 0x000001ff) == ui) { + spe_ceqi(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_ceq(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} + +void +spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +{ + /* If the comparison value is 10 bits or less, it fits inside a + * Compare Logical Greater Than Word Immediate instruction. + */ + if ((ui & 0x000003ff) == ui) { + spe_clgti(p, rT, rA, ui); + } + /* Otherwise, we're going to have to load a word first. */ + else { + unsigned int tmp_reg = spe_allocate_available_register(p); + spe_load_uint(p, tmp_reg, ui); + spe_clgt(p, rT, rA, tmp_reg); + spe_release_register(p, tmp_reg); + } +} void spe_splat(struct spe_function *p, unsigned rT, unsigned rA) diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 61c7edeb60..cd2e245409 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -53,17 +53,26 @@ struct spe_function uint num_inst; uint max_inst; - /** - * Mask of used / unused registers - * - * Each set bit corresponds to an available register. Each cleared bit - * corresponds to an allocated register. + /** + * The "set count" reflects the number of nested register sets + * are allowed. In the unlikely case that we exceed the set count, + * register allocation will start to be confused, which is critical + * enough that we check for it. + */ + unsigned char set_count; + + /** + * Flags for used and unused registers. Each byte corresponds to a + * register; a 0 in that byte means that the register is available. + * A value of 1 means that the register was allocated in the current + * register set. Any other value N means that the register was allocated + * N register sets ago. * * \sa * spe_allocate_register, spe_allocate_available_register, - * spe_release_register + * spe_allocate_register_set, spe_release_register_set, spe_release_register, */ - uint64_t regs[SPE_NUM_REGS / 64]; + unsigned char regs[SPE_NUM_REGS]; boolean print; /**< print/dump instructions as they're emitted? */ int indent; /**< number of spaces to indent */ @@ -77,6 +86,8 @@ extern unsigned spe_code_size(const struct spe_function *p); extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); extern void spe_release_register(struct spe_function *p, int reg); +extern void spe_allocate_register_set(struct spe_function *p); +extern void spe_release_register_set(struct spe_function *p); extern void spe_print_code(struct spe_function *p, boolean enable); extern void spe_indent(struct spe_function *p, int spaces); @@ -307,6 +318,22 @@ spe_load_int(struct spe_function *p, unsigned rT, int i); extern void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); +/** And immediate value into rT. */ +extern void +spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Xor immediate value into rT. */ +extern void +spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Compare equal with immediate value. */ +extern void +spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + +/** Compare greater with immediate value. */ +extern void +spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); + /** Replicate word 0 of rA across rT. */ extern void spe_splat(struct spe_function *p, unsigned rT, unsigned rA); diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 99329fd8e2..c223bc1744 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -227,6 +227,7 @@ struct cell_command_render float xmin, ymin, xmax, ymax; /* XXX another dummy field */ uint min_index; boolean inline_verts; + uint front_winding; /* the rasterizer needs to be able to determine facing to apply front/back-facing stencil */ }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 653afc235d..f920ae13b4 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -54,10 +54,12 @@ * \param ifragZ_reg register containing integer fragment Z values (in) * \param ifbZ_reg register containing integer frame buffer Z values (in/out) * \param zmask_reg register containing result of Z test/comparison (out) + * + * Returns true if the Z-buffer needs to be updated. */ -static void -gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, - struct spe_function *f, +static boolean +gen_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, int mask_reg, int ifragZ_reg, int ifbZ_reg, int zmask_reg) { /* NOTE: we use clgt below, not cgt, because we want to compare _unsigned_ @@ -132,7 +134,10 @@ gen_depth_test(const struct pipe_depth_stencil_alpha_state *dsa, * framebufferZ = (ztest_passed ? fragmentZ : framebufferZ; */ spe_selb(f, ifbZ_reg, ifbZ_reg, ifragZ_reg, mask_reg); + return true; } + + return false; } @@ -238,22 +243,34 @@ gen_alpha_test(const struct pipe_depth_stencil_alpha_state *dsa, * it and have to allocate and load it again unnecessarily. */ static inline void -setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +setup_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int *r) { if (*is_already_set) return; *r = spe_allocate_available_register(f); - spe_load_float(f, *r, value); - *is_already_set = true; } static inline void -release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +release_optional_register(struct spe_function *f, boolean *is_already_set, unsigned int r) { if (!*is_already_set) return; spe_release_register(f, r); *is_already_set = false; } +static inline void +setup_const_register(struct spe_function *f, boolean *is_already_set, unsigned int *r, float value) +{ + if (*is_already_set) return; + setup_optional_register(f, is_already_set, r); + spe_load_float(f, *r, value); +} + +static inline void +release_const_register(struct spe_function *f, boolean *is_already_set, unsigned int r) +{ + release_optional_register(f, is_already_set, r); +} + /** * Generate SPE code to implement the given blend mode for a quad of pixels. * \param f SPE function to append instruction onto. @@ -1117,6 +1134,633 @@ gen_colormask(struct spe_function *f, spe_release_register(f, colormask_reg); } +/* This function is annoyingly similar to gen_depth_test(), above, except + * that instead of comparing two varying values (i.e. fragment and buffer), + * we're comparing a varying value with a static value. As such, we have + * access to the Compare Immediate instructions where we don't in + * gen_depth_test(), which is what makes us very different. + * + * The return value in the stencil_pass_reg is a bitmask of valid + * fragments that also passed the stencil test. The bitmask of valid + * fragments that failed would be found in (mask_reg & ~stencil_pass_reg). + */ +static void +gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state, + unsigned int mask_reg, unsigned int fbS_reg, + unsigned int stencil_pass_reg) +{ + /* Generate code that puts the set of passing fragments into the stencil_pass_reg + * register, taking into account whether each fragment was active to begin with. + */ + switch (state->func) { + case PIPE_FUNC_EQUAL: + /* stencil_pass = mask & (s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + /* stencil_fail = mask & ~stencil_pass */ + break; + + case PIPE_FUNC_NOTEQUAL: + /* stencil_pass = mask & ~(s == reference) */ + spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_GREATER: + /* stencil_pass = mask & (s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_LESS: { + /* stencil_pass = mask & (reference > s) */ + /* There's no convenient Compare Less Than Immediate instruction, so + * we'll have to do this one the harder way, by loading a register and + * comparing directly. Compare Logical Greater Than Word (clgt) + * treats its operands as unsigned - no sign extension. + */ + unsigned int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + break; + } + + case PIPE_FUNC_LEQUAL: + /* stencil_pass = mask & (s <= reference) = mask & ~(s > reference) */ + spe_compare_greater_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + break; + + case PIPE_FUNC_GEQUAL: { + /* stencil_pass = mask & (s >= reference) = mask & ~(reference > s) */ + /* As above, we have to do this by loading a register */ + unsigned int tmp_reg = spe_allocate_available_register(f); + spe_load_uint(f, tmp_reg, state->ref_value); + spe_clgt(f, stencil_pass_reg, tmp_reg, fbS_reg); + spe_andc(f, stencil_pass_reg, mask_reg, stencil_pass_reg); + spe_release_register(f, tmp_reg); + break; + } + + case PIPE_FUNC_NEVER: + /* stencil_pass = mask & 0 = 0 */ + spe_load_uint(f, stencil_pass_reg, 0); + spe_move(f, stencil_pass_reg, mask_reg); /* zmask = mask */ + break; + + case PIPE_FUNC_ALWAYS: + /* stencil_pass = mask & 1 = mask */ + spe_move(f, stencil_pass_reg, mask_reg); + break; + } + + /* The fragments that passed the stencil test are now in stencil_pass_reg. + * The fragments that failed would be (mask_reg & ~stencil_pass_reg). + */ +} + +/* This function generates code that calculates a set of new stencil values + * given the earlier values and the operation to apply. It does not + * apply any tests. It is intended to be called up to 3 times + * (for the stencil fail operation, for the stencil pass-z fail operation, + * and for the stencil pass-z pass operation) to collect up to three + * possible sets of values, and for the caller to combine them based + * on the result of the tests. + * + * stencil_max_value should be (2^n - 1) where n is the number of bits + * in the stencil buffer - in other words, it should be usable as a mask. + */ +static void +gen_stencil_values(struct spe_function *f, unsigned int stencil_op, + unsigned int stencil_ref_value, unsigned int stencil_max_value, + unsigned int fbS_reg, unsigned int newS_reg) +{ + /* The code below assumes that newS_reg and fbS_reg are not the same + * register; if they can be, the calculations below will have to use + * an additional temporary register. For now, mark the assumption + * with an assertion that will fail if they are the same. + */ + ASSERT(fbS_reg != newS_reg); + + /* The code also assumes the the stencil_max_value is of the form + * 2^n-1 and can therefore be used as a mask for the valid bits in + * addition to a maximum. Make sure this is the case as well. + * The clever math below exploits the fact that incrementing a + * binary number serves to flip all the bits of a number starting at + * the LSB and continuing to (and including) the first zero bit + * found. That means that a number and its increment will always + * have at least one bit in common (the high order bit, if nothing + * else) *unless* the number is zero, *or* the number is of a form + * consisting of some number of 1s in the low-order bits followed + * by nothing but 0s in the high-order bits. The latter case + * implies it's of the form 2^n-1. + */ + ASSERT(stencil_max_value > 0 && ((stencil_max_value + 1) & stencil_max_value) == 0); + + switch(stencil_op) { + case PIPE_STENCIL_OP_KEEP: + /* newS = S */ + spe_move(f, newS_reg, fbS_reg); + break; + + case PIPE_STENCIL_OP_ZERO: + /* newS = 0 */ + spe_zero(f, newS_reg); + break; + + case PIPE_STENCIL_OP_REPLACE: + /* newS = stencil reference value */ + spe_load_uint(f, newS_reg, stencil_ref_value); + break; + + case PIPE_STENCIL_OP_INCR: { + /* newS = (s == max ? max : s + 1) */ + unsigned int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, stencil_max_value); + /* Add Word Immediate computes rT = rA + 10-bit signed immediate */ + spe_ai(f, newS_reg, fbS_reg, 1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_DECR: { + /* newS = (s == 0 ? 0 : s - 1) */ + unsigned int equals_reg = spe_allocate_available_register(f); + + spe_compare_equal_uint(f, equals_reg, fbS_reg, 0); + /* Add Word Immediate with a (-1) value works */ + spe_ai(f, newS_reg, fbS_reg, -1); + /* Select from the current value or the new value based on the equality test */ + spe_selb(f, newS_reg, fbS_reg, newS_reg, equals_reg); + + spe_release_register(f, equals_reg); + break; + } + case PIPE_STENCIL_OP_INCR_WRAP: + /* newS = (s == max ? 0 : s + 1), but since max is 2^n-1, we can + * do a normal add and mask off the correct bits + */ + spe_ai(f, newS_reg, fbS_reg, 1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_DECR_WRAP: + /* newS = (s == 0 ? max : s - 1), but we'll pull the same mask trick as above */ + spe_ai(f, newS_reg, fbS_reg, -1); + spe_and_uint(f, newS_reg, newS_reg, stencil_max_value); + break; + + case PIPE_STENCIL_OP_INVERT: + /* newS = ~s. We take advantage of the mask/max value to invert only + * the valid bits for the field so we don't have to do an extra "and". + */ + spe_xor_uint(f, newS_reg, fbS_reg, stencil_max_value); + break; + + default: + ASSERT(0); + } +} + + +/* This function generates code to get all the necessary possible + * stencil values. For each of the output registers (fail_reg, + * zfail_reg, and zpass_reg), it either allocates a new register + * and calculates a new set of values based on the stencil operation, + * or it reuses a register allocation and calculation done for an + * earlier (matching) operation, or it reuses the fbS_reg register + * (if the stencil operation is KEEP, which doesn't change the + * stencil buffer). + * + * Since this function allocates a variable number of registers, + * to avoid incurring complex logic to free them, they should + * be allocated after a spe_allocate_register_set() call + * and released by the corresponding spe_release_register_set() call. + */ +static void +gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa, + unsigned int fbS_reg, + unsigned int *fail_reg, unsigned int *zfail_reg, + unsigned int *zpass_reg, unsigned int *back_fail_reg, + unsigned int *back_zfail_reg, unsigned int *back_zpass_reg) +{ + unsigned zfail_op, back_zfail_op; + + /* Stenciling had better be enabled here */ + ASSERT(dsa->stencil[0].enabled); + + /* If the depth test is not enabled, it is treated as though it always + * passes. In particular, that means that the "zfail_op" (and the backfacing + * counterpart, if active) are not considered - a failing stencil test will + * trigger the "fail_op", and a passing stencil test will trigger the + * "zpass_op". + * + * By overriding the operations in this case to be PIPE_STENCIL_OP_KEEP, + * we keep them from being calculated. + */ + if (dsa->depth.enabled) { + zfail_op = dsa->stencil[0].zfail_op; + back_zfail_op = dsa->stencil[1].zfail_op; + } + else { + zfail_op = PIPE_STENCIL_OP_KEEP; + back_zfail_op = PIPE_STENCIL_OP_KEEP; + } + + /* One-sided or front-facing stencil */ + if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP) { + *fail_reg = fbS_reg; + } + else { + *fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].fail_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *fail_reg); + } + + if (zfail_op == PIPE_STENCIL_OP_KEEP) { + *zfail_reg = fbS_reg; + } + else if (zfail_op == dsa->stencil[0].fail_op) { + *zfail_reg = *fail_reg; + } + else { + *zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].zfail_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *zfail_reg); + } + + if (dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP) { + *zpass_reg = fbS_reg; + } + else if (dsa->stencil[0].zpass_op == dsa->stencil[0].fail_op) { + *zpass_reg = *fail_reg; + } + else if (dsa->stencil[0].zpass_op == zfail_op) { + *zpass_reg = *zfail_reg; + } + else { + *zpass_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[0].zpass_op, dsa->stencil[0].ref_value, + 0xff, fbS_reg, *zpass_reg); + } + + /* If two-sided stencil is enabled, we have more work to do. */ + if (!dsa->stencil[1].enabled) { + /* This just flags that the registers need not be deallocated later */ + *back_fail_reg = fbS_reg; + *back_zfail_reg = fbS_reg; + *back_zpass_reg = fbS_reg; + } + else { + /* Same calculations as above, but for the back stencil */ + if (dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP) { + *back_fail_reg = fbS_reg; + } + else if (dsa->stencil[1].fail_op == dsa->stencil[0].fail_op) { + *back_fail_reg = *fail_reg; + } + else if (dsa->stencil[1].fail_op == zfail_op) { + *back_fail_reg = *zfail_reg; + } + else if (dsa->stencil[1].fail_op == dsa->stencil[0].zpass_op) { + *back_fail_reg = *zpass_reg; + } + else { + *back_fail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].fail_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_fail_reg); + } + + if (back_zfail_op == PIPE_STENCIL_OP_KEEP) { + *back_zfail_reg = fbS_reg; + } + else if (back_zfail_op == dsa->stencil[0].fail_op) { + *back_zfail_reg = *fail_reg; + } + else if (back_zfail_op == zfail_op) { + *back_zfail_reg = *zfail_reg; + } + else if (back_zfail_op == dsa->stencil[0].zpass_op) { + *back_zfail_reg = *zpass_reg; + } + else if (back_zfail_op == dsa->stencil[1].fail_op) { + *back_zfail_reg = *back_fail_reg; + } + else { + *back_zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].zfail_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_zfail_reg); + } + + if (dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) { + *back_zpass_reg = fbS_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[0].fail_op) { + *back_zpass_reg = *fail_reg; + } + else if (dsa->stencil[1].zpass_op == zfail_op) { + *back_zpass_reg = *zfail_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[0].zpass_op) { + *back_zpass_reg = *zpass_reg; + } + else if (dsa->stencil[1].zpass_op == dsa->stencil[1].fail_op) { + *back_zpass_reg = *back_fail_reg; + } + else if (dsa->stencil[1].zpass_op == back_zfail_op) { + *back_zpass_reg = *back_zfail_reg; + } + else { + *back_zfail_reg = spe_allocate_available_register(f); + gen_stencil_values(f, dsa->stencil[1].zpass_op, dsa->stencil[1].ref_value, + 0xff, fbS_reg, *back_zpass_reg); + } + } /* End of calculations for back-facing stencil */ +} + +static boolean +gen_stencil_depth_test(struct spe_function *f, + const struct pipe_depth_stencil_alpha_state *dsa, + const int const facing_reg, + const int mask_reg, const int fragZ_reg, + const int fbZ_reg, const int fbS_reg) +{ + /* True if we've generated code that could require writeback to the + * depth and/or stencil buffers + */ + boolean modified_buffers = false; + + boolean need_to_calculate_stencil_values; + boolean need_to_writemask_stencil_values; + + /* Registers. We may or may not actually allocate these, depending + * on whether the state values indicate that we need them. + */ + unsigned int stencil_pass_reg, stencil_fail_reg; + unsigned int stencil_fail_values, stencil_pass_depth_fail_values, stencil_pass_depth_pass_values; + unsigned int stencil_writemask_reg; + unsigned int zmask_reg; + unsigned int newS_reg; + + /* Stenciling is quite complex: up to six different configurable stencil + * operations/calculations can be required (three each for front-facing + * and back-facing fragments). Many of those operations will likely + * be identical, so there's good reason to try to avoid calculating + * the same values more than once (which unfortunately makes the code less + * straightforward). + * + * To make register management easier, we start a new + * register set; we can release all the registers in the set at + * once, and avoid having to keep track of exactly which registers + * we allocate. We can still allocate and free registers as + * desired (if we know we no longer need a register), but we don't + * have to spend the complexity to track the more difficult variant + * register usage scenarios. + */ + spe_allocate_register_set(f); + + /* Calculate the writemask. If the writemask is trivial (either + * all 0s, meaning that we don't need to calculate any stencil values + * because they're not going to change the stencil anyway, or all 1s, + * meaning that we have to calculate the stencil values but do not + * need to mask them), we can avoid generating code. Don't forget + * that we need to consider backfacing stencil, if enabled. + */ + if (dsa->stencil[0].write_mask == 0x0 && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + /* Trivial: don't need to calculate stencil values, and don't need to + * write them back to the framebuffer. + */ + need_to_calculate_stencil_values = false; + need_to_writemask_stencil_values = false; + } + else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + /* Still trivial, but a little less so. We need to write the stencil + * values, but we don't need to mask them. + */ + need_to_calculate_stencil_values = true; + need_to_writemask_stencil_values = false; + } + else { + /* The general case: calculate, mask, and write */ + need_to_calculate_stencil_values = true; + need_to_writemask_stencil_values = true; + + /* While we're here, generate code that calculates what the + * writemask should be. If backface stenciling is enabled, + * and the backface writemask is not the same as the frontface + * writemask, we'll have to generate code that merges the + * two masks into a single effective mask based on fragment facing. + */ + stencil_writemask_reg = spe_allocate_available_register(f); + spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask); + if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) { + unsigned int back_write_mask_reg = spe_allocate_available_register(f); + spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask); + spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg); + spe_release_register(f, back_write_mask_reg); + } + } + + /* At least one-sided stenciling must be on. Generate code that + * runs the stencil test on the basic/front-facing stencil, leaving + * the mask of passing stencil bits in stencil_pass_reg. This mask will + * be used both to mask the set of active pixels, and also to + * determine how the stencil buffer changes. + * + * This test will *not* change the value in mask_reg (because we don't + * yet know whether to apply the two-sided stencil or one-sided stencil). + */ + stencil_pass_reg = spe_allocate_available_register(f); + gen_stencil_test(f, &dsa->stencil[0], mask_reg, fbS_reg, stencil_pass_reg); + + /* If two-sided stenciling is on, generate code to run the stencil + * test on the backfacing stencil as well, and combine the two results + * into the one correct result based on facing. + */ + if (dsa->stencil[1].enabled) { + unsigned int temp_reg = spe_allocate_available_register(f); + gen_stencil_test(f, &dsa->stencil[1], mask_reg, fbS_reg, temp_reg); + spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg); + spe_release_register(f, temp_reg); + } + + /* Generate code that, given the mask of valid fragments and the + * mask of valid fragments that passed the stencil test, computes + * the mask of valid fragments that failed the stencil test. We + * have to do this before we run a depth test (because the + * depth test should not be performed on fragments that failed the + * stencil test, and because the depth test will update the + * mask of valid fragments based on the results of the depth test). + */ + stencil_fail_reg = spe_allocate_available_register(f); + spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); + /* Now remove the stenciled-out pixels from the valid fragment mask, + * so we can later use the valid fragment mask in the depth test. + */ + spe_and(f, mask_reg, mask_reg, stencil_pass_reg); + + /* We may not need to calculate stencil values, if the writemask is off */ + if (need_to_calculate_stencil_values) { + unsigned int back_stencil_fail_values, back_stencil_pass_depth_fail_values, back_stencil_pass_depth_pass_values; + unsigned int front_stencil_fail_values, front_stencil_pass_depth_fail_values, front_stencil_pass_depth_pass_values; + + /* Generate code that calculates exactly which stencil values we need, + * without calculating the same value twice (say, if two different + * stencil ops have the same value). This code will work for one-sided + * and two-sided stenciling (so that we take into account that operations + * may match between front and back stencils), and will also take into + * account whether the depth test is enabled (if the depth test is off, + * we don't need any of the zfail results, because the depth test always + * is considered to pass if it is disabled). Any register value that + * does not need to be calculated will come back with the same value + * that's in fbS_reg. + * + * This function will allocate a variant number of registers that + * will be released as part of the register set. + */ + gen_get_stencil_values(f, dsa, fbS_reg, + &front_stencil_fail_values, &front_stencil_pass_depth_fail_values, + &front_stencil_pass_depth_pass_values, &back_stencil_fail_values, + &back_stencil_pass_depth_fail_values, &back_stencil_pass_depth_pass_values); + + /* Tricky, tricky, tricky - the things we do to create optimal + * code... + * + * The various stencil values registers may overlap with each other + * and with fbS_reg arbitrarily (as any particular operation is + * only calculated once and stored in one register, no matter + * how many times it is used). So we can't change the values + * within those registers directly - if we change a value in a + * register that's being referenced by two different calculations, + * we've just unwittingly changed the second value as well... + * + * Avoid this by allocating new registers to hold the results + * (there may be 2, if the depth test is off, or 3, if it is on). + * These will be released as part of the register set. + */ + if (!dsa->stencil[1].enabled) { + /* The easy case: if two-sided stenciling is *not* enabled, we + * just use the front-sided values. + */ + stencil_fail_values = front_stencil_fail_values; + stencil_pass_depth_fail_values = front_stencil_pass_depth_fail_values; + stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values; + } + else { /* two-sided stencil enabled */ + /* Allocate new registers for the needed merged values */ + stencil_fail_values = spe_allocate_available_register(f); + spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg); + if (dsa->depth.enabled) { + stencil_pass_depth_fail_values = spe_allocate_available_register(f); + spe_selb(f, stencil_pass_depth_fail_values, front_stencil_pass_depth_fail_values, back_stencil_pass_depth_fail_values, facing_reg); + } + else { + stencil_pass_depth_fail_values = fbS_reg; + } + stencil_pass_depth_pass_values = spe_allocate_available_register(f); + spe_selb(f, stencil_pass_depth_pass_values, front_stencil_pass_depth_pass_values, back_stencil_pass_depth_pass_values, facing_reg); + } + } + + /* We now have all the stencil values we need. We also need + * the results of the depth test to figure out which + * stencil values will become the new stencil values. (Even if + * we aren't actually calculating stencil values, we need to apply + * the depth test if it's enabled.) + * + * The code generated by gen_depth_test() returns the results of the + * test in the given register, but also alters the mask_reg based + * on the results of the test. + */ + if (dsa->depth.enabled) { + zmask_reg = spe_allocate_available_register(f); + modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + } + + if (need_to_calculate_stencil_values) { + /* If we need to writemask the stencil values before going into + * the stencil buffer, we'll have to use a new register to + * hold the new values. If not, we can just keep using the + * current register. + */ + if (need_to_writemask_stencil_values) { + newS_reg = spe_allocate_available_register(f); + spe_move(f, newS_reg, fbS_reg); + modified_buffers = true; + } + else { + newS_reg = fbS_reg; + } + + /* Merge in the selected stencil fail values */ + if (stencil_fail_values != fbS_reg) { + spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); + } + + /* Same for the stencil pass/depth fail values. If this calculation + * is not needed (say, if depth test is off), then the + * stencil_pass_depth_fail_values register will be equal to fbS_reg + * and we'll skip the calculation. + */ + if (stencil_pass_depth_fail_values != fbS_reg) { + /* We don't actually have a stencil pass/depth fail mask yet. + * Calculate it here from the stencil passing mask and the + * depth passing mask. Note that zmask_reg *must* have been + * set above if we're here. + */ + unsigned int stencil_pass_depth_fail_mask = spe_allocate_available_register(f); + spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, stencil_pass_depth_fail_mask); + + spe_release_register(f, stencil_pass_depth_fail_mask); + } + + /* Same for the stencil pass/depth pass mask */ + if (stencil_pass_depth_pass_values != fbS_reg) { + unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f); + spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); + + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); + spe_release_register(f, stencil_pass_depth_pass_mask); + } + + /* Almost done. If we need to writemask, do it now, leaving the + * results in the fbS_reg register passed in. If we don't need + * to writemask, then the results are *already* in the fbS_reg, + * so there's nothing more to do. + */ + + if (need_to_writemask_stencil_values) { + /* The Select Bytes command makes a fine writemask. Where + * the mask is 0, the first (original) values are retained, + * effectively masking out changes. Where the mask is 1, the + * second (new) values are retained, incorporating changes. + */ + spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); + } + } /* done calculating stencil values */ + + /* The stencil and/or depth values have been applied, and the + * mask_reg, fbS_reg, and fbZ_reg values have been updated. + * We're all done, except that we've allocated a fair number + * of registers that we didn't bother tracking. Release all + * those registers as part of the register set, and go home. + */ + spe_release_register_set(f); + + /* Return true if we could have modified the stencil and/or + * depth buffers. + */ + return modified_buffers; +} + + /** * Generate SPE code to implement the fragment operations (alpha test, * depth test, stencil test, blending, colormask, and final @@ -1156,6 +1800,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) const int fragB_reg = 10; /* vector float */ const int fragA_reg = 11; /* vector float */ const int mask_reg = 12; /* vector uint */ + const int facing_reg = 13; /* uint */ /* offset of quad from start of tile * XXX assuming 4-byte pixels for color AND Z/stencil!!!! @@ -1183,6 +1828,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_allocate_register(f, fragB_reg); spe_allocate_register(f, fragA_reg); spe_allocate_register(f, mask_reg); + spe_allocate_register(f, facing_reg); quad_offset_reg = spe_allocate_available_register(f); fbRGBA_reg = spe_allocate_available_register(f); @@ -1195,6 +1841,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) ASSERT(TILE_SIZE == 32); + spe_comment(f, 0, "Computing tile location in memory"); spe_rotmi(f, y2_reg, y_reg, -1); /* y2 = y / 2 */ spe_rotmi(f, x2_reg, x_reg, -1); /* x2 = x / 2 */ spe_shli(f, y2_reg, y2_reg, 4); /* y2 *= 16 */ @@ -1205,124 +1852,164 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_release_register(f, y2_reg); } - if (dsa->alpha.enabled) { gen_alpha_test(dsa, f, mask_reg, fragA_reg); } + /* If we need the stencil buffers (because one- or two-sided stencil is + * enabled) or the depth buffer (because the depth test is enabled), + * go grab them. Note that if either one- or two-sided stencil is + * enabled, dsa->stencil[0].enabled will be true. + */ if (dsa->depth.enabled || dsa->stencil[0].enabled) { const enum pipe_format zs_format = cell->framebuffer.zsbuf->format; boolean write_depth_stencil; - int fbZ_reg = spe_allocate_available_register(f); /* Z values */ - int fbS_reg = spe_allocate_available_register(f); /* Stencil values */ + /* We may or may not need to allocate a register for Z or stencil values */ + boolean fbS_reg_set = false, fbZ_reg_set = false; + unsigned int fbS_reg, fbZ_reg = 0; + + spe_comment(f, 0, "Loading Z/stencil tile"); /* fetch quad of depth/stencil values from tile at (x,y) */ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ + /* XXX Not sure this is allowed if we've only got a 16-bit Z buffer... */ spe_lqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); - if (dsa->depth.enabled) { - /* Extract Z bits from fbZS_reg into fbZ_reg */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - int mask_reg = spe_allocate_available_register(f); - spe_fsmbi(f, mask_reg, 0x7777); /* mask[0,1,2,3] = 0x00ffffff */ - spe_and(f, fbZ_reg, fbZS_reg, mask_reg); /* fbZ = fbZS & mask */ - spe_release_register(f, mask_reg); - /* OK, fbZ_reg has four 24-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - spe_rotmi(f, fbZ_reg, fbZS_reg, -8); /* fbZ = fbZS >> 8 */ - /* OK, fbZ_reg has four 24-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - spe_move(f, fbZ_reg, fbZS_reg); - /* OK, fbZ_reg has four 32-bit Z values now */ - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - spe_move(f, fbZ_reg, fbZS_reg); - /* OK, fbZ_reg has four 16-bit Z values now */ - } - else { - ASSERT(0); /* invalid format */ - } - - /* Convert fragZ values from float[4] to 16, 24 or 32-bit uint[4] */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM || - zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - /* fragZ = fragZ >> 8 */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -8); - } - else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - } - else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - /* scale/convert fragZ from float in [0,1] to uint in [0, ~0] */ - spe_cfltu(f, fragZ_reg, fragZ_reg, 32); - /* fragZ = fragZ >> 16 */ - spe_rotmi(f, fragZ_reg, fragZ_reg, -16); - } - } - else { - /* no Z test, but set Z to zero so we don't OR-in garbage below */ - spe_load_uint(f, fbZ_reg, 0); /* XXX set to zero for now */ + /* From the Z/stencil buffer format, pull out the bits we need for + * Z and/or stencil. We'll also convert the incoming fragment Z + * value in fragZ_reg from a floating point value in [0.0..1.0] to + * an unsigned integer value with the appropriate resolution. + */ + switch(zs_format) { + + case PIPE_FORMAT_S8Z24_UNORM: /* fall through */ + case PIPE_FORMAT_X8Z24_UNORM: + if (dsa->depth.enabled) { + /* We need the Z part at least */ + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* four 24-bit Z values in the low-order bits */ + spe_and_uint(f, fbZ_reg, fbZS_reg, 0x00ffffff); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 24-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + } + if (dsa->stencil[0].enabled) { + setup_optional_register(f, &fbS_reg_set, &fbS_reg); + /* four 8-bit Z values in the high-order bits */ + spe_rotmi(f, fbS_reg, fbZS_reg, -24); + } + break; + + case PIPE_FORMAT_Z24S8_UNORM: /* fall through */ + case PIPE_FORMAT_Z24X8_UNORM: + if (dsa->depth.enabled) { + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* shift by 8 to get the upper 24-bit values */ + spe_rotmi(f, fbS_reg, fbZS_reg, -8); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 24-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -8); + } + if (dsa->stencil[0].enabled) { + setup_optional_register(f, &fbS_reg_set, &fbS_reg); + /* 8-bit stencil in the low-order bits - mask them out */ + spe_and_uint(f, fbS_reg, fbZS_reg, 0x000000ff); + } + break; + + case PIPE_FORMAT_Z32_UNORM: + if (dsa->depth.enabled) { + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 32-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + } + /* No stencil, so can't do anything there */ + break; + + case PIPE_FORMAT_Z16_UNORM: + if (dsa->depth.enabled) { + /* XXX Not sure this is correct, but it was here before, so we're + * going with it for now + */ + setup_optional_register(f, &fbZ_reg_set, &fbZ_reg); + /* Copy over 4 32-bit values */ + spe_move(f, fbZ_reg, fbZS_reg); + + /* Incoming fragZ_reg value is a float in 0.0...1.0; convert + * to a 16-bit unsigned integer + */ + spe_cfltu(f, fragZ_reg, fragZ_reg, 32); + spe_rotmi(f, fragZ_reg, fragZ_reg, -16); + } + /* No stencil */ + break; + + default: + ASSERT(0); /* invalid format */ } - + /* If stencil is enabled, use the stencil-specific code + * generator to generate both the stencil and depth (if needed) + * tests. Otherwise, if only depth is enabled, generate + * a quick depth test. The test generators themselves will + * report back whether the depth/stencil buffer has to be + * written back. + */ if (dsa->stencil[0].enabled) { - /* Extract Stencil bit sfrom fbZS_reg into fbS_reg */ - if (zs_format == PIPE_FORMAT_S8Z24_UNORM || - zs_format == PIPE_FORMAT_X8Z24_UNORM) { - /* XXX extract with a shift */ - ASSERT(0); - } - else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || - zs_format == PIPE_FORMAT_Z24X8_UNORM) { - /* XXX extract with a mask */ - ASSERT(0); - } - } - else { - /* no stencil test, but set to zero so we don't OR-in garbage below */ - spe_load_uint(f, fbS_reg, 0); /* XXX set to zero for now */ - } + /* This will perform the stencil and depth tests, and update + * the mask_reg, fbZ_reg, and fbS_reg as required by the + * tests. + */ + ASSERT(fbS_reg_set); + ASSERT(fbZ_reg_set); + spe_comment(f, 0, "Perform stencil test"); - if (dsa->stencil[0].enabled) { - /* XXX this may involve depth testing too */ - // gen_stencil_test(dsa, f, ... ); - ASSERT(0); + write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg); } else if (dsa->depth.enabled) { int zmask_reg = spe_allocate_available_register(f); - gen_depth_test(dsa, f, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); + spe_comment(f, 0, "Perform depth test"); + write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); spe_release_register(f, zmask_reg); } - - /* do we need to write Z and/or Stencil back into framebuffer? */ - write_depth_stencil = (dsa->depth.writemask | - dsa->stencil[0].write_mask | - dsa->stencil[1].write_mask); + else { + write_depth_stencil = false; + } if (write_depth_stencil) { /* Merge latest Z and Stencil values into fbZS_reg. * fbZ_reg has four Z vals in bits [23..0] or bits [15..0]. * fbS_reg has four 8-bit Z values in bits [7..0]. */ + spe_comment(f, 0, "Storing depth/stencil values"); if (zs_format == PIPE_FORMAT_S8Z24_UNORM || zs_format == PIPE_FORMAT_X8Z24_UNORM) { - spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + if (fbS_reg_set) { + spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } + else { + spe_move(f, fbZS_reg, fbZ_reg); + } } else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || zs_format == PIPE_FORMAT_Z24X8_UNORM) { spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ - spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + if (fbS_reg_set) { + spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ + } } else if (zs_format == PIPE_FORMAT_Z32_UNORM) { spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ @@ -1341,11 +2028,10 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_stqx(f, fbZS_reg, depth_tile_reg, quad_offset_reg); } - spe_release_register(f, fbZ_reg); - spe_release_register(f, fbS_reg); + release_optional_register(f, &fbZ_reg_set, fbZ_reg); + release_optional_register(f, &fbS_reg_set, fbS_reg); } - /* Get framebuffer quad/colors. We'll need these for blending, * color masking, and to obey the quad/pixel mask. * Load: fbRGBA_reg = memory[color_tile + quad_offset] @@ -1354,8 +2040,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) */ spe_lqx(f, fbRGBA_reg, color_tile_reg, quad_offset_reg); - if (blend->blend_enable) { + spe_comment(f, 0, "Perform blending"); gen_blend(blend, blend_color, f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, fbRGBA_reg); } @@ -1369,19 +2055,21 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) int rgba_reg = spe_allocate_available_register(f); /* Pack four float colors as four 32-bit int colors */ + spe_comment(f, 0, "Convert fragment colors to framebuffer colors"); gen_pack_colors(f, color_format, fragR_reg, fragG_reg, fragB_reg, fragA_reg, rgba_reg); if (blend->logicop_enable) { + spe_comment(f, 0, "Compute logic op"); gen_logicop(blend, f, rgba_reg, fbRGBA_reg); } if (blend->colormask != PIPE_MASK_RGBA) { + spe_comment(f, 0, "Compute color mask"); gen_colormask(f, blend->colormask, color_format, rgba_reg, fbRGBA_reg); } - /* Mix fragment colors with framebuffer colors using the quad/pixel mask: * if (mask[i]) * rgba[i] = rgba[i]; @@ -1393,6 +2081,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) /* Store updated quad in tile: * memory[color_tile + quad_offset] = rgba_reg; */ + spe_comment(f, 0, "Store framebuffer colors"); spe_stqx(f, rgba_reg, color_tile_reg, quad_offset_reg); spe_release_register(f, rgba_reg); diff --git a/src/gallium/drivers/cell/ppu/cell_render.c b/src/gallium/drivers/cell/ppu/cell_render.c index dd25ae880e..79cb8df82f 100644 --- a/src/gallium/drivers/cell/ppu/cell_render.c +++ b/src/gallium/drivers/cell/ppu/cell_render.c @@ -152,6 +152,7 @@ cell_flush_prim_buffer(struct cell_context *cell) struct cell_command_render *render = &cell_global.command[i].render; render->prim_type = PIPE_PRIM_TRIANGLES; render->num_verts = cell->prim_buffer.num_verts; + render->front_winding = cell->rasterizer->front_winding; render->vertex_size = cell->vertex_info->size * 4; render->xmin = cell->prim_buffer.xmin; render->ymin = cell->prim_buffer.ymin; diff --git a/src/gallium/drivers/cell/ppu/cell_vbuf.c b/src/gallium/drivers/cell/ppu/cell_vbuf.c index aa63435b93..578ddf62dc 100644 --- a/src/gallium/drivers/cell/ppu/cell_vbuf.c +++ b/src/gallium/drivers/cell/ppu/cell_vbuf.c @@ -214,6 +214,7 @@ cell_vbuf_draw(struct vbuf_render *vbr, render->opcode = CELL_CMD_RENDER; render->prim_type = cvbr->prim; + render->front_winding = cell->rasterizer->front_winding; render->num_indexes = nr_indices; render->min_index = min_index; diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 29a305232e..1cd577c23c 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -73,7 +73,8 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, vector float fragGreen, vector float fragBlue, vector float fragAlpha, - vector unsigned int mask); + vector unsigned int mask, + uint facing); /** Function for running fragment program */ typedef void (*spu_fragment_program_func)(vector float *inputs, diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index f107764fb2..d252fa6dc1 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -57,7 +57,8 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragG, vector float fragB, vector float fragA, - vector unsigned int mask) + vector unsigned int mask, + uint facing) { vector float frag_aos[4]; unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ @@ -433,23 +434,23 @@ spu_fallback_fragment_ops(uint x, uint y, /* Form bitmask depending on color buffer format and colormask bits */ switch (spu.fb.color_format) { case PIPE_FORMAT_A8R8G8B8_UNORM: - if (spu.blend.colormask & (1<<0)) + if (spu.blend.colormask & PIPE_MASK_R) cmask |= 0x00ff0000; /* red */ - if (spu.blend.colormask & (1<<1)) + if (spu.blend.colormask & PIPE_MASK_G) cmask |= 0x0000ff00; /* green */ - if (spu.blend.colormask & (1<<2)) + if (spu.blend.colormask & PIPE_MASK_B) cmask |= 0x000000ff; /* blue */ - if (spu.blend.colormask & (1<<3)) + if (spu.blend.colormask & PIPE_MASK_A) cmask |= 0xff000000; /* alpha */ break; case PIPE_FORMAT_B8G8R8A8_UNORM: - if (spu.blend.colormask & (1<<0)) + if (spu.blend.colormask & PIPE_MASK_R) cmask |= 0x0000ff00; /* red */ - if (spu.blend.colormask & (1<<1)) + if (spu.blend.colormask & PIPE_MASK_G) cmask |= 0x00ff0000; /* green */ - if (spu.blend.colormask & (1<<2)) + if (spu.blend.colormask & PIPE_MASK_B) cmask |= 0xff000000; /* blue */ - if (spu.blend.colormask & (1<<3)) + if (spu.blend.colormask & PIPE_MASK_A) cmask |= 0x000000ff; /* alpha */ break; default: diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h index f817abf046..a61689c83a 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -38,7 +38,8 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragGreen, vector float fragBlue, vector float fragAlpha, - vector unsigned int mask); + vector unsigned int mask, + uint facing); #endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_render.c b/src/gallium/drivers/cell/spu/spu_render.c index 305dc98881..82dbeb26b7 100644 --- a/src/gallium/drivers/cell/spu/spu_render.c +++ b/src/gallium/drivers/cell/spu/spu_render.c @@ -279,7 +279,7 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) v1 = (const float *) (vertices + indexes[j+1] * vertex_size); v2 = (const float *) (vertices + indexes[j+2] * vertex_size); - drawn += tri_draw(v0, v1, v2, tx, ty); + drawn += tri_draw(v0, v1, v2, tx, ty, render->front_winding); } //printf("SPU %u: drew %u of %u\n", spu.init.id, drawn, render->num_indexes/3); @@ -297,5 +297,3 @@ cmd_render(const struct cell_command_render *render, uint *pos_incr) printf("SPU %u: RENDER done\n", spu.init.id); } - - diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 0a8fb56a62..6039cd80b2 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -118,6 +118,8 @@ struct setup_stage { float oneoverarea; + uint facing; + uint tx, ty; int cliprect_minx, cliprect_maxx, cliprect_miny, cliprect_maxy; @@ -274,7 +276,7 @@ eval_z(float x, float y) * overall. */ static INLINE void -emit_quad( int x, int y, mask_t mask ) +emit_quad( int x, int y, mask_t mask) { /* If any bits in mask are set... */ if (spu_extract(spu_orx(mask), 0)) { @@ -344,7 +346,8 @@ emit_quad( int x, int y, mask_t mask ) fragZ, soa_frag[0], soa_frag[1], soa_frag[2], soa_frag[3], - mask); + mask, + setup.facing); } } @@ -379,7 +382,8 @@ emit_quad( int x, int y, mask_t mask ) outputs[0*4+1], outputs[0*4+2], outputs[0*4+3], - mask); + mask, + setup.facing); } } } @@ -483,7 +487,7 @@ static void flush_spans( void ) */ for (x = block(minleft); x <= block(maxright); x += 2) { #if 1 - emit_quad( x, setup.span.y, calculate_mask( x ) ); + emit_quad( x, setup.span.y, calculate_mask( x )); #endif } @@ -902,13 +906,28 @@ static void subtriangle( struct edge *eleft, eright->sy += lines; } +static float +determinant( const float *v0, + const float *v1, + const float *v2 ) +{ + /* edge vectors e = v0 - v2, f = v1 - v2 */ + const float ex = v0[0] - v2[0]; + const float ey = v0[1] - v2[1]; + const float fx = v1[0] - v2[0]; + const float fy = v1[1] - v2[1]; + + /* det = cross(e,f).z */ + return ex * fy - ey * fx; +} + /** * Draw triangle into tile at (tx, ty) (tile coords) * The tile data should have already been fetched. */ boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding) { setup.tx = tx; setup.ty = ty; @@ -919,6 +938,12 @@ tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty) setup.cliprect_maxx = (tx + 1) * TILE_SIZE; setup.cliprect_maxy = (ty + 1) * TILE_SIZE; + /* Before we sort vertices, determine the facing of the triangle, + * which will be needed for front/back-face stencil application + */ + float det = determinant(v0, v1, v2); + setup.facing = (det > 0.0) ^ (front_winding == PIPE_WINDING_CW); + if (!setup_sort_vertices((struct vertex_header *) v0, (struct vertex_header *) v1, (struct vertex_header *) v2)) { diff --git a/src/gallium/drivers/cell/spu/spu_tri.h b/src/gallium/drivers/cell/spu/spu_tri.h index aa694dd7c9..abc3d35160 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.h +++ b/src/gallium/drivers/cell/spu/spu_tri.h @@ -31,7 +31,7 @@ extern boolean -tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty); +tri_draw(const float *v0, const float *v1, const float *v2, uint tx, uint ty, uint front_winding); #endif /* SPU_TRI_H */ -- cgit v1.2.3 From 53d4706c6c0922160f310834daaec5718ff1c511 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Wed, 10 Sep 2008 11:39:43 +0100 Subject: make draw's vertex_info struct smaller/quicker to compare with memcmp() --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 4 +- src/gallium/auxiliary/draw/draw_pt_emit.c | 4 +- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 4 +- .../auxiliary/draw/draw_pt_fetch_shade_emit.c | 6 +-- src/gallium/auxiliary/draw/draw_vertex.c | 6 +-- src/gallium/auxiliary/draw/draw_vertex.h | 44 ++++++++++++++++++---- src/gallium/drivers/i915simple/i915_prim_emit.c | 4 +- .../drivers/i915simple/i915_state_derived.c | 4 +- src/gallium/drivers/softpipe/sp_setup.c | 12 +++--- 9 files changed, 59 insertions(+), 29 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index c0cf4269db..9825e116c3 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -231,9 +231,9 @@ vbuf_set_prim( struct vbuf_stage *vbuf, uint prim ) unsigned emit_sz = 0; unsigned src_buffer = 0; unsigned output_format; - unsigned src_offset = (vbuf->vinfo->src_index[i] * 4 * sizeof(float) ); + unsigned src_offset = (vbuf->vinfo->attrib[i].src_index * 4 * sizeof(float) ); - switch (vbuf->vinfo->emit[i]) { + switch (vbuf->vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d4eca80588..d520b05869 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -84,11 +84,11 @@ void draw_pt_emit_prepare( struct pt_emit *emit, unsigned emit_sz = 0; unsigned src_buffer = 0; unsigned output_format; - unsigned src_offset = (vinfo->src_index[i] * 4 * sizeof(float) ); + unsigned src_offset = (vinfo->attrib[i].src_index * 4 * sizeof(float) ); - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 5a4db6cfe5..3966ad48ba 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -121,7 +121,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, memset(&key, 0, sizeof(key)); for (i = 0; i < vinfo->num_attribs; i++) { - const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->src_index[i]]; + const struct pipe_vertex_element *src = &draw->pt.vertex_element[vinfo->attrib[i].src_index]; unsigned emit_sz = 0; unsigned input_format = src->src_format; @@ -129,7 +129,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, unsigned input_offset = src->src_offset; unsigned output_format; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: output_format = PIPE_FORMAT_R32G32B32A32_FLOAT; emit_sz = 4 * sizeof(float); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index a0e08dd10a..f7e6a1a8ee 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -133,7 +133,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, for (i = 0; i < vinfo->num_attribs; i++) { unsigned emit_sz = 0; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_4F: emit_sz = 4 * sizeof(float); break; @@ -161,8 +161,8 @@ static void fse_prepare( struct draw_pt_middle_end *middle, * numbers, not to positions in the hw vertex description -- * that's handled by the output_offset field. */ - fse->key.element[i].out.format = vinfo->emit[i]; - fse->key.element[i].out.vs_output = vinfo->src_index[i]; + fse->key.element[i].out.format = vinfo->attrib[i].emit; + fse->key.element[i].out.vs_output = vinfo->attrib[i].src_index; fse->key.element[i].out.offset = dst_offset; dst_offset += emit_sz; diff --git a/src/gallium/auxiliary/draw/draw_vertex.c b/src/gallium/auxiliary/draw/draw_vertex.c index 1446f785c5..3214213e44 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.c +++ b/src/gallium/auxiliary/draw/draw_vertex.c @@ -49,7 +49,7 @@ draw_compute_vertex_size(struct vertex_info *vinfo) vinfo->size = 0; for (i = 0; i < vinfo->num_attribs; i++) { - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_OMIT: break; case EMIT_4UB: @@ -81,8 +81,8 @@ draw_dump_emitted_vertex(const struct vertex_info *vinfo, const uint8_t *data) unsigned i, j; for (i = 0; i < vinfo->num_attribs; i++) { - j = vinfo->src_index[i]; - switch (vinfo->emit[i]) { + j = vinfo->attrib[i].src_index; + switch (vinfo->attrib[i].emit) { case EMIT_OMIT: debug_printf("EMIT_OMIT:"); break; diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index 16c65c4317..dca6158128 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -75,12 +75,41 @@ struct vertex_info { uint num_attribs; uint hwfmt[4]; /**< hardware format info for this format */ - enum interp_mode interp_mode[PIPE_MAX_SHADER_INPUTS]; - enum attrib_emit emit[PIPE_MAX_SHADER_INPUTS]; /**< EMIT_x */ - uint src_index[PIPE_MAX_SHADER_INPUTS]; /**< map to post-xform attribs */ uint size; /**< total vertex size in dwords */ + + /* Keep this small and at the end of the struct to allow quick + * memcmp() comparisons. + */ + struct { + ubyte interp_mode:4; /**< INTERP_x */ + ubyte emit:4; /**< EMIT_x */ + ubyte src_index; /**< map to post-xform attribs */ + } attrib[PIPE_MAX_SHADER_INPUTS]; }; +static inline int +draw_vinfo_size( const struct vertex_info *a ) +{ + return ((const char *)&a->attrib[a->num_attribs] - + (const char *)a); +} + +static inline int +draw_vinfo_compare( const struct vertex_info *a, + const struct vertex_info *b ) +{ + unsigned sizea = draw_vinfo_size( a ); + return memcmp( a, b, sizea ); +} + +static inline void +draw_vinfo_copy( struct vertex_info *dst, + const struct vertex_info *src ) +{ + unsigned size = draw_vinfo_size( src ); + memcpy( dst, src, size ); +} + /** @@ -91,14 +120,15 @@ struct vertex_info */ static INLINE uint draw_emit_vertex_attr(struct vertex_info *vinfo, - enum attrib_emit emit, enum interp_mode interp, + enum attrib_emit emit, + enum interp_mode interp, /* only used by softpipe??? */ uint src_index) { const uint n = vinfo->num_attribs; assert(n < PIPE_MAX_SHADER_INPUTS); - vinfo->emit[n] = emit; - vinfo->interp_mode[n] = interp; - vinfo->src_index[n] = src_index; + vinfo->attrib[n].emit = emit; + vinfo->attrib[n].interp_mode = interp; + vinfo->attrib[n].src_index = src_index; vinfo->num_attribs++; return n; } diff --git a/src/gallium/drivers/i915simple/i915_prim_emit.c b/src/gallium/drivers/i915simple/i915_prim_emit.c index d194c2fb15..8f1f58b2dd 100644 --- a/src/gallium/drivers/i915simple/i915_prim_emit.c +++ b/src/gallium/drivers/i915simple/i915_prim_emit.c @@ -77,9 +77,9 @@ emit_hw_vertex( struct i915_context *i915, assert(!i915->dirty); for (i = 0; i < vinfo->num_attribs; i++) { - const uint j = vinfo->src_index[i]; + const uint j = vinfo->attrib[i].src_index; const float *attrib = vertex->data[j]; - switch (vinfo->emit[i]) { + switch (vinfo->attrib[i].emit) { case EMIT_1F: OUT_BATCH( fui(attrib[0]) ); count++; diff --git a/src/gallium/drivers/i915simple/i915_state_derived.c b/src/gallium/drivers/i915simple/i915_state_derived.c index 488615067c..178d4e8781 100644 --- a/src/gallium/drivers/i915simple/i915_state_derived.c +++ b/src/gallium/drivers/i915simple/i915_state_derived.c @@ -88,12 +88,12 @@ static void calculate_vertex_layout( struct i915_context *i915 ) if (needW) { draw_emit_vertex_attr(&vinfo, EMIT_4F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZW; - vinfo.emit[0] = EMIT_4F; + vinfo.attrib[0].emit = EMIT_4F; } else { draw_emit_vertex_attr(&vinfo, EMIT_3F, INTERP_LINEAR, src); vinfo.hwfmt[0] |= S4_VFMT_XYZ; - vinfo.emit[0] = EMIT_3F; + vinfo.attrib[0].emit = EMIT_3F; } /* hardware point size */ diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index bc8263c33e..13d8017393 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -773,10 +773,10 @@ static void setup_tri_coefficients( struct setup_context *setup ) /* setup interpolation for all the remaining attributes: */ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: for (j = 0; j < NUM_CHANNELS; j++) const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); @@ -1084,10 +1084,10 @@ setup_line_coefficients(struct setup_context *setup, /* setup interpolation for all the remaining attributes: */ for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: for (j = 0; j < NUM_CHANNELS; j++) const_coeff(setup, &setup->coef[fragSlot], vertSlot, j); @@ -1331,10 +1331,10 @@ setup_point( struct setup_context *setup, const_coeff(setup, &setup->posCoef, 0, 3); for (fragSlot = 0; fragSlot < spfs->info.num_inputs; fragSlot++) { - const uint vertSlot = vinfo->src_index[fragSlot]; + const uint vertSlot = vinfo->attrib[fragSlot].src_index; uint j; - switch (vinfo->interp_mode[fragSlot]) { + switch (vinfo->attrib[fragSlot].interp_mode) { case INTERP_CONSTANT: /* fall-through */ case INTERP_LINEAR: -- cgit v1.2.3 From 7053f8c902e904495dffbbf6ea55f414cec780e7 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 6 Oct 2008 11:54:22 +0100 Subject: rtasm: fix debug build --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index cc5871f873..dd26d4d9ed 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -657,7 +657,7 @@ void sse_movntps( struct x86_function *p, struct x86_reg dst, struct x86_reg src) { - DUMP_RR( dst, reg ); + DUMP_RR( dst, src ); assert(dst.mod != mod_REG); assert(src.mod == mod_REG); -- cgit v1.2.3 From 9b827018133868e84ddc0998a5b5387584c7478c Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Mon, 6 Oct 2008 13:23:56 +0200 Subject: draw: Fix compiler errors on Windows. --- src/gallium/auxiliary/draw/draw_vertex.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index dca6158128..a943607d7e 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -87,14 +87,14 @@ struct vertex_info } attrib[PIPE_MAX_SHADER_INPUTS]; }; -static inline int +static INLINE int draw_vinfo_size( const struct vertex_info *a ) { return ((const char *)&a->attrib[a->num_attribs] - (const char *)a); } -static inline int +static INLINE int draw_vinfo_compare( const struct vertex_info *a, const struct vertex_info *b ) { @@ -102,7 +102,7 @@ draw_vinfo_compare( const struct vertex_info *a, return memcmp( a, b, sizea ); } -static inline void +static INLINE void draw_vinfo_copy( struct vertex_info *dst, const struct vertex_info *src ) { -- cgit v1.2.3 From f7ee3c979261b4a2b77365b47c7147f69fbfd606 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 6 Oct 2008 18:31:56 -0600 Subject: gallium: replace assertion with conditional/recovery code The assertion failed when we ran out of exec memory. Found with conform texcombine test. --- src/gallium/auxiliary/rtasm/rtasm_x86sse.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c index dd26d4d9ed..ad9d8f8ced 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c +++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c @@ -370,7 +370,11 @@ void x86_jcc( struct x86_function *p, DUMP_I(cc); if (offset < 0) { - assert(p->csr - p->store > -offset); + /*assert(p->csr - p->store > -offset);*/ + if (p->csr - p->store <= -offset) { + /* probably out of memory (using the error_overflow buffer) */ + return; + } } if (offset <= 127 && offset >= -128) { -- cgit v1.2.3 From 4d7394f89292131323fc8e39efa511a2eeb8cc60 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 7 Oct 2008 14:25:09 +0900 Subject: gallium: Introduce PIPE_ARCH_SSE define for SSE support. Besides meaning x86 and x86-64 architecture, it also depends on SSE2 support enabled on gcc. This fixes the linux-debug build. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 2 +- src/gallium/auxiliary/util/u_sse.h | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/include/pipe/p_config.h | 8 ++++++++ 5 files changed, 12 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0efabd9de8..b11ae31662 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -37,7 +37,7 @@ #include "draw_vs.h" -#if defined(PIPE_ARCH_X86) +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 79f424b692..f79170b9d6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -27,7 +27,7 @@ #include "pipe/p_config.h" -#ifdef PIPE_ARCH_X86 +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) #include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/auxiliary/util/u_sse.h b/src/gallium/auxiliary/util/u_sse.h index 68e56f0816..e2a8491e62 100644 --- a/src/gallium/auxiliary/util/u_sse.h +++ b/src/gallium/auxiliary/util/u_sse.h @@ -39,7 +39,7 @@ #include "pipe/p_config.h" -#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_ARCH_SSE) #include #include diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 496ed43df2..0111469405 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -40,7 +40,7 @@ #include "tgsi/tgsi_sse2.h" -#ifdef PIPE_ARCH_X86 +#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) #include "rtasm/rtasm_x86sse.h" diff --git a/src/gallium/include/pipe/p_config.h b/src/gallium/include/pipe/p_config.h index af3746c026..ef05547819 100644 --- a/src/gallium/include/pipe/p_config.h +++ b/src/gallium/include/pipe/p_config.h @@ -85,6 +85,14 @@ #define PIPE_ARCH_X86_64 #endif +#if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) +#if defined(PIPE_CC_GCC) && !defined(__SSE2__) +/* #warning SSE2 support requires -msse -msse2 compiler options */ +#else +#define PIPE_ARCH_SSE +#endif +#endif + #if 0 /* FIXME */ #define PIPE_ARCH_PPC #endif -- cgit v1.2.3 From c48da7d78b4e7bdbe056b3c9668756d49019be06 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 6 Oct 2008 12:22:55 +0100 Subject: draw: add switch for drivers to force vertex data passthrough --- src/gallium/auxiliary/draw/draw_context.c | 8 +++++++ src/gallium/auxiliary/draw/draw_context.h | 3 +++ src/gallium/auxiliary/draw/draw_private.h | 3 +++ src/gallium/auxiliary/draw/draw_pt.c | 38 +++++++++++++++---------------- 4 files changed, 33 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 36751c2621..41a4cba1dd 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -272,6 +272,14 @@ draw_enable_point_sprites(struct draw_context *draw, boolean enable) } +void +draw_set_force_passthrough( struct draw_context *draw, boolean enable ) +{ + draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE ); + draw->force_passthrough = enable; +} + + /** * Ask the draw module for the location/slot of the given vertex attribute in * a post-transformed vertex. diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 0ab3681b64..3eeb453531 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -160,6 +160,9 @@ void draw_set_render( struct draw_context *draw, void draw_set_driver_clipping( struct draw_context *draw, boolean bypass_clipping ); +void draw_set_force_passthrough( struct draw_context *draw, + boolean enable ); + /******************************************************************************* * Draw pipeline */ diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 626a2e3e30..37c4c87f87 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -163,12 +163,15 @@ struct draw_context struct { boolean bypass_clipping; + boolean bypass_vs; } driver; boolean flushing; /**< debugging/sanity */ boolean suspend_flushing; /**< internally set */ boolean bypass_clipping; /**< set if either api or driver bypass_clipping true */ + boolean force_passthrough; /**< never clip or shade */ + /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 669c11c993..87ec6ae20c 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -69,26 +69,26 @@ draw_pt_arrays(struct draw_context *draw, return TRUE; } - - if (!draw->render) { - opt |= PT_PIPELINE; - } - - if (draw_need_pipeline(draw, - draw->rasterizer, - prim)) { - opt |= PT_PIPELINE; - } - - if (!draw->bypass_clipping && !draw->pt.test_fse) { - opt |= PT_CLIPTEST; + if (!draw->force_passthrough) { + if (!draw->render) { + opt |= PT_PIPELINE; + } + + if (draw_need_pipeline(draw, + draw->rasterizer, + prim)) { + opt |= PT_PIPELINE; + } + + if (!draw->bypass_clipping && !draw->pt.test_fse) { + opt |= PT_CLIPTEST; + } + + if (!draw->rasterizer->bypass_vs) { + opt |= PT_SHADE; + } } - - if (!draw->rasterizer->bypass_vs) { - opt |= PT_SHADE; - } - - + if (opt == 0) middle = draw->pt.middle.fetch_emit; else if (opt == PT_SHADE && !draw->pt.no_fse) -- cgit v1.2.3 From 23cc303994eb630c56b1224dfdac51dcea41ed03 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 7 Oct 2008 16:44:24 +0100 Subject: draw: don't assume output buffer pointer is aligned --- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index dd79bc799a..39f75b50b7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -338,7 +338,7 @@ static void emit_store_R32G32B32A32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { - sse_movaps(cp->func, dst_ptr, dataXMM); + sse_movups(cp->func, dst_ptr, dataXMM); } static void emit_store_R32G32B32( struct aos_compilation *cp, -- cgit v1.2.3 From 94ba48bd85ec5c62e1a303d8bb3fc25c8e153247 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Tue, 7 Oct 2008 21:11:01 +0200 Subject: Gallivm: fix the constant layout, this gets a bunch of progs/ working. Notably, gears doesn't. --- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 5 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 5 ++ src/gallium/auxiliary/gallivm/instructionssoa.h | 1 + src/gallium/auxiliary/gallivm/storagesoa.cpp | 71 +++++++++++++++++++---- src/gallium/auxiliary/gallivm/storagesoa.h | 7 ++- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 10 ++-- 6 files changed, 78 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 3a4a41e544..3a2f2878a3 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -158,8 +158,8 @@ void gallivm_cpu_jit_compile(struct gallivm_cpu_engine *cpu, struct gallivm_prog llvm::ExistingModuleProvider *mp = new llvm::ExistingModuleProvider(mod); llvm::ExecutionEngine *ee = cpu->engine; assert(ee); - /*FIXME : remove */ - ee->DisableLazyCompilation(); + /*FIXME : why was this disabled ? we need it for pow/sqrt/... */ + ee->DisableLazyCompilation(false); ee->addModuleProvider(mp); llvm::Function *func = func_for_shader(prog); @@ -202,7 +202,6 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, unsigned int i, j; unsigned slot; vertex_shader_runner runner = reinterpret_cast(prog->function); - assert(runner); for (i = 0; i < count; i += MAX_TGSI_VERTICES) { diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index a658072551..1143ee0b0b 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -171,6 +171,11 @@ std::vector InstructionsSoa::extractVector(llvm::Value *vector) return res; } +llvm::IRBuilder<>* InstructionsSoa::getIRBuilder() +{ + return &m_builder; +} + void InstructionsSoa::createFunctionMap() { m_functionsMap[TGSI_OPCODE_ABS] = "abs"; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.h b/src/gallium/auxiliary/gallivm/instructionssoa.h index 3817fdc904..d6831e0a6b 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.h +++ b/src/gallium/auxiliary/gallivm/instructionssoa.h @@ -76,6 +76,7 @@ public: void end(); std::vector extractVector(llvm::Value *vector); + llvm::IRBuilder<>* getIRBuilder(); private: const char * name(const char *prefix) const; llvm::Value *vectorFromVals(llvm::Value *x, llvm::Value *y, diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index 78d754371f..646b9d7ca0 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -91,19 +91,29 @@ void StorageSoa::declareImmediates() for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { std::vector vec = m_immediatesToFlush[i]; std::vector vals(4); + float val; std::vector channelArray; - vals[0] = vec[0]; vals[1] = vec[0]; vals[2] = vec[0]; vals[3] = vec[0]; - llvm::Constant *xChannel = createConstGlobalVector(vals); + val = vec[0]; + llvm::Constant *xChannel = createConstGlobalFloat(val); + val = vec[1]; + llvm::Constant *yChannel = createConstGlobalFloat(val); + val = vec[2]; + llvm::Constant *zChannel = createConstGlobalFloat(val); + val = vec[3]; + llvm::Constant *wChannel = createConstGlobalFloat(val); - vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; +// vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; +// llvm::Constant *xChannel = createConstGlobalVector(vec[0]); + +/* vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; llvm::Constant *yChannel = createConstGlobalVector(vals); vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; llvm::Constant *zChannel = createConstGlobalVector(vals); vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; - llvm::Constant *wChannel = createConstGlobalVector(vals); + llvm::Constant *wChannel = createConstGlobalVector(vals);*/ channelArray.push_back(xChannel); channelArray.push_back(yChannel); channelArray.push_back(zChannel); @@ -144,22 +154,54 @@ std::vector StorageSoa::inputElement(llvm::Value *idx) return res; } -std::vector StorageSoa::constElement(llvm::Value *idx) +llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value* vector, int cc) +{ + std::vector x(4); + x[0] = m_builder->CreateExtractElement(vector, + constantInt(cc), + name("x")); + + VectorType *vectorType = VectorType::get(Type::FloatTy, 4); + Constant *constVector = Constant::getNullValue(vectorType); + Value *res = m_builder->CreateInsertElement(constVector, x[0], + constantInt(0), + name("vecx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(1), + name("vecxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(2), + name("vecxxx")); + res = m_builder->CreateInsertElement(res, x[0], constantInt(3), + name("vecxxxx")); + return res; +} + +std::vector StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx) { std::vector res(4); + std::vector res2(4); llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; xChannel = elementPointer(m_consts, idx, 0); - yChannel = elementPointer(m_consts, idx, 1); +/* yChannel = elementPointer(m_consts, idx, 1); zChannel = elementPointer(m_consts, idx, 2); - wChannel = elementPointer(m_consts, idx, 3); + wChannel = elementPointer(m_consts, idx, 3);*/ res[0] = alignedArrayLoad(xChannel); +/* res[1] = alignedArrayLoad(xChannel); + res[2] = alignedArrayLoad(xChannel); + res[3] = alignedArrayLoad(xChannel);*/ + + + res2[0]=unpackConstElement(m_builder, res[0],0); + res2[1]=unpackConstElement(m_builder, res[0],1); + res2[2]=unpackConstElement(m_builder, res[0],2); + res2[3]=unpackConstElement(m_builder, res[0],3); +/*res[0] = alignedArrayLoad(xChannel); res[1] = alignedArrayLoad(yChannel); res[2] = alignedArrayLoad(zChannel); - res[3] = alignedArrayLoad(wChannel); + res[3] = alignedArrayLoad(wChannel);*/ - return res; + return res2; } std::vector StorageSoa::outputElement(llvm::Value *idx) @@ -260,6 +302,12 @@ llvm::Module * StorageSoa::currentModule() const return m_block->getParent()->getParent(); } +llvm::Constant * StorageSoa::createConstGlobalFloat(const float val) +{ + Constant*c = ConstantFP::get(APFloat(val)); + return c; +} + llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector &vec) { VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -278,7 +326,7 @@ llvm::Constant * StorageSoa::createConstGlobalVector(const std::vector &v } std::vector StorageSoa::load(enum tgsi_file_type type, int idx, int swizzle, - llvm::Value *indIdx) + llvm::IRBuilder<>* m_builder,llvm::Value *indIdx) { std::vector val(4); @@ -302,7 +350,8 @@ std::vector StorageSoa::load(enum tgsi_file_type type, int idx, in val = tempElement(realIndex); break; case TGSI_FILE_CONSTANT: - val = constElement(realIndex); + val = constElement(m_builder, realIndex); + printf("constant COUCOU index %d\n",realIndex); break; case TGSI_FILE_IMMEDIATE: val = immediateElement(realIndex); diff --git a/src/gallium/auxiliary/gallivm/storagesoa.h b/src/gallium/auxiliary/gallivm/storagesoa.h index ae2fc7c6ae..f21ca6ec43 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.h +++ b/src/gallium/auxiliary/gallivm/storagesoa.h @@ -29,6 +29,7 @@ #define STORAGESOA_H #include +#include #include #include @@ -56,7 +57,7 @@ public: std::vector load(enum tgsi_file_type type, int idx, int swizzle, - llvm::Value *indIdx =0); + llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); void store(enum tgsi_file_type type, int idx, const std::vector &val, int mask); @@ -76,10 +77,12 @@ private: const char *name(const char *prefix) const; llvm::Value *alignedArrayLoad(llvm::Value *val); llvm::Module *currentModule() const; + llvm::Constant *createConstGlobalFloat(const float val); llvm::Constant *createConstGlobalVector(const std::vector &vec); std::vector inputElement(llvm::Value *indIdx); - std::vector constElement(llvm::Value *indIdx); + llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); + std::vector constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); std::vector outputElement(llvm::Value *indIdx); std::vector tempElement(llvm::Value *indIdx); std::vector immediateElement(llvm::Value *indIdx); diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 7292c0e366..1191a6cae9 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -52,7 +52,7 @@ static inline FunctionType *vertexShaderFunctionType() // pass are castable to the following: // [4 x <4 x float>] inputs, // [4 x <4 x float>] output, - // [4 x [4 x float]] consts, + // [4 x [1 x float]] consts, // [4 x <4 x float>] temps std::vector funcArgs; @@ -61,7 +61,7 @@ static inline FunctionType *vertexShaderFunctionType() PointerType *vectorArrayPtr = PointerType::get(vectorArray, 0); ArrayType *floatArray = ArrayType::get(Type::FloatTy, 4); - ArrayType *constsArray = ArrayType::get(floatArray, 4); + ArrayType *constsArray = ArrayType::get(floatArray, 1); PointerType *constsArrayPtr = PointerType::get(constsArray, 0); funcArgs.push_back(vectorArrayPtr);//inputs @@ -246,6 +246,7 @@ translate_instruction(llvm::Module *module, val = storage->constElement(src->SrcRegister.Index, indIdx); } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { val = storage->inputElement(src->SrcRegister.Index, indIdx); + // FIXME we should not be generating elements for temporaries, this creates useless memory writes } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { val = storage->tempElement(src->SrcRegister.Index); } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { @@ -676,6 +677,7 @@ translate_instruction(llvm::Module *module, if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + // FIXME we should not be generating elements for temporaries, this creates useless memory writes } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { @@ -707,9 +709,8 @@ translate_instructionir(llvm::Module *module, if (src->SrcRegister.Indirect) { indIdx = storage->addrElement(src->SrcRegisterInd.Index); } - val = storage->load((enum tgsi_file_type)src->SrcRegister.File, - src->SrcRegister.Index, swizzle, indIdx); + src->SrcRegister.Index, swizzle, instr->getIRBuilder(), indIdx); inputs[i] = val; } @@ -1025,7 +1026,6 @@ translate_instructionir(llvm::Module *module, /* store results */ for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; - storage->store((enum tgsi_file_type)dst->DstRegister.File, dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } -- cgit v1.2.3 From 85e578bbc7032b356b436b282534c765ef35f064 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Tue, 7 Oct 2008 21:13:49 +0200 Subject: Gallivm: don't say hello, it's rude. --- src/gallium/auxiliary/gallivm/storagesoa.cpp | 1 - 1 file changed, 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index 646b9d7ca0..d4ecf97c36 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -351,7 +351,6 @@ std::vector StorageSoa::load(enum tgsi_file_type type, int idx, in break; case TGSI_FILE_CONSTANT: val = constElement(m_builder, realIndex); - printf("constant COUCOU index %d\n",realIndex); break; case TGSI_FILE_IMMEDIATE: val = immediateElement(realIndex); -- cgit v1.2.3 From f192ad5ebca138a21fd372fa268ba2b0f4f8b147 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 7 Oct 2008 14:33:16 -0600 Subject: gallium: added general-purpose key->data map/lookup container --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/SConscript | 5 +- src/gallium/auxiliary/util/u_keymap.c | 309 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_keymap.h | 68 ++++++++ 4 files changed, 381 insertions(+), 2 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_keymap.c create mode 100644 src/gallium/auxiliary/util/u_keymap.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index d3951e4e7d..b3d1045a8f 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -10,6 +10,7 @@ C_SOURCES = \ u_gen_mipmap.c \ u_handle_table.c \ u_hash_table.c \ + u_keymap.c \ u_math.c \ u_mm.c \ u_rect.c \ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index e65c17b1cc..8a04955a16 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -11,13 +11,14 @@ util = env.ConvenienceLibrary( 'u_gen_mipmap.c', 'u_handle_table.c', 'u_hash_table.c', + 'u_keymap.c', 'u_math.c', 'u_mm.c', 'u_rect.c', 'u_simple_shaders.c', 'u_snprintf.c', - 'u_stream_stdc.c', - 'u_stream_wd.c', + 'u_stream_stdc.c', + 'u_stream_wd.c', 'u_tile.c', 'u_time.c', ]) diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c new file mode 100644 index 0000000000..01b17ddb1b --- /dev/null +++ b/src/gallium/auxiliary/util/u_keymap.c @@ -0,0 +1,309 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * Key lookup/associative container. + * + * Like Jose's u_hash_table, based on CSO cache code for now. + * + * Author: Brian Paul + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_error.h" + +#include "cso_cache/cso_hash.h" + +#include "util/u_memory.h" +#include "util/u_keymap.h" + + +struct keymap +{ + struct cso_hash *cso; + unsigned key_size; + unsigned max_entries; /* XXX not obeyed net */ + unsigned num_entries; + keymap_delete_func delete_func; +}; + + +struct keymap_item +{ + void *key, *value; +}; + + +/** + * This the default key-delete function used when the client doesn't + * provide one. + */ +static void +default_delete_func(const struct keymap *map, + const void *key, void *data, void *user) +{ + FREE((void*) data); +} + + +static INLINE struct keymap_item * +hash_table_item(struct cso_hash_iter iter) +{ + return (struct keymap_item *) cso_hash_iter_data(iter); +} + + +/** + * Return 4-byte hash key for a block of bytes. + */ +static unsigned +hash(const void *key, unsigned keySize) +{ + unsigned i, hash; + + keySize /= 4; /* convert from bytes to uints */ + + hash = 0; + for (i = 0; i < keySize; i++) { + hash ^= (i + 1) * ((const unsigned *) key)[i]; + } + + /*hash = hash ^ (hash >> 11) ^ (hash >> 22);*/ + + return hash; +} + + +/** + * Create a new map. + * \param keySize size of the keys in bytes + * \param maxEntries max number of entries to allow (~0 = infinity) + * \param deleteFunc optional callback to call when entries + * are deleted/replaced + */ +struct keymap * +util_new_keymap(unsigned keySize, unsigned maxEntries, + keymap_delete_func deleteFunc) +{ + struct keymap *map = MALLOC_STRUCT(keymap); + if (!map) + return NULL; + + map->cso = cso_hash_create(); + if (!map->cso) { + FREE(map); + return NULL; + } + + map->max_entries = maxEntries; + map->num_entries = 0; + map->key_size = keySize; + map->delete_func = deleteFunc ? deleteFunc : default_delete_func; + + return map; +} + + +/** + * Delete/free a keymap and all entries. The deleteFunc that was given at + * create time will be called for each entry. + * \param user user-provided pointer passed through to the delete callback + */ +void +util_delete_keymap(struct keymap *map, void *user) +{ + util_keymap_remove_all(map, user); + cso_hash_delete(map->cso); + FREE(map); +} + + +static INLINE struct cso_hash_iter +hash_table_find_iter(const struct keymap *map, const void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter; + struct keymap_item *item; + + iter = cso_hash_find(map->cso, key_hash); + while (!cso_hash_iter_is_null(iter)) { + item = (struct keymap_item *) cso_hash_iter_data(iter); + if (!memcmp(item->key, key, map->key_size)) + break; + iter = cso_hash_iter_next(iter); + } + + return iter; +} + + +static INLINE struct keymap_item * +hash_table_find_item(const struct keymap *map, const void *key, + unsigned key_hash) +{ + struct cso_hash_iter iter = hash_table_find_iter(map, key, key_hash); + if (cso_hash_iter_is_null(iter)) { + return NULL; + } + else { + return hash_table_item(iter); + } +} + + +/** + * Insert a new key + data pointer into the table. + * Note: we create a copy of the key, but not the data! + * If the key is already present in the table, replace the existing + * entry (calling the delete callback on the previous entry). + * If the maximum capacity of the map is reached an old entry + * will be deleted (the delete callback will be called). + */ +boolean +util_keymap_insert(struct keymap *map, const void *key, + const void *data, void *user) +{ + unsigned key_hash; + struct keymap_item *item; + struct cso_hash_iter iter; + + assert(map); + + key_hash = hash(key, map->key_size); + + item = hash_table_find_item(map, key, key_hash); + if (item) { + /* call delete callback for old entry/item */ + map->delete_func(map, item->key, item->value, user); + item->value = (void *) data; + return TRUE; + } + + item = MALLOC_STRUCT(keymap_item); + if (!item) + return FALSE; + + item->key = mem_dup(key, map->key_size); + item->value = (void *) data; + + iter = cso_hash_insert(map->cso, key_hash, item); + if (cso_hash_iter_is_null(iter)) { + FREE(item); + return FALSE; + } + + map->num_entries++; + + return TRUE; +} + + +/** + * Look up a key in the map and return the associated data pointer. + */ +const void * +util_keymap_lookup(const struct keymap *map, const void *key) +{ + unsigned key_hash; + struct keymap_item *item; + + assert(map); + + key_hash = hash(key, map->key_size); + + item = hash_table_find_item(map, key, key_hash); + if (!item) + return NULL; + + return item->value; +} + + +/** + * Remove an entry from the map. + * The delete callback will be called if the given key/entry is found. + * \param user passed to the delete callback as the last param. + */ +void +util_keymap_remove(struct keymap *map, const void *key, void *user) +{ + unsigned key_hash; + struct cso_hash_iter iter; + struct keymap_item *item; + + assert(map); + + key_hash = hash(key, map->key_size); + + iter = hash_table_find_iter(map, key, key_hash); + if (cso_hash_iter_is_null(iter)) + return; + + item = hash_table_item(iter); + assert(item); + map->delete_func(map, item->key, item->value, user); + FREE(item->key); + FREE(item); + + map->num_entries--; + + cso_hash_erase(map->cso, iter); +} + + +/** + * Remove all entries from the map, calling the delete callback for each. + * \param user passed to the delete callback as the last param. + */ +void +util_keymap_remove_all(struct keymap *map, void *user) +{ + struct cso_hash_iter iter; + struct keymap_item *item; + + assert(map); + + iter = cso_hash_first_node(map->cso); + while (!cso_hash_iter_is_null(iter)) { + item = (struct keymap_item *) + cso_hash_take(map->cso, cso_hash_iter_key(iter)); + map->delete_func(map, item->key, item->value, user); + FREE(item->key); + FREE(item); + iter = cso_hash_first_node(map->cso); + } +} + + +extern void +util_keymap_info(const struct keymap *map) +{ + debug_printf("Keymap %p: %u of max %u entries\n", + (void *) map, map->num_entries, map->max_entries); +} diff --git a/src/gallium/auxiliary/util/u_keymap.h b/src/gallium/auxiliary/util/u_keymap.h new file mode 100644 index 0000000000..8d60a76fc3 --- /dev/null +++ b/src/gallium/auxiliary/util/u_keymap.h @@ -0,0 +1,68 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_KEYMAP_H +#define U_KEYMAP_H + +#include "pipe/p_compiler.h" + + +/** opaque keymap type */ +struct keymap; + + +/** Delete/callback function type */ +typedef void (*keymap_delete_func)(const struct keymap *map, + const void *key, void *data, + void *user); + + +extern struct keymap * +util_new_keymap(unsigned keySize, unsigned maxEntries, + keymap_delete_func deleteFunc); + +extern void +util_delete_keymap(struct keymap *map, void *user); + +extern boolean +util_keymap_insert(struct keymap *map, const void *key, + const void *data, void *user); + +extern const void * +util_keymap_lookup(const struct keymap *map, const void *key); + +extern void +util_keymap_remove(struct keymap *map, const void *key, void *user); + +extern void +util_keymap_remove_all(struct keymap *map, void *user); + +extern void +util_keymap_info(const struct keymap *map); + + +#endif /* U_KEYMAP_H */ -- cgit v1.2.3 From a0809c527105496f0dac234bee72d67abd2d2b17 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Tue, 7 Oct 2008 23:43:21 +0200 Subject: Gallivm: reorder the functions alphabetically so I can work on it. --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 240 +++++++++++----------- 1 file changed, 119 insertions(+), 121 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index 1143ee0b0b..d5600fd22d 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -90,68 +90,11 @@ llvm::Value * InstructionsSoa::vectorFromVals(llvm::Value *x, llvm::Value *y, return res; } -std::vector InstructionsSoa::arl(const std::vector in) -{ - std::vector res(4); - - //Extract x's - llvm::Value *x1 = m_builder.CreateExtractElement(in[0], - m_storage->constantInt(0), - name("extractX")); - //cast it to an unsigned int - x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); - - res[0] = x1;//vectorFromVals(x1, x2, x3, x4); - //only x is valid. the others shouldn't be necessary - /* - res[1] = Constant::getNullValue(m_floatVecType); - res[2] = Constant::getNullValue(m_floatVecType); - res[3] = Constant::getNullValue(m_floatVecType); - */ - - return res; -} - - -std::vector InstructionsSoa::add(const std::vector in1, - const std::vector in2) -{ - std::vector res(4); - - res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); - res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); - res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); - res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); - - return res; -} - -std::vector InstructionsSoa::mul(const std::vector in1, - const std::vector in2) -{ - std::vector res(4); - - res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); - res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); - res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); - res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); - - return res; -} - void InstructionsSoa::end() { m_builder.CreateRetVoid(); } -std::vector InstructionsSoa::madd(const std::vector in1, - const std::vector in2, - const std::vector in3) -{ - std::vector res = mul(in1, in2); - return add(res, in3); -} - std::vector InstructionsSoa::extractVector(llvm::Value *vector) { std::vector res(4); @@ -279,6 +222,41 @@ std::vector InstructionsSoa::abs(const std::vector i return callBuiltin(func, in1); } +std::vector InstructionsSoa::add(const std::vector in1, + const std::vector in2) +{ + std::vector res(4); + + res[0] = m_builder.CreateAdd(in1[0], in2[0], name("addx")); + res[1] = m_builder.CreateAdd(in1[1], in2[1], name("addy")); + res[2] = m_builder.CreateAdd(in1[2], in2[2], name("addz")); + res[3] = m_builder.CreateAdd(in1[3], in2[3], name("addw")); + + return res; +} + +std::vector InstructionsSoa::arl(const std::vector in) +{ + std::vector res(4); + + //Extract x's + llvm::Value *x1 = m_builder.CreateExtractElement(in[0], + m_storage->constantInt(0), + name("extractX")); + //cast it to an unsigned int + x1 = m_builder.CreateFPToUI(x1, IntegerType::get(32), name("x1IntCast")); + + res[0] = x1;//vectorFromVals(x1, x2, x3, x4); + //only x is valid. the others shouldn't be necessary + /* + res[1] = Constant::getNullValue(m_floatVecType); + res[2] = Constant::getNullValue(m_floatVecType); + res[3] = Constant::getNullValue(m_floatVecType); + */ + + return res; +} + std::vector InstructionsSoa::dp3(const std::vector in1, const std::vector in2) { @@ -286,6 +264,59 @@ std::vector InstructionsSoa::dp3(const std::vector i return callBuiltin(func, in1, in2); } +std::vector InstructionsSoa::lit(const std::vector in) +{ + llvm::Function *func = function(TGSI_OPCODE_LIT); + return callBuiltin(func, in); +} + +std::vector InstructionsSoa::madd(const std::vector in1, + const std::vector in2, + const std::vector in3) +{ + std::vector res = mul(in1, in2); + return add(res, in3); +} + +std::vector InstructionsSoa::max(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MAX); + return callBuiltin(func, in1, in2); +} + +std::vector InstructionsSoa::min(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_MIN); + return callBuiltin(func, in1, in2); +} + +std::vector InstructionsSoa::mul(const std::vector in1, + const std::vector in2) +{ + std::vector res(4); + + res[0] = m_builder.CreateMul(in1[0], in2[0], name("mulx")); + res[1] = m_builder.CreateMul(in1[1], in2[1], name("muly")); + res[2] = m_builder.CreateMul(in1[2], in2[2], name("mulz")); + res[3] = m_builder.CreateMul(in1[3], in2[3], name("mulw")); + + return res; +} + +std::vector InstructionsSoa::pow(const std::vector in1, + const std::vector in2) +{ + llvm::Function *func = function(TGSI_OPCODE_POWER); + return callBuiltin(func, in1, in2); +} + +std::vector InstructionsSoa::rsq(const std::vector in) +{ + llvm::Function *func = function(TGSI_OPCODE_RSQ); + return callBuiltin(func, in); +} std::vector InstructionsSoa::slt(const std::vector in1, const std::vector in2) @@ -294,6 +325,37 @@ std::vector InstructionsSoa::slt(const std::vector i return callBuiltin(func, in1, in2); } +std::vector InstructionsSoa::sub(const std::vector in1, + const std::vector in2) +{ + std::vector res(4); + + res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); + res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); + res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); + res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); + + return res; +} + +void checkFunction(Function *func) +{ + for (Function::const_iterator BI = func->begin(), BE = func->end(); + BI != BE; ++BI) { + const BasicBlock &BB = *BI; + for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); + II != IE; ++II) { + const Instruction &I = *II; + std::cout<< "Instr = "<setOperand(op, V); + } + } + } +} + llvm::Value * InstructionsSoa::allocaTemp() { VectorType *vector = VectorType::get(Type::FloatTy, 4); @@ -413,46 +475,6 @@ std::vector InstructionsSoa::callBuiltin(llvm::Function *func, const std return allocaToResult(allocaPtr); } -std::vector InstructionsSoa::pow(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_POWER); - return callBuiltin(func, in1, in2); -} - -std::vector InstructionsSoa::min(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MIN); - return callBuiltin(func, in1, in2); -} - - -std::vector InstructionsSoa::max(const std::vector in1, - const std::vector in2) -{ - llvm::Function *func = function(TGSI_OPCODE_MAX); - return callBuiltin(func, in1, in2); -} - -void checkFunction(Function *func) -{ - for (Function::const_iterator BI = func->begin(), BE = func->end(); - BI != BE; ++BI) { - const BasicBlock &BB = *BI; - for (BasicBlock::const_iterator II = BB.begin(), IE = BB.end(); - II != IE; ++II) { - const Instruction &I = *II; - std::cout<< "Instr = "<setOperand(op, V); - } - } - } -} - void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) { assert(originalFunc); @@ -497,28 +519,4 @@ void InstructionsSoa::injectFunction(llvm::Function *originalFunc, int op) } } -std::vector InstructionsSoa::sub(const std::vector in1, - const std::vector in2) -{ - std::vector res(4); - - res[0] = m_builder.CreateSub(in1[0], in2[0], name("subx")); - res[1] = m_builder.CreateSub(in1[1], in2[1], name("suby")); - res[2] = m_builder.CreateSub(in1[2], in2[2], name("subz")); - res[3] = m_builder.CreateSub(in1[3], in2[3], name("subw")); - - return res; -} - -std::vector InstructionsSoa::lit(const std::vector in) -{ - llvm::Function *func = function(TGSI_OPCODE_LIT); - return callBuiltin(func, in); -} - -std::vector InstructionsSoa::rsq(const std::vector in) -{ - llvm::Function *func = function(TGSI_OPCODE_RSQ); - return callBuiltin(func, in); -} -- cgit v1.2.3 From 73d00b9e93a9e8a5fecb0de224552741e389fc11 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 8 Oct 2008 16:33:04 -0600 Subject: gallium: better instruction printing for SPE code --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 46 ++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 8a87e9abb1..a6dd7ef311 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -164,6 +164,24 @@ rem_prefix(const char *longname) } +static const char * +reg_name(int reg) +{ + switch (reg) { + case SPE_REG_SP: + return "$sp"; + case SPE_REG_RA: + return "$lr"; + default: + { + static char buf[10]; + sprintf(buf, "$%d", reg); + return buf; + } + } +} + + static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, unsigned rA, unsigned rB, const char *name) { @@ -176,7 +194,8 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\t$%d, $%d, $%d\n", rem_prefix(name), rT, rA, rB); + printf("%s\t%s, %s, %s\n", + rem_prefix(name), reg_name(rT), reg_name(rA), reg_name(rB)); } } @@ -194,7 +213,8 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\t$%d, $%d, $%d, $%d\n", rem_prefix(name), rT, rA, rB, rC); + printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT), + reg_name(rA), reg_name(rB), reg_name(rC)); } } @@ -211,7 +231,8 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); } } @@ -229,7 +250,8 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); } } @@ -248,10 +270,14 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, if (p->print) { indent(p); if (strcmp(name, "spe_lqd") == 0 || - strcmp(name, "spe_stqd") == 0) - printf("%s\t$%d, 0x%x($%d)\n", rem_prefix(name), rT, imm, rA); - else - printf("%s\t$%d, $%d, 0x%x\n", rem_prefix(name), rT, rA, imm); + strcmp(name, "spe_stqd") == 0) { + printf("%s\t%s, %d(%s)\n", + rem_prefix(name), reg_name(rT), imm, reg_name(rA)); + } + else { + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); + } } } @@ -267,7 +293,7 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\t$%d, 0x%x\n", rem_prefix(name), rT, imm); + printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); } } @@ -283,7 +309,7 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - printf("%s\t$%d, 0x%x\n", rem_prefix(name), rT, imm); + printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); } } -- cgit v1.2.3 From 5c57cbec32136c25f104872179d979098be9a1a7 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 8 Oct 2008 16:35:40 -0600 Subject: gallium: asst. clean-ups Don't use register qualifier. Doxygen-ize comments. Remove 'extern'. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index a6dd7ef311..c442b1f6aa 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -385,7 +385,7 @@ void _name (struct spe_function *p, int imm) \ */ void spe_init_func(struct spe_function *p, unsigned code_size) { - register unsigned int i; + unsigned int i; p->store = align_malloc(code_size, 16); p->num_inst = 0; @@ -475,7 +475,7 @@ void spe_release_register(struct spe_function *p, int reg) */ void spe_allocate_register_set(struct spe_function *p) { - register unsigned int i; + unsigned int i; /* Keep track of the set count. If it ever wraps around to 0, * we're in trouble. @@ -489,7 +489,8 @@ void spe_allocate_register_set(struct spe_function *p) * when the register set is released. */ for (i = 0; i < SPE_NUM_REGS; i++) { - if (p->regs[i] > 0) p->regs[i]++; + if (p->regs[i] > 0) + p->regs[i]++; } } @@ -506,7 +507,8 @@ void spe_release_register_set(struct spe_function *p) * available. */ for (i = 0; i < SPE_NUM_REGS; i++) { - if (p->regs[i] > 0) p->regs[i]--; + if (p->regs[i] > 0) + p->regs[i]--; } } @@ -525,7 +527,7 @@ spe_indent(struct spe_function *p, int spaces) } -extern void +void spe_comment(struct spe_function *p, int rel_indent, const char *s) { if (p->print) { @@ -710,10 +712,12 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) } } -/* This function is constructed identically to spe_sor_uint() below. +/** + * This function is constructed identically to spe_sor_uint() below. * Changes to one should be made in the other. */ -void spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +void +spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) { /* If we can, emit a single instruction, either And Byte Immediate * (which uses the same constant across each byte), And Halfword Immediate @@ -723,7 +727,7 @@ void spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int * * Otherwise, we'll need to use a temporary register. */ - register unsigned int tmp; + unsigned int tmp; /* If the upper 23 bits are all 0s or all 1s, sign extension * will work and we can use And Word Immediate @@ -760,10 +764,12 @@ void spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int spe_release_register(p, tmp_reg); } -/* This function is constructed identically to spe_and_uint() above. +/** + * This function is constructed identically to spe_and_uint() above. * Changes to one should be made in the other. */ -void spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +void +spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) { /* If we can, emit a single instruction, either Exclusive Or Byte * Immediate (which uses the same constant across each byte), Exclusive @@ -773,7 +779,7 @@ void spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int * * Otherwise, we'll need to use a temporary register. */ - register unsigned int tmp; + unsigned int tmp; /* If the upper 23 bits are all 0s or all 1s, sign extension * will work and we can use Exclusive Or Word Immediate -- cgit v1.2.3 From d48a92e88040470f93e2186f8eb23e4797a09860 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 8 Oct 2008 20:44:32 -0600 Subject: cell: implement function calls from shader code. fslight demo runs now. Used for SIN, COS, EXP2, LOG2, POW instructions. TEX next. Fixed some bugs in MIN, MAX, DP3, DP4, DPH instructions. In rtasm code: Special-case spe_lqd(), spe_stqd() functions so they take byte offsets but low-order 4 bits are shifted out. This makes things consistant with SPU assembly language conventions. Added spe_get_registers_used() function. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 76 ++++++++++-- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 11 +- src/gallium/drivers/cell/ppu/cell_gen_fp.c | 141 +++++++++++++++-------- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 30 ++--- 4 files changed, 182 insertions(+), 76 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index c442b1f6aa..9274bc5e3c 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -174,9 +174,12 @@ reg_name(int reg) return "$lr"; default: { - static char buf[10]; - sprintf(buf, "$%d", reg); - return buf; + /* cycle through four buffers to handle multiple calls per printf */ + static char buf[4][10]; + static int b = 0; + b = (b + 1) % 4; + sprintf(buf[b], "$%d", reg); + return buf[b]; } } } @@ -269,15 +272,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, assert(p->num_inst <= p->max_inst); if (p->print) { indent(p); - if (strcmp(name, "spe_lqd") == 0 || - strcmp(name, "spe_stqd") == 0) { - printf("%s\t%s, %d(%s)\n", - rem_prefix(name), reg_name(rT), imm, reg_name(rA)); - } - else { - printf("%s\t%s, %s, 0x%x\n", - rem_prefix(name), reg_name(rT), reg_name(rA), imm); - } + printf("%s\t%s, %s, 0x%x\n", + rem_prefix(name), reg_name(rT), reg_name(rA), imm); } } @@ -379,6 +375,7 @@ void _name (struct spe_function *p, int imm) \ #include "rtasm_ppc_spe.h" + /** * Initialize an spe_function. * \param code_size size of instruction buffer to allocate, in bytes. @@ -513,6 +510,20 @@ void spe_release_register_set(struct spe_function *p) } +unsigned +spe_get_registers_used(const struct spe_function *p, ubyte used[]) +{ + unsigned i, num = 0; + /* only count registers in the range available to callers */ + for (i = 2; i < 80; i++) { + if (p->regs[i]) { + used[num++] = i; + } + } + return num; +} + + void spe_print_code(struct spe_function *p, boolean enable) { @@ -539,6 +550,46 @@ spe_comment(struct spe_function *p, int rel_indent, const char *s) } +/** + * Load quad word. + * NOTE: imm is in bytes and the least significant 4 bits must be zero! + */ +void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) +{ + const boolean pSave = p->print; + + p->print = FALSE; + assert(offset % 4 == 0); + emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("lqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } +} + + +/** + * Store quad word. + * NOTE: imm is in bytes and the least significant 4 bits must be zero! + */ +void spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) +{ + const boolean pSave = p->print; + + p->print = FALSE; + assert(offset % 4 == 0); + emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); + p->print = pSave; + + if (p->print) { + indent(p); + printf("stqd\t%s, %d(%s)\n", reg_name(rT), offset, reg_name(rA)); + } +} + + /** * For branch instructions: * \param d if 1, disable interupts if branch is taken @@ -764,6 +815,7 @@ spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) spe_release_register(p, tmp_reg); } + /** * This function is constructed identically to spe_and_uint() above. * Changes to one should be made in the other. diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index cd2e245409..47dadb343c 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -89,6 +89,9 @@ extern void spe_release_register(struct spe_function *p, int reg); extern void spe_allocate_register_set(struct spe_function *p); extern void spe_release_register_set(struct spe_function *p); +extern unsigned +spe_get_registers_used(const struct spe_function *p, ubyte used[]); + extern void spe_print_code(struct spe_function *p, boolean enable); extern void spe_indent(struct spe_function *p, int spaces); extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); @@ -128,11 +131,9 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); /* Memory load / store instructions */ -EMIT_RI10(spe_lqd, 0x034); EMIT_RR (spe_lqx, 0x1c4); EMIT_RI16(spe_lqa, 0x061); EMIT_RI16(spe_lqr, 0x067); -EMIT_RI10(spe_stqd, 0x024); EMIT_RR (spe_stqx, 0x144); EMIT_RI16(spe_stqa, 0x041); EMIT_RI16(spe_stqr, 0x047); @@ -290,6 +291,12 @@ EMIT_RI16(spe_brz, 0x040); EMIT_RI16(spe_brhnz, 0x046); EMIT_RI16(spe_brhz, 0x044); +extern void +spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); + +extern void +spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); + extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fp.c b/src/gallium/drivers/cell/ppu/cell_gen_fp.c index 3065869d04..640ebcadbb 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fp.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fp.c @@ -84,6 +84,8 @@ struct codegen /** Index of execution mask register */ int exec_mask_reg; + int frame_size; /**< Stack frame size, in words */ + struct spe_function *f; boolean error; }; @@ -208,7 +210,7 @@ get_src_reg(struct codegen *gen, reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->inputs_reg, offset); + spe_lqd(gen->f, reg, gen->inputs_reg, offset * 16); } break; case TGSI_FILE_IMMEDIATE: @@ -221,7 +223,7 @@ get_src_reg(struct codegen *gen, reg = get_itemp(gen); reg_is_itemp = TRUE; /* Load: reg = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, reg, gen->constants_reg, offset); + spe_lqd(gen->f, reg, gen->constants_reg, offset * 16); } break; default: @@ -325,6 +327,7 @@ store_dest_reg(struct codegen *gen, } else { /* we're not inside a condition or loop: do nothing special */ + } break; case TGSI_FILE_OUTPUT: @@ -337,17 +340,17 @@ store_dest_reg(struct codegen *gen, /* First read the current value from memory: * Load: curval = memory[(machine_reg) + offset] */ - spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset); + spe_lqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); /* Mix curval with newvalue according to exec mask: * d[i] = mask_reg[i] ? value_reg : d_reg */ spe_selb(gen->f, curval_reg, curval_reg, value_reg, exec_reg); /* Store: memory[(machine_reg) + offset] = curval */ - spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset); + spe_stqd(gen->f, curval_reg, gen->outputs_reg, offset * 16); } else { /* Store: memory[(machine_reg) + offset] = reg */ - spe_stqd(gen->f, value_reg, gen->outputs_reg, offset); + spe_stqd(gen->f, value_reg, gen->outputs_reg, offset * 16); } } break; @@ -357,6 +360,41 @@ store_dest_reg(struct codegen *gen, } + +static void +emit_prologue(struct codegen *gen) +{ + gen->frame_size = 256+128; /* XXX temporary */ + + spe_comment(gen->f, -4, "Function prologue:"); + + /* save $lr on stack # stqd $lr,16($sp) */ + spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* save stack pointer # stqd $sp,-frameSize($sp) */ + spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); + + /* adjust stack pointer # ai $sp,$sp,-frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -gen->frame_size); +} + + +static void +emit_epilogue(struct codegen *gen) +{ + spe_comment(gen->f, -4, "Function epilogue:"); + + /* restore stack pointer # ai $sp,$sp,frameSize */ + spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, gen->frame_size); + + /* restore $lr # lqd $lr,16($sp) */ + spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); + + /* return from function call */ + spe_bi(gen->f, SPE_REG_RA, 0, 0); +} + + static boolean emit_MOV(struct codegen *gen, const struct tgsi_full_instruction *inst) { @@ -588,6 +626,7 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); int tmp_reg = get_itemp(gen); + /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); @@ -603,7 +642,9 @@ emit_DP3(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); } } @@ -623,6 +664,7 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, CHAN_X, &inst->FullSrcRegisters[1]); int tmp_reg = get_itemp(gen); + /* t = x0 * x1 */ spe_fm(gen->f, tmp_reg, s1_reg, s2_reg); @@ -643,6 +685,8 @@ emit_DP4(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -683,6 +727,8 @@ emit_DPH(struct codegen *gen, const struct tgsi_full_instruction *inst) for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { + int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + spe_move(gen->f, d_reg, tmp_reg); store_dest_reg(gen, tmp_reg, ch, &inst->FullDstRegisters[0]); } } @@ -1112,9 +1158,6 @@ emit_function_call(struct codegen *gen, uint addr; int ch; - /* XXX temporary value */ - const int frameSize = 64; /* stack frame (activation record) size */ - assert(num_args <= 3); /* lookup function address */ @@ -1136,48 +1179,45 @@ emit_function_call(struct codegen *gen, for (ch = 0; ch < 4; ch++) { if (inst->FullDstRegisters[0].DstRegister.WriteMask & (1 << ch)) { - int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - int s_regs[3]; - uint a; + int s_regs[3], d_reg; + ubyte usedRegs[SPE_NUM_REGS]; + uint a, i, numUsed; + for (a = 0; a < num_args; a++) { s_regs[a] = get_src_reg(gen, ch, &inst->FullSrcRegisters[a]); } + d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); - /* Basically: - * save registers on stack - * move parameters to registers 3, 4, 5... - * call function - * save return value (reg 3) - * restore registers from stack - */ + numUsed = spe_get_registers_used(gen->f, usedRegs); + assert(numUsed < gen->frame_size / 16 - 32); - /* XXX hack: load first function param */ - spe_move(gen->f, 3, s_regs[0]); - - /* save $lr on stack # stqd $lr,16($sp) */ - spe_stqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - /* save stack pointer # stqd $sp,-frameSize($sp) */ - spe_stqd(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize); - - /* XXX save registers to stack here */ + /* save registers to stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + int offset = 2 + i; + spe_stqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } - /* adjust stack pointer # ai $sp,$sp,-frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, -frameSize); + /* setup function arguments */ + for (a = 0; a < num_args; a++) { + spe_move(gen->f, 3 + a, s_regs[a]); + } /* branch to function, save return addr */ spe_brasl(gen->f, SPE_REG_RA, addr); - /* restore stack pointer # ai $sp,$sp,frameSize */ - spe_ai(gen->f, SPE_REG_SP, SPE_REG_SP, frameSize); - - /* XXX restore registers from stack here */ - - /* restore $lr # lqd $lr,16($sp) */ - spe_lqd(gen->f, SPE_REG_RA, SPE_REG_SP, 16); - - /* XXX hack: save function's return value */ + /* save function's return value */ spe_move(gen->f, d_reg, 3); + /* restore registers from stack */ + for (i = 0; i < numUsed; i++) { + uint reg = usedRegs[i]; + if (reg != d_reg) { + int offset = 2 + i; + spe_lqd(gen->f, reg, SPE_REG_SP, 16 * offset); + } + } + store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); } @@ -1202,10 +1242,11 @@ emit_MAX(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); /* d = (s1 > s2) ? s1 : s2 */ - spe_fcgt(gen->f, d_reg, s1_reg, s2_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); + spe_fcgt(gen->f, tmp_reg, s1_reg, s2_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -1230,10 +1271,11 @@ emit_MIN(struct codegen *gen, const struct tgsi_full_instruction *inst) int s1_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[0]); int s2_reg = get_src_reg(gen, ch, &inst->FullSrcRegisters[1]); int d_reg = get_dst_reg(gen, ch, &inst->FullDstRegisters[0]); + int tmp_reg = get_itemp(gen); /* d = (s2 > s1) ? s1 : s2 */ - spe_fcgt(gen->f, d_reg, s2_reg, s1_reg); - spe_selb(gen->f, d_reg, s2_reg, s1_reg, d_reg); + spe_fcgt(gen->f, tmp_reg, s2_reg, s1_reg); + spe_selb(gen->f, d_reg, s2_reg, s1_reg, tmp_reg); store_dest_reg(gen, d_reg, ch, &inst->FullDstRegisters[0]); free_itemps(gen); @@ -1346,8 +1388,7 @@ static boolean emit_END(struct codegen *gen) { spe_comment(gen->f, -4, "END:"); - /* return from function call */ - spe_bi(gen->f, SPE_REG_RA, 0, 0); + emit_epilogue(gen); return true; } @@ -1420,6 +1461,10 @@ emit_instruction(struct codegen *gen, return emit_function_call(gen, inst, "spu_sin", 1); case TGSI_OPCODE_POW: return emit_function_call(gen, inst, "spu_pow", 2); + case TGSI_OPCODE_EXPBASE2: + return emit_function_call(gen, inst, "spu_exp2", 1); + case TGSI_OPCODE_LOGBASE2: + return emit_function_call(gen, inst, "spu_log2", 1); case TGSI_OPCODE_IF: return emit_IF(gen, inst); @@ -1532,6 +1577,7 @@ emit_declaration(struct cell_context *cell, } + /** * Translate TGSI shader code to SPE instructions. This is done when * the state tracker gives us a new shader (via pipe->create_fs_state()). @@ -1571,12 +1617,14 @@ cell_gen_fragment_program(struct cell_context *cell, tgsi_parse_init(&parse, tokens); + emit_prologue(&gen); + while (!tgsi_parse_end_of_tokens(&parse) && !gen.error) { tgsi_parse_token(&parse); switch (parse.FullToken.Token.Type) { case TGSI_TOKEN_TYPE_IMMEDIATE: - if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) + if (!emit_immediate(&gen, &parse.FullToken.FullImmediate)) gen.error = true; break; @@ -1595,7 +1643,6 @@ cell_gen_fragment_program(struct cell_context *cell, } } - if (gen.error) { /* terminate the SPE code */ return emit_END(&gen); diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 566df7f59e..18969005b0 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -73,8 +73,8 @@ emit_matrix_transpose(struct spe_function *p, int col3; - spe_lqd(p, shuf_hi, shuf_ptr, 3); - spe_lqd(p, shuf_lo, shuf_ptr, 4); + spe_lqd(p, shuf_hi, shuf_ptr, 3*16); + spe_lqd(p, shuf_lo, shuf_ptr, 4*16); spe_shufb(p, t1, row0, row2, shuf_hi); spe_shufb(p, t2, row0, row2, shuf_lo); @@ -122,13 +122,13 @@ emit_matrix_transpose(struct spe_function *p, */ switch (count) { case 4: - spe_stqd(p, col3, dest_ptr, 3); + spe_stqd(p, col3, dest_ptr, 3 * 16); case 3: - spe_stqd(p, col2, dest_ptr, 2); + spe_stqd(p, col2, dest_ptr, 2 * 16); case 2: - spe_stqd(p, col1, dest_ptr, 1); + spe_stqd(p, col1, dest_ptr, 1 * 16); case 1: - spe_stqd(p, col0, dest_ptr, 0); + spe_stqd(p, col0, dest_ptr, 0 * 16); } @@ -166,17 +166,17 @@ emit_fetch(struct spe_function *p, float scale_signed = 0.0; float scale_unsigned = 0.0; - spe_lqd(p, v0, in_ptr, 0 + offset[0]); - spe_lqd(p, v1, in_ptr, 1 + offset[0]); - spe_lqd(p, v2, in_ptr, 2 + offset[0]); - spe_lqd(p, v3, in_ptr, 3 + offset[0]); + spe_lqd(p, v0, in_ptr, (0 + offset[0]) * 16); + spe_lqd(p, v1, in_ptr, (1 + offset[0]) * 16); + spe_lqd(p, v2, in_ptr, (2 + offset[0]) * 16); + spe_lqd(p, v3, in_ptr, (3 + offset[0]) * 16); offset[0] += 4; switch (bytes) { case 1: scale_signed = 1.0f / 127.0f; scale_unsigned = 1.0f / 255.0f; - spe_lqd(p, tmp, shuf_ptr, 1); + spe_lqd(p, tmp, shuf_ptr, 1 * 16); spe_shufb(p, v0, v0, v0, tmp); spe_shufb(p, v1, v1, v1, tmp); spe_shufb(p, v2, v2, v2, tmp); @@ -185,7 +185,7 @@ emit_fetch(struct spe_function *p, case 2: scale_signed = 1.0f / 32767.0f; scale_unsigned = 1.0f / 65535.0f; - spe_lqd(p, tmp, shuf_ptr, 2); + spe_lqd(p, tmp, shuf_ptr, 2 * 16); spe_shufb(p, v0, v0, v0, tmp); spe_shufb(p, v1, v1, v1, tmp); spe_shufb(p, v2, v2, v2, tmp); @@ -241,11 +241,11 @@ emit_fetch(struct spe_function *p, switch (count) { case 1: - spe_stqd(p, float_zero, out_ptr, 1); + spe_stqd(p, float_zero, out_ptr, 1 * 16); case 2: - spe_stqd(p, float_zero, out_ptr, 2); + spe_stqd(p, float_zero, out_ptr, 2 * 16); case 3: - spe_stqd(p, float_one, out_ptr, 3); + spe_stqd(p, float_one, out_ptr, 3 * 16); } if (float_zero != -1) { -- cgit v1.2.3 From db9de99925ee7d16ef2e99d41510e7231aa25366 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Thu, 9 Oct 2008 23:32:01 +0200 Subject: Gallivm: cleanup soa storage. --- src/gallium/auxiliary/gallivm/soabuiltins.c | 1 - src/gallium/auxiliary/gallivm/storagesoa.cpp | 45 ++++++++-------------------- 2 files changed, 12 insertions(+), 34 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/soabuiltins.c b/src/gallium/auxiliary/gallivm/soabuiltins.c index cb85e1734e..b20f3c4963 100644 --- a/src/gallium/auxiliary/gallivm/soabuiltins.c +++ b/src/gallium/auxiliary/gallivm/soabuiltins.c @@ -167,7 +167,6 @@ void min(float4 *res, res[3] = minvec(tmp0w, tmp1w); } - void max(float4 *res, float4 tmp0x, float4 tmp0y, float4 tmp0z, float4 tmp0w, float4 tmp1x, float4 tmp1y, float4 tmp1z, float4 tmp1w) diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index d4ecf97c36..4fc075cf6d 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -91,29 +91,19 @@ void StorageSoa::declareImmediates() for (unsigned int i = 0; i < m_immediatesToFlush.size(); ++i) { std::vector vec = m_immediatesToFlush[i]; std::vector vals(4); - float val; std::vector channelArray; - val = vec[0]; - llvm::Constant *xChannel = createConstGlobalFloat(val); - val = vec[1]; - llvm::Constant *yChannel = createConstGlobalFloat(val); - val = vec[2]; - llvm::Constant *zChannel = createConstGlobalFloat(val); - val = vec[3]; - llvm::Constant *wChannel = createConstGlobalFloat(val); + vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; + llvm::Constant *xChannel = createConstGlobalVector(vals); -// vals[0] = vec[0]; vals[1] = vec[1]; vals[2] = vec[2]; vals[3] = vec[3]; -// llvm::Constant *xChannel = createConstGlobalVector(vec[0]); - -/* vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; + vals[0] = vec[1]; vals[1] = vec[1]; vals[2] = vec[1]; vals[3] = vec[1]; llvm::Constant *yChannel = createConstGlobalVector(vals); vals[0] = vec[2]; vals[1] = vec[2]; vals[2] = vec[2]; vals[3] = vec[2]; llvm::Constant *zChannel = createConstGlobalVector(vals); vals[0] = vec[3]; vals[1] = vec[3]; vals[2] = vec[3]; vals[3] = vec[3]; - llvm::Constant *wChannel = createConstGlobalVector(vals);*/ + llvm::Constant *wChannel = createConstGlobalVector(vals); channelArray.push_back(xChannel); channelArray.push_back(yChannel); channelArray.push_back(zChannel); @@ -177,29 +167,18 @@ llvm::Value* StorageSoa::unpackConstElement(llvm::IRBuilder<>* m_builder, llvm:: std::vector StorageSoa::constElement(llvm::IRBuilder<>* m_builder, llvm::Value *idx) { - std::vector res(4); + llvm::Value* res; std::vector res2(4); llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; xChannel = elementPointer(m_consts, idx, 0); -/* yChannel = elementPointer(m_consts, idx, 1); - zChannel = elementPointer(m_consts, idx, 2); - wChannel = elementPointer(m_consts, idx, 3);*/ - - res[0] = alignedArrayLoad(xChannel); -/* res[1] = alignedArrayLoad(xChannel); - res[2] = alignedArrayLoad(xChannel); - res[3] = alignedArrayLoad(xChannel);*/ - - - res2[0]=unpackConstElement(m_builder, res[0],0); - res2[1]=unpackConstElement(m_builder, res[0],1); - res2[2]=unpackConstElement(m_builder, res[0],2); - res2[3]=unpackConstElement(m_builder, res[0],3); -/*res[0] = alignedArrayLoad(xChannel); - res[1] = alignedArrayLoad(yChannel); - res[2] = alignedArrayLoad(zChannel); - res[3] = alignedArrayLoad(wChannel);*/ + + res = alignedArrayLoad(xChannel); + + res2[0]=unpackConstElement(m_builder, res,0); + res2[1]=unpackConstElement(m_builder, res,1); + res2[2]=unpackConstElement(m_builder, res,2); + res2[3]=unpackConstElement(m_builder, res,3); return res2; } -- cgit v1.2.3 From ca5224945ae11d3c2e80fd39b7e08464d019bbdd Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Fri, 10 Oct 2008 01:31:34 +0100 Subject: gallium: silence warning --- src/gallium/auxiliary/draw/draw_private.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 626a2e3e30..40a72c9dcf 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -193,7 +193,7 @@ struct draw_context const float (*aligned_constants)[4]; - float (*aligned_constant_storage)[4]; + const float (*aligned_constant_storage)[4]; unsigned const_storage_size; -- cgit v1.2.3 From 7ac1fc77661faf0897507fef0437fe69d0ba53ac Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 9 Oct 2008 19:54:46 -0600 Subject: cell: fix incorrect bitmask in spe_load_uint() --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 9274bc5e3c..cc35f0ba5b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -727,7 +727,7 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) * Bytes Immediate (fsmbi) to load the value in a single instruction. * Otherwise, in the general case, we have to use ilhu followed by iohl. */ - if ((ui & 0xfffc0000) == ui) { + if ((ui & 0x3ffff) == ui) { spe_ila(p, rT, ui); } else if ((ui >> 16) == (ui & 0xffff)) { -- cgit v1.2.3 From adeed0f90fdd46ea139d5c4b3b75d5dc79b2a0c7 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Fri, 10 Oct 2008 14:13:13 -0600 Subject: CELL: fixing stencil bugs These are the defects found and fixed so far. Several more have been observed; I'm working on them. - Fixed an error in spe_load_uint() that caused incorrect values to be loaded if the given unsigned value had the low 18 bits as 0, and that caused inefficient code to be emitted if the given value had the high 14 bits as 0. - Fixed a problem in stencil code generation where optional registers weren't tracked correctly. - Fixed a problem that the stencil function NEVER was acting as ALWAYS. - Fixed several problems that could occur if stenciling were enabled but depth was disabled. - Fixed a problem with two-sided stencil writemask handling that could cause a stencil writemask to not be applied. - Fixed several state permutations that were incorrectly flagged as not requiring stencil values to be calculated. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 4 +- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 88 +++++++++++++++++++----- 2 files changed, 72 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index cc35f0ba5b..9bf3b9bf0c 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -727,7 +727,7 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) * Bytes Immediate (fsmbi) to load the value in a single instruction. * Otherwise, in the general case, we have to use ilhu followed by iohl. */ - if ((ui & 0x3ffff) == ui) { + if ((ui & 0x0003ffff) == ui) { spe_ila(p, rT, ui); } else if ((ui >> 16) == (ui & 0xffff)) { @@ -764,7 +764,7 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) } /** - * This function is constructed identically to spe_sor_uint() below. + * This function is constructed identically to spe_xor_uint() below. * Changes to one should be made in the other. */ void diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index de170d1036..4e1e53ecdc 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -247,6 +247,7 @@ setup_optional_register(struct spe_function *f, boolean *is_already_set, unsigne { if (*is_already_set) return; *r = spe_allocate_available_register(f); + *is_already_set = true; } static inline void @@ -1157,7 +1158,6 @@ gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state, /* stencil_pass = mask & (s == reference) */ spe_compare_equal_uint(f, stencil_pass_reg, fbS_reg, state->ref_value); spe_and(f, stencil_pass_reg, mask_reg, stencil_pass_reg); - /* stencil_fail = mask & ~stencil_pass */ break; case PIPE_FUNC_NOTEQUAL: @@ -1207,7 +1207,6 @@ gen_stencil_test(struct spe_function *f, const struct pipe_stencil_state *state, case PIPE_FUNC_NEVER: /* stencil_pass = mask & 0 = 0 */ spe_load_uint(f, stencil_pass_reg, 0); - spe_move(f, stencil_pass_reg, mask_reg); /* zmask = mask */ break; case PIPE_FUNC_ALWAYS: @@ -1483,6 +1482,10 @@ gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_a } /* End of calculations for back-facing stencil */ } +/* Note that fbZ_reg may *not* be set on entry, if in fact + * the depth test is not enabled. This function must not use + * the register if depth is not enabled. + */ static boolean gen_stencil_depth_test(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa, @@ -1522,6 +1525,7 @@ gen_stencil_depth_test(struct spe_function *f, * have to spend the complexity to track the more difficult variant * register usage scenarios. */ + spe_comment(f, 0, "Allocating stencil register set"); spe_allocate_register_set(f); /* Calculate the writemask. If the writemask is trivial (either @@ -1538,7 +1542,7 @@ gen_stencil_depth_test(struct spe_function *f, need_to_calculate_stencil_values = false; need_to_writemask_stencil_values = false; } - else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0x00)) { + else if (dsa->stencil[0].write_mask == 0xff && (!dsa->stencil[1].enabled || dsa->stencil[1].write_mask == 0xff)) { /* Still trivial, but a little less so. We need to write the stencil * values, but we don't need to mask them. */ @@ -1556,10 +1560,12 @@ gen_stencil_depth_test(struct spe_function *f, * writemask, we'll have to generate code that merges the * two masks into a single effective mask based on fragment facing. */ + spe_comment(f, 0, "Computing stencil writemask"); stencil_writemask_reg = spe_allocate_available_register(f); spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask); if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) { unsigned int back_write_mask_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Resolving two-sided stencil writemask"); spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask); spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg); spe_release_register(f, back_write_mask_reg); @@ -1575,6 +1581,7 @@ gen_stencil_depth_test(struct spe_function *f, * This test will *not* change the value in mask_reg (because we don't * yet know whether to apply the two-sided stencil or one-sided stencil). */ + spe_comment(f, 0, "Running basic stencil test"); stencil_pass_reg = spe_allocate_available_register(f); gen_stencil_test(f, &dsa->stencil[0], mask_reg, fbS_reg, stencil_pass_reg); @@ -1584,6 +1591,7 @@ gen_stencil_depth_test(struct spe_function *f, */ if (dsa->stencil[1].enabled) { unsigned int temp_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Running backface stencil test"); gen_stencil_test(f, &dsa->stencil[1], mask_reg, fbS_reg, temp_reg); spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg); spe_release_register(f, temp_reg); @@ -1597,6 +1605,7 @@ gen_stencil_depth_test(struct spe_function *f, * stencil test, and because the depth test will update the * mask of valid fragments based on the results of the depth test). */ + spe_comment(f, 0, "Computing stencil fail mask and updating fragment mask"); stencil_fail_reg = spe_allocate_available_register(f); spe_andc(f, stencil_fail_reg, mask_reg, stencil_pass_reg); /* Now remove the stenciled-out pixels from the valid fragment mask, @@ -1623,6 +1632,7 @@ gen_stencil_depth_test(struct spe_function *f, * This function will allocate a variant number of registers that * will be released as part of the register set. */ + spe_comment(f, 0, "Computing stencil values"); gen_get_stencil_values(f, dsa, fbS_reg, &front_stencil_fail_values, &front_stencil_pass_depth_fail_values, &front_stencil_pass_depth_pass_values, &back_stencil_fail_values, @@ -1652,6 +1662,7 @@ gen_stencil_depth_test(struct spe_function *f, stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values; } else { /* two-sided stencil enabled */ + spe_comment(f, 0, "Resolving backface stencil values"); /* Allocate new registers for the needed merged values */ stencil_fail_values = spe_allocate_available_register(f); spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg); @@ -1678,11 +1689,13 @@ gen_stencil_depth_test(struct spe_function *f, * on the results of the test. */ if (dsa->depth.enabled) { + spe_comment(f, 0, "Running stencil depth test"); zmask_reg = spe_allocate_available_register(f); modified_buffers |= gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); } if (need_to_calculate_stencil_values) { + /* If we need to writemask the stencil values before going into * the stencil buffer, we'll have to use a new register to * hold the new values. If not, we can just keep using the @@ -1690,8 +1703,8 @@ gen_stencil_depth_test(struct spe_function *f, */ if (need_to_writemask_stencil_values) { newS_reg = spe_allocate_available_register(f); + spe_comment(f, 0, "Saving current stencil values for writemasking"); spe_move(f, newS_reg, fbS_reg); - modified_buffers = true; } else { newS_reg = fbS_reg; @@ -1699,7 +1712,9 @@ gen_stencil_depth_test(struct spe_function *f, /* Merge in the selected stencil fail values */ if (stencil_fail_values != fbS_reg) { + spe_comment(f, 0, "Loading stencil fail values"); spe_selb(f, newS_reg, newS_reg, stencil_fail_values, stencil_fail_reg); + modified_buffers = true; } /* Same for the stencil pass/depth fail values. If this calculation @@ -1714,20 +1729,36 @@ gen_stencil_depth_test(struct spe_function *f, * set above if we're here. */ unsigned int stencil_pass_depth_fail_mask = spe_allocate_available_register(f); + spe_comment(f, 0, "Loading stencil pass/depth fail values"); spe_andc(f, stencil_pass_depth_fail_mask, stencil_pass_reg, zmask_reg); spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_fail_values, stencil_pass_depth_fail_mask); spe_release_register(f, stencil_pass_depth_fail_mask); + modified_buffers = true; } - /* Same for the stencil pass/depth pass mask */ + /* Same for the stencil pass/depth pass mask. Note that we + * *can* get here with zmask_reg being unset (if the depth + * test is off but the stencil test is on). In this case, + * we assume the depth test passes, and don't need to mask + * the stencil pass mask with the Z mask. + */ if (stencil_pass_depth_pass_values != fbS_reg) { - unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f); - spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); - - spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); - spe_release_register(f, stencil_pass_depth_pass_mask); + if (dsa->depth.enabled) { + unsigned int stencil_pass_depth_pass_mask = spe_allocate_available_register(f); + /* We'll need a separate register */ + spe_comment(f, 0, "Loading stencil pass/depth pass values"); + spe_and(f, stencil_pass_depth_pass_mask, stencil_pass_reg, zmask_reg); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_depth_pass_mask); + spe_release_register(f, stencil_pass_depth_pass_mask); + } + else { + /* We can use the same stencil-pass register */ + spe_comment(f, 0, "Loading stencil pass values"); + spe_selb(f, newS_reg, newS_reg, stencil_pass_depth_pass_values, stencil_pass_reg); + } + modified_buffers = true; } /* Almost done. If we need to writemask, do it now, leaving the @@ -1736,14 +1767,16 @@ gen_stencil_depth_test(struct spe_function *f, * so there's nothing more to do. */ - if (need_to_writemask_stencil_values) { + if (need_to_writemask_stencil_values && modified_buffers) { /* The Select Bytes command makes a fine writemask. Where * the mask is 0, the first (original) values are retained, * effectively masking out changes. Where the mask is 1, the * second (new) values are retained, incorporating changes. */ + spe_comment(f, 0, "Writemasking new stencil values"); spe_selb(f, fbS_reg, fbS_reg, newS_reg, stencil_writemask_reg); } + } /* done calculating stencil values */ /* The stencil and/or depth values have been applied, and the @@ -1752,6 +1785,7 @@ gen_stencil_depth_test(struct spe_function *f, * of registers that we didn't bother tracking. Release all * those registers as part of the register set, and go home. */ + spe_comment(f, 0, "Releasing stencil register set"); spe_release_register_set(f); /* Return true if we could have modified the stencil and/or @@ -1869,7 +1903,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) boolean fbS_reg_set = false, fbZ_reg_set = false; unsigned int fbS_reg, fbZ_reg = 0; - spe_comment(f, 0, "Fetch quad's Z/stencil values from tile"); + spe_comment(f, 0, "Fetching Z/stencil quad from tile"); /* fetch quad of depth/stencil values from tile at (x,y) */ /* Load: fbZS_reg = memory[depth_tile_reg + offset_reg] */ @@ -1973,13 +2007,18 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) * tests. */ ASSERT(fbS_reg_set); - ASSERT(fbZ_reg_set); spe_comment(f, 0, "Perform stencil test"); + /* Note that fbZ_reg may not be set on entry, if stenciling + * is enabled but there's no Z-buffer. The + * gen_stencil_depth_test() function must ignore the + * fbZ_reg register if depth is not enabled. + */ write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg); } else if (dsa->depth.enabled) { int zmask_reg = spe_allocate_available_register(f); + ASSERT(fbZ_reg_set); spe_comment(f, 0, "Perform depth test"); write_depth_stencil = gen_depth_test(f, dsa, mask_reg, fragZ_reg, fbZ_reg, zmask_reg); spe_release_register(f, zmask_reg); @@ -1996,26 +2035,39 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_comment(f, 0, "Store quad's depth/stencil values in tile"); if (zs_format == PIPE_FORMAT_S8Z24_UNORM || zs_format == PIPE_FORMAT_X8Z24_UNORM) { - if (fbS_reg_set) { + if (fbS_reg_set && fbZ_reg_set) { spe_shli(f, fbS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ } + else if (fbS_reg_set) { + spe_shli(f, fbZS_reg, fbS_reg, 24); /* fbS = fbS << 24 */ + } else { spe_move(f, fbZS_reg, fbZ_reg); } } else if (zs_format == PIPE_FORMAT_Z24S8_UNORM || zs_format == PIPE_FORMAT_Z24X8_UNORM) { - spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ - if (fbS_reg_set) { + if (fbS_reg_set && fbZ_reg_set) { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ spe_or(f, fbZS_reg, fbS_reg, fbZ_reg); /* fbZS = fbS | fbZ */ } + else if (fbS_reg_set) { + spe_move(f, fbZS_reg, fbS_reg); + } + else { + spe_shli(f, fbZ_reg, fbZ_reg, 8); /* fbZ = fbZ << 8 */ + } } else if (zs_format == PIPE_FORMAT_Z32_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + if (fbZ_reg_set) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } } else if (zs_format == PIPE_FORMAT_Z16_UNORM) { - spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + if (fbZ_reg_set) { + spe_move(f, fbZS_reg, fbZ_reg); /* fbZS = fbZ */ + } } else if (zs_format == PIPE_FORMAT_S8_UNORM) { ASSERT(0); /* XXX to do */ -- cgit v1.2.3 From 78c67a726fff052abeb03417283504a5dd521665 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 10 Oct 2008 14:35:56 -0600 Subject: cell: fix assertions in spe_lqd(), spe_stqd() --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 9bf3b9bf0c..5b0f6bdd48 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -559,7 +559,7 @@ void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) const boolean pSave = p->print; p->print = FALSE; - assert(offset % 4 == 0); + assert(offset % 16 == 0); emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); p->print = pSave; @@ -579,7 +579,7 @@ void spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) const boolean pSave = p->print; p->print = FALSE; - assert(offset % 4 == 0); + assert(offset % 16 == 0); emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); p->print = pSave; -- cgit v1.2.3 From f42ef6f39d213b4c6315ba95791c16ca2b1a4b21 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 10 Oct 2008 14:44:52 -0600 Subject: cell: additional 'offset' checking in spe_lqd(), spe_stqd() --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 5b0f6bdd48..d0bacd08a6 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -552,14 +552,19 @@ spe_comment(struct spe_function *p, int rel_indent, const char *s) /** * Load quad word. - * NOTE: imm is in bytes and the least significant 4 bits must be zero! + * NOTE: offset is in bytes and the least significant 4 bits must be zero! */ void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) { const boolean pSave = p->print; - p->print = FALSE; + /* offset must be a multiple of 16 */ assert(offset % 16 == 0); + /* offset must fit in 10-bit signed int field, after shifting */ + assert((offset >> 4) <= 511); + assert((offset >> 4) >= -512); + + p->print = FALSE; emit_RI10(p, 0x034, rT, rA, offset >> 4, "spe_lqd"); p->print = pSave; @@ -572,14 +577,19 @@ void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) /** * Store quad word. - * NOTE: imm is in bytes and the least significant 4 bits must be zero! + * NOTE: offset is in bytes and the least significant 4 bits must be zero! */ void spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) { const boolean pSave = p->print; - p->print = FALSE; + /* offset must be a multiple of 16 */ assert(offset % 16 == 0); + /* offset must fit in 10-bit signed int field, after shifting */ + assert((offset >> 4) <= 511); + assert((offset >> 4) >= -512); + + p->print = FALSE; emit_RI10(p, 0x024, rT, rA, offset >> 4, "spe_stqd"); p->print = pSave; -- cgit v1.2.3 From d3403b5482ee1c0faa0f42b8782ee3093a2f7b5e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 10 Oct 2008 14:57:57 -0600 Subject: cell: add emit_RI10s() which does range checking on the 10-bit signed immediate field This type of checking should be expanded to cover more instructions... --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 16 ++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 24 ++++++++++++++---------- 2 files changed, 30 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index d0bacd08a6..dea1aed032 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -278,6 +278,16 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, } +/** As above, but do range checking on signed immediate value */ +static void emit_RI10s(struct spe_function *p, unsigned op, unsigned rT, + unsigned rA, int imm, const char *name) +{ + assert(imm <= 511); + assert(imm >= -512); + emit_RI10(p, op, rT, rA, imm, name); +} + + static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, int imm, const char *name) { @@ -354,6 +364,12 @@ void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ } +#define EMIT_RI10s(_name, _op) \ +void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +{ \ + emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \ +} + #define EMIT_RI16(_name, _op) \ void _name (struct spe_function *p, unsigned rT, int imm) \ { \ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 47dadb343c..d6a3c02f20 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -119,6 +119,9 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); #define EMIT_RI10(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ int imm) +#define EMIT_RI10s(_name, _op) \ + extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ + int imm) #define EMIT_RI16(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, int imm) #define EMIT_RI18(_name, _op) \ @@ -163,7 +166,7 @@ EMIT_RI16(spe_fsmbi, 0x065); EMIT_RR (spe_ah, 0x0c8); EMIT_RI10(spe_ahi, 0x01d); EMIT_RR (spe_a, 0x0c0); -EMIT_RI10(spe_ai, 0x01c); +EMIT_RI10s(spe_ai, 0x01c); EMIT_RR (spe_sfh, 0x048); EMIT_RI10(spe_sfhi, 0x00d); EMIT_RR (spe_sf, 0x040); @@ -201,19 +204,19 @@ EMIT_R (spe_xshw, 0x2ae); EMIT_R (spe_xswd, 0x2a6); EMIT_RR (spe_and, 0x0c1); EMIT_RR (spe_andc, 0x2c1); -EMIT_RI10(spe_andbi, 0x016); -EMIT_RI10(spe_andhi, 0x015); -EMIT_RI10(spe_andi, 0x014); +EMIT_RI10s(spe_andbi, 0x016); +EMIT_RI10s(spe_andhi, 0x015); +EMIT_RI10s(spe_andi, 0x014); EMIT_RR (spe_or, 0x041); EMIT_RR (spe_orc, 0x2c9); -EMIT_RI10(spe_orbi, 0x006); -EMIT_RI10(spe_orhi, 0x005); -EMIT_RI10(spe_ori, 0x004); +EMIT_RI10s(spe_orbi, 0x006); +EMIT_RI10s(spe_orhi, 0x005); +EMIT_RI10s(spe_ori, 0x004); EMIT_R (spe_orx, 0x1f0); EMIT_RR (spe_xor, 0x241); -EMIT_RI10(spe_xorbi, 0x026); -EMIT_RI10(spe_xorhi, 0x025); -EMIT_RI10(spe_xori, 0x024); +EMIT_RI10s(spe_xorbi, 0x026); +EMIT_RI10s(spe_xorhi, 0x025); +EMIT_RI10s(spe_xori, 0x024); EMIT_RR (spe_nand, 0x0c9); EMIT_RR (spe_nor, 0x049); EMIT_RR (spe_eqv, 0x249); @@ -422,6 +425,7 @@ EMIT_R (spe_wrch, 0x10d); #undef EMIT_RI7 #undef EMIT_RI8 #undef EMIT_RI10 +#undef EMIT_RI10s #undef EMIT_RI16 #undef EMIT_RI18 #undef EMIT_I16 -- cgit v1.2.3 From 13f46fa1b9c3009395a0d7f30ebef127f5937451 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 7 Oct 2008 16:44:24 +0100 Subject: draw: don't assume output buffer pointer is aligned (cherry picked from commit 23cc303994eb630c56b1224dfdac51dcea41ed03) --- src/gallium/auxiliary/draw/draw_vs_aos_io.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos_io.c b/src/gallium/auxiliary/draw/draw_vs_aos_io.c index dd79bc799a..39f75b50b7 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos_io.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos_io.c @@ -338,7 +338,7 @@ static void emit_store_R32G32B32A32( struct aos_compilation *cp, struct x86_reg dst_ptr, struct x86_reg dataXMM ) { - sse_movaps(cp->func, dst_ptr, dataXMM); + sse_movups(cp->func, dst_ptr, dataXMM); } static void emit_store_R32G32B32( struct aos_compilation *cp, -- cgit v1.2.3 From 05a8f203cdea768466e5faf1dec4155e1e945c78 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 15 Oct 2008 11:56:57 -0600 Subject: gallium: fix the test in vs_exec_prepare() to avoid redundant bindings Fixes regressions seen in progs/samples/prim.c, progs/demos/ray.c --- src/gallium/auxiliary/draw/draw_vs_exec.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 79a19d6be2..13d4fcfdbf 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -46,7 +46,6 @@ struct exec_vertex_shader { struct draw_vertex_shader base; struct tgsi_exec_machine *machine; - const struct tgsi_token *machine_tokens; }; static struct exec_vertex_shader *exec_vertex_shader( struct draw_vertex_shader *vs ) @@ -66,12 +65,11 @@ vs_exec_prepare( struct draw_vertex_shader *shader, /* Specify the vertex program to interpret/execute. * Avoid rebinding when possible. */ - if (evs->machine_tokens != shader->state.tokens) { + if (evs->machine->Tokens != shader->state.tokens) { tgsi_exec_machine_bind_shader(evs->machine, shader->state.tokens, PIPE_MAX_SAMPLERS, NULL /*samplers*/ ); - evs->machine_tokens = shader->state.tokens; } } -- cgit v1.2.3 From e0c6653a5fda956119239ef921daf1e3b950dfc8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 10:35:38 -0600 Subject: cell: implement many more PPC instructions for code gen --- src/gallium/auxiliary/rtasm/Makefile | 1 + src/gallium/auxiliary/rtasm/rtasm_ppc.c | 603 ++++++++++++++++++++++++++++++-- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 141 +++++++- 3 files changed, 704 insertions(+), 41 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/Makefile b/src/gallium/auxiliary/rtasm/Makefile index 39b8a4dbd7..252dc5274a 100644 --- a/src/gallium/auxiliary/rtasm/Makefile +++ b/src/gallium/auxiliary/rtasm/Makefile @@ -7,6 +7,7 @@ C_SOURCES = \ rtasm_cpu.c \ rtasm_execmem.c \ rtasm_x86sse.c \ + rtasm_ppc.c \ rtasm_ppc_spe.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 534a23568d..4a94ed0460 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -23,10 +23,19 @@ /** * PPC code generation. + * For reference, see http://www.power.org/resources/reading/PowerISA_V2.05.pdf + * ABI info: http://www.cs.utsa.edu/~whaley/teach/cs6463FHPO/LEC/lec12_ho.pdf + * + * Other PPC refs: + * http://www-01.ibm.com/chips/techlib/techlib.nsf/techdocs/852569B20050FF778525699600719DF2 + * http://www.ibm.com/developerworks/eserver/library/es-archguide-v2.html + * http://www.freescale.com/files/product/doc/MPCFPE32B.pdf + * * \author Brian Paul */ +#include #include "util/u_memory.h" #include "pipe/p_debug.h" #include "rtasm_ppc.h" @@ -35,30 +44,125 @@ void ppc_init_func(struct ppc_function *p, unsigned max_inst) { - p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); - p->num_inst = 0; - p->max_inst = max_inst; - p->vec_used = ~0; + uint i; + + p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); + p->num_inst = 0; + p->max_inst = max_inst; + p->fp_used = ~0x0; + p->vec_used = ~0x0; + + /* only allow using gp registers 7..12 for now */ + p->reg_used = 0x0; + for (i = 7; i < 13; i++) + p->reg_used |= (1 << i); } void ppc_release_func(struct ppc_function *p) { - assert(p->num_inst <= p->max_inst); - if (p->store != NULL) { - align_free(p->store); - } - p->store = NULL; + assert(p->num_inst <= p->max_inst); + if (p->store != NULL) { + align_free(p->store); + } + p->store = NULL; +} + + +void (*ppc_get_func(struct ppc_function *p))(void) +{ +#if 0 + DUMP_END(); + if (DISASSEM && p->store) + debug_printf("disassemble %p %p\n", p->store, p->csr); + + if (p->store == p->error_overflow) + return (void (*)(void)) NULL; + else +#endif + return (void (*)(void)) p->store; +} + + +void +ppc_dump_func(const struct ppc_function *p) +{ + uint i; + for (i = 0; i < p->num_inst; i++) { + debug_printf("%3u: 0x%08x\n", i, p->store[i]); + } +} + + +/** + * Allocate a general purpose register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->reg_used & mask) != 0) { + p->reg_used &= ~mask; + return i; + } + } + return -1; } /** - * Alloate a vector register. + * Mark the given general purpose register as "unallocated". + */ +void +ppc_release_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + assert((p->reg_used & (1 << reg)) == 0); + p->reg_used |= (1 << reg); +} + + +/** + * Allocate a floating point register. * \return register index or -1 if none left. */ int -ppc_allocate_vec_register(struct ppc_function *p, int reg) +ppc_allocate_fp_register(struct ppc_function *p) +{ + unsigned i; + for (i = 0; i < PPC_NUM_FP_REGS; i++) { + const uint64_t mask = 1 << i; + if ((p->fp_used & mask) != 0) { + p->fp_used &= ~mask; + return i; + } + } + return -1; +} + + +/** + * Mark the given floating point register as "unallocated". + */ +void +ppc_release_fp_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_FP_REGS); + assert((p->fp_used & (1 << reg)) == 0); + p->fp_used |= (1 << reg); +} + + +/** + * Allocate a vector register. + * \return register index or -1 if none left. + */ +int +ppc_allocate_vec_register(struct ppc_function *p) { unsigned i; for (i = 0; i < PPC_NUM_VEC_REGS; i++) { @@ -68,7 +172,6 @@ ppc_allocate_vec_register(struct ppc_function *p, int reg) return i; } } - return -1; } @@ -81,7 +184,6 @@ ppc_release_vec_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_VEC_REGS); assert((p->vec_used & (1 << reg)) == 0); - p->vec_used |= (1 << reg); } @@ -98,6 +200,20 @@ union vx_inst { } inst; }; +static inline void +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vx_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + union vxr_inst { uint32_t bits; struct { @@ -110,6 +226,21 @@ union vxr_inst { } inst; }; +static inline void +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +{ + union vxr_inst inst; + inst.inst.op = 4; + inst.inst.vD = vD; + inst.inst.vA = vA; + inst.inst.vB = vB; + inst.inst.rC = 0; + inst.inst.op2 = op2; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + union va_inst { uint32_t bits; struct { @@ -122,49 +253,204 @@ union va_inst { } inst; }; - static inline void -emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) { - union vx_inst inst; + union va_inst inst; inst.inst.op = 4; inst.inst.vD = vD; inst.inst.vA = vA; inst.inst.vB = vB; + inst.inst.vC = vC; inst.inst.op2 = op2; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); }; -static inline void -emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) + +union i_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned li:24; + unsigned aa:1; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk) { - union vxr_inst inst; - inst.inst.op = 4; - inst.inst.vD = vD; - inst.inst.vA = vA; - inst.inst.vB = vB; - inst.inst.rC = 0; + union i_inst inst; + inst.inst.op = op; + inst.inst.li = li; + inst.inst.aa = aa; + inst.inst.lk = lk; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + +union xl_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned bo:5; + unsigned bi:5; + unsigned unused:3; + unsigned bh:2; + unsigned op2:10; + unsigned lk:1; + } inst; +}; + +static INLINE void +emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh, + uint op2, uint lk) +{ + union xl_inst inst; + inst.inst.op = op; + inst.inst.bo = bo; + inst.inst.bi = bi; + inst.inst.unused = 0x0; + inst.inst.bh = bh; inst.inst.op2 = op2; + inst.inst.lk = lk; p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); +} + +static INLINE void +dump_xl(const char *name, uint inst) +{ + union xl_inst i; + + i.bits = inst; + debug_printf("%s = 0x%08x\n", name, inst); + debug_printf(" op: %d 0x%x\n", i.inst.op, i.inst.op); + debug_printf(" bo: %d 0x%x\n", i.inst.bo, i.inst.bo); + debug_printf(" bi: %d 0x%x\n", i.inst.bi, i.inst.bi); + debug_printf(" unused: %d 0x%x\n", i.inst.unused, i.inst.unused); + debug_printf(" bh: %d 0x%x\n", i.inst.bh, i.inst.bh); + debug_printf(" op2: %d 0x%x\n", i.inst.op2, i.inst.op2); + debug_printf(" lk: %d 0x%x\n", i.inst.lk, i.inst.lk); +} + + +union x_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned vrs:5; + unsigned ra:5; + unsigned rb:5; + unsigned op2:10; + unsigned unused:1; + } inst; }; -static inline void -emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +static INLINE void +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) { - union va_inst inst; - inst.inst.op = 4; - inst.inst.vD = vD; - inst.inst.vA = vA; - inst.inst.vB = vB; - inst.inst.vC = vC; + union x_inst inst; + inst.inst.op = op; + inst.inst.vrs = vrs; + inst.inst.ra = ra; + inst.inst.rb = rb; inst.inst.op2 = op2; + inst.inst.unused = 0x0; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + +union d_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned si:16; + } inst; +}; + +static inline void +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) +{ + union d_inst inst; + assert(si >= -32768); + assert(si <= 32767); + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.si = (unsigned) (si & 0xffff); p->store[p->num_inst++] = inst.bits; assert(p->num_inst <= p->max_inst); }; +union a_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned frt:5; + unsigned fra:5; + unsigned frb:5; + unsigned unused:5; + unsigned op2:5; + unsigned rc:1; + } inst; +}; + +static inline void +emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, + uint rc) +{ + union a_inst inst; + inst.inst.op = op; + inst.inst.frt = frt; + inst.inst.fra = fra; + inst.inst.frb = frb; + inst.inst.unused = 0x0; + inst.inst.op2 = op2; + inst.inst.rc = rc; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +}; + + +union xo_inst { + uint32_t bits; + struct { + unsigned op:6; + unsigned rt:5; + unsigned ra:5; + unsigned rb:5; + unsigned oe:1; + unsigned op2:9; + unsigned rc:1; + } inst; +}; + +static INLINE void +emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, + uint op2, uint rc) +{ + union xo_inst inst; + inst.inst.op = op; + inst.inst.rt = rt; + inst.inst.ra = ra; + inst.inst.rb = rb; + inst.inst.oe = oe; + inst.inst.op2 = op2; + inst.inst.rc = rc; + p->store[p->num_inst++] = inst.bits; + assert(p->num_inst <= p->max_inst); +} + + + + /** ** float vector arithmetic @@ -172,7 +458,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) /** vector float add */ void -ppc_vaddfp(struct ppc_function *p,uint vD, uint vA, uint vB) +ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) { emit_vx(p, 10, vD, vA, vB); } @@ -198,11 +484,11 @@ ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) emit_vx(p, 1034, vD, vA, vB); } -/** vector float mult add */ +/** vector float mult add: vD = vA * vB + vC */ void ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 46, vD, vA, vB, vC); + emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ } /** vector float compare greater than */ @@ -282,13 +568,26 @@ ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) emit_vx(p, 586, vD, 0, vB); } +/** vector store: store vR at mem[vA+vB] */ +void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 231); +} + +/** vector load: vR = mem[vA+vB] */ +void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 103); +} + /** - ** bitwise operations + ** vector bitwise operations **/ - /** vector and */ void ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) @@ -324,6 +623,14 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) emit_vx(p, 1220, vD, vA, vB); } +/** Pseudo-instruction: vector move */ +void +ppc_vecmove(struct ppc_function *p, uint vD, uint vA) +{ + ppc_vor(p, vD, vA, vA); +} + + /** ** Vector shuffle / select / splat / etc @@ -363,3 +670,225 @@ ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) { emit_vx(p, 652, vD, imm, vB); } + +/** vector splat signed immediate word */ +void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm) +{ + assert(imm >= -16); + assert(imm < 15); + emit_vx(p, 908, vD, imm, 0); +} + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) +{ + emit_vx(p, 388, vD, vA, vB); +} + + + + +/** + ** integer arithmetic + **/ + +/** rt = ra + imm */ +void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 14, rt, ra, imm); +} + +/** rt = ra + (imm << 16) */ +void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 15, rt, ra, imm); +} + +/** rt = ra + rb */ +void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_xo(p, 31, rt, ra, rb, 0, 266, 0); +} + +/** rt = ra AND ra */ +void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 28); /* note argument order */ +} + +/** rt = ra AND imm */ +void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 28, ra, rt, imm); /* note argument order */ +} + +/** rt = ra OR ra */ +void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 444); /* note argument order */ +} + +/** rt = ra OR imm */ +void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 24, ra, rt, imm); /* note argument order */ +} + +/** rt = ra XOR ra */ +void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) +{ + emit_x(p, 31, ra, rt, rb, 316); /* note argument order */ +} + +/** rt = ra XOR imm */ +void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) +{ + emit_d(p, 26, ra, rt, imm); /* note argument order */ +} + +/** pseudo instruction: move: rt = ra */ +void +ppc_mr(struct ppc_function *p, uint rt, uint ra) +{ + ppc_or(p, rt, ra, ra); +} + +/** pseudo instruction: load immediate: rt = imm */ +void +ppc_li(struct ppc_function *p, uint rt, int imm) +{ + ppc_addi(p, rt, 0, imm); +} + +/** rt = imm << 16 */ +void +ppc_lis(struct ppc_function *p, uint rt, int imm) +{ + ppc_addis(p, rt, 0, imm); +} + +/** rt = imm */ +void +ppc_load_int(struct ppc_function *p, uint rt, int imm) +{ + ppc_lis(p, rt, (imm >> 16)); /* rt = imm >> 16 */ + ppc_ori(p, rt, rt, (imm & 0xffff)); /* rt = rt | (imm & 0xffff) */ +} + + + + +/** + ** integer load/store + **/ + +/** store rs at memory[(ra)+d], + * then update ra = (ra)+d + */ +void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 37, rs, ra, d); +} + +/** store rs at memory[(ra)+d] */ +void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) +{ + emit_d(p, 36, rs, ra, d); +} + +/** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ +void +ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) +{ + emit_d(p, 32, rt, ra, d); +} + + + +/** + ** Float (non-vector) arithmetic + **/ + +/** add: frt = fra + frb */ +void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 21, 0); +} + +/** sub: frt = fra - frb */ +void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) +{ + emit_a(p, 63, frt, fra, frb, 20, 0); +} + +/** convert to int: rt = (int) ra */ +void +ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) +{ + emit_x(p, 63, rt, 0, fra, 15); +} + +/** store frs at mem[(ra)+offset] */ +void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) +{ + emit_d(p, 52, frs, ra, offset); +} + +/** store frs at mem[(ra)+(rb)] */ +void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) +{ + emit_x(p, 31, frs, ra, rb, 983); +} + +/** load frt = mem[(ra)+offset] */ +void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) +{ + emit_d(p, 48, frt, ra, offset); +} + + + + + +/** + ** branch instructions + **/ + +/** BLR: Branch to link register (p. 35) */ +void +ppc_blr(struct ppc_function *p) +{ + emit_i(p, 18, 0, 0, 1); +} + +/** Branch Conditional to Link Register (p. 36) */ +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) +{ + emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); +} + +/** Pseudo instruction: return from subroutine */ +void +ppc_return(struct ppc_function *p) +{ + ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0); +} diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index ed14e943df..6370b60494 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -36,27 +36,46 @@ #define PPC_INST_SIZE 4 /**< 4 bytes / instruction */ +#define PPC_NUM_REGS 32 +#define PPC_NUM_FP_REGS 32 #define PPC_NUM_VEC_REGS 32 +/** Stack pointer register */ +#define PPC_REG_SP 1 + +/** Branch conditions */ +#define BRANCH_COND_ALWAYS 0x14 /* binary 1z1zz (z=ignored) */ + +/** Branch hints */ +#define BRANCH_HINT_SUB_RETURN 0x0 /* binary 00 */ + struct ppc_function { uint32_t *store; /**< instruction buffer */ uint num_inst; uint max_inst; - uint32_t vec_used; /** used/free vector registers bitmask */ uint32_t reg_used; /** used/free general-purpose registers bitmask */ + uint32_t fp_used; /** used/free floating point registers bitmask */ + uint32_t vec_used; /** used/free vector registers bitmask */ }; extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); extern void ppc_release_func(struct ppc_function *p); - -extern int ppc_allocate_vec_register(struct ppc_function *p, int reg); +extern void (*ppc_get_func( struct ppc_function *p ))( void ); +extern void ppc_dump_func(const struct ppc_function *p); + +extern int ppc_allocate_register(struct ppc_function *p); +extern void ppc_release_register(struct ppc_function *p, int reg); +extern int ppc_allocate_fp_register(struct ppc_function *p); +extern void ppc_release_fp_register(struct ppc_function *p, int reg); +extern int ppc_allocate_vec_register(struct ppc_function *p); extern void ppc_release_vec_register(struct ppc_function *p, int reg); + /** ** float vector arithmetic **/ @@ -126,9 +145,18 @@ extern void ppc_vrfiz(struct ppc_function *p, uint vD, uint vB); +/** vector store: store vR at mem[vA+vB] */ +extern void +ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB); + +/** vector load: vR = mem[vA+vB] */ +extern void +ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); + + /** - ** bitwise operations + ** vector bitwise operations **/ @@ -152,6 +180,10 @@ ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB); extern void ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); +/** Pseudo-instruction: vector move */ +extern void +ppc_vecmove(struct ppc_function *p, uint vD, uint vA); + /** ** Vector shuffle / select / splat / etc @@ -177,5 +209,106 @@ ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm); extern void ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm); +/** vector splat signed immediate word */ +extern void +ppc_vspltisw(struct ppc_function *p, uint vD, int imm); + +/** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ +extern void +ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB); + + + +/** + ** scalar arithmetic + **/ + +extern void +ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb); + +extern void +ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm); + +extern void +ppc_mr(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_li(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_lis(struct ppc_function *p, uint rt, int imm); + +extern void +ppc_load_int(struct ppc_function *p, uint rt, int imm); + + + +/** + ** scalar load/store + **/ + +extern void +ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_stw(struct ppc_function *p, uint rs, uint ra, int d); + +extern void +ppc_lwz(struct ppc_function *p, uint rs, uint ra, int d); + + + +/** + ** Float (non-vector) arithmetic + **/ + +extern void +ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb); + +extern void +ppc_fctiwz(struct ppc_function *p, uint rt, uint ra); + +extern void +ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); + +extern void +ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); + + + +/** + ** branch instructions + **/ + +extern void +ppc_blr(struct ppc_function *p); + +void +ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg); + +extern void +ppc_return(struct ppc_function *p); + #endif /* RTASM_PPC_H */ -- cgit v1.2.3 From 049f57f86a2cb8ff08fba819c581a034ca7ea52c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 11:06:39 -0600 Subject: gallium: added ppc_lvewx() --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 7 +++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 4 ++++ 2 files changed, 11 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 4a94ed0460..aaec2d2191 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -582,6 +582,13 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) emit_x(p, 31, vR, vA, vB, 103); } +/** load vector element word: vR = mem_word[vA+vB] */ +void +ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB) +{ + emit_x(p, 31, vR, vA, vB, 71); +} + /** diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 6370b60494..53d5746dc8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -153,6 +153,10 @@ ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB); extern void ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); +/** load vector element word: vR = mem_word[vA+vB] */ +extern void +ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); + /** -- cgit v1.2.3 From 70f4ad44985e3ec6dabc1b0e55a5bf85803a4cd4 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 11:07:35 -0600 Subject: gallium: TGSI to PPC code generation Based on the TGSIto SSE2 code generator. Incomplete and lots of SSE stuff still hanging around but the basic dozen or so TGSI opcodes are functioning. --- src/gallium/auxiliary/tgsi/Makefile | 1 + src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2781 +++++++++++++++++++++++++++++++++ src/gallium/auxiliary/tgsi/tgsi_ppc.h | 48 + 3 files changed, 2830 insertions(+) create mode 100644 src/gallium/auxiliary/tgsi/tgsi_ppc.c create mode 100644 src/gallium/auxiliary/tgsi/tgsi_ppc.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile index c7155a9316..d7df9490cf 100644 --- a/src/gallium/auxiliary/tgsi/Makefile +++ b/src/gallium/auxiliary/tgsi/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ tgsi_info.c \ tgsi_iterate.c \ tgsi_parse.c \ + tgsi_ppc.c \ tgsi_scan.c \ tgsi_sse2.c \ tgsi_text.c \ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c new file mode 100644 index 0000000000..112e736523 --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -0,0 +1,2781 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * TGSI to PowerPC code generation. + */ + +#include "pipe/p_config.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_debug.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_math.h" +#include "util/u_sse.h" +#include "tgsi/tgsi_parse.h" +#include "tgsi/tgsi_util.h" +#include "tgsi_exec.h" +#include "tgsi_ppc.h" +#include "rtasm/rtasm_ppc.h" + + +/* for 1/sqrt() + * + * This costs about 100fps (close to 10%) in gears: + */ +#define HIGH_PRECISION 1 + +#define FAST_MATH 1 + + +#define FOR_EACH_CHANNEL( CHAN )\ + for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) + +#define IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + ((INST).FullDstRegisters[0].DstRegister.WriteMask & (1 << (CHAN))) + +#define IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN )\ + if (IS_DST0_CHANNEL_ENABLED( INST, CHAN )) + +#define FOR_EACH_DST0_ENABLED_CHANNEL( INST, CHAN )\ + FOR_EACH_CHANNEL( CHAN )\ + IF_IS_DST0_CHANNEL_ENABLED( INST, CHAN ) + +#define CHAN_X 0 +#define CHAN_Y 1 +#define CHAN_Z 2 +#define CHAN_W 3 + +#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I +#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C + +#define TEMP_R0 TGSI_EXEC_TEMP_R0 +#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR + + +/** + * Context/state used during code gen. + */ +struct gen_context +{ + struct ppc_function *f; + int inputs_reg; /**< register pointing to input params */ + int outputs_reg; /**< register pointing to output params */ + int temps_reg; /**< register pointing to temporary "registers" */ + int immed_reg; /**< register pointing to immediates buffer */ + int const_reg; /**< register pointing to constants buffer */ +}; + + + +#if 0000 + +/** + * X86 utility functions. + */ + +static struct x86_reg +make_xmm( + unsigned xmm ) +{ + return x86_make_reg( + file_XMM, + (enum x86_reg_name) xmm ); +} + +/** + * X86 register mapping helpers. + */ + +static struct x86_reg +get_const_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_CX ); +} + +static struct x86_reg +get_input_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_AX ); +} + +static struct x86_reg +get_output_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DX ); +} + +static struct x86_reg +get_temp_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_BX ); +} + +static struct x86_reg +get_coef_base( void ) +{ + return get_output_base(); +} + +static struct x86_reg +get_immediate_base( void ) +{ + return x86_make_reg( + file_REG32, + reg_DI ); +} + + +/** + * Data access helpers. + */ + + +static struct x86_reg +get_immediate( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_immediate_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_const( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_const_base(), + (vec * 4 + chan) * 4 ); +} + +static struct x86_reg +get_input( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_input_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_output( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_output_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_temp( + unsigned vec, + unsigned chan ) +{ + return x86_make_disp( + get_temp_base(), + (vec * 4 + chan) * 16 ); +} + +static struct x86_reg +get_coef( + unsigned vec, + unsigned chan, + unsigned member ) +{ + return x86_make_disp( + get_coef_base(), + ((vec * 3 + member) * 4 + chan) * 4 ); +} + + +static void +emit_ret( + struct x86_function *func ) +{ + x86_ret( func ); +} + +#endif + +/** + * Data fetch helpers. + */ + +#if 00 +/** + * Copy a shader constant to xmm register + * \param xmm the destination xmm register + * \param vec the src const buffer index + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_const( + struct x86_function *func, + uint xmm, + int vec, + uint chan, + uint indirect, + uint indirectFile, + int indirectIndex ) +{ + if (indirect) { + struct x86_reg r0 = get_input_base(); + struct x86_reg r1 = get_output_base(); + uint i; + + assert( indirectFile == TGSI_FILE_ADDRESS ); + assert( indirectIndex == 0 ); + + x86_push( func, r0 ); + x86_push( func, r1 ); + + for (i = 0; i < QUAD_SIZE; i++) { + x86_lea( func, r0, get_const( vec, chan ) ); + x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) ); + + /* Quick hack to multiply by 16 -- need to add SHL to rtasm. + */ + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + x86_add( func, r1, r1 ); + + x86_add( func, r0, r1 ); + x86_mov( func, r1, x86_deref( r0 ) ); + x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 ); + } + + x86_pop( func, r1 ); + x86_pop( func, r0 ); + + sse_movaps( + func, + make_xmm( xmm ), + get_temp( TEMP_R0, CHAN_X ) ); + } + else { + assert( vec >= 0 ); + + sse_movss( + func, + make_xmm( xmm ), + get_const( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); + } +} + +static void +emit_immediate( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_immediate( vec, chan ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + + +/** + * Copy a shader input to xmm register + * \param xmm the destination xmm register + * \param vec the src input attrib + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_inputf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + make_xmm( xmm ), + get_input( vec, chan ) ); +} + +/** + * Store an xmm register to a shader output + * \param xmm the source xmm register + * \param vec the dest output attrib + * \param chan src dest channel to store (X, Y, Z or W) + */ +static void +emit_output( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_output( vec, chan ), + make_xmm( xmm ) ); +} + +/** + * Copy a shader temporary to xmm register + * \param xmm the destination xmm register + * \param vec the src temp register + * \param chan src channel to fetch (X, Y, Z or W) + */ +static void +emit_tempf( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + make_xmm( xmm ), + get_temp( vec, chan ) ); +} + +/** + * Load an xmm register with an input attrib coefficient (a0, dadx or dady) + * \param xmm the destination xmm register + * \param vec the src input/attribute coefficient index + * \param chan src channel to fetch (X, Y, Z or W) + * \param member 0=a0, 1=dadx, 2=dady + */ +static void +emit_coef( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan, + unsigned member ) +{ + sse_movss( + func, + make_xmm( xmm ), + get_coef( vec, chan, member ) ); + sse_shufps( + func, + make_xmm( xmm ), + make_xmm( xmm ), + SHUF( 0, 0, 0, 0 ) ); +} + +/** + * Data store helpers. + */ + +static void +emit_inputs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movups( + func, + get_input( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_temps( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + sse_movaps( + func, + get_temp( vec, chan ), + make_xmm( xmm ) ); +} + +static void +emit_addrs( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + assert( vec == 0 ); + + emit_temps( + func, + xmm, + vec + TGSI_EXEC_TEMP_ADDR, + chan ); +} + +/** + * Coefficent fetch helpers. + */ + +static void +emit_coef_a0( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 0 ); +} + +static void +emit_coef_dadx( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 1 ); +} + +static void +emit_coef_dady( + struct x86_function *func, + unsigned xmm, + unsigned vec, + unsigned chan ) +{ + emit_coef( + func, + xmm, + vec, + chan, + 2 ); +} +#endif + + +/** + * Function call helpers. + */ + +#if 00 +/** + * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be + * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee + * that the stack pointer is 16 byte aligned, as expected. + */ +static void +emit_func_call_dst( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + void (PIPE_CDECL *code)() ) +{ + struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); + unsigned i, n, xmm; + unsigned xmm_mask; + + /* Bitmask of the xmm registers to save */ + xmm_mask = (1 << xmm_save) - 1; + xmm_mask &= ~(1 << xmm_dst); + + sse_movaps( + func, + get_temp( TEMP_R0, 0 ), + make_xmm( xmm_dst ) ); + + x86_push( + func, + x86_make_reg( file_REG32, reg_AX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX) ); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) + ++n; + + x86_sub_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ), + make_xmm( xmm ) ); + ++n; + } + + x86_lea( + func, + ecx, + get_temp( TEMP_R0, 0 ) ); + + x86_push( func, ecx ); + x86_mov_reg_imm( func, ecx, (unsigned long) code ); + x86_call( func, ecx ); + x86_pop(func, ecx ); + + for(i = 0, n = 0; i < 8; ++i) + if(xmm_mask & (1 << i)) { + sse_movups( + func, + make_xmm( xmm ), + x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) ); + ++n; + } + + x86_add_imm( + func, + x86_make_reg( file_REG32, reg_SP ), + n*16); + + /* Restore GP registers in a reverse order. + */ + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_CX) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX) ); + + sse_movaps( + func, + make_xmm( xmm_dst ), + get_temp( TEMP_R0, 0 ) ); +} + +static void +emit_func_call_dst_src( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + unsigned xmm_src, + void (PIPE_CDECL *code)() ) +{ + sse_movaps( + func, + get_temp( TEMP_R0, 1 ), + make_xmm( xmm_src ) ); + + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + code ); +} + +/* + * Fast SSE2 implementation of special math functions. + */ + +#define POLY0(x, c0) _mm_set1_ps(c0) +#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0)) +#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0)) +#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0)) +#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0)) +#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0)) + +#define EXP_POLY_DEGREE 3 +#define LOG_POLY_DEGREE 5 + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +exp2f4(__m128 x) +{ + __m128i ipart; + __m128 fpart, expipart, expfpart; + + x = _mm_min_ps(x, _mm_set1_ps( 129.00000f)); + x = _mm_max_ps(x, _mm_set1_ps(-126.99999f)); + + /* ipart = int(x - 0.5) */ + ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f))); + + /* fpart = x - ipart */ + fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart)); + + /* expipart = (float) (1 << ipart) */ + expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23)); + + /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */ +#if EXP_POLY_DEGREE == 5 + expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f); +#elif EXP_POLY_DEGREE == 4 + expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f); +#elif EXP_POLY_DEGREE == 3 + expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f); +#elif EXP_POLY_DEGREE == 2 + expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f); +#else +#error +#endif + + return _mm_mul_ps(expipart, expfpart); +} + +/** + * See http://www.devmaster.net/forums/showthread.php?p=43580 + */ +static INLINE __m128 +log2f4(__m128 x) +{ + __m128i expmask = _mm_set1_epi32(0x7f800000); + __m128i mantmask = _mm_set1_epi32(0x007fffff); + __m128 one = _mm_set1_ps(1.0f); + + __m128i i = _mm_castps_si128(x); + + /* exp = (float) exponent(x) */ + __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127))); + + /* mant = (float) mantissa(x) */ + __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one); + + __m128 logmant; + + /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ + * These coefficients can be generate with + * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html + */ +#if LOG_POLY_DEGREE == 6 + logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f); +#elif LOG_POLY_DEGREE == 5 + logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); +#elif LOG_POLY_DEGREE == 4 + logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); +#elif LOG_POLY_DEGREE == 3 + logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); +#else +#error +#endif + + /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ + logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one)); + + return _mm_add_ps(logmant, exp); +} + +static INLINE __m128 +powf4(__m128 x, __m128 y) +{ + return exp2f4(_mm_mul_ps(log2f4(x), y)); +} + + +/** + * Low-level instruction translators. + */ + +static void +emit_abs( + struct x86_function *func, + unsigned xmm ) +{ + sse_andps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_7FFFFFFF_I, + TGSI_EXEC_TEMP_7FFFFFFF_C ) ); +} + +static void +emit_add( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_addps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void PIPE_CDECL +cos4f( + float *store ) +{ + store[0] = cosf( store[0] ); + store[1] = cosf( store[1] ); + store[2] = cosf( store[2] ); + store[3] = cosf( store[3] ); +} + +static void +emit_cos( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + cos4f ); +} + +static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif +ex24f( + float *store ) +{ + _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) )); +} + +static void +emit_ex2( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + ex24f ); +} + +static void +emit_f2it( + struct x86_function *func, + unsigned xmm ) +{ + sse2_cvttps2dq( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); +} + +static void PIPE_CDECL +flr4f( + float *store ) +{ + store[0] = floorf( store[0] ); + store[1] = floorf( store[1] ); + store[2] = floorf( store[2] ); + store[3] = floorf( store[3] ); +} + +static void +emit_flr( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + flr4f ); +} + +static void PIPE_CDECL +frc4f( + float *store ) +{ + store[0] -= floorf( store[0] ); + store[1] -= floorf( store[1] ); + store[2] -= floorf( store[2] ); + store[3] -= floorf( store[3] ); +} + +static void +emit_frc( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + frc4f ); +} + +static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif +lg24f( + float *store ) +{ + _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) )); +} + +static void +emit_lg2( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + lg24f ); +} + +static void +emit_MOV( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_movups( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_mul (struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src) +{ + sse_mulps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_neg( + struct x86_function *func, + unsigned xmm ) +{ + sse_xorps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +#if defined(PIPE_CC_GCC) +__attribute__((force_align_arg_pointer)) +#endif +pow4f( + float *store ) +{ +#if 1 + _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) )); +#else + store[0] = powf( store[0], store[4] ); + store[1] = powf( store[1], store[5] ); + store[2] = powf( store[2], store[6] ); + store[3] = powf( store[3], store[7] ); +#endif +} + +static void +emit_pow( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst, + unsigned xmm_src ) +{ + emit_func_call_dst_src( + func, + xmm_save, + xmm_dst, + xmm_src, + pow4f ); +} + +static void +emit_rcp ( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. Need to either emit a proper divide or use the + * iterative technique described below in emit_rsqrt(). + */ + sse2_rcpps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} + +static void +emit_rsqrt( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ +#if HIGH_PRECISION + /* Although rsqrtps() and rcpps() are low precision on some/all SSE + * implementations, it is possible to improve its precision at + * fairly low cost, using a newton/raphson step, as below: + * + * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) + * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] + * + * See: http://softwarecommunity.intel.com/articles/eng/1818.htm + */ + { + struct x86_reg dst = make_xmm( xmm_dst ); + struct x86_reg src = make_xmm( xmm_src ); + struct x86_reg tmp0 = make_xmm( 2 ); + struct x86_reg tmp1 = make_xmm( 3 ); + + assert( xmm_dst != xmm_src ); + assert( xmm_dst != 2 && xmm_dst != 3 ); + assert( xmm_src != 2 && xmm_src != 3 ); + + sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); + sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); + sse_rsqrtps( func, tmp1, src ); + sse_mulps( func, src, tmp1 ); + sse_mulps( func, dst, tmp1 ); + sse_mulps( func, src, tmp1 ); + sse_subps( func, tmp0, src ); + sse_mulps( func, dst, tmp0 ); + } +#else + /* On Intel CPUs at least, this is only accurate to 12 bits -- not + * good enough. + */ + sse_rsqrtps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +#endif +} + +static void +emit_setsign( + struct x86_function *func, + unsigned xmm ) +{ + sse_orps( + func, + make_xmm( xmm ), + get_temp( + TGSI_EXEC_TEMP_80000000_I, + TGSI_EXEC_TEMP_80000000_C ) ); +} + +static void PIPE_CDECL +sin4f( + float *store ) +{ + store[0] = sinf( store[0] ); + store[1] = sinf( store[1] ); + store[2] = sinf( store[2] ); + store[3] = sinf( store[3] ); +} + +static void +emit_sin (struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + sin4f ); +} + +static void +emit_sub( + struct x86_function *func, + unsigned xmm_dst, + unsigned xmm_src ) +{ + sse_subps( + func, + make_xmm( xmm_dst ), + make_xmm( xmm_src ) ); +} +#endif + + +/** + * Register fetch. + */ +static void +emit_fetch(struct gen_context *gen, + unsigned vec_reg, + const struct tgsi_full_src_register *reg, + const unsigned chan_index) +{ + uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); + + switch (swizzle) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (reg->SrcRegister.File) { + case TGSI_FILE_INPUT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, vec_reg, gen->inputs_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, vec_reg, gen->temps_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_IMMEDIATE: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, vec_reg, gen->immed_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_CONSTANT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + ppc_li(gen->f, offset_reg, offset); + /* load vector word */ + ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg); + /* splat word[0] across vector */ + ppc_vspltw(gen->f, vec_reg, vec_reg, 0); + ppc_release_register(gen->f, offset_reg); + } + break; + default: + assert( 0 ); + } + break; + + case TGSI_EXTSWIZZLE_ZERO: +#if 0 + emit_tempf( + func, + xmm, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); +#endif + break; + + case TGSI_EXTSWIZZLE_ONE: +#if 0 + emit_tempf( + func, + xmm, + TEMP_ONE_I, + TEMP_ONE_C ); +#endif + break; + + default: + assert( 0 ); + } + +#if 0 + switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { + case TGSI_UTIL_SIGN_CLEAR: + emit_abs( func, xmm ); + break; + + case TGSI_UTIL_SIGN_SET: + emit_setsign( func, xmm ); + break; + + case TGSI_UTIL_SIGN_TOGGLE: + emit_neg( func, xmm ); + break; + + case TGSI_UTIL_SIGN_KEEP: + break; + } +#endif +} + +#define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \ + emit_fetch( GEN, VEC_REG, &(INST).FullSrcRegisters[SRC_REG], CHAN ) + + + +/** + * Register store. + */ +static void +emit_store(struct gen_context *gen, + unsigned vec_reg, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index) +{ + switch (reg->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_stvx(gen->f, vec_reg, gen->outputs_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_stvx(gen->f, vec_reg, gen->temps_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; +#if 0 + case TGSI_FILE_ADDRESS: + emit_addrs( + func, + xmm, + reg->DstRegister.Index, + chan_index ); + break; +#endif + default: + assert( 0 ); + } + +#if 0 + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; + } +#endif +} + + +#define STORE( GEN, INST, XMM, INDEX, CHAN )\ + emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + + + +#if 000 +/** + * High-level instruction translators. + */ + +static void +emit_kil( + struct x86_function *func, + const struct tgsi_full_src_register *reg ) +{ + unsigned uniquemask; + unsigned registers[4]; + unsigned nextregister = 0; + unsigned firstchan = ~0; + unsigned chan_index; + + /* This mask stores component bits that were already tested. Note that + * we test if the value is less than zero, so 1.0 and 0.0 need not to be + * tested. */ + uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); + + FOR_EACH_CHANNEL( chan_index ) { + unsigned swizzle; + + /* unswizzle channel */ + swizzle = tgsi_util_get_full_src_register_extswizzle( + reg, + chan_index ); + + /* check if the component has not been already tested */ + if( !(uniquemask & (1 << swizzle)) ) { + uniquemask |= 1 << swizzle; + + /* allocate register */ + registers[chan_index] = nextregister; + emit_fetch( + func, + nextregister, + reg, + chan_index ); + nextregister++; + + /* mark the first channel used */ + if( firstchan == ~0 ) { + firstchan = chan_index; + } + } + } + + x86_push( + func, + x86_make_reg( file_REG32, reg_AX ) ); + x86_push( + func, + x86_make_reg( file_REG32, reg_DX ) ); + + FOR_EACH_CHANNEL( chan_index ) { + if( uniquemask & (1 << chan_index) ) { + sse_cmpps( + func, + make_xmm( registers[chan_index] ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + + if( chan_index == firstchan ) { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_AX ), + make_xmm( registers[chan_index] ) ); + } + else { + sse_pmovmskb( + func, + x86_make_reg( file_REG32, reg_DX ), + make_xmm( registers[chan_index] ) ); + x86_or( + func, + x86_make_reg( file_REG32, reg_AX ), + x86_make_reg( file_REG32, reg_DX ) ); + } + } + } + + x86_or( + func, + get_temp( + TGSI_EXEC_TEMP_KILMASK_I, + TGSI_EXEC_TEMP_KILMASK_C ), + x86_make_reg( file_REG32, reg_AX ) ); + + x86_pop( + func, + x86_make_reg( file_REG32, reg_DX ) ); + x86_pop( + func, + x86_make_reg( file_REG32, reg_AX ) ); +} + + +static void +emit_kilp( + struct x86_function *func ) +{ + /* XXX todo / fix me */ +} + + +static void +emit_setcc( + struct x86_function *func, + struct tgsi_full_instruction *inst, + enum sse_cc cc ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + make_xmm( 1 ), + cc ); + sse_andps( + func, + make_xmm( 0 ), + get_temp( + TEMP_ONE_I, + TEMP_ONE_C ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} + +static void +emit_cmp( + struct x86_function *func, + struct tgsi_full_instruction *inst ) +{ + unsigned chan_index; + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + sse_cmpps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ), + cc_LessThan ); + sse_andps( + func, + make_xmm( 1 ), + make_xmm( 0 ) ); + sse_andnps( + func, + make_xmm( 0 ), + make_xmm( 2 ) ); + sse_orps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } +} +#endif + + +static void +emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, 0, 0, chan_index); /* v0 = srcreg[0] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + /* turn off the most significant bit of each vector float word */ + { + int v1 = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, v1, -1); /* v1 = {-1, -1, -1, -1} */ + ppc_vslw(gen->f, v1, v1, v1); /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */ + ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */ + ppc_release_vec_register(gen->f, v1); + } + break; + case TGSI_OPCODE_FLOOR: + ppc_vrfim(gen->f, v0, v0); /* v0 = floor(v0) */ + break; + case TGSI_OPCODE_FRAC: + { + int v1 = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ + ppc_vsubfp(gen->f, v0, v0, v1); /* v0 = v0 - v1 */ + ppc_release_vec_register(gen->f, v1); + } + break; + case TGSI_OPCODE_EXPBASE2: + ppc_vexptefp(gen->f, v0, v0); /* v0 = 2^v0 */ + break; + case TGSI_OPCODE_LOGBASE2: + /* XXX this may be broken! */ + ppc_vlogefp(gen->f, v0, v0); /* v0 = log2(v0) */ + break; + case TGSI_OPCODE_MOV: + /* nothing */ + break; + default: + assert(0); + } + STORE(gen, *inst, v0, 0, chan_index); /* store v0 */ + } + ppc_release_vec_register(gen->f, v0); +} + + +static void +emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + ppc_vaddfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_SUB: + ppc_vsubfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MUL: + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */ + break; + case TGSI_OPCODE_MIN: + ppc_vminfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MAX: + ppc_vmaxfp(gen->f, v2, v0, v1); + break; + default: + assert(0); + } + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} + + +static void +emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + + FETCH(gen, *inst, v0, 0, CHAN_X); /* v0 = src0.XXXX */ + FETCH(gen, *inst, v1, 1, CHAN_X); /* v1 = src1.XXXX */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + FETCH(gen, *inst, v0, 0, CHAN_Y); /* v0 = src0.YYYY */ + FETCH(gen, *inst, v1, 1, CHAN_Y); /* v1 = src1.YYYY */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + FETCH(gen, *inst, v0, 0, CHAN_Z); /* v0 = src0.ZZZZ */ + FETCH(gen, *inst, v1, 1, CHAN_Z); /* v1 = src1.ZZZZ */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + + if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { + FETCH(gen, *inst, v0, 0, CHAN_W); /* v0 = src0.WWWW */ + FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { + FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ + } + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} + + +static void +emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + int v3 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + FETCH(gen, *inst, v2, 2, chan_index); /* v2 = srcreg[2] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MAD: + ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ + break; + case TGSI_OPCODE_LRP: + ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ + ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ + break; + default: + assert(0); + } + STORE(gen, *inst, v3, 0, chan_index); /* store v3 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); + ppc_release_vec_register(gen->f, v3); +} + + +static int +emit_instruction(struct gen_context *gen, + struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLOOR: + case TGSI_OPCODE_FRAC: + case TGSI_OPCODE_EXPBASE2: + case TGSI_OPCODE_LOGBASE2: + emit_unaryop(gen, inst); + break; + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + emit_binop(gen, inst); + break; + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_LRP: + emit_triop(gen, inst); + break; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + emit_dotprod(gen, inst); + break; + case TGSI_OPCODE_END: + /* normal end */ + return 1; + default: + return 0; + } + +#if 0 + unsigned chan_index; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ARL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LIT: + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C); + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { + STORE( func, *inst, 0, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { + STORE( func, *inst, 0, 0, CHAN_W ); + } + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_maxps( + func, + make_xmm( 0 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + /* XMM[1] = SrcReg[0].yyyy */ + FETCH( func, *inst, 1, 0, CHAN_Y ); + /* XMM[1] = max(XMM[1], 0) */ + sse_maxps( + func, + make_xmm( 1 ), + get_temp( + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ) ); + /* XMM[2] = SrcReg[0].wwww */ + FETCH( func, *inst, 2, 0, CHAN_W ); + /* XMM[2] = min(XMM[2], 128.0) */ + sse_minps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_128_I, + TGSI_EXEC_TEMP_128_C ) ); + /* XMM[2] = max(XMM[2], -128.0) */ + sse_maxps( + func, + make_xmm( 2 ), + get_temp( + TGSI_EXEC_TEMP_MINUS_128_I, + TGSI_EXEC_TEMP_MINUS_128_C ) ); + emit_pow( func, 3, 1, 2 ); + FETCH( func, *inst, 0, 0, CHAN_X ); + sse_xorps( + func, + make_xmm( 2 ), + make_xmm( 2 ) ); + sse_cmpps( + func, + make_xmm( 2 ), + make_xmm( 0 ), + cc_LessThanEqual ); + sse_andps( + func, + make_xmm( 2 ), + make_xmm( 1 ) ); + STORE( func, *inst, 2, 0, CHAN_Z ); + } + } + break; + + case TGSI_OPCODE_RCP: + /* TGSI_OPCODE_RECIP */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rcp( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RSQ: + /* TGSI_OPCODE_RECIPSQRT */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_rsqrt( func, 1, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 1, 0, chan_index ); + } + break; + + case TGSI_OPCODE_EXP: + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( func, *inst, 0, 0, CHAN_X ); + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_MOV( func, 1, 0 ); + emit_flr( func, 2, 1 ); + /* dst.x = ex2(floor(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { + emit_MOV( func, 2, 1 ); + emit_ex2( func, 3, 2 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + /* dst.y = src.x - floor(src.x) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_MOV( func, 2, 0 ); + emit_sub( func, 2, 1 ); + STORE( func, *inst, 2, 0, CHAN_Y ); + } + } + /* dst.z = ex2(src.x) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + emit_ex2( func, 3, 0 ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { + emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_LOG: + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_abs( func, 0 ); + emit_MOV( func, 1, 0 ); + emit_lg2( func, 2, 1 ); + /* dst.z = lg2(abs(src.x)) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { + STORE( func, *inst, 1, 0, CHAN_Z ); + } + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_flr( func, 2, 1 ); + /* dst.x = floor(lg2(abs(src.x))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { + STORE( func, *inst, 1, 0, CHAN_X ); + } + /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { + emit_ex2( func, 2, 1 ); + emit_rcp( func, 1, 1 ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + } + } + /* dst.w = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { + emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MUL: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ADD: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_add( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP3: + /* TGSI_OPCODE_DOT3 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DP4: + /* TGSI_OPCODE_DOT4 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul(func, 1, 2 ); + emit_add(func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_W ); + FETCH( func, *inst, 2, 1, CHAN_W ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DST: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 1, 1, CHAN_Y ); + emit_mul( func, 0, 1 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + FETCH( func, *inst, 0, 0, CHAN_Z ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + FETCH( func, *inst, 0, 1, CHAN_W ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MIN: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_minps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_MAX: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + sse_maxps( + func, + make_xmm( 0 ), + make_xmm( 1 ) ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLT: + /* TGSI_OPCODE_SETLT */ + emit_setcc( func, inst, cc_LessThan ); + break; + + case TGSI_OPCODE_SGE: + /* TGSI_OPCODE_SETGE */ + emit_setcc( func, inst, cc_NotLessThan ); + break; + + case TGSI_OPCODE_MAD: + /* TGSI_OPCODE_MADD */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SUB: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + emit_sub( func, 0, 1 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LERP: + /* TGSI_OPCODE_LRP */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + FETCH( func, *inst, 1, 1, chan_index ); + FETCH( func, *inst, 2, 2, chan_index ); + emit_sub( func, 1, 2 ); + emit_mul( func, 0, 1 ); + emit_add( func, 0, 2 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CND: + return 0; + break; + + case TGSI_OPCODE_CND0: + return 0; + break; + + case TGSI_OPCODE_DOT2ADD: + /* TGSI_OPCODE_DP2A */ + return 0; + break; + + case TGSI_OPCODE_INDEX: + return 0; + break; + + case TGSI_OPCODE_NEGATE: + return 0; + break; + + case TGSI_OPCODE_FRAC: + /* TGSI_OPCODE_FRC */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_frc( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CLAMP: + return 0; + break; + + case TGSI_OPCODE_FLOOR: + /* TGSI_OPCODE_FLR */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_flr( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_ROUND: + return 0; + break; + + case TGSI_OPCODE_EXPBASE2: + /* TGSI_OPCODE_EX2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_ex2( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_LOGBASE2: + /* TGSI_OPCODE_LG2 */ + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_lg2( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_POWER: + /* TGSI_OPCODE_POW */ + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_pow( func, 0, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_CROSSPRODUCT: + /* TGSI_OPCODE_XPD */ + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { + FETCH( func, *inst, 1, 1, CHAN_Z ); + FETCH( func, *inst, 3, 0, CHAN_Z ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 0, 0, CHAN_Y ); + FETCH( func, *inst, 4, 1, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + emit_MOV( func, 2, 0 ); + emit_mul( func, 2, 1 ); + emit_MOV( func, 5, 3 ); + emit_mul( func, 5, 4 ); + emit_sub( func, 2, 5 ); + STORE( func, *inst, 2, 0, CHAN_X ); + } + if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || + IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { + FETCH( func, *inst, 2, 1, CHAN_X ); + FETCH( func, *inst, 5, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + emit_mul( func, 3, 2 ); + emit_mul( func, 1, 5 ); + emit_sub( func, 3, 1 ); + STORE( func, *inst, 3, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_mul( func, 5, 4 ); + emit_mul( func, 0, 2 ); + emit_sub( func, 5, 0 ); + STORE( func, *inst, 5, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_MULTIPLYMATRIX: + return 0; + break; + + case TGSI_OPCODE_ABS: + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_abs( func, 0) ; + + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_RCC: + return 0; + break; + + case TGSI_OPCODE_DPH: + FETCH( func, *inst, 0, 0, CHAN_X ); + FETCH( func, *inst, 1, 1, CHAN_X ); + emit_mul( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Y ); + FETCH( func, *inst, 2, 1, CHAN_Y ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 0, CHAN_Z ); + FETCH( func, *inst, 2, 1, CHAN_Z ); + emit_mul( func, 1, 2 ); + emit_add( func, 0, 1 ); + FETCH( func, *inst, 1, 1, CHAN_W ); + emit_add( func, 0, 1 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_COS: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_DDX: + return 0; + break; + + case TGSI_OPCODE_DDY: + return 0; + break; + + case TGSI_OPCODE_KILP: + /* predicated kill */ + emit_kilp( func ); + return 0; /* XXX fix me */ + break; + + case TGSI_OPCODE_KIL: + /* conditional kill */ + emit_kil( func, &inst->FullSrcRegisters[0] ); + break; + + case TGSI_OPCODE_PK2H: + return 0; + break; + + case TGSI_OPCODE_PK2US: + return 0; + break; + + case TGSI_OPCODE_PK4B: + return 0; + break; + + case TGSI_OPCODE_PK4UB: + return 0; + break; + + case TGSI_OPCODE_RFL: + return 0; + break; + + case TGSI_OPCODE_SEQ: + return 0; + break; + + case TGSI_OPCODE_SFL: + return 0; + break; + + case TGSI_OPCODE_SGT: + return 0; + break; + + case TGSI_OPCODE_SIN: + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0, 0 ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + break; + + case TGSI_OPCODE_SLE: + return 0; + break; + + case TGSI_OPCODE_SNE: + return 0; + break; + + case TGSI_OPCODE_STR: + return 0; + break; + + case TGSI_OPCODE_TEX: + if (0) { + /* Disable dummy texture code: + */ + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C ); + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); + } + } + else { + return 0; + } + break; + + case TGSI_OPCODE_TXD: + return 0; + break; + + case TGSI_OPCODE_UP2H: + return 0; + break; + + case TGSI_OPCODE_UP2US: + return 0; + break; + + case TGSI_OPCODE_UP4B: + return 0; + break; + + case TGSI_OPCODE_UP4UB: + return 0; + break; + + case TGSI_OPCODE_X2D: + return 0; + break; + + case TGSI_OPCODE_ARA: + return 0; + break; + + case TGSI_OPCODE_ARR: + return 0; + break; + + case TGSI_OPCODE_BRA: + return 0; + break; + + case TGSI_OPCODE_CAL: + return 0; + break; + + case TGSI_OPCODE_RET: + emit_ret( func ); + break; + + case TGSI_OPCODE_END: + break; + + case TGSI_OPCODE_SSG: + return 0; + break; + + case TGSI_OPCODE_CMP: + emit_cmp (func, inst); + break; + + case TGSI_OPCODE_SCS: + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_cos( func, 0, 0 ); + STORE( func, *inst, 0, 0, CHAN_X ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { + FETCH( func, *inst, 0, 0, CHAN_X ); + emit_sin( func, 0, 0 ); + STORE( func, *inst, 0, 0, CHAN_Y ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { + emit_tempf( + func, + 0, + TGSI_EXEC_TEMP_00000000_I, + TGSI_EXEC_TEMP_00000000_C ); + STORE( func, *inst, 0, 0, CHAN_Z ); + } + IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { + emit_tempf( + func, + 0, + TEMP_ONE_I, + TEMP_ONE_C ); + STORE( func, *inst, 0, 0, CHAN_W ); + } + break; + + case TGSI_OPCODE_TXB: + return 0; + break; + + case TGSI_OPCODE_NRM: + return 0; + break; + + case TGSI_OPCODE_DIV: + return 0; + break; + + case TGSI_OPCODE_DP2: + return 0; + break; + + case TGSI_OPCODE_TXL: + return 0; + break; + + case TGSI_OPCODE_BRK: + return 0; + break; + + case TGSI_OPCODE_IF: + return 0; + break; + + case TGSI_OPCODE_LOOP: + return 0; + break; + + case TGSI_OPCODE_REP: + return 0; + break; + + case TGSI_OPCODE_ELSE: + return 0; + break; + + case TGSI_OPCODE_ENDIF: + return 0; + break; + + case TGSI_OPCODE_ENDLOOP: + return 0; + break; + + case TGSI_OPCODE_ENDREP: + return 0; + break; + + case TGSI_OPCODE_PUSHA: + return 0; + break; + + case TGSI_OPCODE_POPA: + return 0; + break; + + case TGSI_OPCODE_CEIL: + return 0; + break; + + case TGSI_OPCODE_I2F: + return 0; + break; + + case TGSI_OPCODE_NOT: + return 0; + break; + + case TGSI_OPCODE_TRUNC: + return 0; + break; + + case TGSI_OPCODE_SHL: + return 0; + break; + + case TGSI_OPCODE_SHR: + return 0; + break; + + case TGSI_OPCODE_AND: + return 0; + break; + + case TGSI_OPCODE_OR: + return 0; + break; + + case TGSI_OPCODE_MOD: + return 0; + break; + + case TGSI_OPCODE_XOR: + return 0; + break; + + case TGSI_OPCODE_SAD: + return 0; + break; + + case TGSI_OPCODE_TXF: + return 0; + break; + + case TGSI_OPCODE_TXQ: + return 0; + break; + + case TGSI_OPCODE_CONT: + return 0; + break; + + case TGSI_OPCODE_EMIT: + return 0; + break; + + case TGSI_OPCODE_ENDPRIM: + return 0; + break; + + default: + return 0; + } +#endif + + return 1; +} + +static void +emit_declaration( + struct ppc_function *func, + struct tgsi_full_declaration *decl ) +{ + if( decl->Declaration.File == TGSI_FILE_INPUT ) { +#if 0 + unsigned first, last, mask; + unsigned i, j; + + first = decl->DeclarationRange.First; + last = decl->DeclarationRange.Last; + mask = decl->Declaration.UsageMask; + + for( i = first; i <= last; i++ ) { + for( j = 0; j < NUM_CHANNELS; j++ ) { + if( mask & (1 << j) ) { + switch( decl->Declaration.Interpolate ) { + case TGSI_INTERPOLATE_CONSTANT: + emit_coef_a0( func, 0, i, j ); + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_LINEAR: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_coef_a0( func, 4, i, j ); + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 4 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_inputs( func, 0, i, j ); + break; + + case TGSI_INTERPOLATE_PERSPECTIVE: + emit_tempf( func, 0, 0, TGSI_SWIZZLE_X ); + emit_coef_dadx( func, 1, i, j ); + emit_tempf( func, 2, 0, TGSI_SWIZZLE_Y ); + emit_coef_dady( func, 3, i, j ); + emit_mul( func, 0, 1 ); /* x * dadx */ + emit_tempf( func, 4, 0, TGSI_SWIZZLE_W ); + emit_coef_a0( func, 5, i, j ); + emit_rcp( func, 4, 4 ); /* 1.0 / w */ + emit_mul( func, 2, 3 ); /* y * dady */ + emit_add( func, 0, 5 ); /* x * dadx + a0 */ + emit_add( func, 0, 2 ); /* x * dadx + y * dady + a0 */ + emit_mul( func, 0, 4 ); /* (x * dadx + y * dady + a0) / w */ + emit_inputs( func, 0, i, j ); + break; + + default: + assert( 0 ); + break; + } + } + } + } +#endif + } +} + +#if 0 +static void aos_to_soa( struct x86_function *func, + uint arg_aos, + uint arg_soa, + uint arg_num, + uint arg_stride ) +{ + struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); + struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); + struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); + struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); + int inner_loop; + + + /* Save EBX */ + x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); + + x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); + x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); + x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); + x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + x86_push( func, aos_input ); + sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_add( func, aos_input, stride ); + sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); + sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); + x86_pop( func, aos_input ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); + sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); + sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); + sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); + + sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); + sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); + sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); + sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); + + /* Advance to next input */ + x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); + x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); + } + /* while --num_inputs */ + x86_dec( func, num_inputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_input ); +} +#endif + +#if 0 +static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) +{ + struct x86_reg soa_output; + struct x86_reg aos_output; + struct x86_reg num_outputs; + struct x86_reg temp; + int inner_loop; + + soa_output = x86_make_reg( file_REG32, reg_AX ); + aos_output = x86_make_reg( file_REG32, reg_BX ); + num_outputs = x86_make_reg( file_REG32, reg_CX ); + temp = x86_make_reg( file_REG32, reg_DX ); + + /* Save EBX */ + x86_push( func, aos_output ); + + x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); + x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); + x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); + + /* do */ + inner_loop = x86_get_label( func ); + { + sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); + sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); + sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); + sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); + + sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); + sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); + sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); + sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); + sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); + sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); + + x86_mov( func, temp, x86_fn_arg( func, stride ) ); + x86_push( func, aos_output ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); + x86_add( func, aos_output, temp ); + sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_add( func, aos_output, temp ); + sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); + sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); + x86_pop( func, aos_output ); + + /* Advance to next output */ + x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); + x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); + } + /* while --num_outputs */ + x86_dec( func, num_outputs ); + x86_jcc( func, cc_NE, inner_loop ); + + /* Restore EBX */ + x86_pop( func, aos_output ); +} +#endif + + +static void +emit_prologue(struct ppc_function *func) +{ + /* XXX set up stack frame */ +} + + +static void +emit_epilogue(struct ppc_function *func) +{ + ppc_return(func); + /* XXX restore prev stack frame */ +} + + + +/** + * Translate a TGSI vertex/fragment shader to PPC code. + * + * \param tokens the TGSI input shader + * \param func the output PPC code/function + * \param immediates buffer to place immediates, later passed to PPC func + * \return TRUE for success, FALSE if translation failed + */ +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *func, + float (*immediates)[4], + boolean do_swizzles ) +{ + struct tgsi_parse_context parse; + /*boolean instruction_phase = FALSE;*/ + unsigned ok = 1; + uint num_immediates = 0; + struct gen_context gen; + + util_init_math(); + + tgsi_parse_init( &parse, tokens ); + + gen.f = func; + gen.inputs_reg = 3; /* first function param */ + gen.outputs_reg = 4; /* second function param */ + gen.temps_reg = 5; /* ... */ + gen.immed_reg = 6; + gen.const_reg = 7; + + emit_prologue(func); + + /* + * Different function args for vertex/fragment shaders: + */ +#if 0 + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + /* DECLARATION phase, do not load output argument. */ + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + /* skipping outputs argument here */ + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_coef_base(), + x86_fn_arg( func, 5 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 6 ) ); + } + else { + assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); + + if (do_swizzles) + aos_to_soa( func, + 6, /* aos_input */ + 1, /* machine->input */ + 7, /* num_inputs */ + 8 ); /* input_stride */ + + x86_mov( + func, + get_input_base(), + x86_fn_arg( func, 1 ) ); + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + x86_mov( + func, + get_const_base(), + x86_fn_arg( func, 3 ) ); + x86_mov( + func, + get_temp_base(), + x86_fn_arg( func, 4 ) ); + x86_mov( + func, + get_immediate_base(), + x86_fn_arg( func, 5 ) ); + } +#endif + + while (!tgsi_parse_end_of_tokens(&parse) && ok) { + tgsi_parse_token(&parse); + + switch (parse.FullToken.Token.Type) { + case TGSI_TOKEN_TYPE_DECLARATION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { + emit_declaration(func, &parse.FullToken.FullDeclaration ); + } + break; + + case TGSI_TOKEN_TYPE_INSTRUCTION: + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { +#if 0 + if( !instruction_phase ) { + /* INSTRUCTION phase, overwrite coeff with output. */ + instruction_phase = TRUE; + x86_mov( + func, + get_output_base(), + x86_fn_arg( func, 2 ) ); + } +#endif + } + + ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); + + if (!ok) { + debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n", + parse.FullToken.FullInstruction.Instruction.Opcode, + parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ? + "vertex shader" : "fragment shader"); + } + break; + + case TGSI_TOKEN_TYPE_IMMEDIATE: + /* splat each immediate component into a float[4] vector for SoA */ + { + const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; + float *imm = (float *) immediates; + uint i; + assert(size <= 4); + assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); + for (i = 0; i < size; i++) { + const float value = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; + imm[num_immediates * 4 + 0] = + imm[num_immediates * 4 + 1] = + imm[num_immediates * 4 + 2] = + imm[num_immediates * 4 + 3] = value; + num_immediates++; + } + } + break; + + default: + ok = 0; + assert( 0 ); + } + } + +#if 0 + if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { + if (do_swizzles) + soa_to_aos( func, 9, 2, 10, 11 ); + } +#endif + + emit_epilogue(func); + + tgsi_parse_free( &parse ); + + return ok; +} + +#endif /* PIPE_ARCH_PPC */ diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h new file mode 100644 index 0000000000..7cd2bf9aff --- /dev/null +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h @@ -0,0 +1,48 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef TGSI_PPC_H +#define TGSI_PPC_H + +#if defined __cplusplus +extern "C" { +#endif + +struct tgsi_token; +struct ppc_function; + +boolean +tgsi_emit_ppc(const struct tgsi_token *tokens, + struct ppc_function *function, + float (*immediates)[4], + boolean do_swizzles); + +#if defined __cplusplus +} +#endif + +#endif /* TGSI_PPC_H */ -- cgit v1.2.3 From b7da4c3dc199ee382bb9924ac86a3485deccc62d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 11:08:45 -0600 Subject: gallium: PPC vertex shader support Works, but dead code lingering, debug code present, etc. --- src/gallium/auxiliary/draw/Makefile | 1 + src/gallium/auxiliary/draw/draw_vs.c | 5 +- src/gallium/auxiliary/draw/draw_vs.h | 4 + src/gallium/auxiliary/draw/draw_vs_ppc.c | 270 +++++++++++++++++++++++++++++++ 4 files changed, 279 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/draw/draw_vs_ppc.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/Makefile b/src/gallium/auxiliary/draw/Makefile index f2e36a89e9..bdbf5a08ed 100644 --- a/src/gallium/auxiliary/draw/Makefile +++ b/src/gallium/auxiliary/draw/Makefile @@ -40,6 +40,7 @@ C_SOURCES = \ draw_vs_aos_machine.c \ draw_vs_exec.c \ draw_vs_llvm.c \ + draw_vs_ppc.c \ draw_vs_sse.c diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 34adbd49b0..7f305304ff 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -85,7 +85,10 @@ draw_create_vertex_shader(struct draw_context *draw, if (!vs) { vs = draw_create_vs_sse( draw, shader ); if (!vs) { - vs = draw_create_vs_exec( draw, shader ); + vs = draw_create_vs_ppc( draw, shader ); + if (!vs) { + vs = draw_create_vs_exec( draw, shader ); + } } } diff --git a/src/gallium/auxiliary/draw/draw_vs.h b/src/gallium/auxiliary/draw/draw_vs.h index 68c24abad3..89ae158751 100644 --- a/src/gallium/auxiliary/draw/draw_vs.h +++ b/src/gallium/auxiliary/draw/draw_vs.h @@ -157,6 +157,10 @@ struct draw_vertex_shader * draw_create_vs_sse(struct draw_context *draw, const struct pipe_shader_state *templ); +struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ); + struct draw_vertex_shader * draw_create_vs_llvm(struct draw_context *draw, const struct pipe_shader_state *templ); diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c new file mode 100644 index 0000000000..a096ad49b8 --- /dev/null +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -0,0 +1,270 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + /* + * Authors: + * Keith Whitwell + * Brian Paul + */ + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "pipe/p_config.h" + +#include "draw_vs.h" + +#if defined(PIPE_ARCH_PPC) + +#include "pipe/p_shader_tokens.h" + +#include "draw_private.h" +#include "draw_context.h" + +#include "rtasm/rtasm_cpu.h" +#include "rtasm/rtasm_ppc.h" +#include "tgsi/tgsi_ppc.h" +#include "tgsi/tgsi_parse.h" + + + +typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], + float (*outputs)[4][4], + float (*temps)[4][4], + float (*immeds)[4][4], + float (*consts)[4]); + +#if 0 + const struct tgsi_exec_vector *input, + struct tgsi_exec_vector *output, + float (*constant)[4], /* 3 */ + struct tgsi_exec_vector *temporary, /* 4 */ + float (*immediates)[4], /* 5 */ + const float (*aos_input)[4], /* 6 */ + uint num_inputs, /* 7 */ + uint input_stride, /* 8 */ + float (*aos_output)[4], /* 9 */ + uint num_outputs, /* 10 */ + uint output_stride ); /* 11 */ +#endif + +struct draw_ppc_vertex_shader { + struct draw_vertex_shader base; + struct ppc_function ppc_program; + + codegen_function func; + + struct tgsi_exec_machine *machine; +}; + + +static void +vs_ppc_prepare( struct draw_vertex_shader *base, + struct draw_context *draw ) +{ +} + + + +/* Simplified vertex shader interface for the pt paths. Given the + * complexity of code-generating all the above operations together, + * it's time to try doing all the other stuff separately. + */ +static void +vs_ppc_run_linear( struct draw_vertex_shader *base, + const float (*input)[4], + float (*output)[4], + const float (*constants)[4], + unsigned count, + unsigned input_stride, + unsigned output_stride ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + struct tgsi_exec_machine *machine = shader->machine; + unsigned int i; + +#define MAX_VERTICES 4 + + /* loop over verts */ + for (i = 0; i < count; i += MAX_VERTICES) { + const uint max_vertices = MIN2(MAX_VERTICES, count - i); + float inputs_soa[PIPE_MAX_SHADER_INPUTS][4][4] ALIGN16_ATTRIB; + float outputs_soa[PIPE_MAX_SHADER_OUTPUTS][4][4] ALIGN16_ATTRIB; + float temps_soa[TGSI_EXEC_NUM_TEMPS][4][4] ALIGN16_ATTRIB; + uint attr; + + /* convert (up to) four input verts to SoA format */ + for (attr = 0; attr < base->info.num_inputs; attr++) { + const float *vIn = (const float *) input; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { +#if 0 + if (attr==0) + printf("Input v%d a%d: %f %f %f %f\n", + vert, attr, vIn[0], vIn[1], vIn[2], vIn[3]); +#endif + inputs_soa[attr][0][vert] = vIn[attr * 4 + 0]; + inputs_soa[attr][1][vert] = vIn[attr * 4 + 1]; + inputs_soa[attr][2][vert] = vIn[attr * 4 + 2]; + inputs_soa[attr][3][vert] = vIn[attr * 4 + 3]; + vIn += input_stride / 4; + } + } + + /* run compiled shader + */ +#if 0 + shader->func(machine->Inputs, + machine->Outputs, + (float (*)[4])constants, + machine->Temps, + (float (*)[4])shader->base.immediates, + input, + base->info.num_inputs, + input_stride, + output, + base->info.num_outputs, + output_stride ); +#else + shader->func(inputs_soa, outputs_soa, temps_soa, + (float (*)[4][4]) shader->base.immediates, + (float (*)[4]) constants); + + /*output[0][0] = input[0][0] * 0.5;*/ +#endif + + /* convert (up to) four output verts from SoA back to AoS format */ + for (attr = 0; attr < base->info.num_outputs; attr++) { + float *vOut = (float *) output; + uint vert; + for (vert = 0; vert < max_vertices; vert++) { + vOut[attr * 4 + 0] = outputs_soa[attr][0][vert]; + vOut[attr * 4 + 1] = outputs_soa[attr][1][vert]; + vOut[attr * 4 + 2] = outputs_soa[attr][2][vert]; + vOut[attr * 4 + 3] = outputs_soa[attr][3][vert]; +#if 0 + if (attr==0) + printf("Output v%d a%d: %f %f %f %f\n", + vert, attr, vOut[0], vOut[1], vOut[2], vOut[3]); +#endif + vOut += output_stride / 4; + } + } + + /* advance to next group of four input/output verts */ + input = (const float (*)[4])((const char *)input + input_stride * max_vertices); + output = (float (*)[4])((char *)output + output_stride * max_vertices); + } +} + + + + +static void +vs_ppc_delete( struct draw_vertex_shader *base ) +{ + struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; + + ppc_release_func( &shader->ppc_program ); + + align_free( (void *) shader->base.immediates ); + + FREE( (void*) shader->base.state.tokens ); + FREE( shader ); +} + + +struct draw_vertex_shader * +draw_create_vs_ppc(struct draw_context *draw, + const struct pipe_shader_state *templ) +{ + struct draw_ppc_vertex_shader *vs; + + vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); + if (vs == NULL) + return NULL; + + /* we make a private copy of the tokens */ + vs->base.state.tokens = tgsi_dup_tokens(templ->tokens); + if (!vs->base.state.tokens) + goto fail; + + tgsi_scan_shader(templ->tokens, &vs->base.info); + + vs->base.draw = draw; +#if 0 + if (1) + vs->base.create_varient = draw_vs_varient_aos_ppc; + else +#endif + vs->base.create_varient = draw_vs_varient_generic; + vs->base.prepare = vs_ppc_prepare; + vs->base.run_linear = vs_ppc_run_linear; + vs->base.delete = vs_ppc_delete; + + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 * + sizeof(float), 16); + + vs->machine = &draw->vs.machine; + + ppc_init_func( &vs->ppc_program, 1000 ); /* XXX fix limit */ + + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, + &vs->ppc_program, + (float (*)[4])vs->base.immediates, + TRUE )) + goto fail; + + vs->func = (codegen_function) ppc_get_func( &vs->ppc_program ); + if (!vs->func) { + goto fail; + } + + return &vs->base; + +fail: + debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + + ppc_release_func( &vs->ppc_program ); + + FREE(vs); + return NULL; +} + + + +#else /* PIPE_ARCH_PPC */ + + +struct draw_vertex_shader * +draw_create_vs_ppc( struct draw_context *draw, + const struct pipe_shader_state *templ ) +{ + return (void *) 0; +} + + +#endif /* PIPE_ARCH_PPC */ -- cgit v1.2.3 From ba4faef7c07c47ad4f71f3e6ba94cb54217c56ed Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 11:13:31 -0600 Subject: gallium: temporarily disable PPC vertex shader until more things run --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index a096ad49b8..990a659f27 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -203,6 +203,9 @@ draw_create_vs_ppc(struct draw_context *draw, { struct draw_ppc_vertex_shader *vs; + /* XXX temporary short-circuit */ + return NULL; + vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); if (vs == NULL) return NULL; -- cgit v1.2.3 From ebdc399d83d6bd2f4e3594874483dbca5f9f5c0e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 13:57:56 -0600 Subject: gallium: fix-up confusing register allocation masks in rtasm_ppc.c Plus, add ppc_reserve_register() func. --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 56 ++++++++++++++++++++------------- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 1 + 2 files changed, 36 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index aaec2d2191..2d9f4e079e 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -49,13 +49,15 @@ ppc_init_func(struct ppc_function *p, unsigned max_inst) p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); p->num_inst = 0; p->max_inst = max_inst; - p->fp_used = ~0x0; - p->vec_used = ~0x0; - - /* only allow using gp registers 7..12 for now */ p->reg_used = 0x0; - for (i = 7; i < 13; i++) - p->reg_used |= (1 << i); + p->fp_used = 0x0; + p->vec_used = 0x0; + + /* only allow using gp registers 3..12 for now */ + for (i = 0; i < 3; i++) + ppc_reserve_register(p, i); + for (i = 12; i < PPC_NUM_REGS; i++) + ppc_reserve_register(p, i); } @@ -95,6 +97,18 @@ ppc_dump_func(const struct ppc_function *p) } +/** + * Mark a register as being unavailable. + */ +int +ppc_reserve_register(struct ppc_function *p, int reg) +{ + assert(reg < PPC_NUM_REGS); + p->reg_used |= (1 << reg); + return reg; +} + + /** * Allocate a general purpose register. * \return register index or -1 if none left. @@ -105,8 +119,8 @@ ppc_allocate_register(struct ppc_function *p) unsigned i; for (i = 0; i < PPC_NUM_REGS; i++) { const uint64_t mask = 1 << i; - if ((p->reg_used & mask) != 0) { - p->reg_used &= ~mask; + if ((p->reg_used & mask) == 0) { + p->reg_used |= mask; return i; } } @@ -121,8 +135,8 @@ void ppc_release_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_REGS); - assert((p->reg_used & (1 << reg)) == 0); - p->reg_used |= (1 << reg); + assert(p->reg_used & (1 << reg)); + p->reg_used &= ~(1 << reg); } @@ -136,8 +150,8 @@ ppc_allocate_fp_register(struct ppc_function *p) unsigned i; for (i = 0; i < PPC_NUM_FP_REGS; i++) { const uint64_t mask = 1 << i; - if ((p->fp_used & mask) != 0) { - p->fp_used &= ~mask; + if ((p->fp_used & mask) == 0) { + p->fp_used |= mask; return i; } } @@ -152,8 +166,8 @@ void ppc_release_fp_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_FP_REGS); - assert((p->fp_used & (1 << reg)) == 0); - p->fp_used |= (1 << reg); + assert(p->fp_used & (1 << reg)); + p->fp_used &= ~(1 << reg); } @@ -167,8 +181,8 @@ ppc_allocate_vec_register(struct ppc_function *p) unsigned i; for (i = 0; i < PPC_NUM_VEC_REGS; i++) { const uint64_t mask = 1 << i; - if ((p->vec_used & mask) != 0) { - p->vec_used &= ~mask; + if ((p->vec_used & mask) == 0) { + p->vec_used |= mask; return i; } } @@ -183,8 +197,8 @@ void ppc_release_vec_register(struct ppc_function *p, int reg) { assert(reg < PPC_NUM_VEC_REGS); - assert((p->vec_used & (1 << reg)) == 0); - p->vec_used |= (1 << reg); + assert(p->vec_used & (1 << reg)); + p->vec_used &= ~(1 << reg); } @@ -582,11 +596,11 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) emit_x(p, 31, vR, vA, vB, 103); } -/** load vector element word: vR = mem_word[vA+vB] */ +/** load vector element word: vR = mem_word[ra+rb] */ void -ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB) +ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) { - emit_x(p, 31, vR, vA, vB, 71); + emit_x(p, 31, vr, ra, rb, 71); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 53d5746dc8..85679b4886 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -67,6 +67,7 @@ extern void ppc_release_func(struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); extern void ppc_dump_func(const struct ppc_function *p); +extern int ppc_reserve_register(struct ppc_function *p, int reg); extern int ppc_allocate_register(struct ppc_function *p); extern void ppc_release_register(struct ppc_function *p, int reg); extern int ppc_allocate_fp_register(struct ppc_function *p); -- cgit v1.2.3 From da63edd720fc154820fcbf699e1056ac9357a03f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 13:59:11 -0600 Subject: gallium: fix broken TGSI_FILE_CONSTANT case, use ppc_reserver_register() --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 112e736523..dbf215c0d5 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -1108,10 +1108,15 @@ emit_fetch(struct gen_context *gen, int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; ppc_li(gen->f, offset_reg, offset); - /* load vector word */ + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our constants start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg); - /* splat word[0] across vector */ - ppc_vspltw(gen->f, vec_reg, vec_reg, 0); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, vec_reg, vec_reg, swizzle); ppc_release_register(gen->f, offset_reg); } break; @@ -2635,11 +2640,11 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, tgsi_parse_init( &parse, tokens ); gen.f = func; - gen.inputs_reg = 3; /* first function param */ - gen.outputs_reg = 4; /* second function param */ - gen.temps_reg = 5; /* ... */ - gen.immed_reg = 6; - gen.const_reg = 7; + gen.inputs_reg = ppc_reserve_register(func, 3); /* first function param */ + gen.outputs_reg = ppc_reserve_register(func, 4); /* second function param */ + gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ + gen.immed_reg = ppc_reserve_register(func, 6); + gen.const_reg = ppc_reserve_register(func, 7); emit_prologue(func); -- cgit v1.2.3 From b06d0720194dfecaf45dc97cbd178411aed5205f Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 14:48:33 -0600 Subject: gallium: added ppc_vload_float(), for limited cases --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 18 ++++++++++++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 4 ++++ 2 files changed, 22 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 2d9f4e079e..65df676eae 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -603,6 +603,24 @@ ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) emit_x(p, 31, vr, ra, rb, 71); } +/** vector load float: vr = splats(imm) */ +void +ppc_vload_float(struct ppc_function *p, uint vr, float imm) +{ + if (imm == 0.0f) { + ppc_vxor(p, vr, vr, vr); + } + else if (imm == 1.0f) { + /* use 2^0=1 to get 1.0 */ + ppc_vxor(p, vr, vr, vr); /* vr = {0,0,0,0} */ + ppc_vexptefp(p, vr, vr); /* vr = 0^0 */ + } + else { + assert(0); + } +} + + /** diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 85679b4886..9f1e3fcd84 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -158,6 +158,10 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); extern void ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); +/** vector load float: vr = splats(imm) */ +extern void +ppc_vload_float(struct ppc_function *p, uint vr, float imm); + /** -- cgit v1.2.3 From 51840065607337210fbba5ba1c01874293fbb42e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 14:48:58 -0600 Subject: gallium: TGSI->PPC inequality operators --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 70 +++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index dbf215c0d5..9bf364b8c4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -1495,6 +1495,68 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) } +/** + * Vector comparisons, resulting in 1.0 or 0.0 values. + */ +static void +emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + int v_one = ppc_allocate_vec_register(gen->f); + uint chan_index; + boolean complement = FALSE; + + /* v_one = splat(1.0) */ + ppc_vload_float(gen->f, v_one, 1.0f); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SNE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SEQ: + ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SGE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SLT: + ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ + break; + + case TGSI_OPCODE_SLE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SGT: + ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ + break; + default: + assert(0); + } + + /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ + + if (complement) + ppc_vandc(gen->f, v2, v_one, v2); /* v2 = v_one & ~v2 */ + else + ppc_vand(gen->f, v2, v_one, v2); /* v2 = v_one & v2 */ + + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); + ppc_release_vec_register(gen->f, v_one); +} + + static void emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) { @@ -1588,6 +1650,14 @@ emit_instruction(struct gen_context *gen, case TGSI_OPCODE_MAX: emit_binop(gen, inst); break; + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + emit_inequality(gen, inst); + break; case TGSI_OPCODE_MAD: case TGSI_OPCODE_LRP: emit_triop(gen, inst); -- cgit v1.2.3 From c6ff870836e7c970f1030e9e0fbdd0cb5df40d29 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 15:21:22 -0600 Subject: cell: TGSI->PPC for RSQ, RCP and src register sign modes --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 162 ++++++++++++++++++++++++---------- 1 file changed, 116 insertions(+), 46 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 9bf364b8c4..3637772102 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -84,11 +84,14 @@ struct gen_context { struct ppc_function *f; - int inputs_reg; /**< register pointing to input params */ - int outputs_reg; /**< register pointing to output params */ - int temps_reg; /**< register pointing to temporary "registers" */ - int immed_reg; /**< register pointing to immediates buffer */ - int const_reg; /**< register pointing to constants buffer */ + int inputs_reg; /**< GP register pointing to input params */ + int outputs_reg; /**< GP register pointing to output params */ + int temps_reg; /**< GP register pointing to temporary "registers" */ + int immed_reg; /**< GP register pointing to immediates buffer */ + int const_reg; /**< GP register pointing to constants buffer */ + + int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ + int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ }; @@ -1059,6 +1062,35 @@ emit_sub( #endif +/** + * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. + */ +static int +gen_one_vec(struct gen_context *gen) +{ + if (gen->one_vec < 0) { + gen->one_vec = ppc_allocate_vec_register(gen->f); + ppc_vload_float(gen->f, gen->one_vec, 1.0f); + } + return gen->one_vec; +} + +/** + * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. + */ +static int +gen_get_bit31_vec(struct gen_context *gen) +{ + if (gen->bit31_vec < 0) { + gen->bit31_vec = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, gen->bit31_vec, -1); + ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); + } + return gen->bit31_vec; +} + + + /** * Register fetch. */ @@ -1124,49 +1156,42 @@ emit_fetch(struct gen_context *gen, assert( 0 ); } break; - case TGSI_EXTSWIZZLE_ZERO: -#if 0 - emit_tempf( - func, - xmm, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ); -#endif + ppc_vload_float(gen->f, vec_reg, 0.0f); break; - case TGSI_EXTSWIZZLE_ONE: -#if 0 - emit_tempf( - func, - xmm, - TEMP_ONE_I, - TEMP_ONE_C ); -#endif + { + int one_vec = gen_one_vec(gen); + ppc_vecmove(gen->f, vec_reg, one_vec); + } break; - default: assert( 0 ); } -#if 0 - switch( tgsi_util_get_full_src_register_sign_mode( reg, chan_index ) ) { - case TGSI_UTIL_SIGN_CLEAR: - emit_abs( func, xmm ); - break; - - case TGSI_UTIL_SIGN_SET: - emit_setsign( func, xmm ); - break; - - case TGSI_UTIL_SIGN_TOGGLE: - emit_neg( func, xmm ); - break; - - case TGSI_UTIL_SIGN_KEEP: - break; + { + uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + int bit31_vec = gen_get_bit31_vec(gen); + + switch (sign_op) { + case TGSI_UTIL_SIGN_CLEAR: + /* vec = vec & ~bit31 */ + ppc_vandc(gen->f, vec_reg, vec_reg, bit31_vec); + break; + case TGSI_UTIL_SIGN_SET: + /* vec = vec | bit31 */ + ppc_vor(gen->f, vec_reg, vec_reg, bit31_vec); + break; + case TGSI_UTIL_SIGN_TOGGLE: + /* vec = vec ^ bit31 */ + ppc_vxor(gen->f, vec_reg, vec_reg, bit31_vec); + break; + default: + assert(0); + } + } } -#endif } #define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \ @@ -1409,6 +1434,36 @@ emit_cmp( #endif +static void +emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + uint chan_index; + + FETCH(gen, *inst, v0, 0, CHAN_X); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + /* v1 = 1.0 / sqrt(v0) */ + ppc_vrsqrtefp(gen->f, v1, v0); + break; + case TGSI_OPCODE_RCP: + /* v1 = 1.0 / v0 */ + ppc_vrefp(gen->f, v1, v0); + break; + default: + assert(0); + } + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE(gen, *inst, v1, 0, chan_index); + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); +} + + static void emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { @@ -1504,12 +1559,9 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) int v0 = ppc_allocate_vec_register(gen->f); int v1 = ppc_allocate_vec_register(gen->f); int v2 = ppc_allocate_vec_register(gen->f); - int v_one = ppc_allocate_vec_register(gen->f); uint chan_index; boolean complement = FALSE; - - /* v_one = splat(1.0) */ - ppc_vload_float(gen->f, v_one, 1.0f); + int one_vec = gen_one_vec(gen); FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ @@ -1543,9 +1595,9 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ if (complement) - ppc_vandc(gen->f, v2, v_one, v2); /* v2 = v_one & ~v2 */ + ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ else - ppc_vand(gen->f, v2, v_one, v2); /* v2 = v_one & v2 */ + ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ } @@ -1553,7 +1605,6 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_release_vec_register(gen->f, v0); ppc_release_vec_register(gen->f, v1); ppc_release_vec_register(gen->f, v2); - ppc_release_vec_register(gen->f, v_one); } @@ -1630,6 +1681,14 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) } +/* +static void +emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ +} +*/ + + static int emit_instruction(struct gen_context *gen, struct tgsi_full_instruction *inst) @@ -1643,6 +1702,10 @@ emit_instruction(struct gen_context *gen, case TGSI_OPCODE_LOGBASE2: emit_unaryop(gen, inst); break; + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_RCP: + emit_scalar_unaryop(gen, inst); + break; case TGSI_OPCODE_ADD: case TGSI_OPCODE_SUB: case TGSI_OPCODE_MUL: @@ -1667,6 +1730,11 @@ emit_instruction(struct gen_context *gen, case TGSI_OPCODE_DPH: emit_dotprod(gen, inst); break; + /* + case TGSI_OPCODE_LIT: + emit_lit(gen, inst); + break; + */ case TGSI_OPCODE_END: /* normal end */ return 1; @@ -2715,6 +2783,8 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ gen.immed_reg = ppc_reserve_register(func, 6); gen.const_reg = ppc_reserve_register(func, 7); + gen.one_vec = -1; + gen.bit31_vec = -1; emit_prologue(func); -- cgit v1.2.3 From 7b1d08738f30d0fec2f07568b16e08c4fdddeeac Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 15:25:04 -0600 Subject: cell: turn on PPC assembly vertex transform gears runs with it now (3x faster FPS than before). --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 990a659f27..fcc9cbfec5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -203,9 +203,6 @@ draw_create_vs_ppc(struct draw_context *draw, { struct draw_ppc_vertex_shader *vs; - /* XXX temporary short-circuit */ - return NULL; - vs = CALLOC_STRUCT( draw_ppc_vertex_shader ); if (vs == NULL) return NULL; @@ -233,7 +230,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->machine = &draw->vs.machine; - ppc_init_func( &vs->ppc_program, 1000 ); /* XXX fix limit */ + ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, -- cgit v1.2.3 From 519c2dbed57b3c5e1717a62df5d5f8b908a1acd6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 15:30:00 -0600 Subject: gallium: remove SSE remnants from tgsi_ppc.c --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2987 +++++---------------------------- 1 file changed, 417 insertions(+), 2570 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 3637772102..432ec7459b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -44,14 +44,6 @@ #include "rtasm/rtasm_ppc.h" -/* for 1/sqrt() - * - * This costs about 100fps (close to 10%) in gears: - */ -#define HIGH_PRECISION 1 - -#define FAST_MATH 1 - #define FOR_EACH_CHANNEL( CHAN )\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) @@ -95,2452 +87,515 @@ struct gen_context }; - -#if 0000 - /** - * X86 utility functions. + * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. */ - -static struct x86_reg -make_xmm( - unsigned xmm ) +static int +gen_one_vec(struct gen_context *gen) { - return x86_make_reg( - file_XMM, - (enum x86_reg_name) xmm ); + if (gen->one_vec < 0) { + gen->one_vec = ppc_allocate_vec_register(gen->f); + ppc_vload_float(gen->f, gen->one_vec, 1.0f); + } + return gen->one_vec; } /** - * X86 register mapping helpers. + * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. */ - -static struct x86_reg -get_const_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_CX ); -} - -static struct x86_reg -get_input_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_AX ); -} - -static struct x86_reg -get_output_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_DX ); -} - -static struct x86_reg -get_temp_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_BX ); -} - -static struct x86_reg -get_coef_base( void ) +static int +gen_get_bit31_vec(struct gen_context *gen) { - return get_output_base(); + if (gen->bit31_vec < 0) { + gen->bit31_vec = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, gen->bit31_vec, -1); + ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); + } + return gen->bit31_vec; } -static struct x86_reg -get_immediate_base( void ) -{ - return x86_make_reg( - file_REG32, - reg_DI ); -} /** - * Data access helpers. + * Register fetch. */ - - -static struct x86_reg -get_immediate( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_immediate_base(), - (vec * 4 + chan) * 4 ); -} - -static struct x86_reg -get_const( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_const_base(), - (vec * 4 + chan) * 4 ); -} - -static struct x86_reg -get_input( - unsigned vec, - unsigned chan ) +static void +emit_fetch(struct gen_context *gen, + unsigned vec_reg, + const struct tgsi_full_src_register *reg, + const unsigned chan_index) { - return x86_make_disp( - get_input_base(), - (vec * 4 + chan) * 16 ); -} + uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); -static struct x86_reg -get_output( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_output_base(), - (vec * 4 + chan) * 16 ); -} + switch (swizzle) { + case TGSI_EXTSWIZZLE_X: + case TGSI_EXTSWIZZLE_Y: + case TGSI_EXTSWIZZLE_Z: + case TGSI_EXTSWIZZLE_W: + switch (reg->SrcRegister.File) { + case TGSI_FILE_INPUT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, vec_reg, gen->inputs_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_TEMPORARY: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, vec_reg, gen->temps_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_IMMEDIATE: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_lvx(gen->f, vec_reg, gen->immed_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; + case TGSI_FILE_CONSTANT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; + ppc_li(gen->f, offset_reg, offset); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our constants start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, vec_reg, vec_reg, swizzle); + ppc_release_register(gen->f, offset_reg); + } + break; + default: + assert( 0 ); + } + break; + case TGSI_EXTSWIZZLE_ZERO: + ppc_vload_float(gen->f, vec_reg, 0.0f); + break; + case TGSI_EXTSWIZZLE_ONE: + { + int one_vec = gen_one_vec(gen); + ppc_vecmove(gen->f, vec_reg, one_vec); + } + break; + default: + assert( 0 ); + } -static struct x86_reg -get_temp( - unsigned vec, - unsigned chan ) -{ - return x86_make_disp( - get_temp_base(), - (vec * 4 + chan) * 16 ); -} + { + uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); + if (sign_op != TGSI_UTIL_SIGN_KEEP) { + int bit31_vec = gen_get_bit31_vec(gen); -static struct x86_reg -get_coef( - unsigned vec, - unsigned chan, - unsigned member ) -{ - return x86_make_disp( - get_coef_base(), - ((vec * 3 + member) * 4 + chan) * 4 ); + switch (sign_op) { + case TGSI_UTIL_SIGN_CLEAR: + /* vec = vec & ~bit31 */ + ppc_vandc(gen->f, vec_reg, vec_reg, bit31_vec); + break; + case TGSI_UTIL_SIGN_SET: + /* vec = vec | bit31 */ + ppc_vor(gen->f, vec_reg, vec_reg, bit31_vec); + break; + case TGSI_UTIL_SIGN_TOGGLE: + /* vec = vec ^ bit31 */ + ppc_vxor(gen->f, vec_reg, vec_reg, bit31_vec); + break; + default: + assert(0); + } + } + } } +#define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \ + emit_fetch( GEN, VEC_REG, &(INST).FullSrcRegisters[SRC_REG], CHAN ) -static void -emit_ret( - struct x86_function *func ) -{ - x86_ret( func ); -} - -#endif -/** - * Data fetch helpers. - */ -#if 00 /** - * Copy a shader constant to xmm register - * \param xmm the destination xmm register - * \param vec the src const buffer index - * \param chan src channel to fetch (X, Y, Z or W) + * Register store. */ static void -emit_const( - struct x86_function *func, - uint xmm, - int vec, - uint chan, - uint indirect, - uint indirectFile, - int indirectIndex ) +emit_store(struct gen_context *gen, + unsigned vec_reg, + const struct tgsi_full_dst_register *reg, + const struct tgsi_full_instruction *inst, + unsigned chan_index) { - if (indirect) { - struct x86_reg r0 = get_input_base(); - struct x86_reg r1 = get_output_base(); - uint i; - - assert( indirectFile == TGSI_FILE_ADDRESS ); - assert( indirectIndex == 0 ); - - x86_push( func, r0 ); - x86_push( func, r1 ); - - for (i = 0; i < QUAD_SIZE; i++) { - x86_lea( func, r0, get_const( vec, chan ) ); - x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) ); - - /* Quick hack to multiply by 16 -- need to add SHL to rtasm. - */ - x86_add( func, r1, r1 ); - x86_add( func, r1, r1 ); - x86_add( func, r1, r1 ); - x86_add( func, r1, r1 ); - - x86_add( func, r0, r1 ); - x86_mov( func, r1, x86_deref( r0 ) ); - x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 ); + switch (reg->DstRegister.File) { + case TGSI_FILE_OUTPUT: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_stvx(gen->f, vec_reg, gen->outputs_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); } - - x86_pop( func, r1 ); - x86_pop( func, r0 ); - - sse_movaps( + break; + case TGSI_FILE_TEMPORARY: + { + int offset_reg = ppc_allocate_register(gen->f); + int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; + ppc_li(gen->f, offset_reg, offset); + ppc_stvx(gen->f, vec_reg, gen->temps_reg, offset_reg); + ppc_release_register(gen->f, offset_reg); + } + break; +#if 0 + case TGSI_FILE_ADDRESS: + emit_addrs( func, - make_xmm( xmm ), - get_temp( TEMP_R0, CHAN_X ) ); + xmm, + reg->DstRegister.Index, + chan_index ); + break; +#endif + default: + assert( 0 ); } - else { - assert( vec >= 0 ); - sse_movss( - func, - make_xmm( xmm ), - get_const( vec, chan ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); +#if 0 + switch( inst->Instruction.Saturate ) { + case TGSI_SAT_NONE: + break; + + case TGSI_SAT_ZERO_ONE: + /* assert( 0 ); */ + break; + + case TGSI_SAT_MINUS_PLUS_ONE: + assert( 0 ); + break; } +#endif } -static void -emit_immediate( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_immediate( vec, chan ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); -} +#define STORE( GEN, INST, XMM, INDEX, CHAN )\ + emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) -/** - * Copy a shader input to xmm register - * \param xmm the destination xmm register - * \param vec the src input attrib - * \param chan src channel to fetch (X, Y, Z or W) - */ -static void -emit_inputf( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - make_xmm( xmm ), - get_input( vec, chan ) ); -} -/** - * Store an xmm register to a shader output - * \param xmm the source xmm register - * \param vec the dest output attrib - * \param chan src dest channel to store (X, Y, Z or W) - */ -static void -emit_output( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - get_output( vec, chan ), - make_xmm( xmm ) ); -} -/** - * Copy a shader temporary to xmm register - * \param xmm the destination xmm register - * \param vec the src temp register - * \param chan src channel to fetch (X, Y, Z or W) - */ static void -emit_tempf( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) +emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - sse_movaps( - func, - make_xmm( xmm ), - get_temp( vec, chan ) ); -} + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + uint chan_index; -/** - * Load an xmm register with an input attrib coefficient (a0, dadx or dady) - * \param xmm the destination xmm register - * \param vec the src input/attribute coefficient index - * \param chan src channel to fetch (X, Y, Z or W) - * \param member 0=a0, 1=dadx, 2=dady - */ -static void -emit_coef( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan, - unsigned member ) -{ - sse_movss( - func, - make_xmm( xmm ), - get_coef( vec, chan, member ) ); - sse_shufps( - func, - make_xmm( xmm ), - make_xmm( xmm ), - SHUF( 0, 0, 0, 0 ) ); + FETCH(gen, *inst, v0, 0, CHAN_X); + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + /* v1 = 1.0 / sqrt(v0) */ + ppc_vrsqrtefp(gen->f, v1, v0); + break; + case TGSI_OPCODE_RCP: + /* v1 = 1.0 / v0 */ + ppc_vrefp(gen->f, v1, v0); + break; + default: + assert(0); + } + + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE(gen, *inst, v1, 0, chan_index); + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); } -/** - * Data store helpers. - */ static void -emit_inputs( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movups( - func, - get_input( vec, chan ), - make_xmm( xmm ) ); -} - -static void -emit_temps( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - sse_movaps( - func, - get_temp( vec, chan ), - make_xmm( xmm ) ); -} - -static void -emit_addrs( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - assert( vec == 0 ); - - emit_temps( - func, - xmm, - vec + TGSI_EXEC_TEMP_ADDR, - chan ); -} - -/** - * Coefficent fetch helpers. - */ - -static void -emit_coef_a0( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 0 ); -} - -static void -emit_coef_dadx( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 1 ); -} - -static void -emit_coef_dady( - struct x86_function *func, - unsigned xmm, - unsigned vec, - unsigned chan ) -{ - emit_coef( - func, - xmm, - vec, - chan, - 2 ); -} -#endif - - -/** - * Function call helpers. - */ - -#if 00 -/** - * NOTE: In gcc, if the destination uses the SSE intrinsics, then it must be - * defined with __attribute__((force_align_arg_pointer)), as we do not guarantee - * that the stack pointer is 16 byte aligned, as expected. - */ -static void -emit_func_call_dst( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst, - void (PIPE_CDECL *code)() ) +emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - unsigned i, n, xmm; - unsigned xmm_mask; - - /* Bitmask of the xmm registers to save */ - xmm_mask = (1 << xmm_save) - 1; - xmm_mask &= ~(1 << xmm_dst); - - sse_movaps( - func, - get_temp( TEMP_R0, 0 ), - make_xmm( xmm_dst ) ); - - x86_push( - func, - x86_make_reg( file_REG32, reg_AX) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_CX) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_DX) ); - - for(i = 0, n = 0; i < 8; ++i) - if(xmm_mask & (1 << i)) - ++n; - - x86_sub_imm( - func, - x86_make_reg( file_REG32, reg_SP ), - n*16); - - for(i = 0, n = 0; i < 8; ++i) - if(xmm_mask & (1 << i)) { - sse_movups( - func, - x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ), - make_xmm( xmm ) ); - ++n; - } - - x86_lea( - func, - ecx, - get_temp( TEMP_R0, 0 ) ); - - x86_push( func, ecx ); - x86_mov_reg_imm( func, ecx, (unsigned long) code ); - x86_call( func, ecx ); - x86_pop(func, ecx ); - - for(i = 0, n = 0; i < 8; ++i) - if(xmm_mask & (1 << i)) { - sse_movups( - func, - make_xmm( xmm ), - x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) ); - ++n; + int v0 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, 0, 0, chan_index); /* v0 = srcreg[0] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + /* turn off the most significant bit of each vector float word */ + { + int v1 = ppc_allocate_vec_register(gen->f); + ppc_vspltisw(gen->f, v1, -1); /* v1 = {-1, -1, -1, -1} */ + ppc_vslw(gen->f, v1, v1, v1); /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */ + ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */ + ppc_release_vec_register(gen->f, v1); + } + break; + case TGSI_OPCODE_FLOOR: + ppc_vrfim(gen->f, v0, v0); /* v0 = floor(v0) */ + break; + case TGSI_OPCODE_FRAC: + { + int v1 = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ + ppc_vsubfp(gen->f, v0, v0, v1); /* v0 = v0 - v1 */ + ppc_release_vec_register(gen->f, v1); + } + break; + case TGSI_OPCODE_EXPBASE2: + ppc_vexptefp(gen->f, v0, v0); /* v0 = 2^v0 */ + break; + case TGSI_OPCODE_LOGBASE2: + /* XXX this may be broken! */ + ppc_vlogefp(gen->f, v0, v0); /* v0 = log2(v0) */ + break; + case TGSI_OPCODE_MOV: + /* nothing */ + break; + default: + assert(0); } - - x86_add_imm( - func, - x86_make_reg( file_REG32, reg_SP ), - n*16); - - /* Restore GP registers in a reverse order. - */ - x86_pop( - func, - x86_make_reg( file_REG32, reg_DX) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_CX) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_AX) ); - - sse_movaps( - func, - make_xmm( xmm_dst ), - get_temp( TEMP_R0, 0 ) ); -} - -static void -emit_func_call_dst_src( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst, - unsigned xmm_src, - void (PIPE_CDECL *code)() ) -{ - sse_movaps( - func, - get_temp( TEMP_R0, 1 ), - make_xmm( xmm_src ) ); - - emit_func_call_dst( - func, - xmm_save, - xmm_dst, - code ); -} - -/* - * Fast SSE2 implementation of special math functions. - */ - -#define POLY0(x, c0) _mm_set1_ps(c0) -#define POLY1(x, c0, c1) _mm_add_ps(_mm_mul_ps(POLY0(x, c1), x), _mm_set1_ps(c0)) -#define POLY2(x, c0, c1, c2) _mm_add_ps(_mm_mul_ps(POLY1(x, c1, c2), x), _mm_set1_ps(c0)) -#define POLY3(x, c0, c1, c2, c3) _mm_add_ps(_mm_mul_ps(POLY2(x, c1, c2, c3), x), _mm_set1_ps(c0)) -#define POLY4(x, c0, c1, c2, c3, c4) _mm_add_ps(_mm_mul_ps(POLY3(x, c1, c2, c3, c4), x), _mm_set1_ps(c0)) -#define POLY5(x, c0, c1, c2, c3, c4, c5) _mm_add_ps(_mm_mul_ps(POLY4(x, c1, c2, c3, c4, c5), x), _mm_set1_ps(c0)) - -#define EXP_POLY_DEGREE 3 -#define LOG_POLY_DEGREE 5 - -/** - * See http://www.devmaster.net/forums/showthread.php?p=43580 - */ -static INLINE __m128 -exp2f4(__m128 x) -{ - __m128i ipart; - __m128 fpart, expipart, expfpart; - - x = _mm_min_ps(x, _mm_set1_ps( 129.00000f)); - x = _mm_max_ps(x, _mm_set1_ps(-126.99999f)); - - /* ipart = int(x - 0.5) */ - ipart = _mm_cvtps_epi32(_mm_sub_ps(x, _mm_set1_ps(0.5f))); - - /* fpart = x - ipart */ - fpart = _mm_sub_ps(x, _mm_cvtepi32_ps(ipart)); - - /* expipart = (float) (1 << ipart) */ - expipart = _mm_castsi128_ps(_mm_slli_epi32(_mm_add_epi32(ipart, _mm_set1_epi32(127)), 23)); - - /* minimax polynomial fit of 2**x, in range [-0.5, 0.5[ */ -#if EXP_POLY_DEGREE == 5 - expfpart = POLY5(fpart, 9.9999994e-1f, 6.9315308e-1f, 2.4015361e-1f, 5.5826318e-2f, 8.9893397e-3f, 1.8775767e-3f); -#elif EXP_POLY_DEGREE == 4 - expfpart = POLY4(fpart, 1.0000026f, 6.9300383e-1f, 2.4144275e-1f, 5.2011464e-2f, 1.3534167e-2f); -#elif EXP_POLY_DEGREE == 3 - expfpart = POLY3(fpart, 9.9992520e-1f, 6.9583356e-1f, 2.2606716e-1f, 7.8024521e-2f); -#elif EXP_POLY_DEGREE == 2 - expfpart = POLY2(fpart, 1.0017247f, 6.5763628e-1f, 3.3718944e-1f); -#else -#error -#endif - - return _mm_mul_ps(expipart, expfpart); -} - -/** - * See http://www.devmaster.net/forums/showthread.php?p=43580 - */ -static INLINE __m128 -log2f4(__m128 x) -{ - __m128i expmask = _mm_set1_epi32(0x7f800000); - __m128i mantmask = _mm_set1_epi32(0x007fffff); - __m128 one = _mm_set1_ps(1.0f); - - __m128i i = _mm_castps_si128(x); - - /* exp = (float) exponent(x) */ - __m128 exp = _mm_cvtepi32_ps(_mm_sub_epi32(_mm_srli_epi32(_mm_and_si128(i, expmask), 23), _mm_set1_epi32(127))); - - /* mant = (float) mantissa(x) */ - __m128 mant = _mm_or_ps(_mm_castsi128_ps(_mm_and_si128(i, mantmask)), one); - - __m128 logmant; - - /* Minimax polynomial fit of log2(x)/(x - 1), for x in range [1, 2[ - * These coefficients can be generate with - * http://www.boost.org/doc/libs/1_36_0/libs/math/doc/sf_and_dist/html/math_toolkit/toolkit/internals2/minimax.html - */ -#if LOG_POLY_DEGREE == 6 - logmant = POLY5(mant, 3.11578814719469302614f, -3.32419399085241980044f, 2.59883907202499966007f, -1.23152682416275988241f, 0.318212422185251071475f, -0.0344359067839062357313f); -#elif LOG_POLY_DEGREE == 5 - logmant = POLY4(mant, 2.8882704548164776201f, -2.52074962577807006663f, 1.48116647521213171641f, -0.465725644288844778798f, 0.0596515482674574969533f); -#elif LOG_POLY_DEGREE == 4 - logmant = POLY3(mant, 2.61761038894603480148f, -1.75647175389045657003f, 0.688243882994381274313f, -0.107254423828329604454f); -#elif LOG_POLY_DEGREE == 3 - logmant = POLY2(mant, 2.28330284476918490682f, -1.04913055217340124191f, 0.204446009836232697516f); -#else -#error -#endif - - /* This effectively increases the polynomial degree by one, but ensures that log2(1) == 0*/ - logmant = _mm_mul_ps(logmant, _mm_sub_ps(mant, one)); - - return _mm_add_ps(logmant, exp); -} - -static INLINE __m128 -powf4(__m128 x, __m128 y) -{ - return exp2f4(_mm_mul_ps(log2f4(x), y)); -} - - -/** - * Low-level instruction translators. - */ - -static void -emit_abs( - struct x86_function *func, - unsigned xmm ) -{ - sse_andps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_7FFFFFFF_I, - TGSI_EXEC_TEMP_7FFFFFFF_C ) ); -} - -static void -emit_add( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_addps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void PIPE_CDECL -cos4f( - float *store ) -{ - store[0] = cosf( store[0] ); - store[1] = cosf( store[1] ); - store[2] = cosf( store[2] ); - store[3] = cosf( store[3] ); -} - -static void -emit_cos( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_save, - xmm_dst, - cos4f ); -} - -static void PIPE_CDECL -#if defined(PIPE_CC_GCC) -__attribute__((force_align_arg_pointer)) -#endif -ex24f( - float *store ) -{ - _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) )); -} - -static void -emit_ex2( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_save, - xmm_dst, - ex24f ); -} - -static void -emit_f2it( - struct x86_function *func, - unsigned xmm ) -{ - sse2_cvttps2dq( - func, - make_xmm( xmm ), - make_xmm( xmm ) ); -} - -static void PIPE_CDECL -flr4f( - float *store ) -{ - store[0] = floorf( store[0] ); - store[1] = floorf( store[1] ); - store[2] = floorf( store[2] ); - store[3] = floorf( store[3] ); -} - -static void -emit_flr( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_save, - xmm_dst, - flr4f ); -} - -static void PIPE_CDECL -frc4f( - float *store ) -{ - store[0] -= floorf( store[0] ); - store[1] -= floorf( store[1] ); - store[2] -= floorf( store[2] ); - store[3] -= floorf( store[3] ); -} - -static void -emit_frc( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_save, - xmm_dst, - frc4f ); -} - -static void PIPE_CDECL -#if defined(PIPE_CC_GCC) -__attribute__((force_align_arg_pointer)) -#endif -lg24f( - float *store ) -{ - _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) )); -} - -static void -emit_lg2( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_save, - xmm_dst, - lg24f ); -} - -static void -emit_MOV( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - sse_movups( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_mul (struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src) -{ - sse_mulps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_neg( - struct x86_function *func, - unsigned xmm ) -{ - sse_xorps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C ) ); -} - -static void PIPE_CDECL -#if defined(PIPE_CC_GCC) -__attribute__((force_align_arg_pointer)) -#endif -pow4f( - float *store ) -{ -#if 1 - _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) )); -#else - store[0] = powf( store[0], store[4] ); - store[1] = powf( store[1], store[5] ); - store[2] = powf( store[2], store[6] ); - store[3] = powf( store[3], store[7] ); -#endif -} - -static void -emit_pow( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst, - unsigned xmm_src ) -{ - emit_func_call_dst_src( - func, - xmm_save, - xmm_dst, - xmm_src, - pow4f ); -} - -static void -emit_rcp ( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ - /* On Intel CPUs at least, this is only accurate to 12 bits -- not - * good enough. Need to either emit a proper divide or use the - * iterative technique described below in emit_rsqrt(). - */ - sse2_rcpps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -} - -static void -emit_rsqrt( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) -{ -#if HIGH_PRECISION - /* Although rsqrtps() and rcpps() are low precision on some/all SSE - * implementations, it is possible to improve its precision at - * fairly low cost, using a newton/raphson step, as below: - * - * x1 = 2 * rcpps(a) - a * rcpps(a) * rcpps(a) - * x1 = 0.5 * rsqrtps(a) * [3.0 - (a * rsqrtps(a))* rsqrtps(a)] - * - * See: http://softwarecommunity.intel.com/articles/eng/1818.htm - */ - { - struct x86_reg dst = make_xmm( xmm_dst ); - struct x86_reg src = make_xmm( xmm_src ); - struct x86_reg tmp0 = make_xmm( 2 ); - struct x86_reg tmp1 = make_xmm( 3 ); - - assert( xmm_dst != xmm_src ); - assert( xmm_dst != 2 && xmm_dst != 3 ); - assert( xmm_src != 2 && xmm_src != 3 ); - - sse_movaps( func, dst, get_temp( TGSI_EXEC_TEMP_HALF_I, TGSI_EXEC_TEMP_HALF_C ) ); - sse_movaps( func, tmp0, get_temp( TGSI_EXEC_TEMP_THREE_I, TGSI_EXEC_TEMP_THREE_C ) ); - sse_rsqrtps( func, tmp1, src ); - sse_mulps( func, src, tmp1 ); - sse_mulps( func, dst, tmp1 ); - sse_mulps( func, src, tmp1 ); - sse_subps( func, tmp0, src ); - sse_mulps( func, dst, tmp0 ); + STORE(gen, *inst, v0, 0, chan_index); /* store v0 */ } -#else - /* On Intel CPUs at least, this is only accurate to 12 bits -- not - * good enough. - */ - sse_rsqrtps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); -#endif -} - -static void -emit_setsign( - struct x86_function *func, - unsigned xmm ) -{ - sse_orps( - func, - make_xmm( xmm ), - get_temp( - TGSI_EXEC_TEMP_80000000_I, - TGSI_EXEC_TEMP_80000000_C ) ); -} - -static void PIPE_CDECL -sin4f( - float *store ) -{ - store[0] = sinf( store[0] ); - store[1] = sinf( store[1] ); - store[2] = sinf( store[2] ); - store[3] = sinf( store[3] ); + ppc_release_vec_register(gen->f, v0); } -static void -emit_sin (struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst) -{ - emit_func_call_dst( - func, - xmm_save, - xmm_dst, - sin4f ); -} static void -emit_sub( - struct x86_function *func, - unsigned xmm_dst, - unsigned xmm_src ) +emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - sse_subps( - func, - make_xmm( xmm_dst ), - make_xmm( xmm_src ) ); + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + ppc_vaddfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_SUB: + ppc_vsubfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MUL: + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */ + break; + case TGSI_OPCODE_MIN: + ppc_vminfp(gen->f, v2, v0, v1); + break; + case TGSI_OPCODE_MAX: + ppc_vmaxfp(gen->f, v2, v0, v1); + break; + default: + assert(0); + } + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); } -#endif /** - * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. + * Vector comparisons, resulting in 1.0 or 0.0 values. */ -static int -gen_one_vec(struct gen_context *gen) -{ - if (gen->one_vec < 0) { - gen->one_vec = ppc_allocate_vec_register(gen->f); - ppc_vload_float(gen->f, gen->one_vec, 1.0f); - } - return gen->one_vec; -} - -/** - * Return index of vector register containing {1<<31, 1<<31, 1<<31, 1<<31}. - */ -static int -gen_get_bit31_vec(struct gen_context *gen) -{ - if (gen->bit31_vec < 0) { - gen->bit31_vec = ppc_allocate_vec_register(gen->f); - ppc_vspltisw(gen->f, gen->bit31_vec, -1); - ppc_vslw(gen->f, gen->bit31_vec, gen->bit31_vec, gen->bit31_vec); - } - return gen->bit31_vec; -} - - - -/** - * Register fetch. - */ -static void -emit_fetch(struct gen_context *gen, - unsigned vec_reg, - const struct tgsi_full_src_register *reg, - const unsigned chan_index) -{ - uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); - - switch (swizzle) { - case TGSI_EXTSWIZZLE_X: - case TGSI_EXTSWIZZLE_Y: - case TGSI_EXTSWIZZLE_Z: - case TGSI_EXTSWIZZLE_W: - switch (reg->SrcRegister.File) { - case TGSI_FILE_INPUT: - { - int offset_reg = ppc_allocate_register(gen->f); - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; - ppc_li(gen->f, offset_reg, offset); - ppc_lvx(gen->f, vec_reg, gen->inputs_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); - } - break; - case TGSI_FILE_TEMPORARY: - { - int offset_reg = ppc_allocate_register(gen->f); - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; - ppc_li(gen->f, offset_reg, offset); - ppc_lvx(gen->f, vec_reg, gen->temps_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); - } - break; - case TGSI_FILE_IMMEDIATE: - { - int offset_reg = ppc_allocate_register(gen->f); - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; - ppc_li(gen->f, offset_reg, offset); - ppc_lvx(gen->f, vec_reg, gen->immed_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); - } - break; - case TGSI_FILE_CONSTANT: - { - int offset_reg = ppc_allocate_register(gen->f); - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; - ppc_li(gen->f, offset_reg, offset); - /* Load 4-byte word into vector register. - * The vector slot depends on the effective address we load from. - * We know that our constants start at a 16-byte boundary so we - * know that 'swizzle' tells us which vector slot will have the - * loaded word. The other vector slots will be undefined. - */ - ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg); - /* splat word[swizzle] across the vector reg */ - ppc_vspltw(gen->f, vec_reg, vec_reg, swizzle); - ppc_release_register(gen->f, offset_reg); - } - break; - default: - assert( 0 ); - } - break; - case TGSI_EXTSWIZZLE_ZERO: - ppc_vload_float(gen->f, vec_reg, 0.0f); - break; - case TGSI_EXTSWIZZLE_ONE: - { - int one_vec = gen_one_vec(gen); - ppc_vecmove(gen->f, vec_reg, one_vec); - } - break; - default: - assert( 0 ); - } - - { - uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); - if (sign_op != TGSI_UTIL_SIGN_KEEP) { - int bit31_vec = gen_get_bit31_vec(gen); - - switch (sign_op) { - case TGSI_UTIL_SIGN_CLEAR: - /* vec = vec & ~bit31 */ - ppc_vandc(gen->f, vec_reg, vec_reg, bit31_vec); - break; - case TGSI_UTIL_SIGN_SET: - /* vec = vec | bit31 */ - ppc_vor(gen->f, vec_reg, vec_reg, bit31_vec); - break; - case TGSI_UTIL_SIGN_TOGGLE: - /* vec = vec ^ bit31 */ - ppc_vxor(gen->f, vec_reg, vec_reg, bit31_vec); - break; - default: - assert(0); - } - } - } -} - -#define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \ - emit_fetch( GEN, VEC_REG, &(INST).FullSrcRegisters[SRC_REG], CHAN ) - - - -/** - * Register store. - */ -static void -emit_store(struct gen_context *gen, - unsigned vec_reg, - const struct tgsi_full_dst_register *reg, - const struct tgsi_full_instruction *inst, - unsigned chan_index) -{ - switch (reg->DstRegister.File) { - case TGSI_FILE_OUTPUT: - { - int offset_reg = ppc_allocate_register(gen->f); - int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; - ppc_li(gen->f, offset_reg, offset); - ppc_stvx(gen->f, vec_reg, gen->outputs_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); - } - break; - case TGSI_FILE_TEMPORARY: - { - int offset_reg = ppc_allocate_register(gen->f); - int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; - ppc_li(gen->f, offset_reg, offset); - ppc_stvx(gen->f, vec_reg, gen->temps_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); - } - break; -#if 0 - case TGSI_FILE_ADDRESS: - emit_addrs( - func, - xmm, - reg->DstRegister.Index, - chan_index ); - break; -#endif - default: - assert( 0 ); - } - -#if 0 - switch( inst->Instruction.Saturate ) { - case TGSI_SAT_NONE: - break; - - case TGSI_SAT_ZERO_ONE: - /* assert( 0 ); */ - break; - - case TGSI_SAT_MINUS_PLUS_ONE: - assert( 0 ); - break; - } -#endif -} - - -#define STORE( GEN, INST, XMM, INDEX, CHAN )\ - emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) - - - -#if 000 -/** - * High-level instruction translators. - */ - -static void -emit_kil( - struct x86_function *func, - const struct tgsi_full_src_register *reg ) -{ - unsigned uniquemask; - unsigned registers[4]; - unsigned nextregister = 0; - unsigned firstchan = ~0; - unsigned chan_index; - - /* This mask stores component bits that were already tested. Note that - * we test if the value is less than zero, so 1.0 and 0.0 need not to be - * tested. */ - uniquemask = (1 << TGSI_EXTSWIZZLE_ZERO) | (1 << TGSI_EXTSWIZZLE_ONE); - - FOR_EACH_CHANNEL( chan_index ) { - unsigned swizzle; - - /* unswizzle channel */ - swizzle = tgsi_util_get_full_src_register_extswizzle( - reg, - chan_index ); - - /* check if the component has not been already tested */ - if( !(uniquemask & (1 << swizzle)) ) { - uniquemask |= 1 << swizzle; - - /* allocate register */ - registers[chan_index] = nextregister; - emit_fetch( - func, - nextregister, - reg, - chan_index ); - nextregister++; - - /* mark the first channel used */ - if( firstchan == ~0 ) { - firstchan = chan_index; - } - } - } - - x86_push( - func, - x86_make_reg( file_REG32, reg_AX ) ); - x86_push( - func, - x86_make_reg( file_REG32, reg_DX ) ); - - FOR_EACH_CHANNEL( chan_index ) { - if( uniquemask & (1 << chan_index) ) { - sse_cmpps( - func, - make_xmm( registers[chan_index] ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ), - cc_LessThan ); - - if( chan_index == firstchan ) { - sse_pmovmskb( - func, - x86_make_reg( file_REG32, reg_AX ), - make_xmm( registers[chan_index] ) ); - } - else { - sse_pmovmskb( - func, - x86_make_reg( file_REG32, reg_DX ), - make_xmm( registers[chan_index] ) ); - x86_or( - func, - x86_make_reg( file_REG32, reg_AX ), - x86_make_reg( file_REG32, reg_DX ) ); - } - } - } - - x86_or( - func, - get_temp( - TGSI_EXEC_TEMP_KILMASK_I, - TGSI_EXEC_TEMP_KILMASK_C ), - x86_make_reg( file_REG32, reg_AX ) ); - - x86_pop( - func, - x86_make_reg( file_REG32, reg_DX ) ); - x86_pop( - func, - x86_make_reg( file_REG32, reg_AX ) ); -} - - -static void -emit_kilp( - struct x86_function *func ) -{ - /* XXX todo / fix me */ -} - - -static void -emit_setcc( - struct x86_function *func, - struct tgsi_full_instruction *inst, - enum sse_cc cc ) -{ - unsigned chan_index; - - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_cmpps( - func, - make_xmm( 0 ), - make_xmm( 1 ), - cc ); - sse_andps( - func, - make_xmm( 0 ), - get_temp( - TEMP_ONE_I, - TEMP_ONE_C ) ); - STORE( func, *inst, 0, 0, chan_index ); - } -} - -static void -emit_cmp( - struct x86_function *func, - struct tgsi_full_instruction *inst ) -{ - unsigned chan_index; - - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - sse_cmpps( - func, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ), - cc_LessThan ); - sse_andps( - func, - make_xmm( 1 ), - make_xmm( 0 ) ); - sse_andnps( - func, - make_xmm( 0 ), - make_xmm( 2 ) ); - sse_orps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } -} -#endif - - -static void -emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) +static void +emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) { int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - uint chan_index; - - FETCH(gen, *inst, v0, 0, CHAN_X); - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_RSQ: - /* v1 = 1.0 / sqrt(v0) */ - ppc_vrsqrtefp(gen->f, v1, v0); - break; - case TGSI_OPCODE_RCP: - /* v1 = 1.0 / v0 */ - ppc_vrefp(gen->f, v1, v0); - break; - default: - assert(0); - } - - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(gen, *inst, v1, 0, chan_index); - } - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); -} - - -static void -emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) -{ - int v0 = ppc_allocate_vec_register(gen->f); - uint chan_index; - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(gen, *inst, 0, 0, chan_index); /* v0 = srcreg[0] */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - /* turn off the most significant bit of each vector float word */ - { - int v1 = ppc_allocate_vec_register(gen->f); - ppc_vspltisw(gen->f, v1, -1); /* v1 = {-1, -1, -1, -1} */ - ppc_vslw(gen->f, v1, v1, v1); /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */ - ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */ - ppc_release_vec_register(gen->f, v1); - } - break; - case TGSI_OPCODE_FLOOR: - ppc_vrfim(gen->f, v0, v0); /* v0 = floor(v0) */ - break; - case TGSI_OPCODE_FRAC: - { - int v1 = ppc_allocate_vec_register(gen->f); - ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ - ppc_vsubfp(gen->f, v0, v0, v1); /* v0 = v0 - v1 */ - ppc_release_vec_register(gen->f, v1); - } - break; - case TGSI_OPCODE_EXPBASE2: - ppc_vexptefp(gen->f, v0, v0); /* v0 = 2^v0 */ - break; - case TGSI_OPCODE_LOGBASE2: - /* XXX this may be broken! */ - ppc_vlogefp(gen->f, v0, v0); /* v0 = log2(v0) */ - break; - case TGSI_OPCODE_MOV: - /* nothing */ - break; - default: - assert(0); - } - STORE(gen, *inst, v0, 0, chan_index); /* store v0 */ - } - ppc_release_vec_register(gen->f, v0); -} - - -static void -emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) -{ - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - int v2 = ppc_allocate_vec_register(gen->f); - uint chan_index; - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ - FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - ppc_vaddfp(gen->f, v2, v0, v1); - break; - case TGSI_OPCODE_SUB: - ppc_vsubfp(gen->f, v2, v0, v1); - break; - case TGSI_OPCODE_MUL: - ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ - ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */ - break; - case TGSI_OPCODE_MIN: - ppc_vminfp(gen->f, v2, v0, v1); - break; - case TGSI_OPCODE_MAX: - ppc_vmaxfp(gen->f, v2, v0, v1); - break; - default: - assert(0); - } - STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ - } - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); - ppc_release_vec_register(gen->f, v2); -} - - -/** - * Vector comparisons, resulting in 1.0 or 0.0 values. - */ -static void -emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) -{ - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - int v2 = ppc_allocate_vec_register(gen->f); - uint chan_index; - boolean complement = FALSE; - int one_vec = gen_one_vec(gen); - - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ - FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_SNE: - complement = TRUE; - /* fall-through */ - case TGSI_OPCODE_SEQ: - ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ - break; - - case TGSI_OPCODE_SGE: - complement = TRUE; - /* fall-through */ - case TGSI_OPCODE_SLT: - ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ - break; - - case TGSI_OPCODE_SLE: - complement = TRUE; - /* fall-through */ - case TGSI_OPCODE_SGT: - ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ - break; - default: - assert(0); - } - - /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ - - if (complement) - ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ - else - ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ - - STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ - } - - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); - ppc_release_vec_register(gen->f, v2); -} - - -static void -emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) -{ - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - int v2 = ppc_allocate_vec_register(gen->f); - uint chan_index; - - ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ - - FETCH(gen, *inst, v0, 0, CHAN_X); /* v0 = src0.XXXX */ - FETCH(gen, *inst, v1, 1, CHAN_X); /* v1 = src1.XXXX */ - ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - - FETCH(gen, *inst, v0, 0, CHAN_Y); /* v0 = src0.YYYY */ - FETCH(gen, *inst, v1, 1, CHAN_Y); /* v1 = src1.YYYY */ - ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - - FETCH(gen, *inst, v0, 0, CHAN_Z); /* v0 = src0.ZZZZ */ - FETCH(gen, *inst, v1, 1, CHAN_Z); /* v1 = src1.ZZZZ */ - ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - - if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { - FETCH(gen, *inst, v0, 0, CHAN_W); /* v0 = src0.WWWW */ - FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ - ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - } - else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { - FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ - ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ - } - - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ - } - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); - ppc_release_vec_register(gen->f, v2); -} - - -static void -emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) -{ - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - int v2 = ppc_allocate_vec_register(gen->f); - int v3 = ppc_allocate_vec_register(gen->f); - uint chan_index; - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ - FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ - FETCH(gen, *inst, v2, 2, chan_index); /* v2 = srcreg[2] */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_MAD: - ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ - break; - case TGSI_OPCODE_LRP: - ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ - ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ - break; - default: - assert(0); - } - STORE(gen, *inst, v3, 0, chan_index); /* store v3 */ - } - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); - ppc_release_vec_register(gen->f, v2); - ppc_release_vec_register(gen->f, v3); -} - - -/* -static void -emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) -{ -} -*/ - - -static int -emit_instruction(struct gen_context *gen, - struct tgsi_full_instruction *inst) -{ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_ABS: - case TGSI_OPCODE_FLOOR: - case TGSI_OPCODE_FRAC: - case TGSI_OPCODE_EXPBASE2: - case TGSI_OPCODE_LOGBASE2: - emit_unaryop(gen, inst); - break; - case TGSI_OPCODE_RSQ: - case TGSI_OPCODE_RCP: - emit_scalar_unaryop(gen, inst); - break; - case TGSI_OPCODE_ADD: - case TGSI_OPCODE_SUB: - case TGSI_OPCODE_MUL: - case TGSI_OPCODE_MIN: - case TGSI_OPCODE_MAX: - emit_binop(gen, inst); - break; - case TGSI_OPCODE_SEQ: - case TGSI_OPCODE_SNE: - case TGSI_OPCODE_SLT: - case TGSI_OPCODE_SGT: - case TGSI_OPCODE_SLE: - case TGSI_OPCODE_SGE: - emit_inequality(gen, inst); - break; - case TGSI_OPCODE_MAD: - case TGSI_OPCODE_LRP: - emit_triop(gen, inst); - break; - case TGSI_OPCODE_DP3: - case TGSI_OPCODE_DP4: - case TGSI_OPCODE_DPH: - emit_dotprod(gen, inst); - break; - /* - case TGSI_OPCODE_LIT: - emit_lit(gen, inst); - break; - */ - case TGSI_OPCODE_END: - /* normal end */ - return 1; - default: - return 0; - } - -#if 0 - unsigned chan_index; - - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ARL: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_f2it( func, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_MOV: - case TGSI_OPCODE_SWZ: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LIT: - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - emit_tempf( - func, - 0, - TEMP_ONE_I, - TEMP_ONE_C); - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ) { - STORE( func, *inst, 0, 0, CHAN_X ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) ) { - STORE( func, *inst, 0, 0, CHAN_W ); - } - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - sse_maxps( - func, - make_xmm( 0 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ) ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - /* XMM[1] = SrcReg[0].yyyy */ - FETCH( func, *inst, 1, 0, CHAN_Y ); - /* XMM[1] = max(XMM[1], 0) */ - sse_maxps( - func, - make_xmm( 1 ), - get_temp( - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ) ); - /* XMM[2] = SrcReg[0].wwww */ - FETCH( func, *inst, 2, 0, CHAN_W ); - /* XMM[2] = min(XMM[2], 128.0) */ - sse_minps( - func, - make_xmm( 2 ), - get_temp( - TGSI_EXEC_TEMP_128_I, - TGSI_EXEC_TEMP_128_C ) ); - /* XMM[2] = max(XMM[2], -128.0) */ - sse_maxps( - func, - make_xmm( 2 ), - get_temp( - TGSI_EXEC_TEMP_MINUS_128_I, - TGSI_EXEC_TEMP_MINUS_128_C ) ); - emit_pow( func, 3, 1, 2 ); - FETCH( func, *inst, 0, 0, CHAN_X ); - sse_xorps( - func, - make_xmm( 2 ), - make_xmm( 2 ) ); - sse_cmpps( - func, - make_xmm( 2 ), - make_xmm( 0 ), - cc_LessThanEqual ); - sse_andps( - func, - make_xmm( 2 ), - make_xmm( 1 ) ); - STORE( func, *inst, 2, 0, CHAN_Z ); - } - } - break; - - case TGSI_OPCODE_RCP: - /* TGSI_OPCODE_RECIP */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rcp( func, 0, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_RSQ: - /* TGSI_OPCODE_RECIPSQRT */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_rsqrt( func, 1, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 1, 0, chan_index ); - } - break; - - case TGSI_OPCODE_EXP: - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( func, *inst, 0, 0, CHAN_X ); - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_MOV( func, 1, 0 ); - emit_flr( func, 2, 1 ); - /* dst.x = ex2(floor(src.x)) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { - emit_MOV( func, 2, 1 ); - emit_ex2( func, 3, 2 ); - STORE( func, *inst, 2, 0, CHAN_X ); - } - /* dst.y = src.x - floor(src.x) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_MOV( func, 2, 0 ); - emit_sub( func, 2, 1 ); - STORE( func, *inst, 2, 0, CHAN_Y ); - } - } - /* dst.z = ex2(src.x) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - emit_ex2( func, 3, 0 ); - STORE( func, *inst, 0, 0, CHAN_Z ); - } - } - /* dst.w = 1.0 */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { - emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_LOG: - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_abs( func, 0 ); - emit_MOV( func, 1, 0 ); - emit_lg2( func, 2, 1 ); - /* dst.z = lg2(abs(src.x)) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z )) { - STORE( func, *inst, 1, 0, CHAN_Z ); - } - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_flr( func, 2, 1 ); - /* dst.x = floor(lg2(abs(src.x))) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X )) { - STORE( func, *inst, 1, 0, CHAN_X ); - } - /* dst.x = abs(src)/ex2(floor(lg2(abs(src.x)))) */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y )) { - emit_ex2( func, 2, 1 ); - emit_rcp( func, 1, 1 ); - emit_mul( func, 0, 1 ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - } - } - /* dst.w = 1.0 */ - if (IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W )) { - emit_tempf( func, 0, TEMP_ONE_I, TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MUL: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_mul( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_ADD: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_add( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP3: - /* TGSI_OPCODE_DOT3 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DP4: - /* TGSI_OPCODE_DOT4 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul(func, 1, 2 ); - emit_add(func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_W ); - FETCH( func, *inst, 2, 1, CHAN_W ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DST: - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_tempf( - func, - 0, - TEMP_ONE_I, - TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 1, 1, CHAN_Y ); - emit_mul( func, 0, 1 ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - FETCH( func, *inst, 0, 0, CHAN_Z ); - STORE( func, *inst, 0, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - FETCH( func, *inst, 0, 1, CHAN_W ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MIN: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_minps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_MAX: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - sse_maxps( - func, - make_xmm( 0 ), - make_xmm( 1 ) ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLT: - /* TGSI_OPCODE_SETLT */ - emit_setcc( func, inst, cc_LessThan ); - break; - - case TGSI_OPCODE_SGE: - /* TGSI_OPCODE_SETGE */ - emit_setcc( func, inst, cc_NotLessThan ); - break; - - case TGSI_OPCODE_MAD: - /* TGSI_OPCODE_MADD */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - emit_mul( func, 0, 1 ); - emit_add( func, 0, 2 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SUB: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - emit_sub( func, 0, 1 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LERP: - /* TGSI_OPCODE_LRP */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - FETCH( func, *inst, 1, 1, chan_index ); - FETCH( func, *inst, 2, 2, chan_index ); - emit_sub( func, 1, 2 ); - emit_mul( func, 0, 1 ); - emit_add( func, 0, 2 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CND: - return 0; - break; - - case TGSI_OPCODE_CND0: - return 0; - break; - - case TGSI_OPCODE_DOT2ADD: - /* TGSI_OPCODE_DP2A */ - return 0; - break; - - case TGSI_OPCODE_INDEX: - return 0; - break; - - case TGSI_OPCODE_NEGATE: - return 0; - break; - - case TGSI_OPCODE_FRAC: - /* TGSI_OPCODE_FRC */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_frc( func, 0, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CLAMP: - return 0; - break; - - case TGSI_OPCODE_FLOOR: - /* TGSI_OPCODE_FLR */ - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_flr( func, 0, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_ROUND: - return 0; - break; - - case TGSI_OPCODE_EXPBASE2: - /* TGSI_OPCODE_EX2 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_ex2( func, 0, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_LOGBASE2: - /* TGSI_OPCODE_LG2 */ - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_lg2( func, 0, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_POWER: - /* TGSI_OPCODE_POW */ - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_pow( func, 0, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_CROSSPRODUCT: - /* TGSI_OPCODE_XPD */ - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) { - FETCH( func, *inst, 1, 1, CHAN_Z ); - FETCH( func, *inst, 3, 0, CHAN_Z ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 0, 0, CHAN_Y ); - FETCH( func, *inst, 4, 1, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - emit_MOV( func, 2, 0 ); - emit_mul( func, 2, 1 ); - emit_MOV( func, 5, 3 ); - emit_mul( func, 5, 4 ); - emit_sub( func, 2, 5 ); - STORE( func, *inst, 2, 0, CHAN_X ); - } - if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) || - IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) ) { - FETCH( func, *inst, 2, 1, CHAN_X ); - FETCH( func, *inst, 5, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - emit_mul( func, 3, 2 ); - emit_mul( func, 1, 5 ); - emit_sub( func, 3, 1 ); - STORE( func, *inst, 3, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_mul( func, 5, 4 ); - emit_mul( func, 0, 2 ); - emit_sub( func, 5, 0 ); - STORE( func, *inst, 5, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - emit_tempf( - func, - 0, - TEMP_ONE_I, - TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_MULTIPLYMATRIX: - return 0; - break; - - case TGSI_OPCODE_ABS: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_abs( func, 0) ; - - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_RCC: - return 0; - break; - - case TGSI_OPCODE_DPH: - FETCH( func, *inst, 0, 0, CHAN_X ); - FETCH( func, *inst, 1, 1, CHAN_X ); - emit_mul( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Y ); - FETCH( func, *inst, 2, 1, CHAN_Y ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 0, CHAN_Z ); - FETCH( func, *inst, 2, 1, CHAN_Z ); - emit_mul( func, 1, 2 ); - emit_add( func, 0, 1 ); - FETCH( func, *inst, 1, 1, CHAN_W ); - emit_add( func, 0, 1 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_COS: - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_DDX: - return 0; - break; - - case TGSI_OPCODE_DDY: - return 0; - break; - - case TGSI_OPCODE_KILP: - /* predicated kill */ - emit_kilp( func ); - return 0; /* XXX fix me */ - break; - - case TGSI_OPCODE_KIL: - /* conditional kill */ - emit_kil( func, &inst->FullSrcRegisters[0] ); - break; - - case TGSI_OPCODE_PK2H: - return 0; - break; - - case TGSI_OPCODE_PK2US: - return 0; - break; - - case TGSI_OPCODE_PK4B: - return 0; - break; - - case TGSI_OPCODE_PK4UB: - return 0; - break; - - case TGSI_OPCODE_RFL: - return 0; - break; - - case TGSI_OPCODE_SEQ: - return 0; - break; - - case TGSI_OPCODE_SFL: - return 0; - break; - - case TGSI_OPCODE_SGT: - return 0; - break; - - case TGSI_OPCODE_SIN: - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0, 0 ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - break; - - case TGSI_OPCODE_SLE: - return 0; - break; - - case TGSI_OPCODE_SNE: - return 0; - break; - - case TGSI_OPCODE_STR: - return 0; - break; - - case TGSI_OPCODE_TEX: - if (0) { - /* Disable dummy texture code: - */ - emit_tempf( - func, - 0, - TEMP_ONE_I, - TEMP_ONE_C ); - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE( func, *inst, 0, 0, chan_index ); - } - } - else { - return 0; - } - break; - - case TGSI_OPCODE_TXD: - return 0; - break; - - case TGSI_OPCODE_UP2H: - return 0; - break; - - case TGSI_OPCODE_UP2US: - return 0; - break; - - case TGSI_OPCODE_UP4B: - return 0; - break; - - case TGSI_OPCODE_UP4UB: - return 0; - break; - - case TGSI_OPCODE_X2D: - return 0; - break; - - case TGSI_OPCODE_ARA: - return 0; - break; - - case TGSI_OPCODE_ARR: - return 0; - break; - - case TGSI_OPCODE_BRA: - return 0; - break; - - case TGSI_OPCODE_CAL: - return 0; - break; - - case TGSI_OPCODE_RET: - emit_ret( func ); - break; - - case TGSI_OPCODE_END: - break; - - case TGSI_OPCODE_SSG: - return 0; - break; - - case TGSI_OPCODE_CMP: - emit_cmp (func, inst); - break; - - case TGSI_OPCODE_SCS: - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_cos( func, 0, 0 ); - STORE( func, *inst, 0, 0, CHAN_X ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) { - FETCH( func, *inst, 0, 0, CHAN_X ); - emit_sin( func, 0, 0 ); - STORE( func, *inst, 0, 0, CHAN_Y ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Z ) { - emit_tempf( - func, - 0, - TGSI_EXEC_TEMP_00000000_I, - TGSI_EXEC_TEMP_00000000_C ); - STORE( func, *inst, 0, 0, CHAN_Z ); - } - IF_IS_DST0_CHANNEL_ENABLED( *inst, CHAN_W ) { - emit_tempf( - func, - 0, - TEMP_ONE_I, - TEMP_ONE_C ); - STORE( func, *inst, 0, 0, CHAN_W ); - } - break; - - case TGSI_OPCODE_TXB: - return 0; - break; - - case TGSI_OPCODE_NRM: - return 0; - break; - - case TGSI_OPCODE_DIV: - return 0; - break; + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; + boolean complement = FALSE; + int one_vec = gen_one_vec(gen); - case TGSI_OPCODE_DP2: - return 0; - break; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ - case TGSI_OPCODE_TXL: - return 0; - break; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_SNE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SEQ: + ppc_vcmpeqfpx(gen->f, v2, v0, v1); /* v2 = v0 == v1 ? ~0 : 0 */ + break; - case TGSI_OPCODE_BRK: - return 0; - break; + case TGSI_OPCODE_SGE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SLT: + ppc_vcmpgtfpx(gen->f, v2, v1, v0); /* v2 = v1 > v0 ? ~0 : 0 */ + break; - case TGSI_OPCODE_IF: - return 0; - break; + case TGSI_OPCODE_SLE: + complement = TRUE; + /* fall-through */ + case TGSI_OPCODE_SGT: + ppc_vcmpgtfpx(gen->f, v2, v0, v1); /* v2 = v0 > v1 ? ~0 : 0 */ + break; + default: + assert(0); + } - case TGSI_OPCODE_LOOP: - return 0; - break; + /* v2 is now {0,0,0,0} or {~0,~0,~0,~0} */ - case TGSI_OPCODE_REP: - return 0; - break; + if (complement) + ppc_vandc(gen->f, v2, one_vec, v2); /* v2 = one_vec & ~v2 */ + else + ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ - case TGSI_OPCODE_ELSE: - return 0; - break; + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } - case TGSI_OPCODE_ENDIF: - return 0; - break; + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} - case TGSI_OPCODE_ENDLOOP: - return 0; - break; - case TGSI_OPCODE_ENDREP: - return 0; - break; +static void +emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + uint chan_index; - case TGSI_OPCODE_PUSHA: - return 0; - break; + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ - case TGSI_OPCODE_POPA: - return 0; - break; + FETCH(gen, *inst, v0, 0, CHAN_X); /* v0 = src0.XXXX */ + FETCH(gen, *inst, v1, 1, CHAN_X); /* v1 = src1.XXXX */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - case TGSI_OPCODE_CEIL: - return 0; - break; + FETCH(gen, *inst, v0, 0, CHAN_Y); /* v0 = src0.YYYY */ + FETCH(gen, *inst, v1, 1, CHAN_Y); /* v1 = src1.YYYY */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - case TGSI_OPCODE_I2F: - return 0; - break; + FETCH(gen, *inst, v0, 0, CHAN_Z); /* v0 = src0.ZZZZ */ + FETCH(gen, *inst, v1, 1, CHAN_Z); /* v1 = src1.ZZZZ */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - case TGSI_OPCODE_NOT: - return 0; - break; + if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { + FETCH(gen, *inst, v0, 0, CHAN_W); /* v0 = src0.WWWW */ + FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + } + else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { + FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ + } - case TGSI_OPCODE_TRUNC: - return 0; - break; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); +} - case TGSI_OPCODE_SHL: - return 0; - break; - case TGSI_OPCODE_SHR: - return 0; - break; +static void +emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v0 = ppc_allocate_vec_register(gen->f); + int v1 = ppc_allocate_vec_register(gen->f); + int v2 = ppc_allocate_vec_register(gen->f); + int v3 = ppc_allocate_vec_register(gen->f); + uint chan_index; + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { + FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ + FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + FETCH(gen, *inst, v2, 2, chan_index); /* v2 = srcreg[2] */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MAD: + ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ + break; + case TGSI_OPCODE_LRP: + ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ + ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ + break; + default: + assert(0); + } + STORE(gen, *inst, v3, 0, chan_index); /* store v3 */ + } + ppc_release_vec_register(gen->f, v0); + ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); + ppc_release_vec_register(gen->f, v3); +} - case TGSI_OPCODE_AND: - return 0; - break; - case TGSI_OPCODE_OR: - return 0; - break; +/* +static void +emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ +} +*/ - case TGSI_OPCODE_MOD: - return 0; - break; - case TGSI_OPCODE_XOR: - return 0; +static int +emit_instruction(struct gen_context *gen, + struct tgsi_full_instruction *inst) +{ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MOV: + case TGSI_OPCODE_ABS: + case TGSI_OPCODE_FLOOR: + case TGSI_OPCODE_FRAC: + case TGSI_OPCODE_EXPBASE2: + case TGSI_OPCODE_LOGBASE2: + emit_unaryop(gen, inst); break; - - case TGSI_OPCODE_SAD: - return 0; + case TGSI_OPCODE_RSQ: + case TGSI_OPCODE_RCP: + emit_scalar_unaryop(gen, inst); break; - - case TGSI_OPCODE_TXF: - return 0; + case TGSI_OPCODE_ADD: + case TGSI_OPCODE_SUB: + case TGSI_OPCODE_MUL: + case TGSI_OPCODE_MIN: + case TGSI_OPCODE_MAX: + emit_binop(gen, inst); break; - - case TGSI_OPCODE_TXQ: - return 0; + case TGSI_OPCODE_SEQ: + case TGSI_OPCODE_SNE: + case TGSI_OPCODE_SLT: + case TGSI_OPCODE_SGT: + case TGSI_OPCODE_SLE: + case TGSI_OPCODE_SGE: + emit_inequality(gen, inst); break; - - case TGSI_OPCODE_CONT: - return 0; + case TGSI_OPCODE_MAD: + case TGSI_OPCODE_LRP: + emit_triop(gen, inst); break; - - case TGSI_OPCODE_EMIT: - return 0; + case TGSI_OPCODE_DP3: + case TGSI_OPCODE_DP4: + case TGSI_OPCODE_DPH: + emit_dotprod(gen, inst); break; - - case TGSI_OPCODE_ENDPRIM: - return 0; + /* + case TGSI_OPCODE_LIT: + emit_lit(gen, inst); break; - + */ + case TGSI_OPCODE_END: + /* normal end */ + return 1; default: return 0; } -#endif + return 1; } @@ -2608,133 +663,6 @@ emit_declaration( } } -#if 0 -static void aos_to_soa( struct x86_function *func, - uint arg_aos, - uint arg_soa, - uint arg_num, - uint arg_stride ) -{ - struct x86_reg soa_input = x86_make_reg( file_REG32, reg_AX ); - struct x86_reg aos_input = x86_make_reg( file_REG32, reg_BX ); - struct x86_reg num_inputs = x86_make_reg( file_REG32, reg_CX ); - struct x86_reg stride = x86_make_reg( file_REG32, reg_DX ); - int inner_loop; - - - /* Save EBX */ - x86_push( func, x86_make_reg( file_REG32, reg_BX ) ); - - x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) ); - x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) ); - x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) ); - x86_mov( func, stride, x86_fn_arg( func, arg_stride ) ); - - /* do */ - inner_loop = x86_get_label( func ); - { - x86_push( func, aos_input ); - sse_movlps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); - sse_movlps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movhps( func, make_xmm( 0 ), x86_make_disp( aos_input, 0 ) ); - sse_movhps( func, make_xmm( 3 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movlps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); - sse_movlps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_add( func, aos_input, stride ); - sse_movhps( func, make_xmm( 1 ), x86_make_disp( aos_input, 0 ) ); - sse_movhps( func, make_xmm( 4 ), x86_make_disp( aos_input, 8 ) ); - x86_pop( func, aos_input ); - - sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); - sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); - sse_shufps( func, make_xmm( 0 ), make_xmm( 1 ), 0x88 ); - sse_shufps( func, make_xmm( 2 ), make_xmm( 1 ), 0xdd ); - sse_shufps( func, make_xmm( 3 ), make_xmm( 4 ), 0x88 ); - sse_shufps( func, make_xmm( 5 ), make_xmm( 4 ), 0xdd ); - - sse_movups( func, x86_make_disp( soa_input, 0 ), make_xmm( 0 ) ); - sse_movups( func, x86_make_disp( soa_input, 16 ), make_xmm( 2 ) ); - sse_movups( func, x86_make_disp( soa_input, 32 ), make_xmm( 3 ) ); - sse_movups( func, x86_make_disp( soa_input, 48 ), make_xmm( 5 ) ); - - /* Advance to next input */ - x86_lea( func, aos_input, x86_make_disp(aos_input, 16) ); - x86_lea( func, soa_input, x86_make_disp(soa_input, 64) ); - } - /* while --num_inputs */ - x86_dec( func, num_inputs ); - x86_jcc( func, cc_NE, inner_loop ); - - /* Restore EBX */ - x86_pop( func, aos_input ); -} -#endif - -#if 0 -static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride ) -{ - struct x86_reg soa_output; - struct x86_reg aos_output; - struct x86_reg num_outputs; - struct x86_reg temp; - int inner_loop; - - soa_output = x86_make_reg( file_REG32, reg_AX ); - aos_output = x86_make_reg( file_REG32, reg_BX ); - num_outputs = x86_make_reg( file_REG32, reg_CX ); - temp = x86_make_reg( file_REG32, reg_DX ); - - /* Save EBX */ - x86_push( func, aos_output ); - - x86_mov( func, soa_output, x86_fn_arg( func, soa ) ); - x86_mov( func, aos_output, x86_fn_arg( func, aos ) ); - x86_mov( func, num_outputs, x86_fn_arg( func, num ) ); - - /* do */ - inner_loop = x86_get_label( func ); - { - sse_movups( func, make_xmm( 0 ), x86_make_disp( soa_output, 0 ) ); - sse_movups( func, make_xmm( 1 ), x86_make_disp( soa_output, 16 ) ); - sse_movups( func, make_xmm( 3 ), x86_make_disp( soa_output, 32 ) ); - sse_movups( func, make_xmm( 4 ), x86_make_disp( soa_output, 48 ) ); - - sse_movaps( func, make_xmm( 2 ), make_xmm( 0 ) ); - sse_movaps( func, make_xmm( 5 ), make_xmm( 3 ) ); - sse_unpcklps( func, make_xmm( 0 ), make_xmm( 1 ) ); - sse_unpckhps( func, make_xmm( 2 ), make_xmm( 1 ) ); - sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) ); - sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) ); - - x86_mov( func, temp, x86_fn_arg( func, stride ) ); - x86_push( func, aos_output ); - sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); - sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); - x86_add( func, aos_output, temp ); - sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) ); - sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) ); - x86_add( func, aos_output, temp ); - sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); - sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); - x86_add( func, aos_output, temp ); - sse_movhps( func, x86_make_disp( aos_output, 0 ), make_xmm( 2 ) ); - sse_movhps( func, x86_make_disp( aos_output, 8 ), make_xmm( 5 ) ); - x86_pop( func, aos_output ); - - /* Advance to next output */ - x86_lea( func, aos_output, x86_make_disp(aos_output, 16) ); - x86_lea( func, soa_output, x86_make_disp(soa_output, 64) ); - } - /* while --num_outputs */ - x86_dec( func, num_outputs ); - x86_jcc( func, cc_NE, inner_loop ); - - /* Restore EBX */ - x86_pop( func, aos_output ); -} -#endif static void @@ -2788,67 +716,6 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, emit_prologue(func); - /* - * Different function args for vertex/fragment shaders: - */ -#if 0 - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { - /* DECLARATION phase, do not load output argument. */ - x86_mov( - func, - get_input_base(), - x86_fn_arg( func, 1 ) ); - /* skipping outputs argument here */ - x86_mov( - func, - get_const_base(), - x86_fn_arg( func, 3 ) ); - x86_mov( - func, - get_temp_base(), - x86_fn_arg( func, 4 ) ); - x86_mov( - func, - get_coef_base(), - x86_fn_arg( func, 5 ) ); - x86_mov( - func, - get_immediate_base(), - x86_fn_arg( func, 6 ) ); - } - else { - assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX); - - if (do_swizzles) - aos_to_soa( func, - 6, /* aos_input */ - 1, /* machine->input */ - 7, /* num_inputs */ - 8 ); /* input_stride */ - - x86_mov( - func, - get_input_base(), - x86_fn_arg( func, 1 ) ); - x86_mov( - func, - get_output_base(), - x86_fn_arg( func, 2 ) ); - x86_mov( - func, - get_const_base(), - x86_fn_arg( func, 3 ) ); - x86_mov( - func, - get_temp_base(), - x86_fn_arg( func, 4 ) ); - x86_mov( - func, - get_immediate_base(), - x86_fn_arg( func, 5 ) ); - } -#endif - while (!tgsi_parse_end_of_tokens(&parse) && ok) { tgsi_parse_token(&parse); @@ -2860,19 +727,6 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, break; case TGSI_TOKEN_TYPE_INSTRUCTION: - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) { -#if 0 - if( !instruction_phase ) { - /* INSTRUCTION phase, overwrite coeff with output. */ - instruction_phase = TRUE; - x86_mov( - func, - get_output_base(), - x86_fn_arg( func, 2 ) ); - } -#endif - } - ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); if (!ok) { @@ -2909,13 +763,6 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, } } -#if 0 - if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) { - if (do_swizzles) - soa_to_aos( func, 9, 2, 10, 11 ); - } -#endif - emit_epilogue(func); tgsi_parse_free( &parse ); -- cgit v1.2.3 From 77160cd97b7f2181b7953bcc8d13e86055b819e3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 15:34:16 -0600 Subject: gallium: var renaming in tgsi_ppc.c --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 36 +++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 432ec7459b..c1e707657b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -117,11 +117,11 @@ gen_get_bit31_vec(struct gen_context *gen) /** - * Register fetch. + * Register fetch, put result in 'dst_vec'. */ static void emit_fetch(struct gen_context *gen, - unsigned vec_reg, + unsigned dst_vec, const struct tgsi_full_src_register *reg, const unsigned chan_index) { @@ -138,7 +138,7 @@ emit_fetch(struct gen_context *gen, int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; ppc_li(gen->f, offset_reg, offset); - ppc_lvx(gen->f, vec_reg, gen->inputs_reg, offset_reg); + ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); ppc_release_register(gen->f, offset_reg); } break; @@ -147,7 +147,7 @@ emit_fetch(struct gen_context *gen, int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; ppc_li(gen->f, offset_reg, offset); - ppc_lvx(gen->f, vec_reg, gen->temps_reg, offset_reg); + ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); ppc_release_register(gen->f, offset_reg); } break; @@ -156,7 +156,7 @@ emit_fetch(struct gen_context *gen, int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; ppc_li(gen->f, offset_reg, offset); - ppc_lvx(gen->f, vec_reg, gen->immed_reg, offset_reg); + ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); ppc_release_register(gen->f, offset_reg); } break; @@ -171,9 +171,9 @@ emit_fetch(struct gen_context *gen, * know that 'swizzle' tells us which vector slot will have the * loaded word. The other vector slots will be undefined. */ - ppc_lvewx(gen->f, vec_reg, gen->const_reg, offset_reg); + ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); /* splat word[swizzle] across the vector reg */ - ppc_vspltw(gen->f, vec_reg, vec_reg, swizzle); + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); ppc_release_register(gen->f, offset_reg); } break; @@ -182,12 +182,12 @@ emit_fetch(struct gen_context *gen, } break; case TGSI_EXTSWIZZLE_ZERO: - ppc_vload_float(gen->f, vec_reg, 0.0f); + ppc_vload_float(gen->f, dst_vec, 0.0f); break; case TGSI_EXTSWIZZLE_ONE: { int one_vec = gen_one_vec(gen); - ppc_vecmove(gen->f, vec_reg, one_vec); + ppc_vecmove(gen->f, dst_vec, one_vec); } break; default: @@ -202,15 +202,15 @@ emit_fetch(struct gen_context *gen, switch (sign_op) { case TGSI_UTIL_SIGN_CLEAR: /* vec = vec & ~bit31 */ - ppc_vandc(gen->f, vec_reg, vec_reg, bit31_vec); + ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec); break; case TGSI_UTIL_SIGN_SET: /* vec = vec | bit31 */ - ppc_vor(gen->f, vec_reg, vec_reg, bit31_vec); + ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec); break; case TGSI_UTIL_SIGN_TOGGLE: /* vec = vec ^ bit31 */ - ppc_vxor(gen->f, vec_reg, vec_reg, bit31_vec); + ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec); break; default: assert(0); @@ -219,17 +219,17 @@ emit_fetch(struct gen_context *gen, } } -#define FETCH( GEN, INST, VEC_REG, SRC_REG, CHAN ) \ - emit_fetch( GEN, VEC_REG, &(INST).FullSrcRegisters[SRC_REG], CHAN ) +#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \ + emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN ) /** - * Register store. + * Register store. Store 'src_vec' at location indicated by 'reg'. */ static void emit_store(struct gen_context *gen, - unsigned vec_reg, + unsigned src_vec, const struct tgsi_full_dst_register *reg, const struct tgsi_full_instruction *inst, unsigned chan_index) @@ -240,7 +240,7 @@ emit_store(struct gen_context *gen, int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; ppc_li(gen->f, offset_reg, offset); - ppc_stvx(gen->f, vec_reg, gen->outputs_reg, offset_reg); + ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); ppc_release_register(gen->f, offset_reg); } break; @@ -249,7 +249,7 @@ emit_store(struct gen_context *gen, int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; ppc_li(gen->f, offset_reg, offset); - ppc_stvx(gen->f, vec_reg, gen->temps_reg, offset_reg); + ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); ppc_release_register(gen->f, offset_reg); } break; -- cgit v1.2.3 From 9e3ee82305b4602feca0253dc0e0c27f9bc9b05e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 16:57:22 -0600 Subject: gallium: PPC LIT instruction (not quite complete yet) --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 89 +++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index c1e707657b..edd535a884 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -535,12 +535,95 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) } -/* + +/** Approximation for vr = pow(va, vb) */ +static void +ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) +{ + /* pow(a,b) ~= exp2(log2(a) * b) */ + int t_vec = ppc_allocate_vec_register(f); + int zero_vec = ppc_allocate_vec_register(f); + + ppc_vload_float(f, zero_vec, 0.0f); + + ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ + ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ + ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ + + ppc_release_vec_register(f, t_vec); + ppc_release_vec_register(f, zero_vec); +} + + static void emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) { + int one_vec = gen_one_vec(gen); + + /* Compute X */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + STORE(gen, *inst, one_vec, 0, CHAN_X); + } + + /* Compute Y, Z */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int x_vec = ppc_allocate_vec_register(gen->f); + int zero_vec = ppc_allocate_vec_register(gen->f); + + FETCH(gen, *inst, x_vec, 0, CHAN_X); /* x_vec = src[0].x */ + + ppc_vload_float(gen->f, zero_vec, 0.0f); /* zero = {0,0,0,0} */ + ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + STORE(gen, *inst, x_vec, 0, CHAN_Y); /* store Y */ + } + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int y_vec = ppc_allocate_vec_register(gen->f); + int z_vec = ppc_allocate_vec_register(gen->f); + int w_vec = ppc_allocate_vec_register(gen->f); + int pow_vec = ppc_allocate_vec_register(gen->f); + int pos_vec = ppc_allocate_vec_register(gen->f); + int c128_vec = ppc_allocate_vec_register(gen->f); + + FETCH(gen, *inst, y_vec, 0, CHAN_Y); /* y_vec = src[0].y */ + ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ + + FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ + + /* XXX clamp Y to [-128, 128] */ + ppc_vload_float(gen->f, c128_vec, 128.0f); + + /* if temp.x > 0 + * pow(tmp.y, tmp.w) + * else + * 0.0 + */ + + ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ + ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ + ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ + + STORE(gen, *inst, z_vec, 0, CHAN_Z); /* store Z */ + + ppc_release_vec_register(gen->f, y_vec); + ppc_release_vec_register(gen->f, z_vec); + ppc_release_vec_register(gen->f, w_vec); + ppc_release_vec_register(gen->f, pow_vec); + ppc_release_vec_register(gen->f, pos_vec); + } + + ppc_release_vec_register(gen->f, x_vec); + ppc_release_vec_register(gen->f, zero_vec); + } + + /* Compute W */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + STORE(gen, *inst, one_vec, 0, CHAN_W); + } } -*/ static int @@ -584,11 +667,9 @@ emit_instruction(struct gen_context *gen, case TGSI_OPCODE_DPH: emit_dotprod(gen, inst); break; - /* case TGSI_OPCODE_LIT: emit_lit(gen, inst); break; - */ case TGSI_OPCODE_END: /* normal end */ return 1; -- cgit v1.2.3 From ae81aeb12868db219cbdc02437c481714cfed3f5 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 16:58:05 -0600 Subject: gallium: GALLIUM_NOPPC debug var to disable PPC codegen --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index edd535a884..9d7de41fe7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -776,15 +776,21 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, float (*immediates)[4], boolean do_swizzles ) { + static int use_ppc_asm = -1; struct tgsi_parse_context parse; /*boolean instruction_phase = FALSE;*/ unsigned ok = 1; uint num_immediates = 0; struct gen_context gen; - util_init_math(); + if (use_ppc_asm < 0) { + /* If GALLIUM_NOPPC is set, don't use PPC codegen */ + use_ppc_asm = !debug_get_bool_option("GALLIUM_NOPPC", FALSE); + } + if (!use_ppc_asm) + return FALSE; - tgsi_parse_init( &parse, tokens ); + util_init_math(); gen.f = func; gen.inputs_reg = ppc_reserve_register(func, 3); /* first function param */ @@ -797,6 +803,8 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, emit_prologue(func); + tgsi_parse_init( &parse, tokens ); + while (!tgsi_parse_end_of_tokens(&parse) && ok) { tgsi_parse_token(&parse); -- cgit v1.2.3 From 3026616c48487a7561d8545c08950539f0ad51d1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 17:17:11 -0600 Subject: gallium: added ppc_vzero() --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 8 ++++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 65df676eae..51d9b53657 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -669,6 +669,14 @@ ppc_vecmove(struct ppc_function *p, uint vD, uint vA) ppc_vor(p, vD, vA, vA); } +/** Set vector register to {0,0,0,0} */ +void +ppc_vzero(struct ppc_function *p, uint vr) +{ + ppc_vxor(p, vr, vr, vr); +} + + /** diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 9f1e3fcd84..f194d3be13 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -193,6 +193,11 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); extern void ppc_vecmove(struct ppc_function *p, uint vD, uint vA); +/** Set vector register to {0,0,0,0} */ +extern void +ppc_vzero(struct ppc_function *p, uint vr); + + /** ** Vector shuffle / select / splat / etc -- cgit v1.2.3 From abbbe876ac98596b143da295abf6887e0a4e50d2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 17:19:12 -0600 Subject: gallium: new PPC built-in constants array It's hard to form PPC vector immediates so load them from an array. --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 8 +++-- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 61 ++++++++++++++++++++++++++++---- src/gallium/auxiliary/tgsi/tgsi_ppc.h | 3 ++ 3 files changed, 63 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index fcc9cbfec5..8eff6d4fda 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -55,7 +55,8 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*outputs)[4][4], float (*temps)[4][4], float (*immeds)[4][4], - float (*consts)[4]); + float (*consts)[4], + const float *builtins); #if 0 const struct tgsi_exec_vector *input, @@ -151,7 +152,8 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, #else shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4][4]) shader->base.immediates, - (float (*)[4]) constants); + (float (*)[4]) constants, + ppc_builtin_constants); /*output[0][0] = input[0][0] * 0.5;*/ #endif @@ -246,7 +248,9 @@ draw_create_vs_ppc(struct draw_context *draw, return &vs->base; fail: + /* debug_error("tgsi_emit_ppc() failed, falling back to interpreter\n"); + */ ppc_release_func( &vs->ppc_program ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 9d7de41fe7..6b05fd16cf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -36,6 +36,7 @@ #include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#include "util/u_memory.h" #include "util/u_sse.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" @@ -44,6 +45,14 @@ #include "rtasm/rtasm_ppc.h" +/** + * Since it's pretty much impossible to form PPC vector immediates, load + * them from memory here: + */ +const float ppc_builtin_constants[] ALIGN16_ATTRIB = { + 1.0f, -128.0f, 128.0, 0.0 +}; + #define FOR_EACH_CHANNEL( CHAN )\ for (CHAN = 0; CHAN < NUM_CHANNELS; CHAN++) @@ -81,12 +90,46 @@ struct gen_context int temps_reg; /**< GP register pointing to temporary "registers" */ int immed_reg; /**< GP register pointing to immediates buffer */ int const_reg; /**< GP register pointing to constants buffer */ + int builtins_reg; /**< GP register pointint to built-in constants */ int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ }; +/** + * Load the given vector register with {value, value, value, value}. + * The value must be in the ppu_builtin_constants[] array. + * We wouldn't need this if there was a simple way to load PPC vector + * registers with immediate values! + */ +static void +load_constant_vec(struct gen_context *gen, int dst_vec, float value) +{ + uint pos; + for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { + if (ppc_builtin_constants[pos] == value) { + int offset_reg = ppc_allocate_register(gen->f); + int offset = pos * 4; + + ppc_li(gen->f, offset_reg, offset); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our builtins start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); + /* splat word[pos % 4] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); + ppc_release_register(gen->f, offset_reg); + return; + } + } + assert(0 && "Need to add new constant to ppc_builtin_constants array"); +} + + /** * Return index of vector register containing {1.0, 1.0, 1.0, 1.0}. */ @@ -95,7 +138,7 @@ gen_one_vec(struct gen_context *gen) { if (gen->one_vec < 0) { gen->one_vec = ppc_allocate_vec_register(gen->f); - ppc_vload_float(gen->f, gen->one_vec, 1.0f); + load_constant_vec(gen, gen->one_vec, 1.0f); } return gen->one_vec; } @@ -115,7 +158,6 @@ gen_get_bit31_vec(struct gen_context *gen) } - /** * Register fetch, put result in 'dst_vec'. */ @@ -182,7 +224,7 @@ emit_fetch(struct gen_context *gen, } break; case TGSI_EXTSWIZZLE_ZERO: - ppc_vload_float(gen->f, dst_vec, 0.0f); + ppc_vzero(gen->f, dst_vec); break; case TGSI_EXTSWIZZLE_ONE: { @@ -544,7 +586,7 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) int t_vec = ppc_allocate_vec_register(f); int zero_vec = ppc_allocate_vec_register(f); - ppc_vload_float(f, zero_vec, 0.0f); + ppc_vzero(f, zero_vec); ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ @@ -573,7 +615,7 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) FETCH(gen, *inst, x_vec, 0, CHAN_X); /* x_vec = src[0].x */ - ppc_vload_float(gen->f, zero_vec, 0.0f); /* zero = {0,0,0,0} */ + ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { @@ -586,7 +628,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) int w_vec = ppc_allocate_vec_register(gen->f); int pow_vec = ppc_allocate_vec_register(gen->f); int pos_vec = ppc_allocate_vec_register(gen->f); - int c128_vec = ppc_allocate_vec_register(gen->f); + int p128_vec = ppc_allocate_vec_register(gen->f); + int n128_vec = ppc_allocate_vec_register(gen->f); FETCH(gen, *inst, y_vec, 0, CHAN_Y); /* y_vec = src[0].y */ ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ @@ -594,7 +637,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ /* XXX clamp Y to [-128, 128] */ - ppc_vload_float(gen->f, c128_vec, 128.0f); + load_constant_vec(gen, p128_vec, 128.0f); + load_constant_vec(gen, n128_vec, -128.0f); /* if temp.x > 0 * pow(tmp.y, tmp.w) @@ -613,6 +657,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_release_vec_register(gen->f, w_vec); ppc_release_vec_register(gen->f, pow_vec); ppc_release_vec_register(gen->f, pos_vec); + ppc_release_vec_register(gen->f, p128_vec); + ppc_release_vec_register(gen->f, n128_vec); } ppc_release_vec_register(gen->f, x_vec); @@ -798,6 +844,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ gen.immed_reg = ppc_reserve_register(func, 6); gen.const_reg = ppc_reserve_register(func, 7); + gen.builtins_reg = ppc_reserve_register(func, 8); gen.one_vec = -1; gen.bit31_vec = -1; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.h b/src/gallium/auxiliary/tgsi/tgsi_ppc.h index 7cd2bf9aff..829ec075e7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.h +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.h @@ -35,6 +35,9 @@ extern "C" { struct tgsi_token; struct ppc_function; +extern const float ppc_builtin_constants[]; + + boolean tgsi_emit_ppc(const struct tgsi_token *tokens, struct ppc_function *function, -- cgit v1.2.3 From f8ab4feb75f4a592e23859813c093dcdbd4b8988 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 17:21:43 -0600 Subject: gallium: remove ppc_vload_float(), rename ppc_vecmove() -> ppc_vmove(). --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 19 +------------------ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 6 +----- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 2 +- 3 files changed, 3 insertions(+), 24 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 51d9b53657..7dd8263749 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -603,23 +603,6 @@ ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) emit_x(p, 31, vr, ra, rb, 71); } -/** vector load float: vr = splats(imm) */ -void -ppc_vload_float(struct ppc_function *p, uint vr, float imm) -{ - if (imm == 0.0f) { - ppc_vxor(p, vr, vr, vr); - } - else if (imm == 1.0f) { - /* use 2^0=1 to get 1.0 */ - ppc_vxor(p, vr, vr, vr); /* vr = {0,0,0,0} */ - ppc_vexptefp(p, vr, vr); /* vr = 0^0 */ - } - else { - assert(0); - } -} - @@ -664,7 +647,7 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) /** Pseudo-instruction: vector move */ void -ppc_vecmove(struct ppc_function *p, uint vD, uint vA) +ppc_vmove(struct ppc_function *p, uint vD, uint vA) { ppc_vor(p, vD, vA, vA); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index f194d3be13..f938d8d759 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -158,10 +158,6 @@ ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB); extern void ppc_lvewx(struct ppc_function *p, uint vR, uint vA, uint vB); -/** vector load float: vr = splats(imm) */ -extern void -ppc_vload_float(struct ppc_function *p, uint vr, float imm); - /** @@ -191,7 +187,7 @@ ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB); /** Pseudo-instruction: vector move */ extern void -ppc_vecmove(struct ppc_function *p, uint vD, uint vA); +ppc_vmove(struct ppc_function *p, uint vD, uint vA); /** Set vector register to {0,0,0,0} */ extern void diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 6b05fd16cf..96beec0cc6 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -229,7 +229,7 @@ emit_fetch(struct gen_context *gen, case TGSI_EXTSWIZZLE_ONE: { int one_vec = gen_one_vec(gen); - ppc_vecmove(gen->f, dst_vec, one_vec); + ppc_vmove(gen->f, dst_vec, one_vec); } break; default: -- cgit v1.2.3 From 0ac99457811eb766e9bdd3903857b5c0fdef7694 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 22 Oct 2008 17:29:37 -0600 Subject: gallium: PPC: clamp y to [-128,128] for LIT --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 96beec0cc6..9ad7ecd7cf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -636,16 +636,17 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ - /* XXX clamp Y to [-128, 128] */ + /* clamp Y to [-128, 128] */ load_constant_vec(gen, p128_vec, 128.0f); load_constant_vec(gen, n128_vec, -128.0f); + ppc_vmaxfp(gen->f, y_vec, y_vec, n128_vec); /* y = max(y, -128) */ + ppc_vminfp(gen->f, y_vec, y_vec, p128_vec); /* y = min(y, 128) */ /* if temp.x > 0 - * pow(tmp.y, tmp.w) + * z = pow(tmp.y, tmp.w) * else - * 0.0 + * z = 0.0 */ - ppc_vec_pow(gen->f, pow_vec, y_vec, w_vec); /* pow = pow(y, w) */ ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ -- cgit v1.2.3 From 6b69e3c71741d99a54c6f4dcb605a3c241239aeb Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Thu, 23 Oct 2008 10:28:48 +0200 Subject: scons: ppc support. --- SConstruct | 2 ++ common.py | 3 ++- scons/gallium.py | 1 + src/gallium/auxiliary/draw/SConscript | 1 + src/gallium/auxiliary/rtasm/SConscript | 1 + src/gallium/auxiliary/tgsi/SConscript | 1 + src/mesa/SConscript | 4 ++++ 7 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/SConstruct b/SConstruct index c1dc624651..8c96817dae 100644 --- a/SConstruct +++ b/SConstruct @@ -70,12 +70,14 @@ platform = env['platform'] # derived options x86 = machine == 'x86' +ppc = machine == 'ppc' gcc = platform in ('linux', 'freebsd', 'darwin') msvc = platform in ('windows', 'winddk') Export([ 'debug', 'x86', + 'ppc', 'dri', 'llvm', 'platform', diff --git a/common.py b/common.py index dd64e0f434..cc2582f1a4 100644 --- a/common.py +++ b/common.py @@ -24,6 +24,7 @@ _machine_map = { 'i486': 'x86', 'i586': 'x86', 'i686': 'x86', + 'ppc' : 'ppc', 'x86_64': 'x86_64', } if 'PROCESSOR_ARCHITECTURE' in os.environ: @@ -56,7 +57,7 @@ def AddOptions(opts): opts.Add(BoolOption('profile', 'profile build', 'no')) #opts.Add(BoolOption('quiet', 'quiet command lines', 'no')) opts.Add(EnumOption('machine', 'use machine-specific assembly code', default_machine, - allowed_values=('generic', 'x86', 'x86_64'))) + allowed_values=('generic', 'ppc', 'x86', 'x86_64'))) opts.Add(EnumOption('platform', 'target platform', default_platform, allowed_values=('linux', 'cell', 'windows', 'winddk', 'wince'))) opts.Add(BoolOption('llvm', 'use LLVM', 'no')) diff --git a/scons/gallium.py b/scons/gallium.py index 3631607e66..2a42bdf2bb 100644 --- a/scons/gallium.py +++ b/scons/gallium.py @@ -175,6 +175,7 @@ def generate(env): machine = env['machine'] platform = env['platform'] x86 = env['machine'] == 'x86' + ppc = env['machine'] == 'ppc' gcc = env['platform'] in ('linux', 'freebsd', 'darwin') msvc = env['platform'] in ('windows', 'winddk', 'wince') diff --git a/src/gallium/auxiliary/draw/SConscript b/src/gallium/auxiliary/draw/SConscript index 544a04918b..5f05aa324a 100644 --- a/src/gallium/auxiliary/draw/SConscript +++ b/src/gallium/auxiliary/draw/SConscript @@ -38,6 +38,7 @@ draw = env.ConvenienceLibrary( 'draw_vs_aos_machine.c', 'draw_vs_exec.c', 'draw_vs_llvm.c', + 'draw_vs_ppc.c', 'draw_vs_sse.c', 'draw_vs_varient.c' ]) diff --git a/src/gallium/auxiliary/rtasm/SConscript b/src/gallium/auxiliary/rtasm/SConscript index 8ea25922aa..eb48368acc 100644 --- a/src/gallium/auxiliary/rtasm/SConscript +++ b/src/gallium/auxiliary/rtasm/SConscript @@ -6,6 +6,7 @@ rtasm = env.ConvenienceLibrary( 'rtasm_cpu.c', 'rtasm_execmem.c', 'rtasm_x86sse.c', + 'rtasm_ppc.c', 'rtasm_ppc_spe.c', ]) diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript index 45bf3f6d57..8200cce42f 100644 --- a/src/gallium/auxiliary/tgsi/SConscript +++ b/src/gallium/auxiliary/tgsi/SConscript @@ -12,6 +12,7 @@ tgsi = env.ConvenienceLibrary( 'tgsi_parse.c', 'tgsi_sanity.c', 'tgsi_scan.c', + 'tgsi_ppc.c', 'tgsi_sse2.c', 'tgsi_text.c', 'tgsi_transform.c', diff --git a/src/mesa/SConscript b/src/mesa/SConscript index af8dfcb493..89b98b37ab 100644 --- a/src/mesa/SConscript +++ b/src/mesa/SConscript @@ -283,6 +283,10 @@ if env['platform'] != 'winddk': 'x86-64/glapi_x86-64.S' ] elif gcc and env['machine'] == 'ppc': + env.Append(CPPDEFINES = [ + 'USE_PPC_ASM', + 'USE_VMX_ASM', + ]) mesa_sources += [ 'ppc/common_ppc.c', ] -- cgit v1.2.3 From 604be5561f17042f61db42b31caf4d720cf66389 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 27 Oct 2008 15:36:25 -0600 Subject: gallium: ppc: use a src register cache to avoid redundant loads --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 300 +++++++++++++++++++++++----------- 1 file changed, 204 insertions(+), 96 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 9ad7ecd7cf..000ddba935 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -72,11 +72,14 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { #define CHAN_Z 2 #define CHAN_W 3 -#define TEMP_ONE_I TGSI_EXEC_TEMP_ONE_I -#define TEMP_ONE_C TGSI_EXEC_TEMP_ONE_C -#define TEMP_R0 TGSI_EXEC_TEMP_R0 -#define TEMP_ADDR TGSI_EXEC_TEMP_ADDR + +struct reg_chan_vec +{ + struct tgsi_full_src_register src; + uint chan; + uint vec; +}; /** @@ -94,6 +97,18 @@ struct gen_context int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ + + + /** + * Cache of src registers. + * This is used to avoid redundant load instructions. + */ + struct { + struct tgsi_full_src_register src; + uint chan; + uint vec; + } regs[12]; /* 3 src regs, 4 channels */ + uint num_regs; }; @@ -261,8 +276,83 @@ emit_fetch(struct gen_context *gen, } } -#define FETCH( GEN, INST, DST_VEC, SRC_REG, CHAN ) \ - emit_fetch( GEN, DST_VEC, &(INST).FullSrcRegisters[SRC_REG], CHAN ) + + +/** + * Test if two TGSI src registers refer to the same memory location. + * We use this to avoid redundant register loads. + */ +static boolean +equal_src_locs(const struct tgsi_full_src_register *a, uint chan_a, + const struct tgsi_full_src_register *b, uint chan_b) +{ + int swz_a, swz_b; + int sign_a, sign_b; + if (a->SrcRegister.File != b->SrcRegister.File) + return FALSE; + if (a->SrcRegister.Index != b->SrcRegister.Index) + return FALSE; + swz_a = tgsi_util_get_full_src_register_extswizzle(a, chan_a); + swz_b = tgsi_util_get_full_src_register_extswizzle(b, chan_b); + if (swz_a != swz_b) + return FALSE; + sign_a = tgsi_util_get_full_src_register_sign_mode(a, chan_a); + sign_b = tgsi_util_get_full_src_register_sign_mode(b, chan_b); + if (sign_a != sign_b) + return FALSE; + return TRUE; +} + + +/** + * Given a TGSI src register and channel index, return the PPC vector + * register containing the value. We use a cache to prevent re-loading + * the same register multiple times. + * \return index of PPC vector register with the desired src operand + */ +static int +get_src_vec(struct gen_context *gen, + struct tgsi_full_instruction *inst, int src_reg, uint chan) +{ + const const struct tgsi_full_src_register *src = + &inst->FullSrcRegisters[src_reg]; + int vec; + uint i; + + /* check the cache */ + for (i = 0; i < gen->num_regs; i++) { + if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { + /* cache hit */ + return gen->regs[i].vec; + } + } + + /* cache miss: allocate new vec reg and emit fetch/load code */ + vec = ppc_allocate_vec_register(gen->f); + gen->regs[gen->num_regs].src = *src; + gen->regs[gen->num_regs].chan = chan; + gen->regs[gen->num_regs].vec = vec; + gen->num_regs++; + emit_fetch(gen, vec, src, chan); + + assert(gen->num_regs <= Elements(gen->regs)); + + return vec; +} + + +/** + * Clear the src operand cache. To be called at the end of each emit function. + */ +static void +release_src_vecs(struct gen_context *gen) +{ + uint i; + for (i = 0; i < gen->num_regs; i++) { + ppc_release_vec_register(gen->f, gen->regs[i].vec); + } + gen->num_regs = 0; +} @@ -333,11 +423,10 @@ emit_store(struct gen_context *gen, static void emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); + int v0, v1 = ppc_allocate_vec_register(gen->f); uint chan_index; - FETCH(gen, *inst, v0, 0, CHAN_X); + v0 = get_src_vec(gen, inst, 0, CHAN_X); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_RSQ: @@ -355,7 +444,8 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { STORE(gen, *inst, v1, 0, chan_index); } - ppc_release_vec_register(gen->f, v0); + + release_src_vecs(gen); ppc_release_vec_register(gen->f, v1); } @@ -363,10 +453,9 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) static void emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v0 = ppc_allocate_vec_register(gen->f); uint chan_index; FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(gen, *inst, 0, 0, chan_index); /* v0 = srcreg[0] */ + int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: /* turn off the most significant bit of each vector float word */ @@ -404,20 +493,30 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) } STORE(gen, *inst, v0, 0, chan_index); /* store v0 */ } - ppc_release_vec_register(gen->f, v0); + + release_src_vecs(gen); } static void emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - int v2 = ppc_allocate_vec_register(gen->f); - uint chan_index; - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ - FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + int v2, zero_vec = -1; + uint chan; + + if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { + zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + } + + v2 = ppc_allocate_vec_register(gen->f); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + + /* emit binop */ switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ADD: ppc_vaddfp(gen->f, v2, v0, v1); @@ -426,8 +525,7 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vsubfp(gen->f, v2, v0, v1); break; case TGSI_OPCODE_MUL: - ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ - ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v0 */ + ppc_vmaddfp(gen->f, v2, v0, v1, zero_vec); break; case TGSI_OPCODE_MIN: ppc_vminfp(gen->f, v2, v0, v1); @@ -438,11 +536,54 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) default: assert(0); } - STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + + /* store v2 */ + STORE(gen, *inst, v2, 0, chan); } - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); + ppc_release_vec_register(gen->f, v2); + + if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) + ppc_release_vec_register(gen->f, zero_vec); + + release_src_vecs(gen); +} + + +static void +emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int v3; + uint chan; + + v3 = ppc_allocate_vec_register(gen->f); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_src_vec(gen, inst, 2, chan); + + /* emit ALU */ + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MAD: + ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ + break; + case TGSI_OPCODE_LRP: + ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ + ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ + break; + default: + assert(0); + } + + /* store v3 */ + STORE(gen, *inst, v3, 0, chan); + } + + ppc_release_vec_register(gen->f, v3); + + release_src_vecs(gen); } @@ -452,16 +593,17 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) static void emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - int v2 = ppc_allocate_vec_register(gen->f); - uint chan_index; - boolean complement = FALSE; + int v2; + uint chan; int one_vec = gen_one_vec(gen); - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ - FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ + v2 = ppc_allocate_vec_register(gen->f); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + /* fetch src operands */ + int v0 = get_src_vec(gen, inst, 0, chan); + int v1 = get_src_vec(gen, inst, 1, chan); + boolean complement = FALSE; switch (inst->Instruction.Opcode) { case TGSI_OPCODE_SNE: @@ -495,89 +637,58 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) else ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ - STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + /* store v2 */ + STORE(gen, *inst, v2, 0, chan); } - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); ppc_release_vec_register(gen->f, v2); + + release_src_vecs(gen); } static void emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - int v2 = ppc_allocate_vec_register(gen->f); + int v0, v1, v2; uint chan_index; + v2 = ppc_allocate_vec_register(gen->f); + ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ - FETCH(gen, *inst, v0, 0, CHAN_X); /* v0 = src0.XXXX */ - FETCH(gen, *inst, v1, 1, CHAN_X); /* v1 = src1.XXXX */ + v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ + v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - FETCH(gen, *inst, v0, 0, CHAN_Y); /* v0 = src0.YYYY */ - FETCH(gen, *inst, v1, 1, CHAN_Y); /* v1 = src1.YYYY */ + v0 = get_src_vec(gen, inst, 0, CHAN_Y); /* v0 = src0.YYYY */ + v1 = get_src_vec(gen, inst, 1, CHAN_Y); /* v1 = src1.YYYY */ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ - FETCH(gen, *inst, v0, 0, CHAN_Z); /* v0 = src0.ZZZZ */ - FETCH(gen, *inst, v1, 1, CHAN_Z); /* v1 = src1.ZZZZ */ + v0 = get_src_vec(gen, inst, 0, CHAN_Z); /* v0 = src0.ZZZZ */ + v1 = get_src_vec(gen, inst, 1, CHAN_Z); /* v1 = src1.ZZZZ */ ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ if (inst->Instruction.Opcode == TGSI_OPCODE_DP4) { - FETCH(gen, *inst, v0, 0, CHAN_W); /* v0 = src0.WWWW */ - FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ - ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ + v0 = get_src_vec(gen, inst, 0, CHAN_W); /* v0 = src0.WWWW */ + v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vmaddfp(gen->f, v2, v0, v1, v2); /* v2 = v0 * v1 + v2 */ } else if (inst->Instruction.Opcode == TGSI_OPCODE_DPH) { - FETCH(gen, *inst, v1, 1, CHAN_W); /* v1 = src1.WWWW */ - ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ + v1 = get_src_vec(gen, inst, 1, CHAN_W); /* v1 = src1.WWWW */ + ppc_vaddfp(gen->f, v2, v2, v1); /* v2 = v2 + v1 */ } FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ } - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); - ppc_release_vec_register(gen->f, v2); -} + release_src_vecs(gen); -static void -emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) -{ - int v0 = ppc_allocate_vec_register(gen->f); - int v1 = ppc_allocate_vec_register(gen->f); - int v2 = ppc_allocate_vec_register(gen->f); - int v3 = ppc_allocate_vec_register(gen->f); - uint chan_index; - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - FETCH(gen, *inst, v0, 0, chan_index); /* v0 = srcreg[0] */ - FETCH(gen, *inst, v1, 1, chan_index); /* v1 = srcreg[1] */ - FETCH(gen, *inst, v2, 2, chan_index); /* v2 = srcreg[2] */ - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_MAD: - ppc_vmaddfp(gen->f, v3, v0, v1, v2); /* v3 = v0 * v1 + v2 */ - break; - case TGSI_OPCODE_LRP: - ppc_vsubfp(gen->f, v3, v1, v2); /* v3 = v1 - v2 */ - ppc_vmaddfp(gen->f, v3, v0, v3, v2); /* v3 = v0 * v3 + v2 */ - break; - default: - assert(0); - } - STORE(gen, *inst, v3, 0, chan_index); /* store v3 */ - } - ppc_release_vec_register(gen->f, v0); - ppc_release_vec_register(gen->f, v1); ppc_release_vec_register(gen->f, v2); - ppc_release_vec_register(gen->f, v3); } - /** Approximation for vr = pow(va, vb) */ static void ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) @@ -610,10 +721,10 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) /* Compute Y, Z */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { - int x_vec = ppc_allocate_vec_register(gen->f); + int x_vec; int zero_vec = ppc_allocate_vec_register(gen->f); - FETCH(gen, *inst, x_vec, 0, CHAN_X); /* x_vec = src[0].x */ + x_vec = get_src_vec(gen, inst, 0, CHAN_X); /* x_vec = src[0].x */ ppc_vzero(gen->f, zero_vec); /* zero = {0,0,0,0} */ ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ @@ -623,18 +734,16 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) } if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { - int y_vec = ppc_allocate_vec_register(gen->f); - int z_vec = ppc_allocate_vec_register(gen->f); - int w_vec = ppc_allocate_vec_register(gen->f); + int y_vec, z_vec, w_vec; int pow_vec = ppc_allocate_vec_register(gen->f); int pos_vec = ppc_allocate_vec_register(gen->f); int p128_vec = ppc_allocate_vec_register(gen->f); int n128_vec = ppc_allocate_vec_register(gen->f); - FETCH(gen, *inst, y_vec, 0, CHAN_Y); /* y_vec = src[0].y */ + y_vec = get_src_vec(gen, inst, 0, CHAN_Y); /* y_vec = src[0].y */ ppc_vmaxfp(gen->f, y_vec, y_vec, zero_vec); /* y_vec = max(y_vec, 0) */ - FETCH(gen, *inst, w_vec, 0, CHAN_W); /* w_vec = src[0].w */ + w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */ /* clamp Y to [-128, 128] */ load_constant_vec(gen, p128_vec, 128.0f); @@ -653,16 +762,12 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) STORE(gen, *inst, z_vec, 0, CHAN_Z); /* store Z */ - ppc_release_vec_register(gen->f, y_vec); - ppc_release_vec_register(gen->f, z_vec); - ppc_release_vec_register(gen->f, w_vec); ppc_release_vec_register(gen->f, pow_vec); ppc_release_vec_register(gen->f, pos_vec); ppc_release_vec_register(gen->f, p128_vec); ppc_release_vec_register(gen->f, n128_vec); } - ppc_release_vec_register(gen->f, x_vec); ppc_release_vec_register(gen->f, zero_vec); } @@ -670,6 +775,8 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { STORE(gen, *inst, one_vec, 0, CHAN_W); } + + release_src_vecs(gen); } @@ -723,11 +830,10 @@ emit_instruction(struct gen_context *gen, default: return 0; } - - return 1; } + static void emit_declaration( struct ppc_function *func, @@ -805,6 +911,7 @@ emit_epilogue(struct ppc_function *func) { ppc_return(func); /* XXX restore prev stack frame */ + debug_printf("PPC: Emitted %u instructions\n", func->num_inst); } @@ -839,6 +946,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, util_init_math(); + memset(&gen, 0, sizeof(gen)); gen.f = func; gen.inputs_reg = ppc_reserve_register(func, 3); /* first function param */ gen.outputs_reg = ppc_reserve_register(func, 4); /* second function param */ -- cgit v1.2.3 From a1754424b6597219f436091dec1de4713719c4b8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 27 Oct 2008 15:58:00 -0600 Subject: gallium: ppc: emit fewer 'li' instructions prior to vector loads/stores --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 106 ++++++++++++++++++++++++---------- 1 file changed, 75 insertions(+), 31 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 000ddba935..06b7a41b1b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -95,6 +95,9 @@ struct gen_context int const_reg; /**< GP register pointing to constants buffer */ int builtins_reg; /**< GP register pointint to built-in constants */ + int offset_reg; /**< used to reduce redundant li instructions */ + int offset_value; + int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ @@ -112,6 +115,70 @@ struct gen_context }; +/** + * Initialize code generation context. + */ +static void +init_gen_context(struct gen_context *gen, struct ppc_function *func) +{ + memset(gen, 0, sizeof(*gen)); + gen->f = func; + gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ + gen->outputs_reg = ppc_reserve_register(func, 4); /* second function param */ + gen->temps_reg = ppc_reserve_register(func, 5); /* ... */ + gen->immed_reg = ppc_reserve_register(func, 6); + gen->const_reg = ppc_reserve_register(func, 7); + gen->builtins_reg = ppc_reserve_register(func, 8); + gen->one_vec = -1; + gen->bit31_vec = -1; + gen->offset_reg = -1; + gen->offset_value = -9999999; +} + + +/** + * All PPC vector load/store instructions form an effective address + * by adding the contents of two registers. For example: + * lvx v2,r8,r9 # v2 = memory[r8 + r9] + * stvx v2,r8,r9 # memory[r8 + r9] = v2; + * So our lvx/stvx instructions are typically preceded by an 'li' instruction + * to load r9 (above) with an immediate (an offset). + * This code emits that 'li' instruction, but only if the offset value is + * different than the previous 'li'. + * This optimization seems to save about 10% in the instruction count. + * Note that we need to unconditionally emit an 'li' inside basic blocks + * (such as inside loops). + */ +static int +emit_li_offset(struct gen_context *gen, int offset) +{ + if (gen->offset_reg <= 0) { + /* allocate a GP register for storing load/store offset */ + gen->offset_reg = ppc_allocate_register(gen->f); + } + + /* emit new 'li' if offset is changing */ + if (gen->offset_value < 0 || gen->offset_value != offset) { + gen->offset_value = offset; + ppc_li(gen->f, gen->offset_reg, offset); + } + + return gen->offset_reg; +} + + +/** + * Forces subsequent emit_li_offset() calls to emit an 'li'. + * To be called at the top of basic blocks. + */ +static int +reset_li_offset(struct gen_context *gen) +{ + gen->offset_value = -9999999; +} + + + /** * Load the given vector register with {value, value, value, value}. * The value must be in the ppu_builtin_constants[] array. @@ -124,10 +191,9 @@ load_constant_vec(struct gen_context *gen, int dst_vec, float value) uint pos; for (pos = 0; pos < Elements(ppc_builtin_constants); pos++) { if (ppc_builtin_constants[pos] == value) { - int offset_reg = ppc_allocate_register(gen->f); int offset = pos * 4; + int offset_reg = emit_li_offset(gen, offset); - ppc_li(gen->f, offset_reg, offset); /* Load 4-byte word into vector register. * The vector slot depends on the effective address we load from. * We know that our builtins start at a 16-byte boundary so we @@ -137,7 +203,6 @@ load_constant_vec(struct gen_context *gen, int dst_vec, float value) ppc_lvewx(gen->f, dst_vec, gen->builtins_reg, offset_reg); /* splat word[pos % 4] across the vector reg */ ppc_vspltw(gen->f, dst_vec, dst_vec, pos % 4); - ppc_release_register(gen->f, offset_reg); return; } } @@ -192,36 +257,29 @@ emit_fetch(struct gen_context *gen, switch (reg->SrcRegister.File) { case TGSI_FILE_INPUT: { - int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; - ppc_li(gen->f, offset_reg, offset); + int offset_reg = emit_li_offset(gen, offset); ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); } break; case TGSI_FILE_TEMPORARY: { - int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; - ppc_li(gen->f, offset_reg, offset); + int offset_reg = emit_li_offset(gen, offset); ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); } break; case TGSI_FILE_IMMEDIATE: { - int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; - ppc_li(gen->f, offset_reg, offset); + int offset_reg = emit_li_offset(gen, offset); ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); } break; case TGSI_FILE_CONSTANT: { - int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; - ppc_li(gen->f, offset_reg, offset); + int offset_reg = emit_li_offset(gen, offset); /* Load 4-byte word into vector register. * The vector slot depends on the effective address we load from. * We know that our constants start at a 16-byte boundary so we @@ -231,7 +289,6 @@ emit_fetch(struct gen_context *gen, ppc_lvewx(gen->f, dst_vec, gen->const_reg, offset_reg); /* splat word[swizzle] across the vector reg */ ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); - ppc_release_register(gen->f, offset_reg); } break; default: @@ -369,20 +426,16 @@ emit_store(struct gen_context *gen, switch (reg->DstRegister.File) { case TGSI_FILE_OUTPUT: { - int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; - ppc_li(gen->f, offset_reg, offset); + int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->outputs_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); } break; case TGSI_FILE_TEMPORARY: { - int offset_reg = ppc_allocate_register(gen->f); int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; - ppc_li(gen->f, offset_reg, offset); + int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); - ppc_release_register(gen->f, offset_reg); } break; #if 0 @@ -946,16 +999,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, util_init_math(); - memset(&gen, 0, sizeof(gen)); - gen.f = func; - gen.inputs_reg = ppc_reserve_register(func, 3); /* first function param */ - gen.outputs_reg = ppc_reserve_register(func, 4); /* second function param */ - gen.temps_reg = ppc_reserve_register(func, 5); /* ... */ - gen.immed_reg = ppc_reserve_register(func, 6); - gen.const_reg = ppc_reserve_register(func, 7); - gen.builtins_reg = ppc_reserve_register(func, 8); - gen.one_vec = -1; - gen.bit31_vec = -1; + init_gen_context(&gen, func); emit_prologue(func); -- cgit v1.2.3 From c46583416a749f2e7f76a1eaadb54a8b9e76fb11 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 28 Oct 2008 13:17:48 -0600 Subject: gallium: use some PPC vec registers to store TGSI temps This could be a lot better, but already makes for better code. --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 184 ++++++++++++++++++++++------------ 1 file changed, 122 insertions(+), 62 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 06b7a41b1b..0de9b972b4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -40,6 +40,7 @@ #include "util/u_sse.h" #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" +#include "tgsi_dump.h" #include "tgsi_exec.h" #include "tgsi_ppc.h" #include "rtasm/rtasm_ppc.h" @@ -73,6 +74,12 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { #define CHAN_W 3 +/** + * How many TGSI temps should be implemented with real PPC vector registers + * rather than memory. + */ +#define MAX_PPC_TEMPS 4 + struct reg_chan_vec { @@ -101,6 +108,12 @@ struct gen_context int one_vec; /**< vector register with {1.0, 1.0, 1.0, 1.0} */ int bit31_vec; /**< vector register with {1<<31, 1<<31, 1<<31, 1<<31} */ + /** + * Map TGSI temps to PPC vector temps. + * We have 32 PPC vector regs. Use 16 of them for storing 4 TGSI temps. + * XXX currently only do this for TGSI temps [0..MAX_PPC_TEMPS-1]. + */ + int temps_map[MAX_PPC_TEMPS][4]; /** * Cache of src registers. @@ -121,6 +134,8 @@ struct gen_context static void init_gen_context(struct gen_context *gen, struct ppc_function *func) { + uint i; + memset(gen, 0, sizeof(*gen)); gen->f = func; gen->inputs_reg = ppc_reserve_register(func, 3); /* first function param */ @@ -133,6 +148,12 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func) gen->bit31_vec = -1; gen->offset_reg = -1; gen->offset_value = -9999999; + for (i = 0; i < MAX_PPC_TEMPS; i++) { + gen->temps_map[i][0] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][1] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][2] = ppc_allocate_vec_register(gen->f); + gen->temps_map[i][3] = ppc_allocate_vec_register(gen->f); + } } @@ -171,7 +192,7 @@ emit_li_offset(struct gen_context *gen, int offset) * Forces subsequent emit_li_offset() calls to emit an 'li'. * To be called at the top of basic blocks. */ -static int +static void reset_li_offset(struct gen_context *gen) { gen->offset_value = -9999999; @@ -239,15 +260,15 @@ gen_get_bit31_vec(struct gen_context *gen) /** - * Register fetch, put result in 'dst_vec'. + * Register fetch. Return PPC vector register with result. */ -static void +static int emit_fetch(struct gen_context *gen, - unsigned dst_vec, const struct tgsi_full_src_register *reg, const unsigned chan_index) { uint swizzle = tgsi_util_get_full_src_register_extswizzle(reg, chan_index); + int dst_vec = -1; switch (swizzle) { case TGSI_EXTSWIZZLE_X: @@ -259,13 +280,20 @@ emit_fetch(struct gen_context *gen, { int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->inputs_reg, offset_reg); } break; case TGSI_FILE_TEMPORARY: - { + if (reg->SrcRegister.Index < MAX_PPC_TEMPS) { + /* use PPC vec register */ + dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; + } + else { + /* use memory-based temp register "file" */ int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->temps_reg, offset_reg); } break; @@ -273,6 +301,7 @@ emit_fetch(struct gen_context *gen, { int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); } break; @@ -280,6 +309,7 @@ emit_fetch(struct gen_context *gen, { int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); + dst_vec = ppc_allocate_vec_register(gen->f); /* Load 4-byte word into vector register. * The vector slot depends on the effective address we load from. * We know that our constants start at a 16-byte boundary so we @@ -301,6 +331,7 @@ emit_fetch(struct gen_context *gen, case TGSI_EXTSWIZZLE_ONE: { int one_vec = gen_one_vec(gen); + dst_vec = ppc_allocate_vec_register(gen->f); ppc_vmove(gen->f, dst_vec, one_vec); } break; @@ -308,6 +339,8 @@ emit_fetch(struct gen_context *gen, assert( 0 ); } + assert(dst_vec >= 0); + { uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); if (sign_op != TGSI_UTIL_SIGN_KEEP) { @@ -331,6 +364,8 @@ emit_fetch(struct gen_context *gen, } } } + + return dst_vec; } @@ -380,20 +415,22 @@ get_src_vec(struct gen_context *gen, for (i = 0; i < gen->num_regs; i++) { if (equal_src_locs(&gen->regs[i].src, gen->regs[i].chan, src, chan)) { /* cache hit */ + assert(gen->regs[i].vec >= 0); return gen->regs[i].vec; } } /* cache miss: allocate new vec reg and emit fetch/load code */ - vec = ppc_allocate_vec_register(gen->f); + vec = emit_fetch(gen, src, chan); gen->regs[gen->num_regs].src = *src; gen->regs[gen->num_regs].chan = chan; gen->regs[gen->num_regs].vec = vec; gen->num_regs++; - emit_fetch(gen, vec, src, chan); assert(gen->num_regs <= Elements(gen->regs)); + assert(vec >= 0); + return vec; } @@ -406,23 +443,48 @@ release_src_vecs(struct gen_context *gen) { uint i; for (i = 0; i < gen->num_regs; i++) { - ppc_release_vec_register(gen->f, gen->regs[i].vec); + const const struct tgsi_full_src_register src = gen->regs[i].src; + if (!(src.SrcRegister.File == TGSI_FILE_TEMPORARY && + src.SrcRegister.Index < MAX_PPC_TEMPS)) { + ppc_release_vec_register(gen->f, gen->regs[i].vec); + } } gen->num_regs = 0; } +static int +get_dst_vec(struct gen_context *gen, + const struct tgsi_full_instruction *inst, + unsigned chan_index) +{ + const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + + if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && + reg->DstRegister.Index < MAX_PPC_TEMPS) { + int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + return vec; + } + else { + return ppc_allocate_vec_register(gen->f); + } +} + + /** * Register store. Store 'src_vec' at location indicated by 'reg'. + * \param free_vec Should the src_vec be released when done? */ static void emit_store(struct gen_context *gen, - unsigned src_vec, - const struct tgsi_full_dst_register *reg, + int src_vec, const struct tgsi_full_instruction *inst, - unsigned chan_index) + unsigned chan_index, + boolean free_vec) { + const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; + switch (reg->DstRegister.File) { case TGSI_FILE_OUTPUT: { @@ -432,7 +494,15 @@ emit_store(struct gen_context *gen, } break; case TGSI_FILE_TEMPORARY: - { + if (reg->DstRegister.Index < MAX_PPC_TEMPS) { + if (!free_vec) { + int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; + if (dst_vec != src_vec) + ppc_vmove(gen->f, dst_vec, src_vec); + } + free_vec = FALSE; + } + else { int offset = (reg->DstRegister.Index * 4 + chan_index) * 16; int offset_reg = emit_li_offset(gen, offset); ppc_stvx(gen->f, src_vec, gen->temps_reg, offset_reg); @@ -465,21 +535,20 @@ emit_store(struct gen_context *gen, break; } #endif -} - - -#define STORE( GEN, INST, XMM, INDEX, CHAN )\ - emit_store( GEN, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN ) + if (free_vec) + ppc_release_vec_register(gen->f, src_vec); +} static void emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v0, v1 = ppc_allocate_vec_register(gen->f); + int v0, v1; uint chan_index; v0 = get_src_vec(gen, inst, 0, CHAN_X); + v1 = ppc_allocate_vec_register(gen->f); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_RSQ: @@ -495,7 +564,7 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) } FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - STORE(gen, *inst, v1, 0, chan_index); + emit_store(gen, v1, inst, chan_index, FALSE); } release_src_vecs(gen); @@ -509,42 +578,37 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) uint chan_index; FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ + int v1 = get_dst_vec(gen, inst, chan_index); switch (inst->Instruction.Opcode) { case TGSI_OPCODE_ABS: /* turn off the most significant bit of each vector float word */ { - int v1 = ppc_allocate_vec_register(gen->f); - ppc_vspltisw(gen->f, v1, -1); /* v1 = {-1, -1, -1, -1} */ - ppc_vslw(gen->f, v1, v1, v1); /* v1 = {1<<31, 1<<31, 1<<31, 1<<31} */ - ppc_vandc(gen->f, v0, v0, v1); /* v0 = v0 & ~v1 */ - ppc_release_vec_register(gen->f, v1); + int bit31_vec = gen_get_bit31_vec(gen); + ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */ } break; case TGSI_OPCODE_FLOOR: - ppc_vrfim(gen->f, v0, v0); /* v0 = floor(v0) */ + ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ break; case TGSI_OPCODE_FRAC: - { - int v1 = ppc_allocate_vec_register(gen->f); - ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */ - ppc_vsubfp(gen->f, v0, v0, v1); /* v0 = v0 - v1 */ - ppc_release_vec_register(gen->f, v1); - } + ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */ + ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */ break; case TGSI_OPCODE_EXPBASE2: - ppc_vexptefp(gen->f, v0, v0); /* v0 = 2^v0 */ + ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */ break; case TGSI_OPCODE_LOGBASE2: /* XXX this may be broken! */ - ppc_vlogefp(gen->f, v0, v0); /* v0 = log2(v0) */ + ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ break; case TGSI_OPCODE_MOV: - /* nothing */ + if (v0 != v1) + ppc_vmove(gen->f, v1, v0); break; default: assert(0); } - STORE(gen, *inst, v0, 0, chan_index); /* store v0 */ + emit_store(gen, v1, inst, chan_index, TRUE); /* store v0 */ } release_src_vecs(gen); @@ -554,7 +618,7 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) static void emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v2, zero_vec = -1; + int zero_vec = -1; uint chan; if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { @@ -562,12 +626,11 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vzero(gen->f, zero_vec); } - v2 = ppc_allocate_vec_register(gen->f); - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { /* fetch src operands */ int v0 = get_src_vec(gen, inst, 0, chan); int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_dst_vec(gen, inst, chan); /* emit binop */ switch (inst->Instruction.Opcode) { @@ -591,11 +654,9 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) } /* store v2 */ - STORE(gen, *inst, v2, 0, chan); + emit_store(gen, v2, inst, chan, TRUE); } - ppc_release_vec_register(gen->f, v2); - if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) ppc_release_vec_register(gen->f, zero_vec); @@ -606,16 +667,14 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) static void emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v3; uint chan; - v3 = ppc_allocate_vec_register(gen->f); - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { /* fetch src operands */ int v0 = get_src_vec(gen, inst, 0, chan); int v1 = get_src_vec(gen, inst, 1, chan); int v2 = get_src_vec(gen, inst, 2, chan); + int v3 = get_dst_vec(gen, inst, chan); /* emit ALU */ switch (inst->Instruction.Opcode) { @@ -631,11 +690,9 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) } /* store v3 */ - STORE(gen, *inst, v3, 0, chan); + emit_store(gen, v3, inst, chan, TRUE); } - ppc_release_vec_register(gen->f, v3); - release_src_vecs(gen); } @@ -646,16 +703,14 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) static void emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) { - int v2; uint chan; int one_vec = gen_one_vec(gen); - v2 = ppc_allocate_vec_register(gen->f); - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { /* fetch src operands */ int v0 = get_src_vec(gen, inst, 0, chan); int v1 = get_src_vec(gen, inst, 1, chan); + int v2 = get_dst_vec(gen, inst, chan); boolean complement = FALSE; switch (inst->Instruction.Opcode) { @@ -691,11 +746,9 @@ emit_inequality(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vand(gen->f, v2, one_vec, v2); /* v2 = one_vec & v2 */ /* store v2 */ - STORE(gen, *inst, v2, 0, chan); + emit_store(gen, v2, inst, chan, TRUE); } - ppc_release_vec_register(gen->f, v2); - release_src_vecs(gen); } @@ -733,7 +786,7 @@ emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) } FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { - STORE(gen, *inst, v2, 0, chan_index); /* store v2 */ + emit_store(gen, v2, inst, chan_index, FALSE); /* store v2, free v2 later */ } release_src_vecs(gen); @@ -768,7 +821,7 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) /* Compute X */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { - STORE(gen, *inst, one_vec, 0, CHAN_X); + emit_store(gen, one_vec, inst, CHAN_X, FALSE); } /* Compute Y, Z */ @@ -783,11 +836,12 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vmaxfp(gen->f, x_vec, x_vec, zero_vec); /* x_vec = max(x_vec, 0) */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { - STORE(gen, *inst, x_vec, 0, CHAN_Y); /* store Y */ + emit_store(gen, x_vec, inst, CHAN_Y, FALSE); } if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { - int y_vec, z_vec, w_vec; + int y_vec, w_vec; + int z_vec = ppc_allocate_vec_register(gen->f); int pow_vec = ppc_allocate_vec_register(gen->f); int pos_vec = ppc_allocate_vec_register(gen->f); int p128_vec = ppc_allocate_vec_register(gen->f); @@ -798,11 +852,11 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) w_vec = get_src_vec(gen, inst, 0, CHAN_W); /* w_vec = src[0].w */ - /* clamp Y to [-128, 128] */ + /* clamp W to [-128, 128] */ load_constant_vec(gen, p128_vec, 128.0f); load_constant_vec(gen, n128_vec, -128.0f); - ppc_vmaxfp(gen->f, y_vec, y_vec, n128_vec); /* y = max(y, -128) */ - ppc_vminfp(gen->f, y_vec, y_vec, p128_vec); /* y = min(y, 128) */ + ppc_vmaxfp(gen->f, w_vec, w_vec, n128_vec); /* w = max(w, -128) */ + ppc_vminfp(gen->f, w_vec, w_vec, p128_vec); /* w = min(w, 128) */ /* if temp.x > 0 * z = pow(tmp.y, tmp.w) @@ -813,8 +867,9 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vcmpgtfpx(gen->f, pos_vec, x_vec, zero_vec); /* pos = x > 0 */ ppc_vand(gen->f, z_vec, pow_vec, pos_vec); /* z = pow & pos */ - STORE(gen, *inst, z_vec, 0, CHAN_Z); /* store Z */ + emit_store(gen, z_vec, inst, CHAN_Z, FALSE); + ppc_release_vec_register(gen->f, z_vec); ppc_release_vec_register(gen->f, pow_vec); ppc_release_vec_register(gen->f, pos_vec); ppc_release_vec_register(gen->f, p128_vec); @@ -826,7 +881,7 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) /* Compute W */ if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { - STORE(gen, *inst, one_vec, 0, CHAN_W); + emit_store(gen, one_vec, inst, CHAN_W, FALSE); } release_src_vecs(gen); @@ -997,6 +1052,11 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, if (!use_ppc_asm) return FALSE; + if (0) { + debug_printf("\n********* TGSI->PPC ********\n"); + tgsi_dump(tokens, 0); + } + util_init_math(); init_gen_context(&gen, func); -- cgit v1.2.3 From f4e9526addc617dc78af9b1af781ffe09ce62504 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 28 Oct 2008 18:21:03 -0600 Subject: gallium: ppc: don't replicate/smear immediate values, use vspltw instruction as with constants --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 8 ++++---- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 22 +++++++++++++--------- 2 files changed, 17 insertions(+), 13 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 8eff6d4fda..ff40263400 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -54,7 +54,7 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*outputs)[4][4], float (*temps)[4][4], - float (*immeds)[4][4], + float (*immeds)[4], float (*consts)[4], const float *builtins); @@ -151,7 +151,7 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, output_stride ); #else shader->func(inputs_soa, outputs_soa, temps_soa, - (float (*)[4][4]) shader->base.immediates, + (float (*)[4]) shader->base.immediates, (float (*)[4]) constants, ppc_builtin_constants); @@ -227,7 +227,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.run_linear = vs_ppc_run_linear; vs->base.delete = vs_ppc_delete; - vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * 4 * + vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); vs->machine = &draw->vs.machine; @@ -236,7 +236,7 @@ draw_create_vs_ppc(struct draw_context *draw, if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, - (float (*)[4])vs->base.immediates, + (float (*)[4]) vs->base.immediates, TRUE )) goto fail; diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 0de9b972b4..dd574ac02a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -299,10 +299,18 @@ emit_fetch(struct gen_context *gen, break; case TGSI_FILE_IMMEDIATE: { - int offset = (reg->SrcRegister.Index * 4 + swizzle) * 16; + int offset = (reg->SrcRegister.Index * 4 + swizzle) * 4; int offset_reg = emit_li_offset(gen, offset); dst_vec = ppc_allocate_vec_register(gen->f); - ppc_lvx(gen->f, dst_vec, gen->immed_reg, offset_reg); + /* Load 4-byte word into vector register. + * The vector slot depends on the effective address we load from. + * We know that our immediates start at a 16-byte boundary so we + * know that 'swizzle' tells us which vector slot will have the + * loaded word. The other vector slots will be undefined. + */ + ppc_lvewx(gen->f, dst_vec, gen->immed_reg, offset_reg); + /* splat word[swizzle] across the vector reg */ + ppc_vspltw(gen->f, dst_vec, dst_vec, swizzle); } break; case TGSI_FILE_CONSTANT: @@ -1095,14 +1103,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, assert(size <= 4); assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); for (i = 0; i < size; i++) { - const float value = - parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; - imm[num_immediates * 4 + 0] = - imm[num_immediates * 4 + 1] = - imm[num_immediates * 4 + 2] = - imm[num_immediates * 4 + 3] = value; - num_immediates++; + immediates[num_immediates][i] = + parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float; } + num_immediates++; } break; -- cgit v1.2.3 From a045b92511eb43ff89e9c0536464af7866956168 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 28 Oct 2008 18:22:14 -0600 Subject: gallium: remove old code --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 29 ----------------------------- 1 file changed, 29 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index ff40263400..19f6c4ee5b 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -58,19 +58,6 @@ typedef void (PIPE_CDECL *codegen_function) (float (*inputs)[4][4], float (*consts)[4], const float *builtins); -#if 0 - const struct tgsi_exec_vector *input, - struct tgsi_exec_vector *output, - float (*constant)[4], /* 3 */ - struct tgsi_exec_vector *temporary, /* 4 */ - float (*immediates)[4], /* 5 */ - const float (*aos_input)[4], /* 6 */ - uint num_inputs, /* 7 */ - uint input_stride, /* 8 */ - float (*aos_output)[4], /* 9 */ - uint num_outputs, /* 10 */ - uint output_stride ); /* 11 */ -#endif struct draw_ppc_vertex_shader { struct draw_vertex_shader base; @@ -137,27 +124,11 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, /* run compiled shader */ -#if 0 - shader->func(machine->Inputs, - machine->Outputs, - (float (*)[4])constants, - machine->Temps, - (float (*)[4])shader->base.immediates, - input, - base->info.num_inputs, - input_stride, - output, - base->info.num_outputs, - output_stride ); -#else shader->func(inputs_soa, outputs_soa, temps_soa, (float (*)[4]) shader->base.immediates, (float (*)[4]) constants, ppc_builtin_constants); - /*output[0][0] = input[0][0] * 0.5;*/ -#endif - /* convert (up to) four output verts from SoA back to AoS format */ for (attr = 0; attr < base->info.num_outputs; attr++) { float *vOut = (float *) output; -- cgit v1.2.3 From 5db0372b3cffec9b5c28699a580da77dcfbd938d Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 28 Oct 2008 18:57:54 -0600 Subject: gallium: ppc: implement TGSI_OPCODE_LOG/EXP --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 111 +++++++++++++++++++++++++++++++++- 1 file changed, 110 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index dd574ac02a..e64fb5ac91 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -896,6 +896,110 @@ emit_lit(struct gen_context *gen, struct tgsi_full_instruction *inst) } +static void +emit_exp(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + const int one_vec = gen_one_vec(gen); + int src_vec; + + /* get src arg */ + src_vec = get_src_vec(gen, inst, 0, CHAN_X); + + /* Compute X = 2^floor(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_X); + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ + ppc_vexptefp(gen->f, dst_vec, tmp_vec); /* dst = 2 ^ tmp */ + emit_store(gen, dst_vec, inst, CHAN_X, TRUE); + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Y = src - floor(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Y); + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vrfim(gen->f, tmp_vec, src_vec); /* tmp = floor(src); */ + ppc_vsubfp(gen->f, dst_vec, src_vec, tmp_vec); /* dst = src - tmp */ + emit_store(gen, dst_vec, inst, CHAN_Y, TRUE); + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Z = RoughApprox2ToX(src) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Z); + ppc_vexptefp(gen->f, dst_vec, src_vec); /* dst = 2 ^ src */ + emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); + } + + /* Compute W = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_store(gen, one_vec, inst, CHAN_W, FALSE); + } + + release_src_vecs(gen); +} + + +static void +emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + const int bit31_vec = gen_get_bit31_vec(gen); + const int one_vec = gen_one_vec(gen); + int src_vec, abs_vec; + + /* get src arg */ + src_vec = get_src_vec(gen, inst, 0, CHAN_X); + + /* compute abs(src) */ + abs_vec = ppc_allocate_vec_register(gen->f); + ppc_vandc(gen->f, abs_vec, src_vec, bit31_vec); /* abs = src & ~bit31 */ + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) && + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + + /* compute tmp = floor(log2(abs)) */ + int tmp_vec = ppc_allocate_vec_register(gen->f); + ppc_vlogefp(gen->f, tmp_vec, abs_vec); /* tmp = log2(abs) */ + ppc_vrfim(gen->f, tmp_vec, tmp_vec); /* tmp = floor(tmp); */ + + /* Compute X = tmp */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X)) { + emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); + } + + /* Compute Y = abs / 2^tmp */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + const int zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + ppc_vexptefp(gen->f, tmp_vec, tmp_vec); /* tmp = 2 ^ tmp */ + ppc_vrefp(gen->f, tmp_vec, tmp_vec); /* tmp = 1 / tmp */ + /* tmp = abs * tmp + zero */ + ppc_vmaddfp(gen->f, tmp_vec, abs_vec, tmp_vec, zero_vec); + emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); + ppc_release_vec_register(gen->f, zero_vec); + } + + ppc_release_vec_register(gen->f, tmp_vec); + } + + /* Compute Z = RoughApproxLog2(abs) */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + int dst_vec = get_dst_vec(gen, inst, CHAN_Z); + ppc_vlogefp(gen->f, dst_vec, abs_vec); /* dst = log2(abs) */ + emit_store(gen, dst_vec, inst, CHAN_Z, TRUE); + } + + /* Compute W = 1.0 */ + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_W)) { + emit_store(gen, one_vec, inst, CHAN_W, FALSE); + } + + ppc_release_vec_register(gen->f, abs_vec); + release_src_vecs(gen); +} + + static int emit_instruction(struct gen_context *gen, struct tgsi_full_instruction *inst) @@ -940,6 +1044,12 @@ emit_instruction(struct gen_context *gen, case TGSI_OPCODE_LIT: emit_lit(gen, inst); break; + case TGSI_OPCODE_LOG: + emit_log(gen, inst); + break; + case TGSI_OPCODE_EXP: + emit_exp(gen, inst); + break; case TGSI_OPCODE_END: /* normal end */ return 1; @@ -1098,7 +1208,6 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, /* splat each immediate component into a float[4] vector for SoA */ { const uint size = parse.FullToken.FullImmediate.Immediate.Size - 1; - float *imm = (float *) immediates; uint i; assert(size <= 4); assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES); -- cgit v1.2.3 From 7640264064c2cbc9922f7f3df51f7caa7b449e8e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 11:03:51 -0600 Subject: gallium: added ppc_vnmsubfp() --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 7 +++++++ src/gallium/auxiliary/rtasm/rtasm_ppc.h | 6 +++++- 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 7dd8263749..a90b5587b0 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -505,6 +505,13 @@ ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ } +/** vector float negative mult subtract: vD = vA - vB * vC */ +void +ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) +{ + emit_va(p, 47, vD, vB, vA, vC); /* note arg order */ +} + /** vector float compare greater than */ void ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index f938d8d759..561e139bce 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -97,10 +97,14 @@ ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB); extern void ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB); -/** vector float mult add */ +/** vector float mult add: vD = vA * vB + vC */ extern void ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); +/** vector float negative mult subtract: vD = vA - vB * vC */ +extern void +ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC); + /** vector float compare greater than */ extern void ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB); -- cgit v1.2.3 From 75b92764a7820558fb2b6cd27a2ab0487ef2f9ba Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 11:04:05 -0600 Subject: gallium: clean-ups --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 19f6c4ee5b..d720c7bbd5 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -64,8 +64,6 @@ struct draw_ppc_vertex_shader { struct ppc_function ppc_program; codegen_function func; - - struct tgsi_exec_machine *machine; }; @@ -73,11 +71,12 @@ static void vs_ppc_prepare( struct draw_vertex_shader *base, struct draw_context *draw ) { + /* nothing */ } - -/* Simplified vertex shader interface for the pt paths. Given the +/** + * Simplified vertex shader interface for the pt paths. Given the * complexity of code-generating all the above operations together, * it's time to try doing all the other stuff separately. */ @@ -91,7 +90,6 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, unsigned output_stride ) { struct draw_ppc_vertex_shader *shader = (struct draw_ppc_vertex_shader *)base; - struct tgsi_exec_machine *machine = shader->machine; unsigned int i; #define MAX_VERTICES 4 @@ -154,8 +152,6 @@ vs_ppc_run_linear( struct draw_vertex_shader *base, } - - static void vs_ppc_delete( struct draw_vertex_shader *base ) { @@ -172,7 +168,7 @@ vs_ppc_delete( struct draw_vertex_shader *base ) struct draw_vertex_shader * draw_create_vs_ppc(struct draw_context *draw, - const struct pipe_shader_state *templ) + const struct pipe_shader_state *templ) { struct draw_ppc_vertex_shader *vs; @@ -201,8 +197,6 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); - vs->machine = &draw->vs.machine; - ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, -- cgit v1.2.3 From 4e1c33700d8885c91d8a1db4cbaefa1ff9f1b5fc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 11:05:34 -0600 Subject: gallium: added PPC support for SWZ, XPD, POW That's the last of the ARB_v_p opcodes, except for ARL. --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 86 +++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index e64fb5ac91..5d13070922 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -610,6 +610,7 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */ break; case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: if (v0 != v1) ppc_vmove(gen->f, v1, v0); break; @@ -1000,12 +1001,91 @@ emit_log(struct gen_context *gen, struct tgsi_full_instruction *inst) } +static void +emit_pow(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int s0_vec = get_src_vec(gen, inst, 0, CHAN_X); + int s1_vec = get_src_vec(gen, inst, 1, CHAN_X); + int pow_vec = ppc_allocate_vec_register(gen->f); + int chan; + + ppc_vec_pow(gen->f, pow_vec, s0_vec, s1_vec); + + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { + emit_store(gen, pow_vec, inst, chan, FALSE); + } + + ppc_release_vec_register(gen->f, pow_vec); + + release_src_vecs(gen); +} + + +static void +emit_xpd(struct gen_context *gen, struct tgsi_full_instruction *inst) +{ + int x0_vec, y0_vec, z0_vec; + int x1_vec, y1_vec, z1_vec; + int zero_vec, tmp_vec; + int tmp2_vec; + + zero_vec = ppc_allocate_vec_register(gen->f); + ppc_vzero(gen->f, zero_vec); + + tmp_vec = ppc_allocate_vec_register(gen->f); + tmp2_vec = ppc_allocate_vec_register(gen->f); + + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + x0_vec = get_src_vec(gen, inst, 0, CHAN_X); + x1_vec = get_src_vec(gen, inst, 1, CHAN_X); + } + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z)) { + y0_vec = get_src_vec(gen, inst, 0, CHAN_Y); + y1_vec = get_src_vec(gen, inst, 1, CHAN_Y); + } + if (IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) || + IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y)) { + z0_vec = get_src_vec(gen, inst, 0, CHAN_Z); + z1_vec = get_src_vec(gen, inst, 1, CHAN_Z); + } + + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_X) { + /* tmp = y0 * z1 */ + ppc_vmaddfp(gen->f, tmp_vec, y0_vec, z1_vec, zero_vec); + /* tmp = tmp - z0 * y1*/ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, z0_vec, y1_vec); + emit_store(gen, tmp_vec, inst, CHAN_X, FALSE); + } + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Y) { + /* tmp = z0 * x1 */ + ppc_vmaddfp(gen->f, tmp_vec, z0_vec, x1_vec, zero_vec); + /* tmp = tmp - x0 * z1 */ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, x0_vec, z1_vec); + emit_store(gen, tmp_vec, inst, CHAN_Y, FALSE); + } + IF_IS_DST0_CHANNEL_ENABLED(*inst, CHAN_Z) { + /* tmp = x0 * y1 */ + ppc_vmaddfp(gen->f, tmp_vec, x0_vec, y1_vec, zero_vec); + /* tmp = tmp - y0 * x1 */ + ppc_vnmsubfp(gen->f, tmp_vec, tmp_vec, y0_vec, x1_vec); + emit_store(gen, tmp_vec, inst, CHAN_Z, FALSE); + } + /* W is undefined */ + + ppc_release_vec_register(gen->f, tmp_vec); + ppc_release_vec_register(gen->f, zero_vec); + release_src_vecs(gen); +} + static int emit_instruction(struct gen_context *gen, struct tgsi_full_instruction *inst) { switch (inst->Instruction.Opcode) { case TGSI_OPCODE_MOV: + case TGSI_OPCODE_SWZ: case TGSI_OPCODE_ABS: case TGSI_OPCODE_FLOOR: case TGSI_OPCODE_FRAC: @@ -1050,6 +1130,12 @@ emit_instruction(struct gen_context *gen, case TGSI_OPCODE_EXP: emit_exp(gen, inst); break; + case TGSI_OPCODE_POW: + emit_pow(gen, inst); + break; + case TGSI_OPCODE_XPD: + emit_xpd(gen, inst); + break; case TGSI_OPCODE_END: /* normal end */ return 1; -- cgit v1.2.3 From 09570d2e737a4c9f3f24edd78af3b897ee261733 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 14:08:13 -0600 Subject: gallium: test for PIPE_OS_LINUX instead of __linux__ --- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 19087589a8..864bd4d3fe 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -38,12 +38,13 @@ #include "rtasm_execmem.h" -#if defined(__linux__) +#if defined(PIPE_OS_LINUX) + /* * Allocate a large block of memory which can hold code then dole it out * in pieces by means of the generic memory manager code. -*/ + */ #include #include @@ -113,7 +114,7 @@ rtasm_exec_free(void *addr) } -#else +#else /* PIPE_OS_LINUX */ /* * Just use regular memory. @@ -133,4 +134,4 @@ rtasm_exec_free(void *addr) } -#endif +#endif /* PIPE_OS_LINUX */ -- cgit v1.2.3 From 3ad56968f09397a8dd417eae025b9506efaf8414 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 14:19:12 -0600 Subject: gallium: prefix memory manager functions with u_ to differentiate from functions in mesa/main/mm.c --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 10 +++++----- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 8 ++++---- src/gallium/auxiliary/util/u_mm.c | 12 ++++++------ src/gallium/auxiliary/util/u_mm.h | 12 ++++++------ 4 files changed, 21 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index fe80ca30ee..6e10cf1806 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -100,7 +100,7 @@ mm_buffer_destroy(struct pb_buffer *buf) assert(buf->base.refcount == 0); pipe_mutex_lock(mm->mutex); - mmFreeMem(mm_buf->block); + u_mmFreeMem(mm_buf->block); FREE(buf); pipe_mutex_unlock(mm->mutex); } @@ -175,14 +175,14 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, mm_buf->mgr = mm; - mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0); if(!mm_buf->block) { debug_printf("warning: heap full\n"); #if 0 mmDumpMemInfo(mm->heap); #endif - mm_buf->block = mmAllocMem(mm->heap, size, mm->align2, 0); + mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0); if(!mm_buf->block) { FREE(mm_buf); pipe_mutex_unlock(mm->mutex); @@ -213,7 +213,7 @@ mm_bufmgr_destroy(struct pb_manager *mgr) pipe_mutex_lock(mm->mutex); - mmDestroy(mm->heap); + u_mmDestroy(mm->heap); pb_unmap(mm->buffer); pb_reference(&mm->buffer, NULL); @@ -254,7 +254,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, if(!mm->map) goto failure; - mm->heap = mmInit(0, size); + mm->heap = u_mmInit(0, size); if (!mm->heap) goto failure; diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 864bd4d3fe..df353633e8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -63,7 +63,7 @@ static void init_heap(void) { if (!exec_heap) - exec_heap = mmInit( 0, EXEC_HEAP_SIZE ); + exec_heap = u_mmInit( 0, EXEC_HEAP_SIZE ); if (!exec_mem) exec_mem = (unsigned char *) mmap(0, EXEC_HEAP_SIZE, @@ -84,7 +84,7 @@ rtasm_exec_malloc(size_t size) if (exec_heap) { size = (size + 31) & ~31; - block = mmAllocMem( exec_heap, size, 32, 0 ); + block = u_mmAllocMem( exec_heap, size, 32, 0 ); } if (block) @@ -104,10 +104,10 @@ rtasm_exec_free(void *addr) pipe_mutex_lock(exec_mutex); if (exec_heap) { - struct mem_block *block = mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); + struct mem_block *block = u_mmFindBlock(exec_heap, (unsigned char *)addr - exec_mem); if (block) - mmFreeMem(block); + u_mmFreeMem(block); } pipe_mutex_unlock(exec_mutex); diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c index 0f51dd5977..592ace00fc 100644 --- a/src/gallium/auxiliary/util/u_mm.c +++ b/src/gallium/auxiliary/util/u_mm.c @@ -31,7 +31,7 @@ void -mmDumpMemInfo(const struct mem_block *heap) +u_mmDumpMemInfo(const struct mem_block *heap) { debug_printf("Memory heap %p:\n", (void *)heap); if (heap == 0) { @@ -58,7 +58,7 @@ mmDumpMemInfo(const struct mem_block *heap) } struct mem_block * -mmInit(int ofs, int size) +u_mmInit(int ofs, int size) { struct mem_block *heap, *block; @@ -165,7 +165,7 @@ SliceBlock(struct mem_block *p, struct mem_block * -mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) +u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) { struct mem_block *p; const int mask = (1 << align2)-1; @@ -198,7 +198,7 @@ mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) struct mem_block * -mmFindBlock(struct mem_block *heap, int start) +u_mmFindBlock(struct mem_block *heap, int start) { struct mem_block *p; @@ -237,7 +237,7 @@ Join2Blocks(struct mem_block *p) } int -mmFreeMem(struct mem_block *b) +u_mmFreeMem(struct mem_block *b) { if (!b) return 0; @@ -266,7 +266,7 @@ mmFreeMem(struct mem_block *b) void -mmDestroy(struct mem_block *heap) +u_mmDestroy(struct mem_block *heap) { struct mem_block *p; diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h index b226b101cb..ce20e48763 100644 --- a/src/gallium/auxiliary/util/u_mm.h +++ b/src/gallium/auxiliary/util/u_mm.h @@ -49,7 +49,7 @@ struct mem_block { * input: total size in bytes * return: a heap pointer if OK, NULL if error */ -extern struct mem_block *mmInit(int ofs, int size); +extern struct mem_block *u_mmInit(int ofs, int size); /** * Allocate 'size' bytes with 2^align2 bytes alignment, @@ -61,7 +61,7 @@ extern struct mem_block *mmInit(int ofs, int size); * startSearch = linear offset from start of heap to begin search * return: pointer to the allocated block, 0 if error */ -extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2, +extern struct mem_block *u_mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch); /** @@ -69,23 +69,23 @@ extern struct mem_block *mmAllocMem(struct mem_block *heap, int size, int align2 * input: pointer to a block * return: 0 if OK, -1 if error */ -extern int mmFreeMem(struct mem_block *b); +extern int u_mmFreeMem(struct mem_block *b); /** * Free block starts at offset * input: pointer to a heap, start offset * return: pointer to a block */ -extern struct mem_block *mmFindBlock(struct mem_block *heap, int start); +extern struct mem_block *u_mmFindBlock(struct mem_block *heap, int start); /** * destroy MM */ -extern void mmDestroy(struct mem_block *mmInit); +extern void u_mmDestroy(struct mem_block *mmInit); /** * For debuging purpose. */ -extern void mmDumpMemInfo(const struct mem_block *mmInit); +extern void u_mmDumpMemInfo(const struct mem_block *mmInit); #endif -- cgit v1.2.3 From 8828d52348d81e1b9ec985200a430554873b5f4e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 14:28:57 -0600 Subject: gallium: fix alignment parameter passed to u_mmAllocMem() Was 32, now 5. The param is expressed as a power of two exponent. The net effect is that the alignment was a no-op on X86 but on PPC we always got the same memory address everytime rtasm_exec_malloc() was called. --- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index df353633e8..be7433baf8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -83,8 +83,8 @@ rtasm_exec_malloc(size_t size) init_heap(); if (exec_heap) { - size = (size + 31) & ~31; - block = u_mmAllocMem( exec_heap, size, 32, 0 ); + size = (size + 31) & ~31; /* next multiple of 32 bytes */ + block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ } if (block) -- cgit v1.2.3 From 8160cb4935151a12588acbe546f00ce8d77bda91 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 14:55:02 -0600 Subject: gallium: fix alignment parameter passed to u_mmAllocMem() Was 32, now 5. The param is expressed as a power of two exponent. The net effect is that the alignment was a no-op on X86 but on PPC we always got the same memory address everytime rtasm_exec_malloc() was called. --- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index 19087589a8..bb3b1a4c25 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -82,8 +82,8 @@ rtasm_exec_malloc(size_t size) init_heap(); if (exec_heap) { - size = (size + 31) & ~31; - block = mmAllocMem( exec_heap, size, 32, 0 ); + size = (size + 31) & ~31; /* next multiple of 32 bytes */ + block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ } if (block) -- cgit v1.2.3 From a5d920297a2affe34c535d30a2c49588f92f69ad Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 16:26:10 -0600 Subject: gallium: use execmem for PPC code, grow instruction buffer as needed --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 70 +++++++++++++++++++++++---------- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 1 + src/gallium/auxiliary/tgsi/tgsi_ppc.c | 8 ++++ 3 files changed, 58 insertions(+), 21 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index a90b5587b0..e73ed71a0b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -38,6 +38,7 @@ #include #include "util/u_memory.h" #include "pipe/p_debug.h" +#include "rtasm_execmem.h" #include "rtasm_ppc.h" @@ -46,9 +47,9 @@ ppc_init_func(struct ppc_function *p, unsigned max_inst) { uint i; - p->store = align_malloc(max_inst * PPC_INST_SIZE, 16); p->num_inst = 0; - p->max_inst = max_inst; + p->max_inst = 100; /* first guess at buffer size */ + p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); p->reg_used = 0x0; p->fp_used = 0x0; p->vec_used = 0x0; @@ -66,12 +67,19 @@ ppc_release_func(struct ppc_function *p) { assert(p->num_inst <= p->max_inst); if (p->store != NULL) { - align_free(p->store); + rtasm_exec_free(p->store); } p->store = NULL; } +uint +ppc_num_instructions(const struct ppc_function *p) +{ + return p->num_inst; +} + + void (*ppc_get_func(struct ppc_function *p))(void) { #if 0 @@ -202,6 +210,35 @@ ppc_release_vec_register(struct ppc_function *p, int reg) } +/** + * Append instruction to instruction buffer. Grow buffer if out of room. + */ +static void +emit_instruction(struct ppc_function *p, uint32_t inst_bits) +{ + if (!p->store) + return; /* out of memory, drop the instruction */ + + if (p->num_inst == p->max_inst) { + /* allocate larger buffer */ + uint32_t *newbuf; + p->max_inst *= 2; /* 2x larger */ + newbuf = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); + if (newbuf) { + memcpy(newbuf, p->store, p->num_inst * PPC_INST_SIZE); + } + rtasm_exec_free(p->store); + p->store = newbuf; + if (!p->store) { + /* out of memory */ + p->num_inst = 0; + return; + } + } + + p->store[p->num_inst++] = inst_bits; +} + union vx_inst { uint32_t bits; @@ -223,8 +260,7 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vA = vA; inst.inst.vB = vB; inst.inst.op2 = op2; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -250,8 +286,7 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vB = vB; inst.inst.rC = 0; inst.inst.op2 = op2; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -277,8 +312,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) inst.inst.vB = vB; inst.inst.vC = vC; inst.inst.op2 = op2; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -300,8 +334,7 @@ emit_i(struct ppc_function *p, uint op, uint li, uint aa, uint lk) inst.inst.li = li; inst.inst.aa = aa; inst.inst.lk = lk; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); } @@ -330,8 +363,7 @@ emit_xl(struct ppc_function *p, uint op, uint bo, uint bi, uint bh, inst.inst.bh = bh; inst.inst.op2 = op2; inst.inst.lk = lk; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); } static INLINE void @@ -373,8 +405,7 @@ emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) inst.inst.rb = rb; inst.inst.op2 = op2; inst.inst.unused = 0x0; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); } @@ -398,8 +429,7 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) inst.inst.rt = rt; inst.inst.ra = ra; inst.inst.si = (unsigned) (si & 0xffff); - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -428,8 +458,7 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, inst.inst.unused = 0x0; inst.inst.op2 = op2; inst.inst.rc = rc; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); }; @@ -458,8 +487,7 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, inst.inst.oe = oe; inst.inst.op2 = op2; inst.inst.rc = rc; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 561e139bce..d0477dec94 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -64,6 +64,7 @@ struct ppc_function extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); extern void ppc_release_func(struct ppc_function *p); +extern uint ppc_num_instructions(const struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); extern void ppc_dump_func(const struct ppc_function *p); diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 5d13070922..a92b1902e3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -1315,6 +1315,14 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, tgsi_parse_free( &parse ); + if (ppc_num_instructions(func) == 0) { + /* ran out of memory for instructions */ + ok = FALSE; + } + + if (!ok) + debug_printf("TGSI->PPC translation failed\n"); + return ok; } -- cgit v1.2.3 From 725ba94ce5701aa8690c7ab2ea792dda86cbbe7a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 16:35:59 -0600 Subject: gallium: no longer pass max_inst to ppc_init_func() --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 2 +- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index d720c7bbd5..8b75136144 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -197,7 +197,7 @@ draw_create_vs_ppc(struct draw_context *draw, vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 * sizeof(float), 16); - ppc_init_func( &vs->ppc_program, 2000 ); /* XXX fix limit */ + ppc_init_func( &vs->ppc_program ); if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index e73ed71a0b..6d11263be8 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -43,7 +43,7 @@ void -ppc_init_func(struct ppc_function *p, unsigned max_inst) +ppc_init_func(struct ppc_function *p) { uint i; diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index d0477dec94..afb4704c39 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -62,7 +62,7 @@ struct ppc_function -extern void ppc_init_func(struct ppc_function *p, unsigned max_inst); +extern void ppc_init_func(struct ppc_function *p); extern void ppc_release_func(struct ppc_function *p); extern uint ppc_num_instructions(const struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); -- cgit v1.2.3 From f952aac1da432336f330122cacc30a87f52b4101 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 16:56:28 -0600 Subject: gallium: grow SPE instruction buffer as needed --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 57 +++++++++++++++++++++-------- 1 file changed, 41 insertions(+), 16 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index dea1aed032..f8568f690b 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -185,6 +185,34 @@ reg_name(int reg) } +static void +emit_instruction(struct spe_function *p, uint32_t inst_bits) +{ + if (!p->store) + return; /* out of memory, drop the instruction */ + + if (p->num_inst == p->max_inst) { + /* allocate larger buffer */ + uint32_t *newbuf; + p->max_inst *= 2; /* 2x larger */ + newbuf = align_malloc(p->max_inst * SPE_INST_SIZE, 16); + if (newbuf) { + memcpy(newbuf, p->store, p->num_inst * SPE_INST_SIZE); + } + align_free(p->store); + p->store = newbuf; + if (!p->store) { + /* out of memory */ + p->num_inst = 0; + return; + } + } + + p->store[p->num_inst++] = inst_bits; +} + + + static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, unsigned rA, unsigned rB, const char *name) { @@ -193,8 +221,7 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); if (p->print) { indent(p); printf("%s\t%s, %s, %s\n", @@ -212,8 +239,7 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, inst.inst.rB = rB; inst.inst.rA = rA; inst.inst.rC = rC; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); if (p->print) { indent(p); printf("%s\t%s, %s, %s, %s\n", rem_prefix(name), reg_name(rT), @@ -230,8 +256,7 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i7 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); if (p->print) { indent(p); printf("%s\t%s, %s, 0x%x\n", @@ -249,8 +274,7 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i8 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); if (p->print) { indent(p); printf("%s\t%s, %s, 0x%x\n", @@ -268,8 +292,7 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, inst.inst.i10 = imm; inst.inst.rA = rA; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); if (p->print) { indent(p); printf("%s\t%s, %s, 0x%x\n", @@ -295,8 +318,7 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, inst.inst.op = op; inst.inst.i16 = imm; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); if (p->print) { indent(p); printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); @@ -311,8 +333,7 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, inst.inst.op = op; inst.inst.i18 = imm; inst.inst.rT = rT; - p->store[p->num_inst++] = inst.bits; - assert(p->num_inst <= p->max_inst); + emit_instruction(p, inst.bits); if (p->print) { indent(p); printf("%s\t%s, 0x%x\n", rem_prefix(name), reg_name(rT), imm); @@ -394,15 +415,19 @@ void _name (struct spe_function *p, int imm) \ /** * Initialize an spe_function. - * \param code_size size of instruction buffer to allocate, in bytes. + * \param code_size initial size of instruction buffer to allocate, in bytes. + * If zero, use a default. */ void spe_init_func(struct spe_function *p, unsigned code_size) { unsigned int i; - p->store = align_malloc(code_size, 16); + if (!code_size) + code_size = 64; + p->num_inst = 0; p->max_inst = code_size / SPE_INST_SIZE; + p->store = align_malloc(code_size, 16); p->set_count = 0; memset(p->regs, 0, SPE_NUM_REGS * sizeof(p->regs[0])); -- cgit v1.2.3 From 7d7f0f170692962cf57d6893428f3a18f590c060 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 17:02:30 -0600 Subject: gallium: fix copy&paste bug --- src/gallium/auxiliary/rtasm/rtasm_execmem.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_execmem.c b/src/gallium/auxiliary/rtasm/rtasm_execmem.c index bb3b1a4c25..f16191cb61 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_execmem.c +++ b/src/gallium/auxiliary/rtasm/rtasm_execmem.c @@ -83,7 +83,7 @@ rtasm_exec_malloc(size_t size) if (exec_heap) { size = (size + 31) & ~31; /* next multiple of 32 bytes */ - block = u_mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ + block = mmAllocMem( exec_heap, size, 5, 0 ); /* 5 -> 32-byte alignment */ } if (block) -- cgit v1.2.3 From 766cb95a4564c48f35b5180155ab40320a68e371 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 29 Oct 2008 17:02:56 -0600 Subject: gallium: new sanity assertions in mmAllocMem() --- src/gallium/auxiliary/util/u_mm.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c index 0f51dd5977..01dd67c810 100644 --- a/src/gallium/auxiliary/util/u_mm.c +++ b/src/gallium/auxiliary/util/u_mm.c @@ -172,6 +172,10 @@ mmAllocMem(struct mem_block *heap, int size, int align2, int startSearch) int startofs = 0; int endofs; + assert(size >= 0); + assert(align2 >= 0); + assert(align2 <= 12); /* sanity check, 2^12 (4KB) enough? */ + if (!heap || align2 < 0 || size <= 0) return NULL; -- cgit v1.2.3 From b81a7dc2d8ba09b48d5022cf9ff65f2fad890e11 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Thu, 30 Oct 2008 23:52:59 +0100 Subject: gallivm: replace the temp parameters of the JIT function with alloca'ed temps. This avoids useless writes of temporary results. --- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 6 ++-- src/gallium/auxiliary/gallivm/storagesoa.cpp | 44 +++++++++++++++++++-------- src/gallium/auxiliary/gallivm/storagesoa.h | 10 +++--- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 11 ++----- 4 files changed, 41 insertions(+), 30 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 3a2f2878a3..93a9748bdb 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -179,8 +179,7 @@ struct gallivm_cpu_engine * gallivm_global_cpu_engine() typedef void (*vertex_shader_runner)(void *ainputs, void *dests, - float (*aconsts)[4], - void *temps); + float (*aconsts)[4]); #define MAX_TGSI_VERTICES 4 /*! @@ -223,8 +222,7 @@ int gallivm_cpu_vs_exec(struct gallivm_prog *prog, /* run shader */ runner(machine->Inputs, machine->Outputs, - (float (*)[4]) constants, - machine->Temps); + (float (*)[4]) constants); /* Unswizzle all output results */ diff --git a/src/gallium/auxiliary/gallivm/storagesoa.cpp b/src/gallium/auxiliary/gallivm/storagesoa.cpp index 4fc075cf6d..e1e5cabcf5 100644 --- a/src/gallium/auxiliary/gallivm/storagesoa.cpp +++ b/src/gallium/auxiliary/gallivm/storagesoa.cpp @@ -48,13 +48,11 @@ using namespace llvm; StorageSoa::StorageSoa(llvm::BasicBlock *block, llvm::Value *input, llvm::Value *output, - llvm::Value *consts, - llvm::Value *temps) + llvm::Value *consts) : m_block(block), m_input(input), m_output(output), m_consts(consts), - m_temps(temps), m_immediates(0), m_idx(0) { @@ -169,7 +167,7 @@ std::vector StorageSoa::constElement(llvm::IRBuilder<>* m_builder, { llvm::Value* res; std::vector res2(4); - llvm::Value *xChannel, *yChannel, *zChannel, *wChannel; + llvm::Value *xChannel; xChannel = elementPointer(m_consts, idx, 0); @@ -195,14 +193,15 @@ std::vector StorageSoa::outputElement(llvm::Value *idx) return res; } -std::vector StorageSoa::tempElement(llvm::Value *idx) +std::vector StorageSoa::tempElement(llvm::IRBuilder<>* m_builder, int idx) { std::vector res(4); + llvm::Value *temp = m_temps[idx]; - res[0] = element(m_temps, idx, 0); - res[1] = element(m_temps, idx, 1); - res[2] = element(m_temps, idx, 2); - res[3] = element(m_temps, idx, 3); + res[0] = element(temp, constantInt(0), 0); + res[1] = element(temp, constantInt(0), 1); + res[2] = element(temp, constantInt(0), 2); + res[3] = element(temp, constantInt(0), 3); return res; } @@ -326,7 +325,7 @@ std::vector StorageSoa::load(enum tgsi_file_type type, int idx, in val = outputElement(realIndex); break; case TGSI_FILE_TEMPORARY: - val = tempElement(realIndex); + val = tempElement(m_builder, idx); break; case TGSI_FILE_CONSTANT: val = constElement(m_builder, realIndex); @@ -355,19 +354,39 @@ std::vector StorageSoa::load(enum tgsi_file_type type, int idx, in return res; } +llvm::Value * StorageSoa::allocaTemp(llvm::IRBuilder<>* m_builder) +{ + VectorType *vector = VectorType::get(Type::FloatTy, 4); + ArrayType *vecArray = ArrayType::get(vector, 4); + AllocaInst *alloca = new AllocaInst(vecArray, "temp", + m_builder->GetInsertBlock()); + + return alloca; +} + + void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector &val, - int mask) + int mask, llvm::IRBuilder<>* m_builder) { llvm::Value *out = 0; + llvm::Value *realIndex = 0; switch(type) { case TGSI_FILE_OUTPUT: out = m_output; + realIndex = constantInt(idx); break; case TGSI_FILE_TEMPORARY: - out = m_temps; + // if that temp doesn't already exist, alloca it + if (m_temps.find(idx) == m_temps.end()) + m_temps[idx] = allocaTemp(m_builder); + + out = m_temps[idx]; + + realIndex = constantInt(0); break; case TGSI_FILE_INPUT: out = m_input; + realIndex = constantInt(idx); break; case TGSI_FILE_ADDRESS: { llvm::Value *addr = m_addresses[idx]; @@ -385,7 +404,6 @@ void StorageSoa::store(enum tgsi_file_type type, int idx, const std::vector load(enum tgsi_file_type type, int idx, int swizzle, llvm::IRBuilder<>* m_builder, llvm::Value *indIdx =0); void store(enum tgsi_file_type type, int idx, const std::vector &val, - int mask); + int mask, llvm::IRBuilder<>* m_builder); void addImmediate(float *vec); void declareImmediates(); @@ -84,7 +83,7 @@ private: llvm::Value* unpackConstElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx, int cc); std::vector constElement(llvm::IRBuilder<>* m_builder, llvm::Value *indIdx); std::vector outputElement(llvm::Value *indIdx); - std::vector tempElement(llvm::Value *indIdx); + std::vector tempElement(llvm::IRBuilder<>* m_builder, int idx); std::vector immediateElement(llvm::Value *indIdx); private: llvm::BasicBlock *m_block; @@ -92,12 +91,13 @@ private: llvm::Value *m_input; llvm::Value *m_output; llvm::Value *m_consts; - llvm::Value *m_temps; + std::map m_temps; llvm::GlobalVariable *m_immediates; std::map m_addresses; std::vector > m_immediatesToFlush; + llvm::Value * allocaTemp(llvm::IRBuilder<>* m_builder); mutable std::map m_constInts; mutable char m_name[32]; diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index 1191a6cae9..c11b88af9e 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -53,7 +53,6 @@ static inline FunctionType *vertexShaderFunctionType() // [4 x <4 x float>] inputs, // [4 x <4 x float>] output, // [4 x [1 x float]] consts, - // [4 x <4 x float>] temps std::vector funcArgs; VectorType *vectorType = VectorType::get(Type::FloatTy, 4); @@ -67,7 +66,6 @@ static inline FunctionType *vertexShaderFunctionType() funcArgs.push_back(vectorArrayPtr);//inputs funcArgs.push_back(vectorArrayPtr);//output funcArgs.push_back(constsArrayPtr);//consts - funcArgs.push_back(vectorArrayPtr);//temps FunctionType *functionType = FunctionType::get( /*Result=*/Type::VoidTy, @@ -246,7 +244,6 @@ translate_instruction(llvm::Module *module, val = storage->constElement(src->SrcRegister.Index, indIdx); } else if (src->SrcRegister.File == TGSI_FILE_INPUT) { val = storage->inputElement(src->SrcRegister.Index, indIdx); - // FIXME we should not be generating elements for temporaries, this creates useless memory writes } else if (src->SrcRegister.File == TGSI_FILE_TEMPORARY) { val = storage->tempElement(src->SrcRegister.Index); } else if (src->SrcRegister.File == TGSI_FILE_OUTPUT) { @@ -677,7 +674,6 @@ translate_instruction(llvm::Module *module, if (dst->DstRegister.File == TGSI_FILE_OUTPUT) { storage->setOutputElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); - // FIXME we should not be generating elements for temporaries, this creates useless memory writes } else if (dst->DstRegister.File == TGSI_FILE_TEMPORARY) { storage->setTempElement(dst->DstRegister.Index, out, dst->DstRegister.WriteMask); } else if (dst->DstRegister.File == TGSI_FILE_ADDRESS) { @@ -1027,7 +1023,8 @@ translate_instructionir(llvm::Module *module, for (int i = 0; i < inst->Instruction.NumDstRegs; ++i) { struct tgsi_full_dst_register *dst = &inst->FullDstRegisters[i]; storage->store((enum tgsi_file_type)dst->DstRegister.File, - dst->DstRegister.Index, out, dst->DstRegister.WriteMask); + dst->DstRegister.Index, out, dst->DstRegister.WriteMask, + instr->getIRBuilder() ); } } @@ -1122,8 +1119,6 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, output->setName("outputs"); Value *consts = args++; consts->setName("consts"); - Value *temps = args++; - temps->setName("temps"); BasicBlock *label_entry = BasicBlock::Create("entry", shader, 0); @@ -1132,7 +1127,7 @@ llvm::Module * tgsi_to_llvmir(struct gallivm_ir *ir, fi = tgsi_default_full_instruction(); fd = tgsi_default_full_declaration(); - StorageSoa storage(label_entry, input, output, consts, temps); + StorageSoa storage(label_entry, input, output, consts); InstructionsSoa instr(mod, shader, label_entry, &storage); while(!tgsi_parse_end_of_tokens(&parse)) { -- cgit v1.2.3 From 82e1026c30dd416231df66daf9b2f28bfc1f1cd6 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 18 Oct 2008 13:31:00 +0900 Subject: gallium: Fix msvc warning. --- src/gallium/auxiliary/util/u_tile.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 853c503f4f..00c12badf2 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -504,7 +504,7 @@ a8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned a; a = float_to_ubyte(pRow[3]); - *dst++ = a; + *dst++ = (ubyte) a; } p += src_stride; } -- cgit v1.2.3 From 28a2edb7389107cd46eb382a44d339dd7972310a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 28 Oct 2008 16:11:09 +0900 Subject: pipebuffer: Ensure refcounts of live buffer objects are never zero. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 15 ++++++++++++--- src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c | 3 +-- 2 files changed, 13 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 8505d333bd..19db8a6a91 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -177,12 +177,16 @@ pb_get_base_buffer( struct pb_buffer *buf, } +/** + * Don't call this directly. Use pb_reference instead. + */ static INLINE void pb_destroy(struct pb_buffer *buf) { assert(buf); if(!buf) return; + assert(buf->base.refcount == 0); buf->vtbl->destroy(buf); } @@ -193,11 +197,16 @@ static INLINE void pb_reference(struct pb_buffer **dst, struct pb_buffer *src) { - if (src) + if (src) { + assert(src->base.refcount); src->base.refcount++; + } - if (*dst && --(*dst)->base.refcount == 0) - pb_destroy( *dst ); + if (*dst) { + assert((*dst)->base.refcount); + if(--(*dst)->base.refcount == 0) + pb_destroy( *dst ); + } *dst = src; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 633ee70a75..e2594ea236 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -86,8 +86,7 @@ fenced_bufmgr_create_buffer(struct pb_manager *mgr, fenced_buf = fenced_buffer_create(fenced_mgr->fenced_list, buf); if(!fenced_buf) { - assert(buf->base.refcount == 1); - pb_destroy(buf); + pb_reference(&buf, NULL); } return fenced_buf; -- cgit v1.2.3 From 95d108416c27f45f4de1178abbe6797cd128ef6a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 31 Oct 2008 19:50:43 +0900 Subject: gallium: Fix typo. --- src/gallium/auxiliary/util/u_time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index bf7d1d1c8d..57b80e5604 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -200,7 +200,7 @@ util_time_timeout(const struct util_time *start, } -#if defined(PIPE_SUBSYSYEM_WINDOWS_DISPLAY) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) void util_time_sleep(unsigned usecs) { LONGLONG start, curr, end; -- cgit v1.2.3 From bdf24007cae9ce485ef123e935eb87c7cba4e0e5 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 3 Nov 2008 20:50:14 +0900 Subject: gallium: WinCE portability fixes. --- src/gallium/auxiliary/util/p_debug.c | 39 +++++++++++++++++++++++++++++++++++- src/gallium/auxiliary/util/u_math.h | 2 +- 2 files changed, 39 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 3ed8bdfdf3..a1a51d7ef2 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -36,6 +36,13 @@ #include #include +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + +#include +#include +#include +#include + #elif defined(PIPE_SUBSYSTEM_WINDOWS_USER) #ifndef WIN32_LEAN_AND_MEAN @@ -98,7 +105,35 @@ void _debug_vprintf(const char *format, va_list ap) OutputDebugStringA(buf); buf[0] = '\0'; } -#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) +#elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) + wchar_t *wide_format; + long wide_str_len; + char buf[512]; + int ret; +#if (_WIN32_WCE < 600) + ret = vsprintf(buf, format, ap); + if(ret < 0){ + sprintf(buf, "Cant handle debug print!"); + ret = 25; + } +#else + ret = vsprintf_s(buf, 512, format, ap); + if(ret < 0){ + sprintf_s(buf, 512, "Cant handle debug print!"); + ret = 25; + } +#endif + buf[ret] = '\0'; + /* Format is ascii - needs to be converted to wchar_t for printing */ + wide_str_len = MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, NULL, 0); + wide_format = (wchar_t *) malloc((wide_str_len+1) * sizeof(wchar_t)); + if (wide_format) { + MultiByteToWideChar(CP_ACP, 0, (const char *) buf, -1, + wide_format, wide_str_len); + NKDbgPrintfW(wide_format, wide_format); + free(wide_format); + } +#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) /* TODO */ #else /* !PIPE_SUBSYSTEM_WINDOWS */ vfprintf(stderr, format, ap); @@ -637,6 +672,7 @@ void debug_dump_surface_bmp(const char *filename, struct pipe_surface *surface) { +#ifndef PIPE_SUBSYSTEM_WINDOWS_MINIPORT struct util_stream *stream; unsigned surface_usage; struct bmp_file_header bmfh; @@ -703,6 +739,7 @@ error2: FREE(rgba); error1: ; +#endif } #endif diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index be7303e550..d2eaa2e7f7 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -68,7 +68,7 @@ __inline double ceil(double val) return ceil_val; } -#ifndef PIPE_SUBSYSTEM_WINDOWS_CE +#ifndef PIPE_SUBSYSTEM_WINDOWS_CE_OGL __inline double floor(double val) { double floor_val; -- cgit v1.2.3 From 7b42a5d634d32c3f15f3a3535b2b9328dfca49bf Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 25 Oct 2008 03:35:01 +0900 Subject: gallium: Read from PIPE_FORMAT_Z32_FLOAT. Mainly for debugging purposes for now. --- src/gallium/auxiliary/util/u_tile.c | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index 00c12badf2..fa683b6774 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -769,6 +769,32 @@ z24s8_get_tile_rgba(const unsigned *src, } +/*** PIPE_FORMAT_Z32_FLOAT ***/ + +/** + * Return each Z value as four floats in [0,1]. + */ +static void +z32f_get_tile_rgba(const float *src, + unsigned w, unsigned h, + float *p, + unsigned dst_stride) +{ + unsigned i, j; + + for (i = 0; i < h; i++) { + float *pRow = p; + for (j = 0; j < w; j++, pRow += 4) { + pRow[0] = + pRow[1] = + pRow[2] = + pRow[3] = *src++; + } + p += dst_stride; + } +} + + /*** PIPE_FORMAT_YCBCR / PIPE_FORMAT_YCBCR_REV ***/ /** @@ -913,6 +939,9 @@ pipe_tile_raw_to_rgba(enum pipe_format format, case PIPE_FORMAT_Z24S8_UNORM: z24s8_get_tile_rgba((unsigned *) src, w, h, dst, dst_stride); break; + case PIPE_FORMAT_Z32_FLOAT: + z32f_get_tile_rgba((float *) src, w, h, dst, dst_stride); + break; case PIPE_FORMAT_YCBCR: ycbcr_get_tile_rgba((ushort *) src, w, h, dst, dst_stride, FALSE); break; -- cgit v1.2.3 From 95438727ddc4012d6e2db843d7173607b2a23b56 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 26 Aug 2008 17:40:24 +0200 Subject: gallium: Silence compiler warnings on Windows. --- src/gallium/auxiliary/util/u_tile.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c index fa683b6774..32f6b072a0 100644 --- a/src/gallium/auxiliary/util/u_tile.c +++ b/src/gallium/auxiliary/util/u_tile.c @@ -460,7 +460,7 @@ l8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned r; r = float_to_ubyte(pRow[0]); - *dst++ = r; + *dst++ = (ubyte) r; } p += src_stride; } @@ -634,7 +634,7 @@ i8_put_tile_rgba(ubyte *dst, for (j = 0; j < w; j++, pRow += 4) { unsigned r; r = float_to_ubyte(pRow[0]); - *dst++ = r; + *dst++ = (ubyte) r; } p += src_stride; } -- cgit v1.2.3 From 502974b345dae8a3ca641083b4df5183b04ca825 Mon Sep 17 00:00:00 2001 From: michal Date: Wed, 5 Nov 2008 11:48:56 +0100 Subject: tgsi: Implement OPCODE_TRUNC. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 4681b29f52..c115956c5d 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -657,6 +657,17 @@ emit_f2it( make_xmm( xmm ) ); } +static void +emit_i2f( + struct x86_function *func, + unsigned xmm ) +{ + sse2_cvtdq2ps( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); +} + static void PIPE_CDECL flr4f( float *store ) @@ -1967,7 +1978,12 @@ emit_instruction( break; case TGSI_OPCODE_TRUNC: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + emit_i2f( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_SHL: -- cgit v1.2.3 From 5a0299875c7a4a9a0cb2cf55777c92c1b17d528b Mon Sep 17 00:00:00 2001 From: michal Date: Wed, 5 Nov 2008 11:58:11 +0100 Subject: draw: Implement TGSI_OPCODE_TRUNC. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 87232865e2..a6880685db 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1632,6 +1632,17 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg tmp0 = aos_get_xmm_reg(cp); + + sse2_cvttps2dq(cp->func, tmp0, arg0); + sse2_cvtdq2ps(cp->func, tmp0, tmp0); + + store_dest(cp, &op->FullDstRegisters[0], tmp0); + return TRUE; +} static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { @@ -1770,6 +1781,9 @@ emit_instruction( struct aos_compilation *cp, case TGSI_OPCODE_SIN: return emit_SIN(cp, inst); + case TGSI_OPCODE_TRUNC: + return emit_TRUNC(cp, inst); + case TGSI_OPCODE_END: return TRUE; -- cgit v1.2.3 From de2ace201fe26d36a2a75211a7d8447940a47fbe Mon Sep 17 00:00:00 2001 From: michal Date: Wed, 5 Nov 2008 11:48:56 +0100 Subject: tgsi: Implement OPCODE_TRUNC. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index f79170b9d6..47e52c8424 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -784,6 +784,17 @@ emit_f2it( make_xmm( xmm ) ); } +static void +emit_i2f( + struct x86_function *func, + unsigned xmm ) +{ + sse2_cvtdq2ps( + func, + make_xmm( xmm ), + make_xmm( xmm ) ); +} + static void PIPE_CDECL flr4f( float *store ) @@ -2104,7 +2115,12 @@ emit_instruction( break; case TGSI_OPCODE_TRUNC: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_f2it( func, 0 ); + emit_i2f( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_SHL: -- cgit v1.2.3 From 7115b79b77e541f3eb81db00f6f0c34a0f224feb Mon Sep 17 00:00:00 2001 From: michal Date: Wed, 5 Nov 2008 11:58:11 +0100 Subject: draw: Implement TGSI_OPCODE_TRUNC. --- src/gallium/auxiliary/draw/draw_vs_aos.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 87232865e2..a6880685db 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -1632,6 +1632,17 @@ static boolean emit_SUB( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } +static boolean emit_TRUNC( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) +{ + struct x86_reg arg0 = fetch_src(cp, &op->FullSrcRegisters[0]); + struct x86_reg tmp0 = aos_get_xmm_reg(cp); + + sse2_cvttps2dq(cp->func, tmp0, arg0); + sse2_cvtdq2ps(cp->func, tmp0, tmp0); + + store_dest(cp, &op->FullDstRegisters[0], tmp0); + return TRUE; +} static boolean emit_XPD( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { @@ -1770,6 +1781,9 @@ emit_instruction( struct aos_compilation *cp, case TGSI_OPCODE_SIN: return emit_SIN(cp, inst); + case TGSI_OPCODE_TRUNC: + return emit_TRUNC(cp, inst); + case TGSI_OPCODE_END: return TRUE; -- cgit v1.2.3 From fc3b361191c35d2b0b072c08e39b1e5b26d7e2a6 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 08:57:11 -0700 Subject: gallium: disable some debug output --- src/gallium/auxiliary/draw/draw_vs_aos.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index a6880685db..6141ba9cbf 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -2190,7 +2190,8 @@ static struct draw_vs_varient *varient_aos_sse( struct draw_vertex_shader *vs, if (!vaos->buffer) goto fail; - debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); + if (0) + debug_printf("nr_vb: %d const: %x\n", vaos->nr_vb, vaos->base.key.const_vbuffers); #if 0 tgsi_dump(vs->state.tokens, 0); -- cgit v1.2.3 From 05a17f83b0a6549fde41540f9075505e81ab08d3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 08:58:40 -0700 Subject: gallium: added some debug code (disabled) --- src/gallium/auxiliary/draw/draw_pt.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 87ec6ae20c..b98c0a0ecf 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -33,6 +33,8 @@ #include "draw/draw_context.h" #include "draw/draw_private.h" #include "draw/draw_pt.h" +#include "draw/draw_vs.h" +#include "tgsi/tgsi_dump.h" static unsigned trim( unsigned count, unsigned first, unsigned incr ) { @@ -195,6 +197,28 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw->reduced_prim = reduced_prim; } +#if 0 + { + int i; + debug_printf("draw_arrays(prim=%u start=%u count=%u):\n", + prim, start, count); + tgsi_dump(draw->vs.vertex_shader->state.tokens, 0); + debug_printf("Elements:\n"); + for (i = 0; i < draw->pt.nr_vertex_elements; i++) { + debug_printf(" format=%s comps=%u\n", + pf_name(draw->pt.vertex_element[i].src_format), + draw->pt.vertex_element[i].nr_components); + } + debug_printf("Buffers:\n"); + for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { + debug_printf(" pitch=%u offset=%u ptr=%p\n", + draw->pt.vertex_buffer[i].pitch, + draw->pt.vertex_buffer[i].buffer_offset, + draw->pt.user.vbuffer[i]); + } + } +#endif + /* drawing done here: */ draw_pt_arrays(draw, prim, start, count); } -- cgit v1.2.3 From a137f03c56688c190f3542fb6b7c9a4ff4c80cff Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 13:55:56 -0700 Subject: gallium: added some sanity check assertions for constant buffer indexing --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index df002939c6..ea5a44fb8a 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -958,6 +958,10 @@ fetch_src_file_channel( switch( file ) { case TGSI_FILE_CONSTANT: assert(mach->Consts); + assert(index->i[0] >= 0); + assert(index->i[1] >= 0); + assert(index->i[2] >= 0); + assert(index->i[3] >= 0); chan->f[0] = mach->Consts[index->i[0]][swizzle]; chan->f[1] = mach->Consts[index->i[1]][swizzle]; chan->f[2] = mach->Consts[index->i[2]][swizzle]; -- cgit v1.2.3 From 03c0ce4c61fd970509d605fe78166e828fc1df57 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 13:56:20 -0700 Subject: gallium: added tgsi_set_exec_mask() --- src/gallium/auxiliary/tgsi/tgsi_exec.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index c4e649e69c..fc40a25e09 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -165,6 +165,10 @@ struct tgsi_exec_labels #define TGSI_EXEC_TEMP_HALF_I (TGSI_EXEC_NUM_TEMPS + 3) #define TGSI_EXEC_TEMP_HALF_C 1 +/* execution mask, each value is either 0 or ~0 */ +#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3) +#define TGSI_EXEC_MASK_C 2 + #define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4) #define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5) @@ -265,6 +269,27 @@ void tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach); +static INLINE void +tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask) +{ + mach->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = + mask; +} + + +/** Set execution mask values prior to executing the shader */ +static INLINE void +tgsi_set_exec_mask(struct tgsi_exec_machine *mach, + boolean ch0, boolean ch1, boolean ch2, boolean ch3) +{ + int *mask = mach->Temps[TGSI_EXEC_MASK_I].xyzw[TGSI_EXEC_MASK_C].i; + mask[0] = ch0 ? ~0 : 0; + mask[1] = ch1 ? ~0 : 0; + mask[2] = ch2 ? ~0 : 0; + mask[3] = ch3 ? ~0 : 0; +} + + #if defined __cplusplus } /* extern "C" */ #endif -- cgit v1.2.3 From f0debbb0bb951bfc6dc0ae467564b3b1230324cf Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 14:02:07 -0700 Subject: gallium: call tgsi_set_exec_mask() and use exec mask in SSE ARL code This prevents vertex shaders from referencing invalid memory locations when the shader is operating on less than four vertices or fragments. --- src/gallium/auxiliary/draw/draw_vs_exec.c | 6 ++++++ src/gallium/auxiliary/draw/draw_vs_sse.c | 14 +++++++++++++ src/gallium/auxiliary/tgsi/tgsi_sse2.c | 35 ++++++++++++++++++++++++++++--- src/gallium/drivers/softpipe/sp_fs_sse.c | 3 ++- 4 files changed, 54 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 44563803f9..82d27d4493 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -120,6 +120,12 @@ vs_exec_run_linear( struct draw_vertex_shader *shader, input = (const float (*)[4])((const char *)input + input_stride); } + tgsi_set_exec_mask(machine, + 1, + max_vertices > 1, + max_vertices > 2, + max_vertices > 3); + /* run interpreter */ tgsi_exec_machine_run( machine ); diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0efabd9de8..77ba5152f9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -99,9 +99,23 @@ vs_sse_run_linear( struct draw_vertex_shader *base, struct tgsi_exec_machine *machine = shader->machine; unsigned int i; + /* By default, execute all channels. XXX move this inside the loop + * below when we support shader conditionals/loops. + */ + tgsi_set_exec_mask(machine, 1, 1, 1, 1); + for (i = 0; i < count; i += MAX_TGSI_VERTICES) { unsigned int max_vertices = MIN2(MAX_TGSI_VERTICES, count - i); + if (max_vertices < 4) { + /* disable the unused execution channels */ + tgsi_set_exec_mask(machine, + 1, + max_vertices > 1, + max_vertices > 2, + 0); + } + /* run compiled shader */ shader->func(machine->Inputs, diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index c115956c5d..4d59106dbf 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -69,6 +69,9 @@ #define TEMP_R0 TGSI_EXEC_TEMP_R0 #define TEMP_ADDR TGSI_EXEC_TEMP_ADDR +#define TEMP_EXEC_MASK_I TGSI_EXEC_MASK_I +#define TEMP_EXEC_MASK_C TGSI_EXEC_MASK_C + /** * X86 utility functions. @@ -230,6 +233,9 @@ emit_const( int indirectIndex ) { if (indirect) { + /* 'vec' is the offset from the address register's value. + * We're loading CONST[ADDR+vec] into an xmm register. + */ struct x86_reg r0 = get_input_base(); struct x86_reg r1 = get_output_base(); uint i; @@ -240,18 +246,40 @@ emit_const( x86_push( func, r0 ); x86_push( func, r1 ); + /* + * Loop over the four pixels or vertices in the quad. + * Get the value of the address (offset) register for pixel/vertex[i], + * add it to the src offset and index into the constant buffer. + * Note that we're working on SOA data. + * If any of the pixel/vertex execution channels are unused their + * values will be garbage. It's very important that we don't use + * those garbage values as indexes into the constant buffer since + * that'll cause segfaults. + * The solution is to bitwise-AND the offset with the execution mask + * register whose values are either 0 or ~0. + * The caller must setup the execution mask register to indicate + * which channels are valid/alive before running the shader. + * The execution mask will also figure into loops and conditionals + * someday. + */ for (i = 0; i < QUAD_SIZE; i++) { - x86_lea( func, r0, get_const( vec, chan ) ); + /* r1 = address register[i] */ x86_mov( func, r1, x86_make_disp( get_temp( TEMP_ADDR, CHAN_X ), i * 4 ) ); + /* r0 = execution mask[i] */ + x86_mov( func, r0, x86_make_disp( get_temp( TEMP_EXEC_MASK_I, TEMP_EXEC_MASK_C ), i * 4 ) ); + /* r1 = r1 & r0 */ + x86_and( func, r1, r0 ); + /* r0 = 'vec', the offset */ + x86_lea( func, r0, get_const( vec, chan ) ); - /* Quick hack to multiply by 16 -- need to add SHL to rtasm. + /* Quick hack to multiply r1 by 16 -- need to add SHL to rtasm. */ x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); x86_add( func, r1, r1 ); - x86_add( func, r0, r1 ); + x86_add( func, r0, r1 ); /* r0 = r0 + r1 */ x86_mov( func, r1, x86_deref( r0 ) ); x86_mov( func, x86_make_disp( get_temp( TEMP_R0, CHAN_X ), i * 4 ), r1 ); } @@ -265,6 +293,7 @@ emit_const( get_temp( TEMP_R0, CHAN_X ) ); } else { + /* 'vec' is the index into the src register file, such as TEMP[vec] */ assert( vec >= 0 ); sse_movss( diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 496ed43df2..50eb2c07bc 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -92,7 +92,8 @@ fs_sse_run( const struct sp_fragment_shader *base, machine->Temps); /* init kill mask */ - machine->Temps[TGSI_EXEC_TEMP_KILMASK_I].xyzw[TGSI_EXEC_TEMP_KILMASK_C].u[0] = 0x0; + tgsi_set_kill_mask(machine, 0x0); + tgsi_set_exec_mask(machine, 1, 1, 1, 1); shader->func( machine->Inputs, machine->Outputs, -- cgit v1.2.3 From cbce12b5404846520bb776f73885f0ea99a13124 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 5 Nov 2008 17:14:00 -0700 Subject: gallium: s/mmDestroy/u_mmDestroy/ --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 6e10cf1806..a976d3041a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -262,7 +262,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer, failure: if(mm->heap) - mmDestroy(mm->heap); + u_mmDestroy(mm->heap); if(mm->map) pb_unmap(mm->buffer); if(mm) -- cgit v1.2.3 From 639a2b0ec853eda49e3e7150b2ed7f8f40d101af Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 5 Nov 2008 19:26:20 -0700 Subject: gallium: don't range check tgsi register index for indirect accesses Fixes progs/vp/arl.txt test. --- src/gallium/auxiliary/tgsi/tgsi_sanity.c | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c index 11659247c0..bc7b941b78 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c @@ -153,17 +153,21 @@ check_register_usage( if (!check_file_name( ctx, file )) return FALSE; - if (index < 0 || index > MAX_REGISTERS) { - report_error( ctx, "%s[%i]: Invalid index %s", file_names[file], index, name ); - return FALSE; - } - if (indirect_access) { + /* Note that 'index' is an offset relative to the value of the + * address register. No range checking done here. + */ if (!is_any_register_declared( ctx, file )) report_error( ctx, "%s: Undeclared %s register", file_names[file], name ); ctx->regs_ind_used[file] = TRUE; } else { + if (index < 0 || index > MAX_REGISTERS) { + report_error( ctx, "%s[%i]: Invalid index %s", + file_names[file], index, name ); + return FALSE; + } + if (!is_register_declared( ctx, file, index )) report_error( ctx, "%s[%d]: Undeclared %s register", file_names[file], index, name ); ctx->regs_used[file][index / BITS_IN_REG_FLAG] |= (1 << (index % BITS_IN_REG_FLAG)); -- cgit v1.2.3 From 5b2b064a5c1328449e3eb8179afc2ba366f18ae6 Mon Sep 17 00:00:00 2001 From: Brian Date: Wed, 5 Nov 2008 20:04:49 -0700 Subject: gallium: check execution mask in indirect register loads Zero-out the index for disabled execution channels to avoid using potential garbage values (thus avoiding bad array indexing). --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index ea5a44fb8a..53e92b96ae 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1045,12 +1045,16 @@ fetch_source( if (reg->SrcRegister.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; + const uint execmask = mach->ExecMask; + uint i; + /* which address register (always zero now) */ index2.i[0] = index2.i[1] = index2.i[2] = index2.i[3] = reg->SrcRegisterInd.Index; + /* get current value of address register[swizzle] */ swizzle = tgsi_util_get_src_register_swizzle( ®->SrcRegisterInd, CHAN_X ); fetch_src_file_channel( mach, @@ -1059,10 +1063,19 @@ fetch_source( &index2, &indir_index ); + /* add value of address register to the offset */ index.i[0] += indir_index.i[0]; index.i[1] += indir_index.i[1]; index.i[2] += indir_index.i[2]; index.i[3] += indir_index.i[3]; + + /* for disabled execution channels, zero-out the index to + * avoid using a potential garbage value. + */ + for (i = 0; i < QUAD_SIZE; i++) { + if ((execmask & (1 << i)) == 0) + index.i[i] = 0; + } } if( reg->SrcRegister.Dimension ) { @@ -1091,6 +1104,8 @@ fetch_source( if (reg->SrcRegisterDim.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; + const uint execmask = mach->ExecMask; + uint i; index2.i[0] = index2.i[1] = @@ -1109,6 +1124,14 @@ fetch_source( index.i[1] += indir_index.i[1]; index.i[2] += indir_index.i[2]; index.i[3] += indir_index.i[3]; + + /* for disabled execution channels, zero-out the index to + * avoid using a potential garbage value. + */ + for (i = 0; i < QUAD_SIZE; i++) { + if ((execmask & (1 << i)) == 0) + index.i[i] = 0; + } } } -- cgit v1.2.3 From d177c9ddda2c452cf7d6696d89cf4458ef986f98 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 6 Nov 2008 16:07:28 -0500 Subject: gallium: actually flip the coordinates --- src/gallium/auxiliary/util/u_rect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index f5619ef791..30f32413d7 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -222,7 +222,8 @@ util_surface_copy(struct pipe_context *pipe, w, h, src_map, do_flip ? -(int) src->stride : src->stride, - src_x, src_y); + src_x, + do_flip ? w - src_y : src_y); } pipe->screen->surface_unmap(pipe->screen, src); -- cgit v1.2.3 From 6c3e7365d5245cfad597cd69e2f8f689e62546b9 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 6 Nov 2008 14:57:20 -0700 Subject: gallium: debug code to print vertex array data (disabled) --- src/gallium/auxiliary/draw/draw_pt.c | 89 ++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index b98c0a0ecf..3c175f31d8 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -178,6 +178,92 @@ void draw_pt_destroy( struct draw_context *draw ) } +/** + * Debug- print the first 'count' vertices. + */ +static void +draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) +{ + uint i; + + debug_printf("Draw arrays(prim = %u, start = %u, count = %u)\n", + prim, start, count); + + for (i = 0; i < count; i++) { + uint ii, j; + + if (draw->pt.user.elts) { + /* indexed arrays */ + switch (draw->pt.user.eltSize) { + case 1: + { + const ubyte *elem = (const ubyte *) draw->pt.user.elts; + ii = elem[start + i]; + } + break; + case 2: + { + const ushort *elem = (const ushort *) draw->pt.user.elts; + ii = elem[start + i]; + } + break; + case 4: + { + const uint *elem = (const uint *) draw->pt.user.elts; + ii = elem[start + i]; + } + break; + default: + assert(0); + } + debug_printf("Element[%u + %u] -> Vertex %u:\n", start, i, ii); + } + else { + /* non-indexed arrays */ + ii = start + i; + debug_printf("Vertex %u:\n", ii); + } + + for (j = 0; j < draw->pt.nr_vertex_elements; j++) { + uint buf = draw->pt.vertex_element[j].vertex_buffer_index; + ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; + ptr += draw->pt.vertex_buffer[buf].pitch * ii; + ptr += draw->pt.vertex_element[j].src_offset; + + debug_printf(" Attr %u: ", j); + switch (draw->pt.vertex_element[j].src_format) { + case PIPE_FORMAT_R32_FLOAT: + { + float *v = (float *) ptr; + debug_printf("%f @ %p\n", v[0], (void *) v); + } + break; + case PIPE_FORMAT_R32G32_FLOAT: + { + float *v = (float *) ptr; + debug_printf("%f %f @ %p\n", v[0], v[1], (void *) v); + } + break; + case PIPE_FORMAT_R32G32B32_FLOAT: + { + float *v = (float *) ptr; + debug_printf("%f %f %f @ %p\n", v[0], v[1], v[2], (void *) v); + } + break; + case PIPE_FORMAT_R32G32B32A32_FLOAT: + { + float *v = (float *) ptr; + debug_printf("%f %f %f %f @ %p\n", v[0], v[1], v[2], v[3], + (void *) v); + } + break; + default: + debug_printf("other format (fix me)\n"); + ; + } + } + } +} /** @@ -197,6 +283,9 @@ draw_arrays(struct draw_context *draw, unsigned prim, draw->reduced_prim = reduced_prim; } + if (0) + draw_print_arrays(draw, prim, start, MIN2(count, 20)); + #if 0 { int i; -- cgit v1.2.3 From bb8a9ce705f309a3b38d10c61c3865db79a0f71c Mon Sep 17 00:00:00 2001 From: Brian Date: Thu, 6 Nov 2008 19:24:47 -0700 Subject: gallium: implement TGSI_OPCODE_NRM/NRM4 in tgsi_exec.c --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 59 +++++++++++++++++++++++++++++++++- 1 file changed, 58 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 53e92b96ae..41dffc3dba 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2462,7 +2462,64 @@ exec_instruction( break; case TGSI_OPCODE_NRM: - assert (0); + /* 3-component vector normalize */ + { + union tgsi_exec_channel tmp, dot; + + /* tmp = dp3(src0, src0): */ + FETCH( &r[0], 0, CHAN_X ); + micro_mul( &tmp, &r[0], &r[0] ); + + FETCH( &r[1], 0, CHAN_Y ); + micro_mul( &dot, &r[1], &r[1] ); + micro_add( &tmp, &tmp, &dot ); + + FETCH( &r[2], 0, CHAN_Z ); + micro_mul( &dot, &r[2], &r[2] ); + micro_add( &tmp, &tmp, &dot ); + + /* tmp = 1 / tmp */ + micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); + + /* note: w channel is undefined */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + /* chan = chan * tmp */ + micro_mul( &r[chan_index], &tmp, &r[chan_index] ); + STORE( &r[chan_index], 0, chan_index ); + } + } + break; + + case TGSI_OPCODE_NRM4: + /* 4-component vector normalize */ + { + union tgsi_exec_channel tmp, dot; + + /* tmp = dp4(src0, src0): */ + FETCH( &r[0], 0, CHAN_X ); + micro_mul( &tmp, &r[0], &r[0] ); + + FETCH( &r[1], 0, CHAN_Y ); + micro_mul( &dot, &r[1], &r[1] ); + micro_add( &tmp, &tmp, &dot ); + + FETCH( &r[2], 0, CHAN_Z ); + micro_mul( &dot, &r[2], &r[2] ); + micro_add( &tmp, &tmp, &dot ); + + FETCH( &r[3], 0, CHAN_W ); + micro_mul( &dot, &r[3], &r[3] ); + micro_add( &tmp, &tmp, &dot ); + + /* tmp = 1 / tmp */ + micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + /* chan = chan * tmp */ + micro_mul( &r[chan_index], &tmp, &r[chan_index] ); + STORE( &r[chan_index], 0, chan_index ); + } + } break; case TGSI_OPCODE_DIV: -- cgit v1.2.3 From cf9836cf09790de70732963ea571b83719c0c03c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 7 Nov 2008 13:02:43 -0700 Subject: gallium: implement TGSI_OPCODE_DP2A, add sqrt to NRM3/NRM4 --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 8d135f8777..1da04ab7e0 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2034,7 +2034,21 @@ exec_instruction( case TGSI_OPCODE_DOT2ADD: /* TGSI_OPCODE_DP2A */ - assert (0); + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH( &r[2], 2, CHAN_X ); + micro_add( &r[0], &r[0], &r[2] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } break; case TGSI_OPCODE_INDEX: @@ -2479,7 +2493,8 @@ exec_instruction( micro_mul( &dot, &r[2], &r[2] ); micro_add( &tmp, &tmp, &dot ); - /* tmp = 1 / tmp */ + /* tmp = 1 / sqrt(tmp) */ + micro_sqrt( &tmp, &tmp ); micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); /* note: w channel is undefined */ @@ -2512,7 +2527,8 @@ exec_instruction( micro_mul( &dot, &r[3], &r[3] ); micro_add( &tmp, &tmp, &dot ); - /* tmp = 1 / tmp */ + /* tmp = 1 / sqrt(tmp) */ + micro_sqrt( &tmp, &tmp ); micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { -- cgit v1.2.3 From a52a6d7bcdaa47604151b9af07ebcd394316e784 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 7 Nov 2008 13:03:07 -0700 Subject: gallium: added SSE for DP2, DP2A --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 3df0c5db3f..b8e4ab6d83 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1742,7 +1742,18 @@ emit_instruction( case TGSI_OPCODE_DOT2ADD: /* TGSI_OPCODE_DP2A */ - return 0; + FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ + FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ + emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ + FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */ + FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */ + emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FETCH( func, *inst, 1, 2, CHAN_X ); /* xmm1 = src[2].x */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */ + } break; case TGSI_OPCODE_INDEX: @@ -2084,7 +2095,16 @@ emit_instruction( break; case TGSI_OPCODE_DP2: - return 0; + FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */ + FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */ + emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */ + FETCH( func, *inst, 1, 0, CHAN_Y ); /* xmm1 = src[0].y */ + FETCH( func, *inst, 2, 1, CHAN_Y ); /* xmm2 = src[1].y */ + emit_mul( func, 1, 2 ); /* xmm1 = xmm1 * xmm2 */ + emit_add( func, 0, 1 ); /* xmm0 = xmm0 + xmm1 */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( func, *inst, 0, 0, chan_index ); /* dest[ch] = xmm0 */ + } break; case TGSI_OPCODE_TXL: -- cgit v1.2.3 From a58dbf34ca88656739a8f8e5f4259e760365c9d0 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 8 Nov 2008 10:29:23 -0700 Subject: gallium: implement SSE codegen for TGSI_OPCODE_NRM/NRM4 --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index b8e4ab6d83..3ce2c1c27b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2087,7 +2087,39 @@ emit_instruction( break; case TGSI_OPCODE_NRM: - return 0; + /* fall-through */ + case TGSI_OPCODE_NRM4: + /* 3 or 4-component normalization */ + { + uint dims = (inst->Instruction.Opcode == TGSI_OPCODE_NRM) ? 3 : 4; + /* note: cannot use xmm regs 2/3 here (see emit_rsqrt() above) */ + FETCH( func, *inst, 4, 0, CHAN_X ); /* xmm4 = src[0].x */ + FETCH( func, *inst, 5, 0, CHAN_Y ); /* xmm5 = src[0].y */ + FETCH( func, *inst, 6, 0, CHAN_Z ); /* xmm6 = src[0].z */ + if (dims == 4) { + FETCH( func, *inst, 7, 0, CHAN_W ); /* xmm7 = src[0].w */ + } + emit_MOV( func, 0, 4 ); /* xmm0 = xmm3 */ + emit_mul( func, 0, 4 ); /* xmm0 *= xmm3 */ + emit_MOV( func, 1, 5 ); /* xmm1 = xmm4 */ + emit_mul( func, 1, 5 ); /* xmm1 *= xmm4 */ + emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ + emit_MOV( func, 1, 6 ); /* xmm1 = xmm5 */ + emit_mul( func, 1, 6 ); /* xmm1 *= xmm5 */ + emit_add( func, 0, 1 ); /* xmm0 += xmm1 */ + if (dims == 4) { + emit_MOV( func, 1, 7 ); /* xmm1 = xmm7 */ + emit_mul( func, 1, 7 ); /* xmm1 *= xmm7 */ + emit_add( func, 0, 0 ); /* xmm0 += xmm1 */ + } + emit_rsqrt( func, 1, 0 ); /* xmm1 = 1/sqrt(xmm0) */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + if (chan_index < dims) { + emit_mul( func, 4+chan_index, 1); /* xmm[4+ch] *= xmm1 */ + STORE( func, *inst, 4+chan_index, 0, chan_index ); + } + } + } break; case TGSI_OPCODE_DIV: -- cgit v1.2.3 From 399da3a337932c6074a69ac73e711138271308eb Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 9 Nov 2008 09:36:22 -0700 Subject: gallium: use PIPE_ARCH_SSE to protect use of SSE instrinsics only This allows us to use SSE codegen with debug builds again. When PIPE_ARCH_SSE is set (w/ gcc -msse -msse2) we will also use the gcc SSE intrinsic functions. --- src/gallium/auxiliary/draw/draw_vs_sse.c | 2 +- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 42 +++++++++++++++++++++++++------- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- 3 files changed, 35 insertions(+), 11 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c index 0e2036f12a..77ba5152f9 100644 --- a/src/gallium/auxiliary/draw/draw_vs_sse.c +++ b/src/gallium/auxiliary/draw/draw_vs_sse.c @@ -37,7 +37,7 @@ #include "draw_vs.h" -#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) +#if defined(PIPE_ARCH_X86) #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 3ce2c1c27b..f93db18114 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -27,12 +27,14 @@ #include "pipe/p_config.h" -#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) +#if defined(PIPE_ARCH_X86) #include "pipe/p_debug.h" #include "pipe/p_shader_tokens.h" #include "util/u_math.h" +#if defined(PIPE_ARCH_SSE) #include "util/u_sse.h" +#endif #include "tgsi/tgsi_parse.h" #include "tgsi/tgsi_util.h" #include "tgsi_exec.h" @@ -627,6 +629,9 @@ emit_func_call_dst_src( code ); } + +#if defined(PIPE_ARCH_SSE) + /* * Fast SSE2 implementation of special math functions. */ @@ -678,6 +683,7 @@ exp2f4(__m128 x) return _mm_mul_ps(expipart, expfpart); } + /** * See http://www.devmaster.net/forums/showthread.php?p=43580 */ @@ -720,12 +726,16 @@ log2f4(__m128 x) return _mm_add_ps(logmant, exp); } + static INLINE __m128 powf4(__m128 x, __m128 y) { return exp2f4(_mm_mul_ps(log2f4(x), y)); } +#endif /* PIPE_ARCH_SSE */ + + /** * Low-level instruction translators. @@ -780,13 +790,20 @@ emit_cos( } static void PIPE_CDECL -#if defined(PIPE_CC_GCC) +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) __attribute__((force_align_arg_pointer)) #endif ex24f( float *store ) { +#if defined(PIPE_ARCH_SSE) _mm_store_ps(&store[0], exp2f4( _mm_load_ps(&store[0]) )); +#else + store[0] = util_fast_exp2( store[0] ); + store[1] = util_fast_exp2( store[1] ); + store[2] = util_fast_exp2( store[2] ); + store[3] = util_fast_exp2( store[3] ); +#endif } static void @@ -871,13 +888,20 @@ emit_frc( } static void PIPE_CDECL -#if defined(PIPE_CC_GCC) +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) __attribute__((force_align_arg_pointer)) #endif lg24f( float *store ) { +#if defined(PIPE_ARCH_SSE) _mm_store_ps(&store[0], log2f4( _mm_load_ps(&store[0]) )); +#else + store[0] = util_fast_log2( store[0] ); + store[1] = util_fast_log2( store[1] ); + store[2] = util_fast_log2( store[2] ); + store[3] = util_fast_log2( store[3] ); +#endif } static void @@ -930,19 +954,19 @@ emit_neg( } static void PIPE_CDECL -#if defined(PIPE_CC_GCC) +#if defined(PIPE_CC_GCC) && defined(PIPE_ARCH_SSE) __attribute__((force_align_arg_pointer)) #endif pow4f( float *store ) { -#if 1 +#if defined(PIPE_ARCH_SSE) _mm_store_ps(&store[0], powf4( _mm_load_ps(&store[0]), _mm_load_ps(&store[4]) )); #else - store[0] = powf( store[0], store[4] ); - store[1] = powf( store[1], store[5] ); - store[2] = powf( store[2], store[6] ); - store[3] = powf( store[3], store[7] ); + store[0] = util_fast_pow( store[0], store[4] ); + store[1] = util_fast_pow( store[1], store[5] ); + store[2] = util_fast_pow( store[2], store[6] ); + store[3] = util_fast_pow( store[3], store[7] ); #endif } diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 8aa597f633..31908a517b 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -40,7 +40,7 @@ #include "tgsi/tgsi_sse2.h" -#if defined(PIPE_ARCH_X86) && defined(PIPE_ARCH_SSE) +#if defined(PIPE_ARCH_X86) #include "rtasm/rtasm_x86sse.h" -- cgit v1.2.3 From 7e8315701945ec807b2b5a9d01250b5ab74ae183 Mon Sep 17 00:00:00 2001 From: Brian Date: Sat, 8 Nov 2008 20:43:38 -0700 Subject: gallium: _debug_vprintf() should be silent if DEBUG is not defined --- src/gallium/auxiliary/util/p_debug.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index b6cff281e6..6ff3e6e0a6 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -101,8 +101,10 @@ void _debug_vprintf(const char *format, va_list ap) #elif defined(PIPE_SUBSYSTEM_WINDOWS_CE) || defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) /* TODO */ #else /* !PIPE_SUBSYSTEM_WINDOWS */ +#ifdef DEBUG vfprintf(stderr, format, ap); #endif +#endif } -- cgit v1.2.3 From 325cbeb29a63e3d71da00baeab864970fe3aa595 Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 9 Nov 2008 10:15:32 -0700 Subject: util: Fix util_fast_pow/exp2/log2. - Use a lookup table for log2. - Compute (float) (1 << ipart) by tweaking with the exponent directly to avoid integer overflow and float conversion. - Also table negative exponents to avoid float division and branching. - Implement util_fast_exp as function of util_fast_exp2. -------- Cherry-picked from gallium-0.2: 8415d06d90a197e16554dab98d160334fd9f9f93 This fixes some pow() glitches seen in fslight.c, spectex.c, etc. Conflicts: src/gallium/auxiliary/util/u_math.h --- src/gallium/auxiliary/util/u_math.c | 21 +++++-- src/gallium/auxiliary/util/u_math.h | 112 +++++++++++++++--------------------- 2 files changed, 64 insertions(+), 69 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index 0729114d6a..5b3cab4642 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -30,7 +30,7 @@ #include "util/u_math.h" - +/** 2^x, for x in [-1.0, 1.0[ */ float pow2_table[POW2_TABLE_SIZE]; @@ -38,9 +38,21 @@ static void init_pow2_table(void) { int i; - for (i = 0; i < POW2_TABLE_SIZE; i++) { - pow2_table[i] = (float) pow(2.0, i / POW2_TABLE_SCALE); - } + for (i = 0; i < POW2_TABLE_SIZE; i++) + pow2_table[i] = (float) pow(2.0, (i - POW2_TABLE_OFFSET) / POW2_TABLE_SCALE); +} + + +/** log2(x), for x in [1.0, 2.0[ */ +float log2_table[LOG2_TABLE_SIZE]; + + +static void +init_log2_table(void) +{ + unsigned i; + for (i = 0; i < LOG2_TABLE_SIZE; i++) + log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SIZE)); } @@ -53,6 +65,7 @@ util_init_math(void) static boolean initialized = FALSE; if (!initialized) { init_pow2_table(); + init_log2_table(); initialized = TRUE; } } diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 196aeb28fa..be7303e550 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -174,8 +174,10 @@ static INLINE float logf( float f ) -#define POW2_TABLE_SIZE 256 -#define POW2_TABLE_SCALE ((float) (POW2_TABLE_SIZE-1)) +#define POW2_TABLE_SIZE_LOG2 9 +#define POW2_TABLE_SIZE (1 << POW2_TABLE_SIZE_LOG2) +#define POW2_TABLE_OFFSET (POW2_TABLE_SIZE/2) +#define POW2_TABLE_SCALE ((float)(POW2_TABLE_SIZE/2)) extern float pow2_table[POW2_TABLE_SIZE]; @@ -186,98 +188,78 @@ util_init_math(void); union fi { float f; - int i; - unsigned ui; + int32_t i; + uint32_t ui; }; /** - * Fast approximation to exp(x). - * Compute with base 2 exponents: exp(x) = exp2(log2(e) * x) - * Note: log2(e) is a constant, k = 1.44269 - * So, exp(x) = exp2(k * x); + * Fast version of 2^x * Identity: exp2(a + b) = exp2(a) * exp2(b) - * Let ipart = int(k*x) - * Let fpart = k*x - ipart; - * So, exp2(k*x) = exp2(ipart) * exp2(fpart) + * Let ipart = int(x) + * Let fpart = x - ipart; + * So, exp2(x) = exp2(ipart) * exp2(fpart) * Compute exp2(ipart) with i << ipart * Compute exp2(fpart) with lookup table. */ static INLINE float -util_fast_exp(float x) +util_fast_exp2(float x) { - if (x >= 0.0f) { - float k = 1.44269f; /* = log2(e) */ - float kx = k * x; - int ipart = (int) kx; - float fpart = kx - (float) ipart; - float y = (float) (1 << ipart) - * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; - return y; - } - else { - /* exp(-x) = 1.0 / exp(x) */ - float k = -1.44269f; - float kx = k * x; - int ipart = (int) kx; - float fpart = kx - (float) ipart; - float y = (float) (1 << ipart) - * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; - return 1.0f / y; - } + int32_t ipart; + float fpart, mpart; + union fi epart; + + if(x > 129.00000f) + return 3.402823466e+38f; + + if(x < -126.99999f) + return 0.0f; + + ipart = (int32_t) x; + fpart = x - (float) ipart; + + /* same as + * epart.f = (float) (1 << ipart) + * but faster and without integer overflow for ipart > 31 */ + epart.i = (ipart + 127 ) << 23; + + mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)]; + + return epart.f * mpart; } /** - * Fast version of 2^x - * XXX the above function could be implemented in terms of this one. + * Fast approximation to exp(x). */ static INLINE float -util_fast_exp2(float x) +util_fast_exp(float x) { - if (x >= 0.0f) { - int ipart = (int) x; - float fpart = x - (float) ipart; - float y = (float) (1 << ipart) - * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; - return y; - } - else { - /* exp(-x) = 1.0 / exp(x) */ - int ipart = (int) -x; - float fpart = -x - (float) ipart; - float y = (float) (1 << ipart) - * pow2_table[(int) (fpart * POW2_TABLE_SCALE)]; - return 1.0f / y; - } + const float k = 1.44269f; /* = log2(e) */ + return util_fast_exp2(k * x); } -/** - * Based on code from http://www.flipcode.com/totd/ - */ +#define LOG2_TABLE_SIZE_LOG2 8 +#define LOG2_TABLE_SIZE (1 << LOG2_TABLE_SIZE_LOG2) +extern float log2_table[LOG2_TABLE_SIZE]; + + static INLINE float -util_fast_log2(float val) +util_fast_log2(float x) { union fi num; - int log_2; - num.f = val; - log_2 = ((num.i >> 23) & 255) - 128; - num.i &= ~(255 << 23); - num.i += 127 << 23; - num.f = ((-1.0f/3) * num.f + 2) * num.f - 2.0f/3; - return num.f + log_2; + float epart, mpart; + num.f = x; + epart = (float)(((num.i & 0x7f800000) >> 23) - 127); + mpart = log2_table[(num.i & 0x007fffff) >> (23 - LOG2_TABLE_SIZE_LOG2)]; + return epart + mpart; } static INLINE float util_fast_pow(float x, float y) { - /* XXX these tests may need adjustment */ - if (y >= 3.0f && (-0.02f <= x && x <= 0.02f)) - return 0.0f; - if (y >= 50.0f && (-0.9f <= x && x <= 0.9f)) - return 0.0f; return util_fast_exp2(util_fast_log2(x) * y); } -- cgit v1.2.3 From 5668e7fa80d71bec38c61ea29e6a2a9996e0a73c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 6 Nov 2008 16:07:28 -0500 Subject: gallium: actually flip the coordinates --- src/gallium/auxiliary/util/u_rect.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index f5619ef791..30f32413d7 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -222,7 +222,8 @@ util_surface_copy(struct pipe_context *pipe, w, h, src_map, do_flip ? -(int) src->stride : src->stride, - src_x, src_y); + src_x, + do_flip ? w - src_y : src_y); } pipe->screen->surface_unmap(pipe->screen, src); -- cgit v1.2.3 From 2276dcf05f7e0ae13ba434615cf7f34dc06b2afe Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 10 Nov 2008 08:24:45 -0700 Subject: gallium: fix typos in comments --- src/gallium/auxiliary/util/u_math.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index 5b3cab4642..9c5f616ceb 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -30,7 +30,7 @@ #include "util/u_math.h" -/** 2^x, for x in [-1.0, 1.0[ */ +/** 2^x, for x in [-1.0, 1.0] */ float pow2_table[POW2_TABLE_SIZE]; @@ -43,7 +43,7 @@ init_pow2_table(void) } -/** log2(x), for x in [1.0, 2.0[ */ +/** log2(x), for x in [1.0, 2.0] */ float log2_table[LOG2_TABLE_SIZE]; -- cgit v1.2.3 From ff42991c720bc1cfbf72194447fde0bebbd65b85 Mon Sep 17 00:00:00 2001 From: Brian Date: Mon, 10 Nov 2008 20:22:36 -0700 Subject: gallium: fix comment again. A half-closed interval was intended. Never saw the [a,b[ notation before. --- src/gallium/auxiliary/util/u_math.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index 9c5f616ceb..d1571cd1fc 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -30,7 +30,7 @@ #include "util/u_math.h" -/** 2^x, for x in [-1.0, 1.0] */ +/** 2^x, for x in [-1.0, 1.0) */ float pow2_table[POW2_TABLE_SIZE]; @@ -43,7 +43,7 @@ init_pow2_table(void) } -/** log2(x), for x in [1.0, 2.0] */ +/** log2(x), for x in [1.0, 2.0) */ float log2_table[LOG2_TABLE_SIZE]; -- cgit v1.2.3 From 90027f85786406133a5180998a75fb612b6a221e Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Tue, 11 Nov 2008 13:57:10 -0700 Subject: CELL: two-sided stencil fixes With these changes, the tests/stencil_twoside test now works. - Eliminate blending from the stencil_twoside test, as it produces an unneeded dependency on having blending working - The spe_splat() function will now work if the register being splatted and the destination register are the same - Separate fragment code generated for front-facing and back-facing fragments. Often these are the same; if two-sided stenciling is on, they can be different. This is easier and faster than generating code that does both tests and merges the results. - Fixed a cut/paste bug where if the back Z-pass stencil operation were different from all the other operations, the back Z-fail results were incorrect. --- progs/tests/stencil_twoside.c | 2 - src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 7 +- src/gallium/drivers/cell/common.h | 6 +- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 239 ++++++--------------- src/gallium/drivers/cell/ppu/cell_gen_fragment.h | 2 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 19 +- src/gallium/drivers/cell/spu/spu_command.c | 6 +- src/gallium/drivers/cell/spu/spu_main.c | 6 +- src/gallium/drivers/cell/spu/spu_main.h | 10 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.c | 3 +- src/gallium/drivers/cell/spu/spu_per_fragment_op.h | 3 +- src/gallium/drivers/cell/spu/spu_tri.c | 20 +- 12 files changed, 115 insertions(+), 208 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/progs/tests/stencil_twoside.c b/progs/tests/stencil_twoside.c index be9d9a776a..8826c46fc2 100644 --- a/progs/tests/stencil_twoside.c +++ b/progs/tests/stencil_twoside.c @@ -115,7 +115,6 @@ static void Display( void ) glVertex2f(-1, 1); glEnd(); - if (use20syntax) { stencil_func_separate(GL_FRONT, GL_ALWAYS, 0, ~0); stencil_func_separate(GL_BACK, GL_ALWAYS, 0, ~0); @@ -279,7 +278,6 @@ static void Init( void ) stencil_op_separate = glutGetProcAddress( "glStencilOpSeparate" ); printf("\nAll 5 squares should be the same color.\n"); - glEnable( GL_BLEND ); } diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index f8568f690b..1bd9f1c8dd 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -958,9 +958,12 @@ spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsig void spe_splat(struct spe_function *p, unsigned rT, unsigned rA) { + /* Use a temporary, just in case rT == rA */ + unsigned int tmp_reg = spe_allocate_available_register(p); /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ - spe_ila(p, rT, 0x00010203); - spe_shufb(p, rT, rA, rA, rT); + spe_ila(p, tmp_reg, 0x00010203); + spe_shufb(p, rT, rA, rA, tmp_reg); + spe_release_register(p, tmp_reg); } diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index 87488ea2d7..a670ed3c6e 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -130,6 +130,9 @@ #define CELL_FENCE_EMITTED 1 #define CELL_FENCE_SIGNALLED 2 +#define CELL_FACING_FRONT 0 +#define CELL_FACING_BACK 1 + struct cell_fence { /** There's a 16-byte status qword per SPU */ @@ -160,7 +163,8 @@ struct cell_command_fragment_ops struct pipe_depth_stencil_alpha_state dsa; struct pipe_blend_state blend; struct pipe_blend_color blend_color; - unsigned code[SPU_MAX_FRAGMENT_OPS_INSTS]; + unsigned code_front[SPU_MAX_FRAGMENT_OPS_INSTS]; + unsigned code_back[SPU_MAX_FRAGMENT_OPS_INSTS]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index d9c3ff3f4d..6e425eafaa 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1412,144 +1412,72 @@ gen_stencil_values(struct spe_function *f, unsigned int stencil_op, * and released by the corresponding spe_release_register_set() call. */ static void -gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa, +gen_get_stencil_values(struct spe_function *f, const struct pipe_stencil_state *stencil, + const unsigned int depth_enabled, unsigned int fbS_reg, unsigned int *fail_reg, unsigned int *zfail_reg, - unsigned int *zpass_reg, unsigned int *back_fail_reg, - unsigned int *back_zfail_reg, unsigned int *back_zpass_reg) + unsigned int *zpass_reg) { - unsigned zfail_op, back_zfail_op; + unsigned zfail_op; /* Stenciling had better be enabled here */ - ASSERT(dsa->stencil[0].enabled); + ASSERT(stencil->enabled); /* If the depth test is not enabled, it is treated as though it always - * passes. In particular, that means that the "zfail_op" (and the backfacing - * counterpart, if active) are not considered - a failing stencil test will - * trigger the "fail_op", and a passing stencil test will trigger the - * "zpass_op". + * passes, which means that the zfail_op is not considered - a + * failing stencil test triggers the fail_op, and a passing one + * triggers the zpass_op * - * By overriding the operations in this case to be PIPE_STENCIL_OP_KEEP, - * we keep them from being calculated. + * As an optimization, override calculation of the zfail_op values + * if they aren't going to be used. By setting the value of + * the operation to PIPE_STENCIL_OP_KEEP, its value will be assumed + * to match the incoming stencil values, and no calculation will + * be done. */ - if (dsa->depth.enabled) { - zfail_op = dsa->stencil[0].zfail_op; - back_zfail_op = dsa->stencil[1].zfail_op; + if (depth_enabled) { + zfail_op = stencil->zfail_op; } else { zfail_op = PIPE_STENCIL_OP_KEEP; - back_zfail_op = PIPE_STENCIL_OP_KEEP; } /* One-sided or front-facing stencil */ - if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP) { + if (stencil->fail_op == PIPE_STENCIL_OP_KEEP) { *fail_reg = fbS_reg; } else { *fail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, dsa->stencil[0].fail_op, dsa->stencil[0].ref_value, + gen_stencil_values(f, stencil->fail_op, stencil->ref_value, 0xff, fbS_reg, *fail_reg); } + /* Check the possibly overridden value, not the structure value */ if (zfail_op == PIPE_STENCIL_OP_KEEP) { *zfail_reg = fbS_reg; } - else if (zfail_op == dsa->stencil[0].fail_op) { + else if (zfail_op == stencil->fail_op) { *zfail_reg = *fail_reg; } else { *zfail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, dsa->stencil[0].zfail_op, dsa->stencil[0].ref_value, + gen_stencil_values(f, stencil->zfail_op, stencil->ref_value, 0xff, fbS_reg, *zfail_reg); } - if (dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP) { + if (stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { *zpass_reg = fbS_reg; } - else if (dsa->stencil[0].zpass_op == dsa->stencil[0].fail_op) { + else if (stencil->zpass_op == stencil->fail_op) { *zpass_reg = *fail_reg; } - else if (dsa->stencil[0].zpass_op == zfail_op) { + else if (stencil->zpass_op == zfail_op) { *zpass_reg = *zfail_reg; } else { *zpass_reg = spe_allocate_available_register(f); - gen_stencil_values(f, dsa->stencil[0].zpass_op, dsa->stencil[0].ref_value, + gen_stencil_values(f, stencil->zpass_op, stencil->ref_value, 0xff, fbS_reg, *zpass_reg); } - - /* If two-sided stencil is enabled, we have more work to do. */ - if (!dsa->stencil[1].enabled) { - /* This just flags that the registers need not be deallocated later */ - *back_fail_reg = fbS_reg; - *back_zfail_reg = fbS_reg; - *back_zpass_reg = fbS_reg; - } - else { - /* Same calculations as above, but for the back stencil */ - if (dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP) { - *back_fail_reg = fbS_reg; - } - else if (dsa->stencil[1].fail_op == dsa->stencil[0].fail_op) { - *back_fail_reg = *fail_reg; - } - else if (dsa->stencil[1].fail_op == zfail_op) { - *back_fail_reg = *zfail_reg; - } - else if (dsa->stencil[1].fail_op == dsa->stencil[0].zpass_op) { - *back_fail_reg = *zpass_reg; - } - else { - *back_fail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, dsa->stencil[1].fail_op, dsa->stencil[1].ref_value, - 0xff, fbS_reg, *back_fail_reg); - } - - if (back_zfail_op == PIPE_STENCIL_OP_KEEP) { - *back_zfail_reg = fbS_reg; - } - else if (back_zfail_op == dsa->stencil[0].fail_op) { - *back_zfail_reg = *fail_reg; - } - else if (back_zfail_op == zfail_op) { - *back_zfail_reg = *zfail_reg; - } - else if (back_zfail_op == dsa->stencil[0].zpass_op) { - *back_zfail_reg = *zpass_reg; - } - else if (back_zfail_op == dsa->stencil[1].fail_op) { - *back_zfail_reg = *back_fail_reg; - } - else { - *back_zfail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, dsa->stencil[1].zfail_op, dsa->stencil[1].ref_value, - 0xff, fbS_reg, *back_zfail_reg); - } - - if (dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) { - *back_zpass_reg = fbS_reg; - } - else if (dsa->stencil[1].zpass_op == dsa->stencil[0].fail_op) { - *back_zpass_reg = *fail_reg; - } - else if (dsa->stencil[1].zpass_op == zfail_op) { - *back_zpass_reg = *zfail_reg; - } - else if (dsa->stencil[1].zpass_op == dsa->stencil[0].zpass_op) { - *back_zpass_reg = *zpass_reg; - } - else if (dsa->stencil[1].zpass_op == dsa->stencil[1].fail_op) { - *back_zpass_reg = *back_fail_reg; - } - else if (dsa->stencil[1].zpass_op == back_zfail_op) { - *back_zpass_reg = *back_zfail_reg; - } - else { - *back_zfail_reg = spe_allocate_available_register(f); - gen_stencil_values(f, dsa->stencil[1].zpass_op, dsa->stencil[1].ref_value, - 0xff, fbS_reg, *back_zpass_reg); - } - } /* End of calculations for back-facing stencil */ } /* Note that fbZ_reg may *not* be set on entry, if in fact @@ -1559,7 +1487,7 @@ gen_get_stencil_values(struct spe_function *f, const struct pipe_depth_stencil_a static boolean gen_stencil_depth_test(struct spe_function *f, const struct pipe_depth_stencil_alpha_state *dsa, - const int const facing_reg, + const uint facing, const int mask_reg, const int fragZ_reg, const int fbZ_reg, const int fbS_reg) { @@ -1571,6 +1499,8 @@ gen_stencil_depth_test(struct spe_function *f, boolean need_to_calculate_stencil_values; boolean need_to_writemask_stencil_values; + struct pipe_stencil_state *stencil; + /* Registers. We may or may not actually allocate these, depending * on whether the state values indicate that we need them. */ @@ -1598,6 +1528,20 @@ gen_stencil_depth_test(struct spe_function *f, spe_comment(f, 0, "Allocating stencil register set"); spe_allocate_register_set(f); + /* The facing we're given is the fragment facing; it doesn't + * exactly match the stencil facing. If stencil is enabled, + * but two-sided stencil is *not* enabled, we use the same + * stencil settings for both front- and back-facing fragments. + * We only use the "back-facing" stencil for backfacing fragments + * if two-sided stenciling is enabled. + */ + if (facing == CELL_FACING_BACK && dsa->stencil[1].enabled) { + stencil = &dsa->stencil[1]; + } + else { + stencil = &dsa->stencil[0]; + } + /* Calculate the writemask. If the writemask is trivial (either * all 0s, meaning that we don't need to calculate any stencil values * because they're not going to change the stencil anyway, or all 1s, @@ -1608,24 +1552,20 @@ gen_stencil_depth_test(struct spe_function *f, * Note that if the backface stencil is *not* enabled, the backface * stencil will have the same values as the frontface stencil. */ - if (dsa->stencil[0].fail_op == PIPE_STENCIL_OP_KEEP && - dsa->stencil[0].zfail_op == PIPE_STENCIL_OP_KEEP && - dsa->stencil[0].zpass_op == PIPE_STENCIL_OP_KEEP && - dsa->stencil[1].fail_op == PIPE_STENCIL_OP_KEEP && - dsa->stencil[1].zfail_op == PIPE_STENCIL_OP_KEEP && - dsa->stencil[1].zpass_op == PIPE_STENCIL_OP_KEEP) { - /* No changes to any stencil values */ + if (stencil->fail_op == PIPE_STENCIL_OP_KEEP && + stencil->zfail_op == PIPE_STENCIL_OP_KEEP && + stencil->zpass_op == PIPE_STENCIL_OP_KEEP) { need_to_calculate_stencil_values = false; need_to_writemask_stencil_values = false; } - else if (dsa->stencil[0].write_mask == 0x0 && dsa->stencil[1].write_mask == 0x0) { + else if (stencil->write_mask == 0x0) { /* All changes are writemasked out, so no need to calculate * what those changes might be, and no need to write anything back. */ need_to_calculate_stencil_values = false; need_to_writemask_stencil_values = false; } - else if (dsa->stencil[0].write_mask == 0xff && dsa->stencil[1].write_mask == 0xff) { + else if (stencil->write_mask == 0xff) { /* Still trivial, but a little less so. We need to write the stencil * values, but we don't need to mask them. */ @@ -1645,14 +1585,7 @@ gen_stencil_depth_test(struct spe_function *f, */ spe_comment(f, 0, "Computing stencil writemask"); stencil_writemask_reg = spe_allocate_available_register(f); - spe_load_uint(f, stencil_writemask_reg, dsa->stencil[0].write_mask); - if (dsa->stencil[1].enabled && dsa->stencil[0].write_mask != dsa->stencil[1].write_mask) { - unsigned int back_write_mask_reg = spe_allocate_available_register(f); - spe_comment(f, 0, "Resolving two-sided stencil writemask"); - spe_load_uint(f, back_write_mask_reg, dsa->stencil[1].write_mask); - spe_selb(f, stencil_writemask_reg, stencil_writemask_reg, back_write_mask_reg, facing_reg); - spe_release_register(f, back_write_mask_reg); - } + spe_load_uint(f, stencil_writemask_reg, dsa->stencil[facing].write_mask); } /* At least one-sided stenciling must be on. Generate code that @@ -1666,19 +1599,7 @@ gen_stencil_depth_test(struct spe_function *f, */ spe_comment(f, 0, "Running basic stencil test"); stencil_pass_reg = spe_allocate_available_register(f); - gen_stencil_test(f, &dsa->stencil[0], 0xff, mask_reg, fbS_reg, stencil_pass_reg); - - /* If two-sided stenciling is on, generate code to run the stencil - * test on the backfacing stencil as well, and combine the two results - * into the one correct result based on facing. - */ - if (dsa->stencil[1].enabled) { - unsigned int temp_reg = spe_allocate_available_register(f); - spe_comment(f, 0, "Running backface stencil test"); - gen_stencil_test(f, &dsa->stencil[1], 0xff, mask_reg, fbS_reg, temp_reg); - spe_selb(f, stencil_pass_reg, stencil_pass_reg, temp_reg, facing_reg); - spe_release_register(f, temp_reg); - } + gen_stencil_test(f, stencil, 0xff, mask_reg, fbS_reg, stencil_pass_reg); /* Generate code that, given the mask of valid fragments and the * mask of valid fragments that passed the stencil test, computes @@ -1698,9 +1619,6 @@ gen_stencil_depth_test(struct spe_function *f, /* We may not need to calculate stencil values, if the writemask is off */ if (need_to_calculate_stencil_values) { - unsigned int back_stencil_fail_values, back_stencil_pass_depth_fail_values, back_stencil_pass_depth_pass_values; - unsigned int front_stencil_fail_values, front_stencil_pass_depth_fail_values, front_stencil_pass_depth_pass_values; - /* Generate code that calculates exactly which stencil values we need, * without calculating the same value twice (say, if two different * stencil ops have the same value). This code will work for one-sided @@ -1715,51 +1633,11 @@ gen_stencil_depth_test(struct spe_function *f, * This function will allocate a variant number of registers that * will be released as part of the register set. */ - spe_comment(f, 0, "Computing stencil values"); - gen_get_stencil_values(f, dsa, fbS_reg, - &front_stencil_fail_values, &front_stencil_pass_depth_fail_values, - &front_stencil_pass_depth_pass_values, &back_stencil_fail_values, - &back_stencil_pass_depth_fail_values, &back_stencil_pass_depth_pass_values); - - /* Tricky, tricky, tricky - the things we do to create optimal - * code... - * - * The various stencil values registers may overlap with each other - * and with fbS_reg arbitrarily (as any particular operation is - * only calculated once and stored in one register, no matter - * how many times it is used). So we can't change the values - * within those registers directly - if we change a value in a - * register that's being referenced by two different calculations, - * we've just unwittingly changed the second value as well... - * - * Avoid this by allocating new registers to hold the results - * (there may be 2, if the depth test is off, or 3, if it is on). - * These will be released as part of the register set. - */ - if (!dsa->stencil[1].enabled) { - /* The easy case: if two-sided stenciling is *not* enabled, we - * just use the front-sided values. - */ - stencil_fail_values = front_stencil_fail_values; - stencil_pass_depth_fail_values = front_stencil_pass_depth_fail_values; - stencil_pass_depth_pass_values = front_stencil_pass_depth_pass_values; - } - else { /* two-sided stencil enabled */ - spe_comment(f, 0, "Resolving backface stencil values"); - /* Allocate new registers for the needed merged values */ - stencil_fail_values = spe_allocate_available_register(f); - spe_selb(f, stencil_fail_values, front_stencil_fail_values, back_stencil_fail_values, facing_reg); - if (dsa->depth.enabled) { - stencil_pass_depth_fail_values = spe_allocate_available_register(f); - spe_selb(f, stencil_pass_depth_fail_values, front_stencil_pass_depth_fail_values, back_stencil_pass_depth_fail_values, facing_reg); - } - else { - stencil_pass_depth_fail_values = fbS_reg; - } - stencil_pass_depth_pass_values = spe_allocate_available_register(f); - spe_selb(f, stencil_pass_depth_pass_values, front_stencil_pass_depth_pass_values, back_stencil_pass_depth_pass_values, facing_reg); - } - } + spe_comment(f, 0, facing == CELL_FACING_FRONT ? "Computing front-facing stencil values" : "Computing back-facing stencil values"); + gen_get_stencil_values(f, stencil, dsa->depth.enabled, fbS_reg, + &stencil_fail_values, &stencil_pass_depth_fail_values, + &stencil_pass_depth_pass_values); + } /* We now have all the stencil values we need. We also need * the results of the depth test to figure out which @@ -1896,10 +1774,12 @@ gen_stencil_depth_test(struct spe_function *f, * should be much faster. * * \param cell the rendering context (in) + * \param facing whether the generated code is for front-facing or + * back-facing fragments * \param f the generated function (out) */ void -cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) +cell_gen_fragment_function(struct cell_context *cell, uint facing, struct spe_function *f) { const struct pipe_depth_stencil_alpha_state *dsa = cell->depth_stencil; const struct pipe_blend_state *blend = cell->blend; @@ -1917,7 +1797,8 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) const int fragB_reg = 10; /* vector float */ const int fragA_reg = 11; /* vector float */ const int mask_reg = 12; /* vector uint */ - const int facing_reg = 13; /* uint */ + + ASSERT(facing == CELL_FACING_FRONT || facing == CELL_FACING_BACK); /* offset of quad from start of tile * XXX assuming 4-byte pixels for color AND Z/stencil!!!! @@ -1945,7 +1826,6 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_allocate_register(f, fragB_reg); spe_allocate_register(f, fragA_reg); spe_allocate_register(f, mask_reg); - spe_allocate_register(f, facing_reg); quad_offset_reg = spe_allocate_available_register(f); fbRGBA_reg = spe_allocate_available_register(f); @@ -1969,6 +1849,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) spe_release_register(f, y2_reg); } + /* Generate the alpha test, if needed. */ if (dsa->alpha.enabled) { gen_alpha_test(dsa, f, mask_reg, fragA_reg); } @@ -2095,7 +1976,7 @@ cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f) * gen_stencil_depth_test() function must ignore the * fbZ_reg register if depth is not enabled. */ - write_depth_stencil = gen_stencil_depth_test(f, dsa, facing_reg, mask_reg, fragZ_reg, fbZ_reg, fbS_reg); + write_depth_stencil = gen_stencil_depth_test(f, dsa, facing, mask_reg, fragZ_reg, fbZ_reg, fbS_reg); } else if (dsa->depth.enabled) { int zmask_reg = spe_allocate_available_register(f); diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h index b59de198dc..2fabfdfb08 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.h +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.h @@ -31,7 +31,7 @@ extern void -cell_gen_fragment_function(struct cell_context *cell, struct spe_function *f); +cell_gen_fragment_function(struct cell_context *cell, uint facing, struct spe_function *f); #endif /* CELL_GEN_FRAGMENT_H */ diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index dd2d7f7d1e..031b27f11f 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -75,23 +75,29 @@ lookup_fragment_ops(struct cell_context *cell) * If not found, create/save new fragment ops command. */ if (!ops) { - struct spe_function spe_code; + struct spe_function spe_code_front, spe_code_back; if (0) debug_printf("**** Create New Fragment Ops\n"); /* Prepare the buffer that will hold the generated code. */ - spe_init_func(&spe_code, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + spe_init_func(&spe_code_front, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + spe_init_func(&spe_code_back, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - /* generate new code */ - cell_gen_fragment_function(cell, &spe_code); + /* generate new code. Always generate new code for both front-facing + * and back-facing fragments, even if it's the same code in both + * cases. + */ + cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); + cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); /* alloc new fragment ops command */ ops = CALLOC_STRUCT(cell_command_fragment_ops); /* populate the new cell_command_fragment_ops object */ ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; - memcpy(ops->code, spe_code.store, spe_code_size(&spe_code)); + memcpy(ops->code_front, spe_code_front.store, spe_code_size(&spe_code_front)); + memcpy(ops->code_back, spe_code_back.store, spe_code_size(&spe_code_back)); ops->dsa = *cell->depth_stencil; ops->blend = *cell->blend; @@ -99,7 +105,8 @@ lookup_fragment_ops(struct cell_context *cell) util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); /* release rtasm buffer */ - spe_release_func(&spe_code); + spe_release_func(&spe_code_front); + spe_release_func(&spe_code_back); } else { if (0) diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index d726622d94..d5faf4e3aa 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -214,7 +214,8 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_ops_code, fops->code, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + memcpy(spu.fragment_ops_code_front, fops->code_front, SPU_MAX_FRAGMENT_OPS_INSTS * 4); + memcpy(spu.fragment_ops_code_back, fops->code_back, SPU_MAX_FRAGMENT_OPS_INSTS * 4); /* Copy state info (for fallback case only) */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); @@ -234,7 +235,8 @@ cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) * raw state records that the fallback code requires. */ if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { - spu.fragment_ops = (spu_fragment_ops_func) spu.fragment_ops_code; + spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) spu.fragment_ops_code_front; + spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) spu.fragment_ops_code_back; } else { /* otherwise, the default fallback code remains in place */ diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index c8bb251905..7033f6037d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -63,7 +63,8 @@ one_time_init(void) * This will normally be overriden by a code-gen'd function * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. */ - spu.fragment_ops = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; } @@ -90,7 +91,8 @@ main(main_param_t speid, main_param_t argp) ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); ASSERT(sizeof(struct cell_command_render) % 8 == 0); - ASSERT(((unsigned long) &spu.fragment_ops_code) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_ops_code_front) % 8 == 0); + ASSERT(((unsigned long) &spu.fragment_ops_code_back) % 8 == 0); ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); one_time_init(); diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 692790c9f3..24cf7d77ce 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -85,8 +85,7 @@ typedef void (*spu_fragment_ops_func)(uint x, uint y, vector float fragGreen, vector float fragBlue, vector float fragAlpha, - vector unsigned int mask, - uint facing); + vector unsigned int mask); /** Function for running fragment program */ typedef vector unsigned int (*spu_fragment_program_func)(vector float *inputs, @@ -170,9 +169,10 @@ struct spu_global ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; /** Current fragment ops machine code, at 8-byte boundary */ - uint fragment_ops_code[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; - /** Current fragment ops function */ - spu_fragment_ops_func fragment_ops; + uint fragment_ops_code_front[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; + uint fragment_ops_code_back[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; + /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ + spu_fragment_ops_func fragment_ops[2]; /** Current fragment program machine code, at 8-byte boundary */ uint fragment_program_code[SPU_MAX_FRAGMENT_PROGRAM_INSTS] ALIGN8_ATTRIB; diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c index f8ffc70492..683664e8a4 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.c +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.c @@ -75,8 +75,7 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragG, vector float fragB, vector float fragA, - vector unsigned int mask, - uint facing) + vector unsigned int mask) { vector float frag_aos[4]; unsigned int fbc0, fbc1, fbc2, fbc3 ; /* framebuffer/tile colors */ diff --git a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h index a61689c83a..f817abf046 100644 --- a/src/gallium/drivers/cell/spu/spu_per_fragment_op.h +++ b/src/gallium/drivers/cell/spu/spu_per_fragment_op.h @@ -38,8 +38,7 @@ spu_fallback_fragment_ops(uint x, uint y, vector float fragGreen, vector float fragBlue, vector float fragAlpha, - vector unsigned int mask, - uint facing); + vector unsigned int mask); #endif /* SPU_PER_FRAGMENT_OP */ diff --git a/src/gallium/drivers/cell/spu/spu_tri.c b/src/gallium/drivers/cell/spu/spu_tri.c index 5f908159bb..22e51a86ae 100644 --- a/src/gallium/drivers/cell/spu/spu_tri.c +++ b/src/gallium/drivers/cell/spu/spu_tri.c @@ -275,15 +275,20 @@ emit_quad( int x, int y, mask_t mask) /* Execute per-fragment/quad operations, including: * alpha test, z test, stencil test, blend and framebuffer writing. + * Note that there are two different fragment operations functions + * that can be called, one for front-facing fragments, and one + * for back-facing fragments. (Often the two are the same; + * but in some cases, like two-sided stenciling, they can be + * very different.) So choose the correct function depending + * on the calculated facing. */ - spu.fragment_ops(ix, iy, &spu.ctile, &spu.ztile, + spu.fragment_ops[setup.facing](ix, iy, &spu.ctile, &spu.ztile, fragZ, outputs[0*4+0], outputs[0*4+1], outputs[0*4+2], outputs[0*4+3], - mask, - setup.facing); + mask); } } } @@ -519,7 +524,14 @@ setup_sort_vertices(const struct vertex_header *v0, setup.oneOverArea = 1.0f / area; - /* The product of area * sign indicates front/back orientation (0/1) */ + /* The product of area * sign indicates front/back orientation (0/1). + * Just in case someone gets the bright idea of switching the front + * and back constants without noticing that we're assuming their + * values in this operation, also assert that the values are + * what we think they are. + */ + ASSERT(CELL_FACING_FRONT == 0); + ASSERT(CELL_FACING_BACK == 1); setup.facing = (area * sign > 0.0f) ^ (spu.rasterizer.front_winding == PIPE_WINDING_CW); -- cgit v1.2.3 From a983f2a6ac04edc2b3407b44c2a1b5bc970c4ce3 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 17:03:58 +0100 Subject: draw: Add missing include. --- src/gallium/auxiliary/draw/draw_pt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 3c175f31d8..18f24e5980 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -35,6 +35,7 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" +#include "util/u_math.h" static unsigned trim( unsigned count, unsigned first, unsigned incr ) { -- cgit v1.2.3 From f447eea4de9cab5de295c717d35824cf92b9f322 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 17:14:07 +0100 Subject: util: Add log2() definition for MSC. --- src/gallium/auxiliary/util/u_math.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index be7303e550..c7bbebc428 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -161,6 +161,11 @@ static INLINE float logf( float f ) return (float) log( (double) f ); } +static INLINE double log2( double x ) +{ + return log( x ) / log( 2.0 ); +} + #else /* Work-around an extra semi-colon in VS 2005 logf definition */ #ifdef logf -- cgit v1.2.3 From 0ee92d6ed9c6aae47d990c9ac004034ded5003f1 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 17:03:58 +0100 Subject: draw: Add missing include. --- src/gallium/auxiliary/draw/draw_pt.c | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 3c175f31d8..18f24e5980 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -35,6 +35,7 @@ #include "draw/draw_pt.h" #include "draw/draw_vs.h" #include "tgsi/tgsi_dump.h" +#include "util/u_math.h" static unsigned trim( unsigned count, unsigned first, unsigned incr ) { -- cgit v1.2.3 From 8fee30064e35488bccf8e6e7478d56ca783ebac1 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 18:13:58 +0100 Subject: rtasm: Compile only for GALLIUM_CELL. --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 6d11263be8..5e7bc02ed3 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -42,6 +42,8 @@ #include "rtasm_ppc.h" +#ifdef GALLIUM_CELL + void ppc_init_func(struct ppc_function *p) { @@ -957,3 +959,5 @@ ppc_return(struct ppc_function *p) { ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0); } + +#endif /* GALLIUM_CELL */ -- cgit v1.2.3 From 1bfe7c36bac4b8e5ddfcce537603aa8a5f35529d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 18:19:20 +0100 Subject: tgsi: Fix a bug with saving/restoring xmm registers upon func call. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index f93db18114..8dfd2ced08 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -527,7 +527,7 @@ emit_func_call_dst( void (PIPE_CDECL *code)() ) { struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX ); - unsigned i, n, xmm; + unsigned i, n; unsigned xmm_mask; /* Bitmask of the xmm registers to save */ @@ -563,7 +563,7 @@ emit_func_call_dst( sse_movups( func, x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ), - make_xmm( xmm ) ); + make_xmm( i ) ); ++n; } @@ -581,7 +581,7 @@ emit_func_call_dst( if(xmm_mask & (1 << i)) { sse_movups( func, - make_xmm( xmm ), + make_xmm( i ), x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ) ); ++n; } -- cgit v1.2.3 From 50357ad35181b7b170abe8413d4ef772978aa5f5 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 17:14:07 +0100 Subject: util: Add log2() definition for MSC. --- src/gallium/auxiliary/util/u_math.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index d2eaa2e7f7..62272110ce 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -161,6 +161,11 @@ static INLINE float logf( float f ) return (float) log( (double) f ); } +static INLINE double log2( double x ) +{ + return log( x ) / log( 2.0 ); +} + #else /* Work-around an extra semi-colon in VS 2005 logf definition */ #ifdef logf -- cgit v1.2.3 From 87f77105ce7207d601ee95bc29ca8c0ea1731d78 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 18:44:20 +0100 Subject: rtasm: Use INLINE keyword. Compile for all platforms, not only GALLIUM_CELL. --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index 5e7bc02ed3..b65bfa7bbd 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -42,8 +42,6 @@ #include "rtasm_ppc.h" -#ifdef GALLIUM_CELL - void ppc_init_func(struct ppc_function *p) { @@ -253,7 +251,7 @@ union vx_inst { } inst; }; -static inline void +static INLINE void emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) { union vx_inst inst; @@ -278,7 +276,7 @@ union vxr_inst { } inst; }; -static inline void +static INLINE void emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) { union vxr_inst inst; @@ -304,7 +302,7 @@ union va_inst { } inst; }; -static inline void +static INLINE void emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) { union va_inst inst; @@ -421,7 +419,7 @@ union d_inst { } inst; }; -static inline void +static INLINE void emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) { union d_inst inst; @@ -448,7 +446,7 @@ union a_inst { } inst; }; -static inline void +static INLINE void emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, uint rc) { @@ -959,5 +957,3 @@ ppc_return(struct ppc_function *p) { ppc_bclr(p, BRANCH_COND_ALWAYS, BRANCH_HINT_SUB_RETURN, 0); } - -#endif /* GALLIUM_CELL */ -- cgit v1.2.3 From c5ba8ba9182a6946ee489241738457b1370b3c77 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 19:01:46 +0100 Subject: util: Optimise log2(). --- src/gallium/auxiliary/util/u_math.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index 62272110ce..fdaec8df82 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -163,7 +163,8 @@ static INLINE float logf( float f ) static INLINE double log2( double x ) { - return log( x ) / log( 2.0 ); + const double invln2 = 1.442695041; + return log( x ) * invln2; } #else -- cgit v1.2.3 From 0d8637451b7bf1aac164dba6d269d1a665160ea3 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 19:02:41 +0100 Subject: util: Optimise log2(). --- src/gallium/auxiliary/util/u_math.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index c7bbebc428..aee69ab7ba 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -163,7 +163,8 @@ static INLINE float logf( float f ) static INLINE double log2( double x ) { - return log( x ) / log( 2.0 ); + const double invln2 = 1.442695041; + return log( x ) * invln2; } #else -- cgit v1.2.3 From 7f15e34cfadbeb460d22f9549511694c2bd27495 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 12 Nov 2008 11:01:40 -0700 Subject: cell: fix typo in EMIT_ macro --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index d6a3c02f20..4cde080a2c 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -100,7 +100,7 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); #endif /* RTASM_PPC_SPE_H */ #ifndef EMIT_ -#define EMIT_(name, _op) \ +#define EMIT_(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT) #define EMIT_R(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA) -- cgit v1.2.3 From 1cd15f03706f921f3a9995a4ee860b91496f4bd2 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 12 Nov 2008 11:05:34 -0700 Subject: cell: move semicolons to silence warnings w/ other compilers --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 378 ++++++++++++++-------------- 1 file changed, 189 insertions(+), 189 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 4cde080a2c..f1500cef29 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -101,198 +101,198 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); #ifndef EMIT_ #define EMIT_(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT) + extern void _name (struct spe_function *p, unsigned rT); #define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA) + extern void _name (struct spe_function *p, unsigned rT, unsigned rA); #define EMIT_RR(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB) + unsigned rB); #define EMIT_RRR(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB, unsigned rC) + unsigned rB, unsigned rC); #define EMIT_RI7(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + int imm); #define EMIT_RI8(_name, _op, bias) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + int imm); #define EMIT_RI10(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + int imm); #define EMIT_RI10s(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm) + int imm); #define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) + extern void _name (struct spe_function *p, unsigned rT, int imm); #define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm) + extern void _name (struct spe_function *p, unsigned rT, int imm); #define EMIT_I16(_name, _op) \ - extern void _name (struct spe_function *p, int imm) + extern void _name (struct spe_function *p, int imm); #define UNDEF_EMIT_MACROS #endif /* EMIT_ */ /* Memory load / store instructions */ -EMIT_RR (spe_lqx, 0x1c4); -EMIT_RI16(spe_lqa, 0x061); -EMIT_RI16(spe_lqr, 0x067); -EMIT_RR (spe_stqx, 0x144); -EMIT_RI16(spe_stqa, 0x041); -EMIT_RI16(spe_stqr, 0x047); -EMIT_RI7 (spe_cbd, 0x1f4); -EMIT_RR (spe_cbx, 0x1d4); -EMIT_RI7 (spe_chd, 0x1f5); -EMIT_RI7 (spe_chx, 0x1d5); -EMIT_RI7 (spe_cwd, 0x1f6); -EMIT_RI7 (spe_cwx, 0x1d6); -EMIT_RI7 (spe_cdd, 0x1f7); -EMIT_RI7 (spe_cdx, 0x1d7); +EMIT_RR (spe_lqx, 0x1c4) +EMIT_RI16(spe_lqa, 0x061) +EMIT_RI16(spe_lqr, 0x067) +EMIT_RR (spe_stqx, 0x144) +EMIT_RI16(spe_stqa, 0x041) +EMIT_RI16(spe_stqr, 0x047) +EMIT_RI7 (spe_cbd, 0x1f4) +EMIT_RR (spe_cbx, 0x1d4) +EMIT_RI7 (spe_chd, 0x1f5) +EMIT_RI7 (spe_chx, 0x1d5) +EMIT_RI7 (spe_cwd, 0x1f6) +EMIT_RI7 (spe_cwx, 0x1d6) +EMIT_RI7 (spe_cdd, 0x1f7) +EMIT_RI7 (spe_cdx, 0x1d7) /* Constant formation instructions */ -EMIT_RI16(spe_ilh, 0x083); -EMIT_RI16(spe_ilhu, 0x082); -EMIT_RI16(spe_il, 0x081); -EMIT_RI18(spe_ila, 0x021); -EMIT_RI16(spe_iohl, 0x0c1); -EMIT_RI16(spe_fsmbi, 0x065); +EMIT_RI16(spe_ilh, 0x083) +EMIT_RI16(spe_ilhu, 0x082) +EMIT_RI16(spe_il, 0x081) +EMIT_RI18(spe_ila, 0x021) +EMIT_RI16(spe_iohl, 0x0c1) +EMIT_RI16(spe_fsmbi, 0x065) /* Integer and logical instructions */ -EMIT_RR (spe_ah, 0x0c8); -EMIT_RI10(spe_ahi, 0x01d); -EMIT_RR (spe_a, 0x0c0); -EMIT_RI10s(spe_ai, 0x01c); -EMIT_RR (spe_sfh, 0x048); -EMIT_RI10(spe_sfhi, 0x00d); -EMIT_RR (spe_sf, 0x040); -EMIT_RI10(spe_sfi, 0x00c); -EMIT_RR (spe_addx, 0x340); -EMIT_RR (spe_cg, 0x0c2); -EMIT_RR (spe_cgx, 0x342); -EMIT_RR (spe_sfx, 0x341); -EMIT_RR (spe_bg, 0x042); -EMIT_RR (spe_bgx, 0x343); -EMIT_RR (spe_mpy, 0x3c4); -EMIT_RR (spe_mpyu, 0x3cc); -EMIT_RI10(spe_mpyi, 0x074); -EMIT_RI10(spe_mpyui, 0x075); -EMIT_RRR (spe_mpya, 0x00c); -EMIT_RR (spe_mpyh, 0x3c5); -EMIT_RR (spe_mpys, 0x3c7); -EMIT_RR (spe_mpyhh, 0x3c6); -EMIT_RR (spe_mpyhha, 0x346); -EMIT_RR (spe_mpyhhu, 0x3ce); -EMIT_RR (spe_mpyhhau, 0x34e); -EMIT_R (spe_clz, 0x2a5); -EMIT_R (spe_cntb, 0x2b4); -EMIT_R (spe_fsmb, 0x1b6); -EMIT_R (spe_fsmh, 0x1b5); -EMIT_R (spe_fsm, 0x1b4); -EMIT_R (spe_gbb, 0x1b2); -EMIT_R (spe_gbh, 0x1b1); -EMIT_R (spe_gb, 0x1b0); -EMIT_RR (spe_avgb, 0x0d3); -EMIT_RR (spe_absdb, 0x053); -EMIT_RR (spe_sumb, 0x253); -EMIT_R (spe_xsbh, 0x2b6); -EMIT_R (spe_xshw, 0x2ae); -EMIT_R (spe_xswd, 0x2a6); -EMIT_RR (spe_and, 0x0c1); -EMIT_RR (spe_andc, 0x2c1); -EMIT_RI10s(spe_andbi, 0x016); -EMIT_RI10s(spe_andhi, 0x015); -EMIT_RI10s(spe_andi, 0x014); -EMIT_RR (spe_or, 0x041); -EMIT_RR (spe_orc, 0x2c9); -EMIT_RI10s(spe_orbi, 0x006); -EMIT_RI10s(spe_orhi, 0x005); -EMIT_RI10s(spe_ori, 0x004); -EMIT_R (spe_orx, 0x1f0); -EMIT_RR (spe_xor, 0x241); -EMIT_RI10s(spe_xorbi, 0x026); -EMIT_RI10s(spe_xorhi, 0x025); -EMIT_RI10s(spe_xori, 0x024); -EMIT_RR (spe_nand, 0x0c9); -EMIT_RR (spe_nor, 0x049); -EMIT_RR (spe_eqv, 0x249); -EMIT_RRR (spe_selb, 0x008); -EMIT_RRR (spe_shufb, 0x00b); +EMIT_RR (spe_ah, 0x0c8) +EMIT_RI10(spe_ahi, 0x01d) +EMIT_RR (spe_a, 0x0c0) +EMIT_RI10s(spe_ai, 0x01c) +EMIT_RR (spe_sfh, 0x048) +EMIT_RI10(spe_sfhi, 0x00d) +EMIT_RR (spe_sf, 0x040) +EMIT_RI10(spe_sfi, 0x00c) +EMIT_RR (spe_addx, 0x340) +EMIT_RR (spe_cg, 0x0c2) +EMIT_RR (spe_cgx, 0x342) +EMIT_RR (spe_sfx, 0x341) +EMIT_RR (spe_bg, 0x042) +EMIT_RR (spe_bgx, 0x343) +EMIT_RR (spe_mpy, 0x3c4) +EMIT_RR (spe_mpyu, 0x3cc) +EMIT_RI10(spe_mpyi, 0x074) +EMIT_RI10(spe_mpyui, 0x075) +EMIT_RRR (spe_mpya, 0x00c) +EMIT_RR (spe_mpyh, 0x3c5) +EMIT_RR (spe_mpys, 0x3c7) +EMIT_RR (spe_mpyhh, 0x3c6) +EMIT_RR (spe_mpyhha, 0x346) +EMIT_RR (spe_mpyhhu, 0x3ce) +EMIT_RR (spe_mpyhhau, 0x34e) +EMIT_R (spe_clz, 0x2a5) +EMIT_R (spe_cntb, 0x2b4) +EMIT_R (spe_fsmb, 0x1b6) +EMIT_R (spe_fsmh, 0x1b5) +EMIT_R (spe_fsm, 0x1b4) +EMIT_R (spe_gbb, 0x1b2) +EMIT_R (spe_gbh, 0x1b1) +EMIT_R (spe_gb, 0x1b0) +EMIT_RR (spe_avgb, 0x0d3) +EMIT_RR (spe_absdb, 0x053) +EMIT_RR (spe_sumb, 0x253) +EMIT_R (spe_xsbh, 0x2b6) +EMIT_R (spe_xshw, 0x2ae) +EMIT_R (spe_xswd, 0x2a6) +EMIT_RR (spe_and, 0x0c1) +EMIT_RR (spe_andc, 0x2c1) +EMIT_RI10s(spe_andbi, 0x016) +EMIT_RI10s(spe_andhi, 0x015) +EMIT_RI10s(spe_andi, 0x014) +EMIT_RR (spe_or, 0x041) +EMIT_RR (spe_orc, 0x2c9) +EMIT_RI10s(spe_orbi, 0x006) +EMIT_RI10s(spe_orhi, 0x005) +EMIT_RI10s(spe_ori, 0x004) +EMIT_R (spe_orx, 0x1f0) +EMIT_RR (spe_xor, 0x241) +EMIT_RI10s(spe_xorbi, 0x026) +EMIT_RI10s(spe_xorhi, 0x025) +EMIT_RI10s(spe_xori, 0x024) +EMIT_RR (spe_nand, 0x0c9) +EMIT_RR (spe_nor, 0x049) +EMIT_RR (spe_eqv, 0x249) +EMIT_RRR (spe_selb, 0x008) +EMIT_RRR (spe_shufb, 0x00b) /* Shift and rotate instructions */ -EMIT_RR (spe_shlh, 0x05f); -EMIT_RI7 (spe_shlhi, 0x07f); -EMIT_RR (spe_shl, 0x05b); -EMIT_RI7 (spe_shli, 0x07b); -EMIT_RR (spe_shlqbi, 0x1db); -EMIT_RI7 (spe_shlqbii, 0x1fb); -EMIT_RR (spe_shlqby, 0x1df); -EMIT_RI7 (spe_shlqbyi, 0x1ff); -EMIT_RR (spe_shlqbybi, 0x1cf); -EMIT_RR (spe_roth, 0x05c); -EMIT_RI7 (spe_rothi, 0x07c); -EMIT_RR (spe_rot, 0x058); -EMIT_RI7 (spe_roti, 0x078); -EMIT_RR (spe_rotqby, 0x1dc); -EMIT_RI7 (spe_rotqbyi, 0x1fc); -EMIT_RR (spe_rotqbybi, 0x1cc); -EMIT_RR (spe_rotqbi, 0x1d8); -EMIT_RI7 (spe_rotqbii, 0x1f8); -EMIT_RR (spe_rothm, 0x05d); -EMIT_RI7 (spe_rothmi, 0x07d); -EMIT_RR (spe_rotm, 0x059); -EMIT_RI7 (spe_rotmi, 0x079); -EMIT_RR (spe_rotqmby, 0x1dd); -EMIT_RI7 (spe_rotqmbyi, 0x1fd); -EMIT_RR (spe_rotqmbybi, 0x1cd); -EMIT_RR (spe_rotqmbi, 0x1c9); -EMIT_RI7 (spe_rotqmbii, 0x1f9); -EMIT_RR (spe_rotmah, 0x05e); -EMIT_RI7 (spe_rotmahi, 0x07e); -EMIT_RR (spe_rotma, 0x05a); -EMIT_RI7 (spe_rotmai, 0x07a); +EMIT_RR (spe_shlh, 0x05f) +EMIT_RI7 (spe_shlhi, 0x07f) +EMIT_RR (spe_shl, 0x05b) +EMIT_RI7 (spe_shli, 0x07b) +EMIT_RR (spe_shlqbi, 0x1db) +EMIT_RI7 (spe_shlqbii, 0x1fb) +EMIT_RR (spe_shlqby, 0x1df) +EMIT_RI7 (spe_shlqbyi, 0x1ff) +EMIT_RR (spe_shlqbybi, 0x1cf) +EMIT_RR (spe_roth, 0x05c) +EMIT_RI7 (spe_rothi, 0x07c) +EMIT_RR (spe_rot, 0x058) +EMIT_RI7 (spe_roti, 0x078) +EMIT_RR (spe_rotqby, 0x1dc) +EMIT_RI7 (spe_rotqbyi, 0x1fc) +EMIT_RR (spe_rotqbybi, 0x1cc) +EMIT_RR (spe_rotqbi, 0x1d8) +EMIT_RI7 (spe_rotqbii, 0x1f8) +EMIT_RR (spe_rothm, 0x05d) +EMIT_RI7 (spe_rothmi, 0x07d) +EMIT_RR (spe_rotm, 0x059) +EMIT_RI7 (spe_rotmi, 0x079) +EMIT_RR (spe_rotqmby, 0x1dd) +EMIT_RI7 (spe_rotqmbyi, 0x1fd) +EMIT_RR (spe_rotqmbybi, 0x1cd) +EMIT_RR (spe_rotqmbi, 0x1c9) +EMIT_RI7 (spe_rotqmbii, 0x1f9) +EMIT_RR (spe_rotmah, 0x05e) +EMIT_RI7 (spe_rotmahi, 0x07e) +EMIT_RR (spe_rotma, 0x05a) +EMIT_RI7 (spe_rotmai, 0x07a) /* Compare, branch, and halt instructions */ -EMIT_RR (spe_heq, 0x3d8); -EMIT_RI10(spe_heqi, 0x07f); -EMIT_RR (spe_hgt, 0x258); -EMIT_RI10(spe_hgti, 0x04f); -EMIT_RR (spe_hlgt, 0x2d8); -EMIT_RI10(spe_hlgti, 0x05f); -EMIT_RR (spe_ceqb, 0x3d0); -EMIT_RI10(spe_ceqbi, 0x07e); -EMIT_RR (spe_ceqh, 0x3c8); -EMIT_RI10(spe_ceqhi, 0x07d); -EMIT_RR (spe_ceq, 0x3c0); -EMIT_RI10(spe_ceqi, 0x07c); -EMIT_RR (spe_cgtb, 0x250); -EMIT_RI10(spe_cgtbi, 0x04e); -EMIT_RR (spe_cgth, 0x248); -EMIT_RI10(spe_cgthi, 0x04d); -EMIT_RR (spe_cgt, 0x240); -EMIT_RI10(spe_cgti, 0x04c); -EMIT_RR (spe_clgtb, 0x2d0); -EMIT_RI10(spe_clgtbi, 0x05e); -EMIT_RR (spe_clgth, 0x2c8); -EMIT_RI10(spe_clgthi, 0x05d); -EMIT_RR (spe_clgt, 0x2c0); -EMIT_RI10(spe_clgti, 0x05c); -EMIT_I16 (spe_br, 0x064); -EMIT_I16 (spe_bra, 0x060); -EMIT_RI16(spe_brsl, 0x066); -EMIT_RI16(spe_brasl, 0x062); -EMIT_RI16(spe_brnz, 0x042); -EMIT_RI16(spe_brz, 0x040); -EMIT_RI16(spe_brhnz, 0x046); -EMIT_RI16(spe_brhz, 0x044); +EMIT_RR (spe_heq, 0x3d8) +EMIT_RI10(spe_heqi, 0x07f) +EMIT_RR (spe_hgt, 0x258) +EMIT_RI10(spe_hgti, 0x04f) +EMIT_RR (spe_hlgt, 0x2d8) +EMIT_RI10(spe_hlgti, 0x05f) +EMIT_RR (spe_ceqb, 0x3d0) +EMIT_RI10(spe_ceqbi, 0x07e) +EMIT_RR (spe_ceqh, 0x3c8) +EMIT_RI10(spe_ceqhi, 0x07d) +EMIT_RR (spe_ceq, 0x3c0) +EMIT_RI10(spe_ceqi, 0x07c) +EMIT_RR (spe_cgtb, 0x250) +EMIT_RI10(spe_cgtbi, 0x04e) +EMIT_RR (spe_cgth, 0x248) +EMIT_RI10(spe_cgthi, 0x04d) +EMIT_RR (spe_cgt, 0x240) +EMIT_RI10(spe_cgti, 0x04c) +EMIT_RR (spe_clgtb, 0x2d0) +EMIT_RI10(spe_clgtbi, 0x05e) +EMIT_RR (spe_clgth, 0x2c8) +EMIT_RI10(spe_clgthi, 0x05d) +EMIT_RR (spe_clgt, 0x2c0) +EMIT_RI10(spe_clgti, 0x05c) +EMIT_I16 (spe_br, 0x064) +EMIT_I16 (spe_bra, 0x060) +EMIT_RI16(spe_brsl, 0x066) +EMIT_RI16(spe_brasl, 0x062) +EMIT_RI16(spe_brnz, 0x042) +EMIT_RI16(spe_brz, 0x040) +EMIT_RI16(spe_brhnz, 0x046) +EMIT_RI16(spe_brhz, 0x044) extern void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); @@ -375,46 +375,46 @@ spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); /* Floating-point instructions */ -EMIT_RR (spe_fa, 0x2c4); -EMIT_RR (spe_dfa, 0x2cc); -EMIT_RR (spe_fs, 0x2c5); -EMIT_RR (spe_dfs, 0x2cd); -EMIT_RR (spe_fm, 0x2c6); -EMIT_RR (spe_dfm, 0x2ce); -EMIT_RRR (spe_fma, 0x00e); -EMIT_RR (spe_dfma, 0x35c); -EMIT_RRR (spe_fnms, 0x00d); -EMIT_RR (spe_dfnms, 0x35e); -EMIT_RRR (spe_fms, 0x00f); -EMIT_RR (spe_dfms, 0x35d); -EMIT_RR (spe_dfnma, 0x35f); -EMIT_R (spe_frest, 0x1b8); -EMIT_R (spe_frsqest, 0x1b9); -EMIT_RR (spe_fi, 0x3d4); -EMIT_RI8 (spe_csflt, 0x1da, 155); -EMIT_RI8 (spe_cflts, 0x1d8, 173); -EMIT_RI8 (spe_cuflt, 0x1db, 155); -EMIT_RI8 (spe_cfltu, 0x1d9, 173); -EMIT_R (spe_frds, 0x3b9); -EMIT_R (spe_fesd, 0x3b8); -EMIT_RR (spe_dfceq, 0x3c3); -EMIT_RR (spe_dfcmeq, 0x3cb); -EMIT_RR (spe_dfcgt, 0x2c3); -EMIT_RR (spe_dfcmgt, 0x2cb); -EMIT_RI7 (spe_dftsv, 0x3bf); -EMIT_RR (spe_fceq, 0x3c2); -EMIT_RR (spe_fcmeq, 0x3ca); -EMIT_RR (spe_fcgt, 0x2c2); -EMIT_RR (spe_fcmgt, 0x2ca); -EMIT_R (spe_fscrwr, 0x3ba); -EMIT_ (spe_fscrrd, 0x398); +EMIT_RR (spe_fa, 0x2c4) +EMIT_RR (spe_dfa, 0x2cc) +EMIT_RR (spe_fs, 0x2c5) +EMIT_RR (spe_dfs, 0x2cd) +EMIT_RR (spe_fm, 0x2c6) +EMIT_RR (spe_dfm, 0x2ce) +EMIT_RRR (spe_fma, 0x00e) +EMIT_RR (spe_dfma, 0x35c) +EMIT_RRR (spe_fnms, 0x00d) +EMIT_RR (spe_dfnms, 0x35e) +EMIT_RRR (spe_fms, 0x00f) +EMIT_RR (spe_dfms, 0x35d) +EMIT_RR (spe_dfnma, 0x35f) +EMIT_R (spe_frest, 0x1b8) +EMIT_R (spe_frsqest, 0x1b9) +EMIT_RR (spe_fi, 0x3d4) +EMIT_RI8 (spe_csflt, 0x1da, 155) +EMIT_RI8 (spe_cflts, 0x1d8, 173) +EMIT_RI8 (spe_cuflt, 0x1db, 155) +EMIT_RI8 (spe_cfltu, 0x1d9, 173) +EMIT_R (spe_frds, 0x3b9) +EMIT_R (spe_fesd, 0x3b8) +EMIT_RR (spe_dfceq, 0x3c3) +EMIT_RR (spe_dfcmeq, 0x3cb) +EMIT_RR (spe_dfcgt, 0x2c3) +EMIT_RR (spe_dfcmgt, 0x2cb) +EMIT_RI7 (spe_dftsv, 0x3bf) +EMIT_RR (spe_fceq, 0x3c2) +EMIT_RR (spe_fcmeq, 0x3ca) +EMIT_RR (spe_fcgt, 0x2c2) +EMIT_RR (spe_fcmgt, 0x2ca) +EMIT_R (spe_fscrwr, 0x3ba) +EMIT_ (spe_fscrrd, 0x398) /* Channel instructions */ -EMIT_R (spe_rdch, 0x00d); -EMIT_R (spe_rdchcnt, 0x00f); -EMIT_R (spe_wrch, 0x10d); +EMIT_R (spe_rdch, 0x00d) +EMIT_R (spe_rdchcnt, 0x00f) +EMIT_R (spe_wrch, 0x10d) #ifdef UNDEF_EMIT_MACROS -- cgit v1.2.3 From b44ec717c831bb2e3363ee79ae1faca7e0665bea Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 12 Nov 2008 11:09:12 -0700 Subject: gallium: add missing prototypes --- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index afb4704c39..08212a2a25 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -244,6 +244,9 @@ ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb); extern void ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm); +extern void +ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm); + extern void ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb); @@ -310,6 +313,9 @@ ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset); extern void ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb); +extern void +ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset); + /** -- cgit v1.2.3 From 26c8593093bd9e42d06a54ed8cfdedce2fb44332 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 12 Nov 2008 23:23:49 +0100 Subject: tgsi: More comments on source register indirect and 2D indexing. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 65 ++++++++++++++++++++++++++++------ src/gallium/auxiliary/tgsi/tgsi_exec.h | 10 ++++++ 2 files changed, 65 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1da04ab7e0..4a217454dd 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1037,11 +1037,28 @@ fetch_source( union tgsi_exec_channel index; uint swizzle; + /* We start with a direct index into a register file. + * + * file[1], + * where: + * file = SrcRegister.File + * [1] = SrcRegister.Index + */ index.i[0] = index.i[1] = index.i[2] = index.i[3] = reg->SrcRegister.Index; + /* There is an extra source register that indirectly subscripts + * a register file. The direct index now becomes an offset + * that is being added to the indirect register. + * + * file[ind[2].x+1], + * where: + * ind = SrcRegisterInd.File + * [2] = SrcRegisterInd.Index + * .x = SrcRegisterInd.SwizzleX + */ if (reg->SrcRegister.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1078,19 +1095,31 @@ fetch_source( } } - if( reg->SrcRegister.Dimension ) { - switch( reg->SrcRegister.File ) { + /* There is an extra source register that is a second + * subscript to a register file. Effectively it means that + * the register file is actually a 2D array of registers. + * + * file[1][3] == file[1*sizeof(file[1])+3], + * where: + * [3] = SrcRegisterDim.Index + */ + if (reg->SrcRegister.Dimension) { + /* The size of the first-order array depends on the register file type. + * We need to multiply the index to the first array to get an effective, + * "flat" index that points to the beginning of the second-order array. + */ + switch (reg->SrcRegister.File) { case TGSI_FILE_INPUT: - index.i[0] *= 17; - index.i[1] *= 17; - index.i[2] *= 17; - index.i[3] *= 17; + index.i[0] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[1] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[2] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; + index.i[3] *= TGSI_EXEC_MAX_INPUT_ATTRIBS; break; case TGSI_FILE_CONSTANT: - index.i[0] *= 4096; - index.i[1] *= 4096; - index.i[2] *= 4096; - index.i[3] *= 4096; + index.i[0] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[1] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[2] *= TGSI_EXEC_MAX_CONST_BUFFER; + index.i[3] *= TGSI_EXEC_MAX_CONST_BUFFER; break; default: assert( 0 ); @@ -1101,6 +1130,17 @@ fetch_source( index.i[2] += reg->SrcRegisterDim.Index; index.i[3] += reg->SrcRegisterDim.Index; + /* Again, the second subscript index can be addressed indirectly + * identically to the first one. + * Nothing stops us from indirectly addressing the indirect register, + * but there is no need for that, so we won't exercise it. + * + * file[1][ind[4].y+3], + * where: + * ind = SrcRegisterDimInd.File + * [4] = SrcRegisterDimInd.Index + * .y = SrcRegisterDimInd.SwizzleX + */ if (reg->SrcRegisterDim.Indirect) { union tgsi_exec_channel index2; union tgsi_exec_channel indir_index; @@ -1133,6 +1173,11 @@ fetch_source( index.i[i] = 0; } } + + /* If by any chance there was a need for a 3D array of register + * files, we would have to check whether SrcRegisterDim is followed + * by a dimension register and continue the saga. + */ } swizzle = tgsi_util_get_full_src_register_extswizzle( reg, chan_index ); diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index fc40a25e09..ac4b239910 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -178,6 +178,16 @@ struct tgsi_exec_labels #define TGSI_EXEC_MAX_LOOP_NESTING 20 #define TGSI_EXEC_MAX_CALL_NESTING 20 +/* The maximum number of input attributes per vertex. For 2D + * input register files, this is the stride between two 1D + * arrays. + */ +#define TGSI_EXEC_MAX_INPUT_ATTRIBS 17 + +/* The maximum number of constant vectors per constant buffer. + */ +#define TGSI_EXEC_MAX_CONST_BUFFER 4096 + /** * Run-time virtual machine state for executing TGSI shader. */ -- cgit v1.2.3 From 2c29a6896a4a026ed3568db9caf90f422b711d8b Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Thu, 13 Nov 2008 11:22:12 -0700 Subject: CELL: fix stencil twiddling, stencil invert Many stencil tests were failing because of a failure to read the stencil buffer, due to "twiddling" (or "untwiddling") "an unsupported texture format". This is fixed for the case of a stencil/Z S824Z format (which twiddles just like the 32-bit color formats). tests/stencilwrap.c was failing on the GL_INVERT test, because the emitted code for "spe_xori" turned out not to be an actual "xori" instruction, but rather a "stqd" instruction, because of a typo in the rtasm code. This is now fixed, and tests/stencil_wrap now works. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 6 +++--- src/gallium/drivers/cell/ppu/cell_texture.c | 6 ++++-- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index f1500cef29..7c211ffc51 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -214,9 +214,9 @@ EMIT_RI10s(spe_orhi, 0x005) EMIT_RI10s(spe_ori, 0x004) EMIT_R (spe_orx, 0x1f0) EMIT_RR (spe_xor, 0x241) -EMIT_RI10s(spe_xorbi, 0x026) -EMIT_RI10s(spe_xorhi, 0x025) -EMIT_RI10s(spe_xori, 0x024) +EMIT_RI10s(spe_xorbi, 0x046) +EMIT_RI10s(spe_xorhi, 0x045) +EMIT_RI10s(spe_xori, 0x044) EMIT_RR (spe_nand, 0x0c9) EMIT_RR (spe_nor, 0x049) EMIT_RR (spe_eqv, 0x249) diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index ae88d06912..47cd9605c8 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -314,6 +314,7 @@ cell_twiddle_texture(struct pipe_screen *screen, switch (ct->base.format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: { int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; int offset = bufWidth * bufHeight * 4 * surface->face; @@ -337,7 +338,7 @@ cell_twiddle_texture(struct pipe_screen *screen, } break; default: - printf("Cell: twiddle unsupported texture format\n"); + printf("Cell: twiddle unsupported texture format 0x%x\n", ct->base.format); ; } @@ -363,6 +364,7 @@ cell_untwiddle_texture(struct pipe_screen *screen, switch (ct->base.format) { case PIPE_FORMAT_A8R8G8B8_UNORM: case PIPE_FORMAT_B8G8R8A8_UNORM: + case PIPE_FORMAT_S8Z24_UNORM: { int numFaces = ct->base.target == PIPE_TEXTURE_CUBE ? 6 : 1; int offset = surface->stride * texHeight * 4 * surface->face; @@ -382,7 +384,7 @@ cell_untwiddle_texture(struct pipe_screen *screen, default: { ct->untiled_data[level] = NULL; - printf("Cell: untwiddle unsupported texture format\n"); + printf("Cell: untwiddle unsupported texture format 0x%x\n", ct->base.format); } } -- cgit v1.2.3 From 0557fa72c0e39a3cb4c241690b495ca142c06616 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 14 Nov 2008 17:59:29 +0000 Subject: translate: pull in prefetch and other optimizations from draw_vs_aos.c --- src/gallium/auxiliary/translate/translate.h | 4 +- src/gallium/auxiliary/translate/translate_sse.c | 312 +++++++++++++++--------- 2 files changed, 204 insertions(+), 112 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/translate/translate.h b/src/gallium/auxiliary/translate/translate.h index 650cd81fa6..34526eb061 100644 --- a/src/gallium/auxiliary/translate/translate.h +++ b/src/gallium/auxiliary/translate/translate.h @@ -48,8 +48,8 @@ struct translate_element { enum pipe_format input_format; enum pipe_format output_format; - unsigned input_buffer; - unsigned input_offset; /* can't really reduce the size of these */ + unsigned input_buffer:8; + unsigned input_offset:24; unsigned output_offset; }; diff --git a/src/gallium/auxiliary/translate/translate_sse.c b/src/gallium/auxiliary/translate/translate_sse.c index 7955186e16..b62db8d8f3 100644 --- a/src/gallium/auxiliary/translate/translate_sse.c +++ b/src/gallium/auxiliary/translate/translate_sse.c @@ -29,7 +29,7 @@ #include "pipe/p_config.h" #include "pipe/p_compiler.h" #include "util/u_memory.h" -#include "util/u_simple_list.h" +#include "util/u_math.h" #include "translate.h" @@ -56,6 +56,11 @@ typedef void (PIPE_CDECL *run_elts_func)( struct translate *translate, unsigned count, void *output_buffer ); +struct translate_buffer { + const void *base_ptr; + unsigned stride; + void *ptr; /* updated per vertex */ +}; struct translate_sse { @@ -73,14 +78,20 @@ struct translate_sse { float float_255[4]; float inv_255[4]; - struct { - char *input_ptr; - unsigned input_stride; - } attrib[PIPE_MAX_ATTRIBS]; + struct translate_buffer buffer[PIPE_MAX_ATTRIBS]; + unsigned nr_buffers; run_func gen_run; run_elts_func gen_run_elts; + /* these are actually known values, but putting them in a struct + * like this is helpful to keep them in sync across the file. + */ + struct x86_reg tmp_EAX; + struct x86_reg idx_EBX; /* either start+i or &elt[i] */ + struct x86_reg outbuf_ECX; + struct x86_reg machine_EDX; + struct x86_reg count_ESI; /* decrements to zero */ }; static int get_offset( const void *a, const void *b ) @@ -95,10 +106,6 @@ static struct x86_reg get_identity( struct translate_sse *p ) struct x86_reg reg = x86_make_reg(file_XMM, 6); if (!p->loaded_identity) { - /* Nasty: - */ - struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); - p->loaded_identity = TRUE; p->identity[0] = 0; p->identity[1] = 0; @@ -106,7 +113,7 @@ static struct x86_reg get_identity( struct translate_sse *p ) p->identity[3] = 1; sse_movups(p->func, reg, - x86_make_disp(translateESI, + x86_make_disp(p->machine_EDX, get_offset(p, &p->identity[0]))); } @@ -115,11 +122,9 @@ static struct x86_reg get_identity( struct translate_sse *p ) static struct x86_reg get_255( struct translate_sse *p ) { - struct x86_reg reg = x86_make_reg(file_XMM, 6); + struct x86_reg reg = x86_make_reg(file_XMM, 7); if (!p->loaded_255) { - struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); - p->loaded_255 = TRUE; p->float_255[0] = p->float_255[1] = @@ -127,12 +132,11 @@ static struct x86_reg get_255( struct translate_sse *p ) p->float_255[3] = 255.0f; sse_movups(p->func, reg, - x86_make_disp(translateESI, + x86_make_disp(p->machine_EDX, get_offset(p, &p->float_255[0]))); } return reg; - return x86_make_reg(file_XMM, 7); } static struct x86_reg get_inv_255( struct translate_sse *p ) @@ -140,8 +144,6 @@ static struct x86_reg get_inv_255( struct translate_sse *p ) struct x86_reg reg = x86_make_reg(file_XMM, 5); if (!p->loaded_inv_255) { - struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); - p->loaded_inv_255 = TRUE; p->inv_255[0] = p->inv_255[1] = @@ -149,7 +151,7 @@ static struct x86_reg get_inv_255( struct translate_sse *p ) p->inv_255[3] = 1.0f / 255.0f; sse_movups(p->func, reg, - x86_make_disp(translateESI, + x86_make_disp(p->machine_EDX, get_offset(p, &p->inv_255[0]))); } @@ -283,28 +285,6 @@ static void emit_store_R8G8B8A8_UNORM( struct translate_sse *p, -static void get_src_ptr( struct translate_sse *p, - struct x86_reg srcEAX, - struct x86_reg translateREG, - struct x86_reg eltREG, - unsigned a ) -{ - struct x86_reg input_ptr = - x86_make_disp(translateREG, - get_offset(p, &p->attrib[a].input_ptr)); - - struct x86_reg input_stride = - x86_make_disp(translateREG, - get_offset(p, &p->attrib[a].input_stride)); - - /* Calculate pointer to current attrib: - */ - x86_mov(p->func, srcEAX, input_stride); - x86_imul(p->func, srcEAX, eltREG); - x86_add(p->func, srcEAX, input_ptr); -} - - /* Extended swizzles? Maybe later. */ static void emit_swizzle( struct translate_sse *p, @@ -374,12 +354,126 @@ static boolean translate_attr( struct translate_sse *p, return TRUE; } -/* Build run( struct translate *translate, + +static boolean init_inputs( struct translate_sse *p, + boolean linear ) +{ + unsigned i; + if (linear) { + for (i = 0; i < p->nr_buffers; i++) { + struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[i].stride)); + struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[i].ptr)); + struct x86_reg buf_base_ptr = x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[i].base_ptr)); + struct x86_reg elt = p->idx_EBX; + struct x86_reg tmp = p->tmp_EAX; + + + /* Calculate pointer to first attrib: + */ + x86_mov(p->func, tmp, buf_stride); + x86_imul(p->func, tmp, elt); + x86_add(p->func, tmp, buf_base_ptr); + + + /* In the linear case, keep the buffer pointer instead of the + * index number. + */ + if (p->nr_buffers == 1) + x86_mov( p->func, elt, tmp ); + else + x86_mov( p->func, buf_ptr, tmp ); + } + } + + return TRUE; +} + + +static struct x86_reg get_buffer_ptr( struct translate_sse *p, + boolean linear, + unsigned buf_idx, + struct x86_reg elt ) +{ + if (linear && p->nr_buffers == 1) { + return p->idx_EBX; + } + else if (linear) { + struct x86_reg ptr = p->tmp_EAX; + struct x86_reg buf_ptr = + x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[buf_idx].ptr)); + + x86_mov(p->func, ptr, buf_ptr); + return ptr; + } + else { + struct x86_reg ptr = p->tmp_EAX; + + struct x86_reg buf_stride = + x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[buf_idx].stride)); + + struct x86_reg buf_base_ptr = + x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[buf_idx].base_ptr)); + + + + /* Calculate pointer to current attrib: + */ + x86_mov(p->func, ptr, buf_stride); + x86_imul(p->func, ptr, elt); + x86_add(p->func, ptr, buf_base_ptr); + return ptr; + } +} + + + +static boolean incr_inputs( struct translate_sse *p, + boolean linear ) +{ + if (linear && p->nr_buffers == 1) { + struct x86_reg stride = x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[0].stride)); + + x86_add(p->func, p->idx_EBX, stride); + sse_prefetchnta(p->func, x86_make_disp(p->idx_EBX, 192)); + } + else if (linear) { + unsigned i; + + /* Is this worthwhile?? + */ + for (i = 0; i < p->nr_buffers; i++) { + struct x86_reg buf_ptr = x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[i].ptr)); + struct x86_reg buf_stride = x86_make_disp(p->machine_EDX, + get_offset(p, &p->buffer[i].stride)); + + x86_mov(p->func, p->tmp_EAX, buf_ptr); + x86_add(p->func, p->tmp_EAX, buf_stride); + if (i == 0) sse_prefetchnta(p->func, x86_make_disp(p->tmp_EAX, 192)); + x86_mov(p->func, buf_ptr, p->tmp_EAX); + } + } + else { + x86_lea(p->func, p->idx_EBX, x86_make_disp(p->idx_EBX, 4)); + } + + return TRUE; +} + + +/* Build run( struct translate *machine, * unsigned start, * unsigned count, * void *output_buffer ) * or - * run_elts( struct translate *translate, + * run_elts( struct translate *machine, * unsigned *elts, * unsigned count, * void *output_buffer ) @@ -394,14 +488,15 @@ static boolean build_vertex_emit( struct translate_sse *p, struct x86_function *func, boolean linear ) { - struct x86_reg vertexECX = x86_make_reg(file_REG32, reg_AX); - struct x86_reg idxEBX = x86_make_reg(file_REG32, reg_BX); - struct x86_reg srcEAX = x86_make_reg(file_REG32, reg_CX); - struct x86_reg countEBP = x86_make_reg(file_REG32, reg_BP); - struct x86_reg translateESI = x86_make_reg(file_REG32, reg_SI); int fixup, label; unsigned j; + p->tmp_EAX = x86_make_reg(file_REG32, reg_AX); + p->idx_EBX = x86_make_reg(file_REG32, reg_BX); + p->outbuf_ECX = x86_make_reg(file_REG32, reg_CX); + p->machine_EDX = x86_make_reg(file_REG32, reg_DX); + p->count_ESI = x86_make_reg(file_REG32, reg_SI); + p->func = func; p->loaded_inv_255 = FALSE; p->loaded_255 = FALSE; @@ -411,74 +506,65 @@ static boolean build_vertex_emit( struct translate_sse *p, /* Push a few regs? */ - x86_push(p->func, countEBP); - x86_push(p->func, translateESI); - x86_push(p->func, idxEBX); - - /* Get vertex count, compare to zero - */ - x86_xor(p->func, idxEBX, idxEBX); - x86_mov(p->func, countEBP, x86_fn_arg(p->func, 3)); - x86_cmp(p->func, countEBP, idxEBX); - fixup = x86_jcc_forward(p->func, cc_E); - - /* If linear, idx is the current element, otherwise it is a pointer - * to the current element. - */ - x86_mov(p->func, idxEBX, x86_fn_arg(p->func, 2)); + x86_push(p->func, p->idx_EBX); + x86_push(p->func, p->count_ESI); - /* Initialize destination register. + /* Load arguments into regs: */ - x86_mov(p->func, vertexECX, x86_fn_arg(p->func, 4)); + x86_mov(p->func, p->machine_EDX, x86_fn_arg(p->func, 1)); + x86_mov(p->func, p->idx_EBX, x86_fn_arg(p->func, 2)); + x86_mov(p->func, p->count_ESI, x86_fn_arg(p->func, 3)); + x86_mov(p->func, p->outbuf_ECX, x86_fn_arg(p->func, 4)); - /* Move argument 1 (translate_sse pointer) into a reg: + /* Get vertex count, compare to zero */ - x86_mov(p->func, translateESI, x86_fn_arg(p->func, 1)); + x86_xor(p->func, p->tmp_EAX, p->tmp_EAX); + x86_cmp(p->func, p->count_ESI, p->tmp_EAX); + fixup = x86_jcc_forward(p->func, cc_E); - /* always load, needed or not: */ + init_inputs(p, linear); - /* Note address for loop jump */ + /* Note address for loop jump + */ label = x86_get_label(p->func); - - - for (j = 0; j < p->translate.key.nr_elements; j++) { - const struct translate_element *a = &p->translate.key.element[j]; - - struct x86_reg destEAX = x86_make_disp(vertexECX, - a->output_offset); - - /* Figure out source pointer address: - */ - if (linear) { - get_src_ptr(p, srcEAX, translateESI, idxEBX, j); + { + struct x86_reg elt = linear ? p->idx_EBX : x86_deref(p->idx_EBX); + int last_vb = -1; + struct x86_reg vb; + + for (j = 0; j < p->translate.key.nr_elements; j++) { + const struct translate_element *a = &p->translate.key.element[j]; + + /* Figure out source pointer address: + */ + if (a->input_buffer != last_vb) { + last_vb = a->input_buffer; + vb = get_buffer_ptr(p, linear, a->input_buffer, elt); + } + + if (!translate_attr( p, a, + x86_make_disp(vb, a->input_offset), + x86_make_disp(p->outbuf_ECX, a->output_offset))) + return FALSE; } - else { - get_src_ptr(p, srcEAX, translateESI, x86_deref(idxEBX), j); - } - - if (!translate_attr( p, a, x86_deref(srcEAX), destEAX )) - return FALSE; - } - - /* Next vertex: - */ - x86_lea(p->func, vertexECX, x86_make_disp(vertexECX, p->translate.key.output_stride)); - /* Incr index - */ - if (linear) { - x86_inc(p->func, idxEBX); - } - else { - x86_lea(p->func, idxEBX, x86_make_disp(idxEBX, 4)); + /* Next output vertex: + */ + x86_lea(p->func, + p->outbuf_ECX, + x86_make_disp(p->outbuf_ECX, + p->translate.key.output_stride)); + + /* Incr index + */ + incr_inputs( p, linear ); } /* decr count, loop if not zero */ - x86_dec(p->func, countEBP); - x86_test(p->func, countEBP, countEBP); + x86_dec(p->func, p->count_ESI); x86_jcc(p->func, cc_NZ, label); /* Exit mmx state? @@ -493,9 +579,8 @@ static boolean build_vertex_emit( struct translate_sse *p, /* Pop regs and return */ - x86_pop(p->func, idxEBX); - x86_pop(p->func, translateESI); - x86_pop(p->func, countEBP); + x86_pop(p->func, p->count_ESI); + x86_pop(p->func, p->idx_EBX); x86_ret(p->func); return TRUE; @@ -513,15 +598,16 @@ static void translate_sse_set_buffer( struct translate *translate, unsigned stride ) { struct translate_sse *p = (struct translate_sse *)translate; - unsigned i; - for (i = 0; i < p->translate.key.nr_elements; i++) { - if (p->translate.key.element[i].input_buffer == buf) { - p->attrib[i].input_ptr = ((char *)ptr + - p->translate.key.element[i].input_offset); - p->attrib[i].input_stride = stride; - } + if (buf < p->nr_buffers) { + p->buffer[buf].base_ptr = (char *)ptr; + p->buffer[buf].stride = stride; } + + if (0) debug_printf("%s %d/%d: %p %d\n", + __FUNCTION__, buf, + p->nr_buffers, + ptr, stride); } @@ -565,6 +651,7 @@ static void PIPE_CDECL translate_sse_run( struct translate *translate, struct translate *translate_sse2_create( const struct translate_key *key ) { struct translate_sse *p = NULL; + unsigned i; if (!rtasm_cpu_has_sse() || !rtasm_cpu_has_sse2()) goto fail; @@ -579,6 +666,11 @@ struct translate *translate_sse2_create( const struct translate_key *key ) p->translate.run_elts = translate_sse_run_elts; p->translate.run = translate_sse_run; + for (i = 0; i < key->nr_elements; i++) + p->nr_buffers = MAX2( p->nr_buffers, key->element[i].input_buffer + 1 ); + + if (0) debug_printf("nr_buffers: %d\n", p->nr_buffers); + if (!build_vertex_emit(p, &p->linear_func, TRUE)) goto fail; -- cgit v1.2.3 From 6afab9001e5ebe5a970810b0e12dbfac0d9abe14 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 13 Nov 2008 08:58:47 -0700 Subject: util: Use OpenGL rasterization rules in blits and mipmap generation. --- src/gallium/auxiliary/util/u_blit.c | 1 + src/gallium/auxiliary/util/u_gen_mipmap.c | 1 + 2 files changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 9adf72944e..d28201ac8d 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -104,6 +104,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; ctx->rasterizer.bypass_clipping = 1; /*ctx->rasterizer.bypass_vs = 1;*/ + ctx->rasterizer.gl_rasterization_rules = 1; /* samplers */ memset(&ctx->sampler, 0, sizeof(ctx->sampler)); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b19a649bbc..9d305ad763 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -725,6 +725,7 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->rasterizer.cull_mode = PIPE_WINDING_NONE; ctx->rasterizer.bypass_clipping = 1; /*ctx->rasterizer.bypass_vs = 1;*/ + ctx->rasterizer.gl_rasterization_rules = 1; /* sampler state */ memset(&ctx->sampler, 0, sizeof(ctx->sampler)); -- cgit v1.2.3 From 7e584a70c492698be18bf4d6372b50d1a1c38385 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 14 Nov 2008 12:55:05 -0700 Subject: gallium: increase table size for fast log/pow functions The various conformance tests pass now. --- src/gallium/auxiliary/util/u_math.c | 2 +- src/gallium/auxiliary/util/u_math.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.c b/src/gallium/auxiliary/util/u_math.c index d1571cd1fc..2811475fa0 100644 --- a/src/gallium/auxiliary/util/u_math.c +++ b/src/gallium/auxiliary/util/u_math.c @@ -52,7 +52,7 @@ init_log2_table(void) { unsigned i; for (i = 0; i < LOG2_TABLE_SIZE; i++) - log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SIZE)); + log2_table[i] = (float) log2(1.0 + i * (1.0 / LOG2_TABLE_SCALE)); } diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index aee69ab7ba..ac11d7001b 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -246,8 +246,9 @@ util_fast_exp(float x) } -#define LOG2_TABLE_SIZE_LOG2 8 -#define LOG2_TABLE_SIZE (1 << LOG2_TABLE_SIZE_LOG2) +#define LOG2_TABLE_SIZE_LOG2 16 +#define LOG2_TABLE_SCALE (1 << LOG2_TABLE_SIZE_LOG2) +#define LOG2_TABLE_SIZE (LOG2_TABLE_SCALE + 1) extern float log2_table[LOG2_TABLE_SIZE]; @@ -258,7 +259,8 @@ util_fast_log2(float x) float epart, mpart; num.f = x; epart = (float)(((num.i & 0x7f800000) >> 23) - 127); - mpart = log2_table[(num.i & 0x007fffff) >> (23 - LOG2_TABLE_SIZE_LOG2)]; + /* mpart = log2_table[mantissa*LOG2_TABLE_SCALE + 0.5] */ + mpart = log2_table[((num.i & 0x007fffff) + (1 << (22 - LOG2_TABLE_SIZE_LOG2))) >> (23 - LOG2_TABLE_SIZE_LOG2)]; return epart + mpart; } -- cgit v1.2.3 From 7b0e0e1a0d571291851e8a7b2e64c8425055cd69 Mon Sep 17 00:00:00 2001 From: Stephane Marchesin Date: Sun, 16 Nov 2008 20:32:05 +0100 Subject: gallivm: fix some small stuff. --- src/gallium/auxiliary/gallivm/gallivm_p.h | 4 ++-- src/gallium/auxiliary/gallivm/storage.cpp | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm_p.h b/src/gallium/auxiliary/gallivm/gallivm_p.h index ebf3e11cd5..d2c5852bdf 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_p.h +++ b/src/gallium/auxiliary/gallivm/gallivm_p.h @@ -101,10 +101,10 @@ static INLINE int gallivm_w_swizzle(int swizzle) return w; } -#endif /* MESA_LLVM */ - #if defined __cplusplus } #endif +#endif /* MESA_LLVM */ + #endif diff --git a/src/gallium/auxiliary/gallivm/storage.cpp b/src/gallium/auxiliary/gallivm/storage.cpp index 6f373f6dd5..73df24c976 100644 --- a/src/gallium/auxiliary/gallivm/storage.cpp +++ b/src/gallium/auxiliary/gallivm/storage.cpp @@ -323,7 +323,7 @@ llvm::Value * Storage::elemIdx(llvm::Value *ptr, int idx, if (indIdx) { getElem = GetElementPtrInst::Create(ptr, - BinaryOperator::create(Instruction::Add, + BinaryOperator::Create(Instruction::Add, indIdx, constantInt(idx), name("add"), -- cgit v1.2.3 From c13cf0d69094bd586df16bb5cf1aa306f8923307 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 11 Nov 2008 23:27:27 +0900 Subject: gallium: Make handle_table reentrant. Ensure that the object has consistent state also when calling the destroy callback. Namely, ensure the object passed to the callback is removed from the table prior to calling the destroy callback to avoid a infinite loop or double free. --- src/gallium/auxiliary/util/u_handle_table.c | 34 ++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 2c40011923..2d15932ce3 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -124,6 +124,28 @@ handle_table_resize(struct handle_table *ht, } +static INLINE void +handle_table_clear(struct handle_table *ht, + unsigned index) +{ + void *object; + + /* The order here is important so that the object being destroyed is not + * present in the table when seen by the destroy callback, because the + * destroy callback may directly or indirectly call the other functions in + * this module. + */ + + object = ht->objects[index]; + if(object) { + ht->objects[index] = NULL; + + if(ht->destroy) + ht->destroy(object); + } +} + + unsigned handle_table_add(struct handle_table *ht, void *object) @@ -184,9 +206,8 @@ handle_table_set(struct handle_table *ht, if(!handle_table_resize(ht, index)) return 0; - if(ht->objects[index] && ht->destroy) - ht->destroy(ht->objects[index]); - + handle_table_clear(ht, index); + ht->objects[index] = object; return handle; @@ -227,10 +248,8 @@ handle_table_remove(struct handle_table *ht, if(!object) return; - if(ht->destroy) - ht->destroy(object); + handle_table_clear(ht, index); - ht->objects[index] = NULL; if(index < ht->filled) ht->filled = index; } @@ -266,8 +285,7 @@ handle_table_destroy(struct handle_table *ht) if(ht->destroy) for(index = 0; index < ht->size; ++index) - if(ht->objects[index]) - ht->destroy(ht->objects[index]); + handle_table_clear(ht, index); FREE(ht->objects); FREE(ht); -- cgit v1.2.3 From 1e35d92953207dd5e40be4954ccc9015913f7f06 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 13 Nov 2008 20:34:10 +0900 Subject: gallium: State when there are no memory leaks detected. --- src/gallium/auxiliary/util/p_debug_mem.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 9511479cbb..250fd60f63 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -265,6 +265,9 @@ debug_memory_end(unsigned long start_no) size_t total_size = 0; struct list_head *entry; + if(start_no == last_no) + return; + entry = list.prev; for (; entry != &list; entry = entry->prev) { struct debug_memory_header *hdr; @@ -302,4 +305,7 @@ debug_memory_end(unsigned long start_no) debug_printf("Total of %u KB of system memory apparently leaked\n", (total_size + 1023)/1024); } + else { + debug_printf("No memory leaks detected.\n"); + } } -- cgit v1.2.3 From ee172bf067d9a66faa9d57980970326b680df839 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 11 Nov 2008 23:27:27 +0900 Subject: gallium: Make handle_table reentrant. Ensure that the object has consistent state also when calling the destroy callback. Namely, ensure the object passed to the callback is removed from the table prior to calling the destroy callback to avoid a infinite loop or double free. --- src/gallium/auxiliary/util/u_handle_table.c | 34 ++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c index 2c40011923..2d15932ce3 100644 --- a/src/gallium/auxiliary/util/u_handle_table.c +++ b/src/gallium/auxiliary/util/u_handle_table.c @@ -124,6 +124,28 @@ handle_table_resize(struct handle_table *ht, } +static INLINE void +handle_table_clear(struct handle_table *ht, + unsigned index) +{ + void *object; + + /* The order here is important so that the object being destroyed is not + * present in the table when seen by the destroy callback, because the + * destroy callback may directly or indirectly call the other functions in + * this module. + */ + + object = ht->objects[index]; + if(object) { + ht->objects[index] = NULL; + + if(ht->destroy) + ht->destroy(object); + } +} + + unsigned handle_table_add(struct handle_table *ht, void *object) @@ -184,9 +206,8 @@ handle_table_set(struct handle_table *ht, if(!handle_table_resize(ht, index)) return 0; - if(ht->objects[index] && ht->destroy) - ht->destroy(ht->objects[index]); - + handle_table_clear(ht, index); + ht->objects[index] = object; return handle; @@ -227,10 +248,8 @@ handle_table_remove(struct handle_table *ht, if(!object) return; - if(ht->destroy) - ht->destroy(object); + handle_table_clear(ht, index); - ht->objects[index] = NULL; if(index < ht->filled) ht->filled = index; } @@ -266,8 +285,7 @@ handle_table_destroy(struct handle_table *ht) if(ht->destroy) for(index = 0; index < ht->size; ++index) - if(ht->objects[index]) - ht->destroy(ht->objects[index]); + handle_table_clear(ht, index); FREE(ht->objects); FREE(ht); -- cgit v1.2.3 From e45773b3de1bbd7db717336a1b5c964b6cdb718e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 13 Nov 2008 20:34:10 +0900 Subject: gallium: State when there are no memory leaks detected. --- src/gallium/auxiliary/util/p_debug_mem.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug_mem.c b/src/gallium/auxiliary/util/p_debug_mem.c index 9511479cbb..250fd60f63 100644 --- a/src/gallium/auxiliary/util/p_debug_mem.c +++ b/src/gallium/auxiliary/util/p_debug_mem.c @@ -265,6 +265,9 @@ debug_memory_end(unsigned long start_no) size_t total_size = 0; struct list_head *entry; + if(start_no == last_no) + return; + entry = list.prev; for (; entry != &list; entry = entry->prev) { struct debug_memory_header *hdr; @@ -302,4 +305,7 @@ debug_memory_end(unsigned long start_no) debug_printf("Total of %u KB of system memory apparently leaked\n", (total_size + 1023)/1024); } + else { + debug_printf("No memory leaks detected.\n"); + } } -- cgit v1.2.3 From 228afbc8e012769983c5504d60c0772c84359bb1 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 17 Nov 2008 16:40:21 +0900 Subject: gallium: Use costum log2 for all windows builds. --- src/gallium/auxiliary/util/u_math.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index ac11d7001b..1ae3234423 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -119,6 +119,7 @@ __inline double __cdecl atan2(double val) #if defined(_MSC_VER) + #if _MSC_VER < 1400 && !defined(__cplusplus) || defined(PIPE_SUBSYSTEM_WINDOWS_CE) static INLINE float cosf( float f ) @@ -161,12 +162,6 @@ static INLINE float logf( float f ) return (float) log( (double) f ); } -static INLINE double log2( double x ) -{ - const double invln2 = 1.442695041; - return log( x ) * invln2; -} - #else /* Work-around an extra semi-colon in VS 2005 logf definition */ #ifdef logf @@ -174,6 +169,13 @@ static INLINE double log2( double x ) #define logf(x) ((float)log((double)(x))) #endif /* logf */ #endif + +static INLINE double log2( double x ) +{ + const double invln2 = 1.442695041; + return log( x ) * invln2; +} + #endif /* _MSC_VER */ -- cgit v1.2.3 From 957f7d7d94e8d092ba98433e61b21ac704453519 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 14 Nov 2008 13:26:01 +0100 Subject: tgsi: Keep address register as a floating point. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 4a217454dd..177cf206b3 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -466,17 +466,6 @@ micro_exp2( #endif } -static void -micro_f2it( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = (int) src->f[0]; - dst->i[1] = (int) src->f[1]; - dst->i[2] = (int) src->f[2]; - dst->i[3] = (int) src->f[3]; -} - static void micro_f2ut( union tgsi_exec_channel *dst, @@ -1081,10 +1070,10 @@ fetch_source( &indir_index ); /* add value of address register to the offset */ - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; + index.i[0] += (int) indir_index.f[0]; + index.i[1] += (int) indir_index.f[1]; + index.i[2] += (int) indir_index.f[2]; + index.i[3] += (int) indir_index.f[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1160,10 +1149,10 @@ fetch_source( &index2, &indir_index ); - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; + index.i[0] += (int) indir_index.f[0]; + index.i[1] += (int) indir_index.f[1]; + index.i[2] += (int) indir_index.f[2]; + index.i[3] += (int) indir_index.f[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1789,7 +1778,7 @@ exec_instruction( case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_f2it( &r[0], &r[0] ); + micro_trunc( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; -- cgit v1.2.3 From d86ffcffb365d1f9fc383e450c8e08bf86169726 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 14 Nov 2008 13:31:06 +0100 Subject: tgsi: Return 0.0 for negative constant register indices. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 177cf206b3..96567772b7 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -947,14 +947,22 @@ fetch_src_file_channel( switch( file ) { case TGSI_FILE_CONSTANT: assert(mach->Consts); - assert(index->i[0] >= 0); - assert(index->i[1] >= 0); - assert(index->i[2] >= 0); - assert(index->i[3] >= 0); - chan->f[0] = mach->Consts[index->i[0]][swizzle]; - chan->f[1] = mach->Consts[index->i[1]][swizzle]; - chan->f[2] = mach->Consts[index->i[2]][swizzle]; - chan->f[3] = mach->Consts[index->i[3]][swizzle]; + if (index->i[0] < 0) + chan->f[0] = 0.0f; + else + chan->f[0] = mach->Consts[index->i[0]][swizzle]; + if (index->i[1] < 0) + chan->f[1] = 0.0f; + else + chan->f[1] = mach->Consts[index->i[1]][swizzle]; + if (index->i[2] < 0) + chan->f[2] = 0.0f; + else + chan->f[2] = mach->Consts[index->i[2]][swizzle]; + if (index->i[3] < 0) + chan->f[3] = 0.0f; + else + chan->f[3] = mach->Consts[index->i[3]][swizzle]; break; case TGSI_FILE_INPUT: -- cgit v1.2.3 From 11fc390f6478526d4f0bdb4b7e628284da31b3b9 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Fri, 21 Nov 2008 11:42:14 -0700 Subject: CELL: use variant-length fragment ops programs This is a set of changes that optimizes the memory use of fragment operation programs (by using and transmitting only as much memory as is needed for the fragment ops programs, instead of maximal sizes), as well as eliminate the dependency on hard-coded maximal program sizes. State that is not dependent on fragment facing (i.e. that isn't using two-sided stenciling) will only save and transmit a single fragment operation program, instead of two identical programs. - Added the ability to emit a LNOP (No Operation (Load)) instruction. This is used to pad the generated fragment operations programs to a multiple of 8 bytes, which is necessary for proper operation of the dual instruction pipeline, and also required for proper SPU-side decoding. - Added the ability to allocate and manage a variant-length struct cell_command_fragment_ops. This structure now puts the generated function field at the end, where it can be as large as necessary. - On the PPU side, we now combine the generated front-facing and back-facing code into a single variant-length buffer (and only use one if the two sets of code are identical) for transmission to the SPU. - On the SPU side, we pull the correct sizes out of the buffer, allocate a new code buffer if the one we have isn't large enough, and save the code to that buffer. The buffer is deallocated when the SPU exits. - Commented out the emit_fetch() static function, which was not being used. --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 7 +- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 11 ++- src/gallium/auxiliary/util/u_memory.h | 2 + src/gallium/drivers/cell/common.h | 31 +++++-- src/gallium/drivers/cell/ppu/cell_gen_fragment.c | 7 +- src/gallium/drivers/cell/ppu/cell_state_emit.c | 77 ++++++++++++++-- src/gallium/drivers/cell/ppu/cell_vertex_fetch.c | 3 + src/gallium/drivers/cell/spu/spu_command.c | 111 +++++++++++++++++------ src/gallium/drivers/cell/spu/spu_command.h | 32 ++++++- src/gallium/drivers/cell/spu/spu_main.c | 15 +-- src/gallium/drivers/cell/spu/spu_main.h | 4 +- 11 files changed, 232 insertions(+), 68 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 1bd9f1c8dd..b9a75ae559 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -341,7 +341,11 @@ static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, } - +#define EMIT(_name, _op) \ +void _name (struct spe_function *p) \ +{ \ + emit_RR(p, _op, 0, 0, 0, __FUNCTION__); \ +} #define EMIT_(_name, _op) \ void _name (struct spe_function *p, unsigned rT) \ @@ -713,7 +717,6 @@ hbrr; #if 0 stop; EMIT_RR (spe_stopd, 0x140); -EMIT_ (spe_lnop, 0x001); EMIT_ (spe_nop, 0x201); sync; EMIT_ (spe_dsync, 0x003); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index 7c211ffc51..f9ad2acacd 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -99,7 +99,9 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); #endif /* RTASM_PPC_SPE_H */ -#ifndef EMIT_ +#ifndef EMIT +#define EMIT(_name, _op) \ + extern void _name (struct spe_function *p); #define EMIT_(_name, _op) \ extern void _name (struct spe_function *p, unsigned rT); #define EMIT_R(_name, _op) \ @@ -129,7 +131,7 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); #define EMIT_I16(_name, _op) \ extern void _name (struct spe_function *p, int imm); #define UNDEF_EMIT_MACROS -#endif /* EMIT_ */ +#endif /* EMIT */ /* Memory load / store instructions @@ -294,6 +296,10 @@ EMIT_RI16(spe_brz, 0x040) EMIT_RI16(spe_brhnz, 0x046) EMIT_RI16(spe_brhz, 0x044) +/* Control instructions + */ +EMIT (spe_lnop, 0x001) + extern void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); @@ -418,6 +424,7 @@ EMIT_R (spe_wrch, 0x10d) #ifdef UNDEF_EMIT_MACROS +#undef EMIT #undef EMIT_ #undef EMIT_R #undef EMIT_RR diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index 857102719d..1a6b596421 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -151,6 +151,8 @@ REALLOC( void *old_ptr, unsigned old_size, unsigned new_size ) #define CALLOC_STRUCT(T) (struct T *) CALLOC(1, sizeof(struct T)) +#define CALLOC_VARIANT_LENGTH_STRUCT(T,more_size) ((struct T *) CALLOC(1, sizeof(struct T) + more_size)) + /** * Return memory on given byte alignment diff --git a/src/gallium/drivers/cell/common.h b/src/gallium/drivers/cell/common.h index a670ed3c6e..98554d7f52 100644 --- a/src/gallium/drivers/cell/common.h +++ b/src/gallium/drivers/cell/common.h @@ -121,11 +121,6 @@ #define CELL_DEBUG_CMD (1 << 5) #define CELL_DEBUG_CACHE (1 << 6) -/** Max instructions for doing per-fragment operations */ -#define SPU_MAX_FRAGMENT_OPS_INSTS 128 - - - #define CELL_FENCE_IDLE 0 #define CELL_FENCE_EMITTED 1 #define CELL_FENCE_SIGNALLED 2 @@ -153,18 +148,36 @@ struct cell_command_fence /** * Command to specify per-fragment operations state and generated code. - * Note that the dsa, blend, blend_color fields are really only needed + * Note that this is a variant-length structure, allocated with as + * much memory as needed to hold the generated code; the "code" + * field *must* be the last field in the structure. Also, the entire + * length of the structure (including the variant code field) must be + * a multiple of 8 bytes; we require that this structure itself be + * a multiple of 8 bytes, and that the generated code also be a multiple + * of 8 bytes. + * + * Also note that the dsa, blend, blend_color fields are really only needed * for the fallback/C per-pixel code. They're not used when we generate - * dynamic SPU fragment code (which is the normal case). + * dynamic SPU fragment code (which is the normal case), and will eventually + * be removed from this structure. */ struct cell_command_fragment_ops { uint64_t opcode; /**< CELL_CMD_STATE_FRAGMENT_OPS */ + + /* Fields for the fallback case */ struct pipe_depth_stencil_alpha_state dsa; struct pipe_blend_state blend; struct pipe_blend_color blend_color; - unsigned code_front[SPU_MAX_FRAGMENT_OPS_INSTS]; - unsigned code_back[SPU_MAX_FRAGMENT_OPS_INSTS]; + + /* Fields for the generated SPU code */ + unsigned total_code_size; + unsigned front_code_index; + unsigned back_code_index; + /* this field has variant length, and must be the last field in + * the structure + */ + unsigned code[0]; }; diff --git a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c index 82336d6635..2c64eb1bcc 100644 --- a/src/gallium/drivers/cell/ppu/cell_gen_fragment.c +++ b/src/gallium/drivers/cell/ppu/cell_gen_fragment.c @@ -1776,7 +1776,10 @@ gen_stencil_depth_test(struct spe_function *f, * \param cell the rendering context (in) * \param facing whether the generated code is for front-facing or * back-facing fragments - * \param f the generated function (out) + * \param f the generated function (in/out); on input, the function + * must already have been initialized. On exit, whatever + * instructions within the generated function have had + * the fragment ops appended. */ void cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct spe_function *f) @@ -1808,8 +1811,6 @@ cell_gen_fragment_function(struct cell_context *cell, const uint facing, struct int fbRGBA_reg; /**< framebuffer's RGBA colors for quad */ int fbZS_reg; /**< framebuffer's combined z/stencil values for quad */ - spe_init_func(f, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - if (cell->debug_flags & CELL_DEBUG_ASM) { spe_print_code(f, true); spe_indent(f, 8); diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c index 031b27f11f..0a0af81f53 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_emit.c +++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c @@ -76,30 +76,86 @@ lookup_fragment_ops(struct cell_context *cell) */ if (!ops) { struct spe_function spe_code_front, spe_code_back; + unsigned int facing_dependent, total_code_size; if (0) debug_printf("**** Create New Fragment Ops\n"); - /* Prepare the buffer that will hold the generated code. */ - spe_init_func(&spe_code_front, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); - spe_init_func(&spe_code_back, SPU_MAX_FRAGMENT_OPS_INSTS * SPE_INST_SIZE); + /* Prepare the buffer that will hold the generated code. The + * "0" passed in for the size means that the SPE code will + * use a default size. + */ + spe_init_func(&spe_code_front, 0); + spe_init_func(&spe_code_back, 0); - /* generate new code. Always generate new code for both front-facing + /* Generate new code. Always generate new code for both front-facing * and back-facing fragments, even if it's the same code in both * cases. */ cell_gen_fragment_function(cell, CELL_FACING_FRONT, &spe_code_front); cell_gen_fragment_function(cell, CELL_FACING_BACK, &spe_code_back); - /* alloc new fragment ops command */ - ops = CALLOC_STRUCT(cell_command_fragment_ops); + /* Make sure the code is a multiple of 8 bytes long; this is + * required to ensure that the dual pipe instruction alignment + * is correct. It's also important for the SPU unpacking, + * which assumes 8-byte boundaries. + */ + unsigned int front_code_size = spe_code_size(&spe_code_front); + while (front_code_size % 8 != 0) { + spe_lnop(&spe_code_front); + front_code_size = spe_code_size(&spe_code_front); + } + unsigned int back_code_size = spe_code_size(&spe_code_back); + while (back_code_size % 8 != 0) { + spe_lnop(&spe_code_back); + back_code_size = spe_code_size(&spe_code_back); + } + /* Determine whether the code we generated is facing-dependent, by + * determining whether the generated code is different for the front- + * and back-facing fragments. + */ + if (front_code_size == back_code_size && memcmp(spe_code_front.store, spe_code_back.store, front_code_size) == 0) { + /* Code is identical; only need one copy. */ + facing_dependent = 0; + total_code_size = front_code_size; + } + else { + /* Code is different for front-facing and back-facing fragments. + * Need to send both copies. + */ + facing_dependent = 1; + total_code_size = front_code_size + back_code_size; + } + + /* alloc new fragment ops command. Note that this structure + * has variant length based on the total code size required. + */ + ops = CALLOC_VARIANT_LENGTH_STRUCT(cell_command_fragment_ops, total_code_size); /* populate the new cell_command_fragment_ops object */ ops->opcode = CELL_CMD_STATE_FRAGMENT_OPS; - memcpy(ops->code_front, spe_code_front.store, spe_code_size(&spe_code_front)); - memcpy(ops->code_back, spe_code_back.store, spe_code_size(&spe_code_back)); + ops->total_code_size = total_code_size; + ops->front_code_index = 0; + memcpy(ops->code, spe_code_front.store, front_code_size); + if (facing_dependent) { + /* We have separate front- and back-facing code. Append the + * back-facing code to the buffer. Be careful because the code + * size is in bytes, but the buffer is of unsigned elements. + */ + ops->back_code_index = front_code_size / sizeof(spe_code_front.store[0]); + memcpy(ops->code + ops->back_code_index, spe_code_back.store, back_code_size); + } + else { + /* Use the same code for front- and back-facing fragments */ + ops->back_code_index = ops->front_code_index; + } + + /* Set the fields for the fallback case. Note that these fields + * (and the whole fallback case) will eventually go away. + */ ops->dsa = *cell->depth_stencil; ops->blend = *cell->blend; + ops->blend_color = cell->blend_color; /* insert cell_command_fragment_ops object into keymap/cache */ util_keymap_insert(cell->fragment_ops_cache, &key, ops, NULL); @@ -200,9 +256,10 @@ cell_emit_state(struct cell_context *cell) CELL_NEW_DEPTH_STENCIL | CELL_NEW_BLEND)) { struct cell_command_fragment_ops *fops, *fops_cmd; - fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd)); + /* Note that cell_command_fragment_ops is a variant-sized record */ fops = lookup_fragment_ops(cell); - memcpy(fops_cmd, fops, sizeof(*fops)); + fops_cmd = cell_batch_alloc(cell, sizeof(*fops_cmd) + fops->total_code_size); + memcpy(fops_cmd, fops, sizeof(*fops) + fops->total_code_size); } if (cell->dirty & CELL_NEW_SAMPLER) { diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c index 18969005b0..9cba537d9e 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_fetch.c @@ -145,6 +145,8 @@ emit_matrix_transpose(struct spe_function *p, } +#if 0 +/* This appears to not be used currently */ static void emit_fetch(struct spe_function *p, unsigned in_ptr, unsigned *offset, @@ -256,6 +258,7 @@ emit_fetch(struct spe_function *p, spe_release_register(p, float_one); } } +#endif void cell_update_vertex_fetch(struct draw_context *draw) diff --git a/src/gallium/drivers/cell/spu/spu_command.c b/src/gallium/drivers/cell/spu/spu_command.c index d5faf4e3aa..8500d19754 100644 --- a/src/gallium/drivers/cell/spu/spu_command.c +++ b/src/gallium/drivers/cell/spu/spu_command.c @@ -210,45 +210,72 @@ cmd_release_verts(const struct cell_command_release_verts *release) static void cmd_state_fragment_ops(const struct cell_command_fragment_ops *fops) { - static int warned = 0; - D_PRINTF(CELL_DEBUG_CMD, "CMD_STATE_FRAGMENT_OPS\n"); - /* Copy SPU code from batch buffer to spu buffer */ - memcpy(spu.fragment_ops_code_front, fops->code_front, SPU_MAX_FRAGMENT_OPS_INSTS * 4); - memcpy(spu.fragment_ops_code_back, fops->code_back, SPU_MAX_FRAGMENT_OPS_INSTS * 4); - /* Copy state info (for fallback case only) */ + + /* Copy state info (for fallback case only - this will eventually + * go away when the fallback case goes away) + */ memcpy(&spu.depth_stencil_alpha, &fops->dsa, sizeof(fops->dsa)); memcpy(&spu.blend, &fops->blend, sizeof(fops->blend)); memcpy(&spu.blend_color, &fops->blend_color, sizeof(fops->blend_color)); - /* Parity twist! For now, always use the fallback code by default, - * only switching to codegen when specifically requested. This - * allows us to develop freely without risking taking down the - * branch. - * - * Later, the parity of this check will be reversed, so that - * codegen is *always* used, unless we specifically indicate that - * we don't want it. - * - * Eventually, the option will be removed completely, because in - * final code we'll always use codegen and won't even provide the - * raw state records that the fallback code requires. + /* Make sure the SPU knows which buffers it's expected to read when + * it's told to pull tiles. */ - if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) == 0) { - spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) spu.fragment_ops_code_front; - spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) spu.fragment_ops_code_back; - } - else { - /* otherwise, the default fallback code remains in place */ + spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); + + /* If we're forcing the fallback code to be used (for debug purposes), + * install that. Otherwise install the incoming SPU code. + */ + if ((spu.init.debug_flags & CELL_DEBUG_FRAGMENT_OP_FALLBACK) != 0) { + static unsigned int warned = 0; if (!warned) { fprintf(stderr, "Cell Warning: using fallback per-fragment code\n"); warned = 1; } + /* The following two lines aren't really necessary if you + * know the debug flags won't change during a run, and if you + * know that the function pointers are initialized correctly. + * We set them here to allow a person to change the debug + * flags during a run (from inside a debugger). + */ + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + return; } - spu.read_depth_stencil = (spu.depth_stencil_alpha.depth.enabled || spu.depth_stencil_alpha.stencil[0].enabled); -} + /* Make sure the SPU code buffer is large enough to hold the incoming code. + * Note that we *don't* use align_malloc() and align_free(), because + * those utility functions are *not* available in SPU code. + * */ + if (spu.fragment_ops_code_size < fops->total_code_size) { + if (spu.fragment_ops_code != NULL) { + free(spu.fragment_ops_code); + } + spu.fragment_ops_code_size = fops->total_code_size; + spu.fragment_ops_code = malloc(fops->total_code_size); + if (spu.fragment_ops_code == NULL) { + /* Whoops. */ + fprintf(stderr, "CELL Warning: failed to allocate fragment ops code (%d bytes) - using fallback\n", fops->total_code_size); + spu.fragment_ops_code = NULL; + spu.fragment_ops_code_size = 0; + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + return; + } + } + /* Copy the SPU code from the command buffer to the spu buffer */ + memcpy(spu.fragment_ops_code, fops->code, fops->total_code_size); + + /* Set the pointers for the front-facing and back-facing fragments + * to the specified offsets within the code. Note that if the + * front-facing and back-facing code are the same, they'll have + * the same offset. + */ + spu.fragment_ops[CELL_FACING_FRONT] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->front_code_index]; + spu.fragment_ops[CELL_FACING_BACK] = (spu_fragment_ops_func) &spu.fragment_ops_code[fops->back_code_index]; +} static void cmd_state_fragment_program(const struct cell_command_fragment_program *fp) @@ -588,7 +615,8 @@ cmd_batch(uint opcode) struct cell_command_fragment_ops *fops = (struct cell_command_fragment_ops *) &buffer[pos]; cmd_state_fragment_ops(fops); - pos += sizeof(*fops) / 8; + /* This is a variant-sized command */ + pos += (sizeof(*fops) + fops->total_code_size)/ 8; } break; case CELL_CMD_STATE_FRAGMENT_PROGRAM: @@ -756,3 +784,32 @@ command_loop(void) if (spu.init.debug_flags & CELL_DEBUG_CACHE) spu_dcache_report(); } + +/* Initialize this module; we manage the fragment ops buffer here. */ +void +spu_command_init(void) +{ + /* Install default/fallback fragment processing function. + * This will normally be overriden by a code-gen'd function + * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. + */ + spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; + spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; + + /* Set up the basic empty buffer for code-gen'ed fragment ops */ + spu.fragment_ops_code = NULL; + spu.fragment_ops_code_size = 0; +} + +void +spu_command_close(void) +{ + /* Deallocate the code-gen buffer for fragment ops, and reset the + * fragment ops functions to their initial setting (just to leave + * things in a good state). + */ + if (spu.fragment_ops_code != NULL) { + free(spu.fragment_ops_code); + } + spu_command_init(); +} diff --git a/src/gallium/drivers/cell/spu/spu_command.h b/src/gallium/drivers/cell/spu/spu_command.h index 853e9aa549..83dcdade28 100644 --- a/src/gallium/drivers/cell/spu/spu_command.h +++ b/src/gallium/drivers/cell/spu/spu_command.h @@ -1,7 +1,35 @@ - - +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ extern void command_loop(void); +extern void +spu_command_init(void); +extern void +spu_command_close(void); diff --git a/src/gallium/drivers/cell/spu/spu_main.c b/src/gallium/drivers/cell/spu/spu_main.c index 7033f6037d..97c86d194d 100644 --- a/src/gallium/drivers/cell/spu/spu_main.c +++ b/src/gallium/drivers/cell/spu/spu_main.c @@ -58,17 +58,8 @@ one_time_init(void) memset(spu.ctile_status, TILE_STATUS_DEFINED, sizeof(spu.ctile_status)); memset(spu.ztile_status, TILE_STATUS_DEFINED, sizeof(spu.ztile_status)); invalidate_tex_cache(); - - /* Install default/fallback fragment processing function. - * This will normally be overriden by a code-gen'd function - * unless CELL_FORCE_FRAGMENT_OPS_FALLBACK is set. - */ - spu.fragment_ops[CELL_FACING_FRONT] = spu_fallback_fragment_ops; - spu.fragment_ops[CELL_FACING_BACK] = spu_fallback_fragment_ops; } - - /* In some versions of the SDK the SPE main takes 'unsigned long' as a * parameter. In others it takes 'unsigned long long'. Use a define to * select between the two. @@ -91,11 +82,11 @@ main(main_param_t speid, main_param_t argp) ASSERT(sizeof(tile_t) == TILE_SIZE * TILE_SIZE * 4); ASSERT(sizeof(struct cell_command_render) % 8 == 0); - ASSERT(((unsigned long) &spu.fragment_ops_code_front) % 8 == 0); - ASSERT(((unsigned long) &spu.fragment_ops_code_back) % 8 == 0); + ASSERT(sizeof(struct cell_command_fragment_ops) % 8 == 0); ASSERT(((unsigned long) &spu.fragment_program_code) % 8 == 0); one_time_init(); + spu_command_init(); D_PRINTF(CELL_DEBUG_CMD, "main() speid=%lu\n", (unsigned long) speid); D_PRINTF(CELL_DEBUG_FRAGMENT_OP_FALLBACK, "using fragment op fallback\n"); @@ -120,5 +111,7 @@ main(main_param_t speid, main_param_t argp) command_loop(); + spu_command_close(); + return 0; } diff --git a/src/gallium/drivers/cell/spu/spu_main.h b/src/gallium/drivers/cell/spu/spu_main.h index 24cf7d77ce..33767e7c51 100644 --- a/src/gallium/drivers/cell/spu/spu_main.h +++ b/src/gallium/drivers/cell/spu/spu_main.h @@ -169,8 +169,8 @@ struct spu_global ubyte ztile_status[CELL_MAX_HEIGHT/TILE_SIZE][CELL_MAX_WIDTH/TILE_SIZE] ALIGN16_ATTRIB; /** Current fragment ops machine code, at 8-byte boundary */ - uint fragment_ops_code_front[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; - uint fragment_ops_code_back[SPU_MAX_FRAGMENT_OPS_INSTS] ALIGN8_ATTRIB; + uint *fragment_ops_code; + uint fragment_ops_code_size; /** Current fragment ops functions, 0 = frontfacing, 1 = backfacing */ spu_fragment_ops_func fragment_ops[2]; -- cgit v1.2.3 From 0b9e96fae9493d5d58f046e01c983a3c4267090e Mon Sep 17 00:00:00 2001 From: Brian Date: Sun, 23 Nov 2008 19:15:15 -0700 Subject: softpipe: remove old/unneeded dependencies between TGSI exec and softpipe Use tgsi_sampler struct as a base class. Softpipe subclasses it and adds the fields it needs. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 8 +- src/gallium/auxiliary/tgsi/tgsi_exec.h | 11 +- src/gallium/drivers/softpipe/sp_fs_exec.c | 2 +- src/gallium/drivers/softpipe/sp_fs_sse.c | 2 +- src/gallium/drivers/softpipe/sp_quad_fs.c | 23 +-- src/gallium/drivers/softpipe/sp_tex_sample.c | 262 +++++++++++++++------------ src/gallium/drivers/softpipe/sp_tex_sample.h | 50 ++++- src/gallium/drivers/softpipe/sp_tile_cache.c | 4 +- src/gallium/drivers/softpipe/sp_tile_cache.h | 2 +- 9 files changed, 216 insertions(+), 148 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 96567772b7..0fdfb91d39 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -133,7 +133,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, uint numSamplers, - struct tgsi_sampler *samplers) + struct tgsi_sampler **samplers) { uint k; struct tgsi_parse_context parse; @@ -1581,7 +1581,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], NULL, NULL, lodBias, /* S, T, P, BIAS */ &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */ break; @@ -1607,7 +1607,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, /* inputs */ &r[0], &r[1], &r[2], &r[3]); /* outputs */ break; @@ -1633,7 +1633,7 @@ exec_tex(struct tgsi_exec_machine *mach, else lodBias = 0.0; - fetch_texel(&mach->Samplers[unit], + fetch_texel(mach->Samplers[unit], &r[0], &r[1], &r[2], lodBias, &r[0], &r[1], &r[2], &r[3]); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h index ac4b239910..4ffd4efbff 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.h +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h @@ -68,17 +68,12 @@ struct tgsi_interp_coef float dady[NUM_CHANNELS]; }; - -struct softpipe_tile_cache; /**< Opaque to TGSI */ - /** * Information for sampling textures, which must be implemented * by code outside the TGSI executor. */ struct tgsi_sampler { - const struct pipe_sampler_state *state; - struct pipe_texture *texture; /** Get samples for four fragments in a quad */ void (*get_samples)(struct tgsi_sampler *sampler, const float s[QUAD_SIZE], @@ -86,8 +81,6 @@ struct tgsi_sampler const float p[QUAD_SIZE], float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]); - void *pipe; /*XXX temporary*/ - struct softpipe_tile_cache *cache; }; /** @@ -205,7 +198,7 @@ struct tgsi_exec_machine struct tgsi_exec_vector *Temps; struct tgsi_exec_vector *Addrs; - struct tgsi_sampler *Samplers; + struct tgsi_sampler **Samplers; float Imms[TGSI_EXEC_NUM_IMMEDIATES][4]; unsigned ImmLimit; @@ -268,7 +261,7 @@ tgsi_exec_machine_bind_shader( struct tgsi_exec_machine *mach, const struct tgsi_token *tokens, uint numSamplers, - struct tgsi_sampler *samplers); + struct tgsi_sampler **samplers); uint tgsi_exec_machine_run( diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c index f472dd0ed2..453b0373f0 100644 --- a/src/gallium/drivers/softpipe/sp_fs_exec.c +++ b/src/gallium/drivers/softpipe/sp_fs_exec.c @@ -92,7 +92,7 @@ sp_setup_pos_vector(const struct tgsi_interp_coef *coef, static void exec_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers ) + struct tgsi_sampler **samplers ) { /* * Bind tokens/shader to the interpreter's machine state. diff --git a/src/gallium/drivers/softpipe/sp_fs_sse.c b/src/gallium/drivers/softpipe/sp_fs_sse.c index 31908a517b..9a273c8764 100644 --- a/src/gallium/drivers/softpipe/sp_fs_sse.c +++ b/src/gallium/drivers/softpipe/sp_fs_sse.c @@ -69,7 +69,7 @@ struct sp_sse_fragment_shader { static void fs_sse_prepare( const struct sp_fragment_shader *base, struct tgsi_exec_machine *machine, - struct tgsi_sampler *samplers ) + struct tgsi_sampler **samplers ) { } diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 1f0cb3e035..730fa0cf49 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -50,8 +50,9 @@ struct quad_shade_stage { - struct quad_stage stage; - struct tgsi_sampler samplers[PIPE_MAX_SAMPLERS]; + struct quad_stage stage; /**< base class */ + struct sp_shader_sampler samplers[PIPE_MAX_SAMPLERS]; + struct sp_shader_sampler *samplers_list[PIPE_MAX_SAMPLERS]; struct tgsi_exec_machine machine; struct tgsi_exec_vector *inputs, *outputs; }; @@ -147,18 +148,10 @@ static void shade_begin(struct quad_stage *qs) { struct quad_shade_stage *qss = quad_shade_stage(qs); struct softpipe_context *softpipe = qs->softpipe; - unsigned i; - unsigned num = MAX2(softpipe->num_textures, softpipe->num_samplers); - - /* set TGSI sampler state that varies */ - for (i = 0; i < num; i++) { - qss->samplers[i].state = softpipe->sampler[i]; - qss->samplers[i].texture = softpipe->texture[i]; - } softpipe->fs->prepare( softpipe->fs, &qss->machine, - qss->samplers ); + qss->samplers_list ); qs->next->begin(qs->next); } @@ -191,12 +184,14 @@ struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) qss->stage.run = shade_quad; qss->stage.destroy = shade_destroy; - /* set TGSI sampler state that's constant */ + /* setup TGSI sampler state */ for (i = 0; i < PIPE_MAX_SAMPLERS; i++) { assert(softpipe->tex_cache[i]); - qss->samplers[i].get_samples = sp_get_samples; - qss->samplers[i].pipe = &softpipe->pipe; + qss->samplers[i].base.get_samples = sp_get_samples; + qss->samplers[i].unit = i; + qss->samplers[i].sp = softpipe; qss->samplers[i].cache = softpipe->tex_cache[i]; + qss->samplers_list[i] = &qss->samplers[i]; } tgsi_exec_machine_init( &qss->machine ); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index 49250ec084..b66caf9507 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -35,6 +35,7 @@ #include "sp_context.h" #include "sp_headers.h" #include "sp_surface.h" +#include "sp_texture.h" #include "sp_tex_sample.h" #include "sp_tile_cache.h" #include "pipe/p_context.h" @@ -463,7 +464,8 @@ choose_cube_face(float rx, float ry, float rz, float *newS, float *newT) * This is only done for fragment shaders, not vertex shaders. */ static float -compute_lambda(struct tgsi_sampler *sampler, +compute_lambda(const struct pipe_texture *tex, + const struct pipe_sampler_state *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -471,7 +473,7 @@ compute_lambda(struct tgsi_sampler *sampler, { float rho, lambda; - assert(sampler->state->normalized_coords); + assert(sampler->normalized_coords); assert(s); { @@ -479,7 +481,7 @@ compute_lambda(struct tgsi_sampler *sampler, float dsdy = s[QUAD_TOP_LEFT] - s[QUAD_BOTTOM_LEFT]; dsdx = fabsf(dsdx); dsdy = fabsf(dsdy); - rho = MAX2(dsdx, dsdy) * sampler->texture->width[0]; + rho = MAX2(dsdx, dsdy) * tex->width[0]; } if (t) { float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]; @@ -487,7 +489,7 @@ compute_lambda(struct tgsi_sampler *sampler, float max; dtdx = fabsf(dtdx); dtdy = fabsf(dtdy); - max = MAX2(dtdx, dtdy) * sampler->texture->height[0]; + max = MAX2(dtdx, dtdy) * tex->height[0]; rho = MAX2(rho, max); } if (p) { @@ -496,13 +498,13 @@ compute_lambda(struct tgsi_sampler *sampler, float max; dpdx = fabsf(dpdx); dpdy = fabsf(dpdy); - max = MAX2(dpdx, dpdy) * sampler->texture->depth[0]; + max = MAX2(dpdx, dpdy) * tex->depth[0]; rho = MAX2(rho, max); } lambda = util_fast_log2(rho); - lambda += lodbias + sampler->state->lod_bias; - lambda = CLAMP(lambda, sampler->state->min_lod, sampler->state->max_lod); + lambda += lodbias + sampler->lod_bias; + lambda = CLAMP(lambda, sampler->min_lod, sampler->max_lod); return lambda; } @@ -516,7 +518,8 @@ compute_lambda(struct tgsi_sampler *sampler, * 4. Return image filter to use within mipmap images */ static void -choose_mipmap_levels(struct tgsi_sampler *sampler, +choose_mipmap_levels(const struct pipe_texture *texture, + const struct pipe_sampler_state *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -524,25 +527,26 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, unsigned *level0, unsigned *level1, float *levelBlend, unsigned *imgFilter) { - if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { + + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* no mipmap selection needed */ - *level0 = *level1 = CLAMP((int) sampler->state->min_lod, - 0, (int) sampler->texture->last_level); + *level0 = *level1 = CLAMP((int) sampler->min_lod, + 0, (int) texture->last_level); - if (sampler->state->min_img_filter != sampler->state->mag_img_filter) { + if (sampler->min_img_filter != sampler->mag_img_filter) { /* non-mipmapped texture, but still need to determine if doing * minification or magnification. */ - float lambda = compute_lambda(sampler, s, t, p, lodbias); + float lambda = compute_lambda(texture, sampler, s, t, p, lodbias); if (lambda <= 0.0) { - *imgFilter = sampler->state->mag_img_filter; + *imgFilter = sampler->mag_img_filter; } else { - *imgFilter = sampler->state->min_img_filter; + *imgFilter = sampler->min_img_filter; } } else { - *imgFilter = sampler->state->mag_img_filter; + *imgFilter = sampler->mag_img_filter; } } else { @@ -550,32 +554,32 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, if (1) /* fragment shader */ - lambda = compute_lambda(sampler, s, t, p, lodbias); + lambda = compute_lambda(texture, sampler, s, t, p, lodbias); else /* vertex shader */ lambda = lodbias; /* not really a bias, but absolute LOD */ if (lambda <= 0.0) { /* XXX threshold depends on the filter */ /* magnifying */ - *imgFilter = sampler->state->mag_img_filter; + *imgFilter = sampler->mag_img_filter; *level0 = *level1 = 0; } else { /* minifying */ - *imgFilter = sampler->state->min_img_filter; + *imgFilter = sampler->min_img_filter; /* choose mipmap level(s) and compute the blend factor between them */ - if (sampler->state->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { + if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NEAREST) { /* Nearest mipmap level */ const int lvl = (int) (lambda + 0.5); *level0 = - *level1 = CLAMP(lvl, 0, (int) sampler->texture->last_level); + *level1 = CLAMP(lvl, 0, (int) texture->last_level); } else { /* Linear interpolation between mipmap levels */ const int lvl = (int) lambda; - *level0 = CLAMP(lvl, 0, (int) sampler->texture->last_level); - *level1 = CLAMP(lvl + 1, 0, (int) sampler->texture->last_level); + *level0 = CLAMP(lvl, 0, (int) texture->last_level); + *level1 = CLAMP(lvl + 1, 0, (int) texture->last_level); *levelBlend = FRAC(lambda); /* blending weight between levels */ } } @@ -585,6 +589,7 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, /** * Get a texel from a texture, using the texture tile cache. + * Called by the TGSI interpreter. * * \param face the cube face in 0..5 * \param level the mipmap level @@ -598,23 +603,29 @@ choose_mipmap_levels(struct tgsi_sampler *sampler, * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... */ static void -get_texel(struct tgsi_sampler *sampler, +get_texel(struct tgsi_sampler *tgsi_sampler, unsigned face, unsigned level, int x, int y, int z, float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) { - if (x < 0 || x >= (int) sampler->texture->width[level] || - y < 0 || y >= (int) sampler->texture->height[level] || - z < 0 || z >= (int) sampler->texture->depth[level]) { - rgba[0][j] = sampler->state->border_color[0]; - rgba[1][j] = sampler->state->border_color[1]; - rgba[2][j] = sampler->state->border_color[2]; - rgba[3][j] = sampler->state->border_color[3]; + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + + if (x < 0 || x >= (int) texture->width[level] || + y < 0 || y >= (int) texture->height[level] || + z < 0 || z >= (int) texture->depth[level]) { + rgba[0][j] = sampler->border_color[0]; + rgba[1][j] = sampler->border_color[1]; + rgba[2][j] = sampler->border_color[2]; + rgba[3][j] = sampler->border_color[3]; } else { const int tx = x % TILE_SIZE; const int ty = y % TILE_SIZE; const struct softpipe_cached_tile *tile - = sp_get_cached_tile_tex(sampler->pipe, sampler->cache, + = sp_get_cached_tile_tex(samp->sp, samp->cache, x, y, z, face, level); rgba[0][j] = tile->data.color[ty][tx][0]; rgba[1][j] = tile->data.color[ty][tx][1]; @@ -624,7 +635,7 @@ get_texel(struct tgsi_sampler *sampler, { debug_printf("Get texel %f %f %f %f from %s\n", rgba[0][j], rgba[1][j], rgba[2][j], rgba[3][j], - pf_name(sampler->texture->format)); + pf_name(texture->format)); } } } @@ -682,7 +693,7 @@ shadow_compare(uint compare_func, * Could probably extend for 3D... */ static void -sp_get_samples_2d_common(struct tgsi_sampler *sampler, +sp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -690,28 +701,33 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, float rgba[NUM_CHANNELS][QUAD_SIZE], const unsigned faces[4]) { - const uint compare_func = sampler->state->compare_func; + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + const uint compare_func = sampler->compare_func; unsigned level0, level1, j, imgFilter; int width, height; float levelBlend; - choose_mipmap_levels(sampler, s, t, p, lodbias, + choose_mipmap_levels(texture, sampler, s, t, p, lodbias, &level0, &level1, &levelBlend, &imgFilter); - assert(sampler->state->normalized_coords); + assert(sampler->normalized_coords); - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; + width = texture->width[level0]; + height = texture->height[level0]; assert(width > 0); switch (imgFilter) { case PIPE_TEX_FILTER_NEAREST: for (j = 0; j < QUAD_SIZE; j++) { - int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); - int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); - get_texel(sampler, faces[j], level0, x, y, 0, rgba, j); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + int x = nearest_texcoord(sampler->wrap_s, s[j], width); + int y = nearest_texcoord(sampler->wrap_t, t[j], height); + get_texel(tgsi_sampler, faces[j], level0, x, y, 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { shadow_compare(compare_func, rgba, p, j); } @@ -721,8 +737,8 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, unsigned c; x = x / 2; y = y / 2; - get_texel(sampler, faces[j], level1, x, y, 0, rgba2, j); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + get_texel(tgsi_sampler, faces[j], level1, x, y, 0, rgba2, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ shadow_compare(compare_func, rgba2, p, j); } @@ -737,13 +753,13 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, for (j = 0; j < QUAD_SIZE; j++) { float tx[4][4], a, b; int x0, y0, x1, y1, c; - linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); - linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); - get_texel(sampler, faces[j], level0, x0, y0, 0, tx, 0); - get_texel(sampler, faces[j], level0, x1, y0, 0, tx, 1); - get_texel(sampler, faces[j], level0, x0, y1, 0, tx, 2); - get_texel(sampler, faces[j], level0, x1, y1, 0, tx, 3); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + linear_texcoord(sampler->wrap_s, s[j], width, &x0, &x1, &a); + linear_texcoord(sampler->wrap_t, t[j], height, &y0, &y1, &b); + get_texel(tgsi_sampler, faces[j], level0, x0, y0, 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level0, x1, y0, 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level0, x0, y1, 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level0, x1, y1, 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { shadow_compare(compare_func, tx, p, 0); shadow_compare(compare_func, tx, p, 1); shadow_compare(compare_func, tx, p, 2); @@ -761,11 +777,11 @@ sp_get_samples_2d_common(struct tgsi_sampler *sampler, y0 = y0 / 2; x1 = x1 / 2; y1 = y1 / 2; - get_texel(sampler, faces[j], level1, x0, y0, 0, tx, 0); - get_texel(sampler, faces[j], level1, x1, y0, 0, tx, 1); - get_texel(sampler, faces[j], level1, x0, y1, 0, tx, 2); - get_texel(sampler, faces[j], level1, x1, y1, 0, tx, 3); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ + get_texel(tgsi_sampler, faces[j], level1, x0, y0, 0, tx, 0); + get_texel(tgsi_sampler, faces[j], level1, x1, y0, 0, tx, 1); + get_texel(tgsi_sampler, faces[j], level1, x0, y1, 0, tx, 2); + get_texel(tgsi_sampler, faces[j], level1, x1, y1, 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE){ shadow_compare(compare_func, tx, p, 0); shadow_compare(compare_func, tx, p, 1); shadow_compare(compare_func, tx, p, 2); @@ -817,27 +833,32 @@ sp_get_samples_2d(struct tgsi_sampler *sampler, static void -sp_get_samples_3d(struct tgsi_sampler *sampler, +sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; /* get/map pipe_surfaces corresponding to 3D tex slices */ unsigned level0, level1, j, imgFilter; int width, height, depth; float levelBlend; const uint face = 0; - choose_mipmap_levels(sampler, s, t, p, lodbias, + choose_mipmap_levels(texture, sampler, s, t, p, lodbias, &level0, &level1, &levelBlend, &imgFilter); - assert(sampler->state->normalized_coords); + assert(sampler->normalized_coords); - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; - depth = sampler->texture->depth[level0]; + width = texture->width[level0]; + height = texture->height[level0]; + depth = texture->depth[level0]; assert(width > 0); assert(height > 0); @@ -846,10 +867,10 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, switch (imgFilter) { case PIPE_TEX_FILTER_NEAREST: for (j = 0; j < QUAD_SIZE; j++) { - int x = nearest_texcoord(sampler->state->wrap_s, s[j], width); - int y = nearest_texcoord(sampler->state->wrap_t, t[j], height); - int z = nearest_texcoord(sampler->state->wrap_r, p[j], depth); - get_texel(sampler, face, level0, x, y, z, rgba, j); + int x = nearest_texcoord(sampler->wrap_s, s[j], width); + int y = nearest_texcoord(sampler->wrap_t, t[j], height); + int z = nearest_texcoord(sampler->wrap_r, p[j], depth); + get_texel(tgsi_sampler, face, level0, x, y, z, rgba, j); if (level0 != level1) { /* get texels from second mipmap level and blend */ @@ -858,7 +879,7 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, x /= 2; y /= 2; z /= 2; - get_texel(sampler, face, level1, x, y, z, rgba2, j); + get_texel(tgsi_sampler, face, level1, x, y, z, rgba2, j); for (c = 0; c < NUM_CHANNELS; c++) { rgba[c][j] = LERP(levelBlend, rgba2[c][j], rgba[c][j]); } @@ -871,17 +892,17 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, float texel0[4][4], texel1[4][4]; float xw, yw, zw; /* interpolation weights */ int x0, x1, y0, y1, z0, z1, c; - linear_texcoord(sampler->state->wrap_s, s[j], width, &x0, &x1, &xw); - linear_texcoord(sampler->state->wrap_t, t[j], height, &y0, &y1, &yw); - linear_texcoord(sampler->state->wrap_r, p[j], depth, &z0, &z1, &zw); - get_texel(sampler, face, level0, x0, y0, z0, texel0, 0); - get_texel(sampler, face, level0, x1, y0, z0, texel0, 1); - get_texel(sampler, face, level0, x0, y1, z0, texel0, 2); - get_texel(sampler, face, level0, x1, y1, z0, texel0, 3); - get_texel(sampler, face, level0, x0, y0, z1, texel1, 0); - get_texel(sampler, face, level0, x1, y0, z1, texel1, 1); - get_texel(sampler, face, level0, x0, y1, z1, texel1, 2); - get_texel(sampler, face, level0, x1, y1, z1, texel1, 3); + linear_texcoord(sampler->wrap_s, s[j], width, &x0, &x1, &xw); + linear_texcoord(sampler->wrap_t, t[j], height, &y0, &y1, &yw); + linear_texcoord(sampler->wrap_r, p[j], depth, &z0, &z1, &zw); + get_texel(tgsi_sampler, face, level0, x0, y0, z0, texel0, 0); + get_texel(tgsi_sampler, face, level0, x1, y0, z0, texel0, 1); + get_texel(tgsi_sampler, face, level0, x0, y1, z0, texel0, 2); + get_texel(tgsi_sampler, face, level0, x1, y1, z0, texel0, 3); + get_texel(tgsi_sampler, face, level0, x0, y0, z1, texel1, 0); + get_texel(tgsi_sampler, face, level0, x1, y0, z1, texel1, 1); + get_texel(tgsi_sampler, face, level0, x0, y1, z1, texel1, 2); + get_texel(tgsi_sampler, face, level0, x1, y1, z1, texel1, 3); /* 3D lerp */ for (c = 0; c < 4; c++) { @@ -904,14 +925,14 @@ sp_get_samples_3d(struct tgsi_sampler *sampler, x1 /= 2; y1 /= 2; z1 /= 2; - get_texel(sampler, face, level1, x0, y0, z0, texel0, 0); - get_texel(sampler, face, level1, x1, y0, z0, texel0, 1); - get_texel(sampler, face, level1, x0, y1, z0, texel0, 2); - get_texel(sampler, face, level1, x1, y1, z0, texel0, 3); - get_texel(sampler, face, level1, x0, y0, z1, texel1, 0); - get_texel(sampler, face, level1, x1, y0, z1, texel1, 1); - get_texel(sampler, face, level1, x0, y1, z1, texel1, 2); - get_texel(sampler, face, level1, x1, y1, z1, texel1, 3); + get_texel(tgsi_sampler, face, level1, x0, y0, z0, texel0, 0); + get_texel(tgsi_sampler, face, level1, x1, y0, z0, texel0, 1); + get_texel(tgsi_sampler, face, level1, x0, y1, z0, texel0, 2); + get_texel(tgsi_sampler, face, level1, x1, y1, z0, texel0, 3); + get_texel(tgsi_sampler, face, level1, x0, y0, z1, texel1, 0); + get_texel(tgsi_sampler, face, level1, x1, y0, z1, texel1, 1); + get_texel(tgsi_sampler, face, level1, x0, y1, z1, texel1, 2); + get_texel(tgsi_sampler, face, level1, x1, y1, z1, texel1, 3); /* 3D lerp */ for (c = 0; c < 4; c++) { @@ -956,38 +977,43 @@ sp_get_samples_cube(struct tgsi_sampler *sampler, static void -sp_get_samples_rect(struct tgsi_sampler *sampler, +sp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; //sp_get_samples_2d_common(sampler, s, t, p, lodbias, rgba, faces); static const uint face = 0; - const uint compare_func = sampler->state->compare_func; + const uint compare_func = sampler->compare_func; unsigned level0, level1, j, imgFilter; int width, height; float levelBlend; - choose_mipmap_levels(sampler, s, t, p, lodbias, + choose_mipmap_levels(texture, sampler, s, t, p, lodbias, &level0, &level1, &levelBlend, &imgFilter); /* texture RECTS cannot be mipmapped */ assert(level0 == level1); - width = sampler->texture->width[level0]; - height = sampler->texture->height[level0]; + width = texture->width[level0]; + height = texture->height[level0]; assert(width > 0); switch (imgFilter) { case PIPE_TEX_FILTER_NEAREST: for (j = 0; j < QUAD_SIZE; j++) { - int x = nearest_texcoord_unnorm(sampler->state->wrap_s, s[j], width); - int y = nearest_texcoord_unnorm(sampler->state->wrap_t, t[j], height); - get_texel(sampler, face, level0, x, y, 0, rgba, j); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + int x = nearest_texcoord_unnorm(sampler->wrap_s, s[j], width); + int y = nearest_texcoord_unnorm(sampler->wrap_t, t[j], height); + get_texel(tgsi_sampler, face, level0, x, y, 0, rgba, j); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { shadow_compare(compare_func, rgba, p, j); } } @@ -997,13 +1023,13 @@ sp_get_samples_rect(struct tgsi_sampler *sampler, for (j = 0; j < QUAD_SIZE; j++) { float tx[4][4], a, b; int x0, y0, x1, y1, c; - linear_texcoord_unnorm(sampler->state->wrap_s, s[j], width, &x0, &x1, &a); - linear_texcoord_unnorm(sampler->state->wrap_t, t[j], height, &y0, &y1, &b); - get_texel(sampler, face, level0, x0, y0, 0, tx, 0); - get_texel(sampler, face, level0, x1, y0, 0, tx, 1); - get_texel(sampler, face, level0, x0, y1, 0, tx, 2); - get_texel(sampler, face, level0, x1, y1, 0, tx, 3); - if (sampler->state->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { + linear_texcoord_unnorm(sampler->wrap_s, s[j], width, &x0, &x1, &a); + linear_texcoord_unnorm(sampler->wrap_t, t[j], height, &y0, &y1, &b); + get_texel(tgsi_sampler, face, level0, x0, y0, 0, tx, 0); + get_texel(tgsi_sampler, face, level0, x1, y0, 0, tx, 1); + get_texel(tgsi_sampler, face, level0, x0, y1, 0, tx, 2); + get_texel(tgsi_sampler, face, level0, x1, y1, 0, tx, 3); + if (sampler->compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE) { shadow_compare(compare_func, tx, p, 0); shadow_compare(compare_func, tx, p, 1); shadow_compare(compare_func, tx, p, 2); @@ -1036,34 +1062,40 @@ sp_get_samples_rect(struct tgsi_sampler *sampler, * a new tgsi_sampler object for each state combo it finds.... */ void -sp_get_samples(struct tgsi_sampler *sampler, +sp_get_samples(struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], float lodbias, float rgba[NUM_CHANNELS][QUAD_SIZE]) { - if (!sampler->texture) + const struct sp_shader_sampler *samp = sp_shader_sampler(tgsi_sampler); + const struct softpipe_context *sp = samp->sp; + const uint unit = samp->unit; + const struct pipe_texture *texture = sp->texture[unit]; + const struct pipe_sampler_state *sampler = sp->sampler[unit]; + + if (!texture) return; - switch (sampler->texture->target) { + switch (texture->target) { case PIPE_TEXTURE_1D: - assert(sampler->state->normalized_coords); - sp_get_samples_1d(sampler, s, t, p, lodbias, rgba); + assert(sampler->normalized_coords); + sp_get_samples_1d(tgsi_sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_2D: - if (sampler->state->normalized_coords) - sp_get_samples_2d(sampler, s, t, p, lodbias, rgba); + if (sampler->normalized_coords) + sp_get_samples_2d(tgsi_sampler, s, t, p, lodbias, rgba); else - sp_get_samples_rect(sampler, s, t, p, lodbias, rgba); + sp_get_samples_rect(tgsi_sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_3D: - assert(sampler->state->normalized_coords); - sp_get_samples_3d(sampler, s, t, p, lodbias, rgba); + assert(sampler->normalized_coords); + sp_get_samples_3d(tgsi_sampler, s, t, p, lodbias, rgba); break; case PIPE_TEXTURE_CUBE: - assert(sampler->state->normalized_coords); - sp_get_samples_cube(sampler, s, t, p, lodbias, rgba); + assert(sampler->normalized_coords); + sp_get_samples_cube(tgsi_sampler, s, t, p, lodbias, rgba); break; default: assert(0); diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 404bfd0c36..783169c939 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -1,8 +1,56 @@ +/************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + #ifndef SP_TEX_SAMPLE_H #define SP_TEX_SAMPLE_H -struct tgsi_sampler; +#include "tgsi/tgsi_exec.h" + + +/** + * Subclass of tgsi_sampler + */ +struct sp_shader_sampler +{ + struct tgsi_sampler base; /**< base class */ + + uint unit; + struct softpipe_context *sp; + struct softpipe_tile_cache *cache; +}; + + + +static INLINE struct sp_shader_sampler * +sp_shader_sampler(struct tgsi_sampler *sampler) +{ + return (struct sp_shader_sampler *) sampler; +} extern void diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index b50c984513..78b0efa46d 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -494,11 +494,11 @@ tex_cache_pos(int x, int y, int z, int face, int level) * Tiles are read-only and indexed with more params. */ const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct pipe_context *pipe, +sp_get_cached_tile_tex(struct softpipe_context *sp, struct softpipe_tile_cache *tc, int x, int y, int z, int face, int level) { - struct pipe_screen *screen = pipe->screen; + struct pipe_screen *screen = sp->pipe.screen; /* tile pos in framebuffer: */ const int tile_x = x & ~(TILE_SIZE - 1); const int tile_y = y & ~(TILE_SIZE - 1); diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.h b/src/gallium/drivers/softpipe/sp_tile_cache.h index bc96c941f6..a66bb50bcc 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.h +++ b/src/gallium/drivers/softpipe/sp_tile_cache.h @@ -96,7 +96,7 @@ sp_get_cached_tile(struct softpipe_context *softpipe, struct softpipe_tile_cache *tc, int x, int y); extern const struct softpipe_cached_tile * -sp_get_cached_tile_tex(struct pipe_context *pipe, +sp_get_cached_tile_tex(struct softpipe_context *softpipe, struct softpipe_tile_cache *tc, int x, int y, int z, int face, int level); -- cgit v1.2.3 From a6d866f72c88d48d2bcfb3e3c882fdb639b5a8ce Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 24 Nov 2008 13:59:06 +0900 Subject: pipebuffer: Implement proper buffer validation. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 36 ++++- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 158 +++++++++++++-------- .../auxiliary/pipebuffer/pb_buffer_fenced.h | 21 +-- .../auxiliary/pipebuffer/pb_buffer_malloc.c | 20 +++ src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 23 ++- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 26 +++- .../auxiliary/pipebuffer/pb_bufmgr_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 25 +++- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 25 +++- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 21 +++ src/gallium/auxiliary/pipebuffer/pb_validate.c | 84 ++++++++--- src/gallium/auxiliary/pipebuffer/pb_validate.h | 10 +- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 22 ++- 14 files changed, 367 insertions(+), 108 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 8505d333bd..c9570d7be2 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -37,7 +37,7 @@ * There is no obligation of a winsys driver to use this library. And a pipe * driver should be completly agnostic about it. * - * \author Jos� Fonseca + * \author Jose Fonseca */ #ifndef PB_BUFFER_H_ @@ -46,6 +46,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" +#include "pipe/p_error.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" @@ -56,6 +57,8 @@ extern "C" { struct pb_vtbl; +struct pb_validate; + /** * Buffer description. @@ -104,6 +107,13 @@ struct pb_vtbl void (*unmap)( struct pb_buffer *buf ); + enum pipe_error (*validate)( struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags ); + + void (*fence)( struct pb_buffer *buf, + struct pipe_fence_handle *fence ); + /** * Get the base buffer and the offset. * @@ -118,6 +128,7 @@ struct pb_vtbl void (*get_base_buffer)( struct pb_buffer *buf, struct pb_buffer **base_buf, unsigned *offset ); + }; @@ -173,10 +184,33 @@ pb_get_base_buffer( struct pb_buffer *buf, offset = 0; return; } + assert(buf->vtbl->get_base_buffer); buf->vtbl->get_base_buffer(buf, base_buf, offset); } +static INLINE enum pipe_error +pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags) +{ + assert(buf); + if(!buf) + return PIPE_ERROR; + assert(buf->vtbl->validate); + return buf->vtbl->validate(buf, vl, flags); +} + + +static INLINE void +pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) +{ + assert(buf); + if(!buf) + return; + assert(buf->vtbl->fence); + buf->vtbl->fence(buf, fence); +} + + static INLINE void pb_destroy(struct pb_buffer *buf) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 17b2781052..17300cb553 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -29,7 +29,7 @@ * \file * Implementation of fenced buffers. * - * \author José Fonseca + * \author Jose Fonseca * \author Thomas Hellström */ @@ -59,13 +59,6 @@ */ #define SUPER(__derived) (&(__derived)->base) -#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ - ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) -#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ - ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) -#define PIPE_BUFFER_USAGE_WRITE \ - ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) - struct fenced_buffer_list { @@ -97,6 +90,8 @@ struct fenced_buffer unsigned flags; unsigned mapcount; + struct pb_validate *vl; + unsigned validation_flags; struct pipe_fence_handle *fence; struct list_head head; @@ -108,7 +103,6 @@ static INLINE struct fenced_buffer * fenced_buffer(struct pb_buffer *buf) { assert(buf); - assert(buf->vtbl == &fenced_buffer_vtbl); return (struct fenced_buffer *)buf; } @@ -274,6 +268,7 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); void *map; + assert(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE); assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE)); flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE; @@ -315,6 +310,101 @@ fenced_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +fenced_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + enum pipe_error ret; + + if(!vl) { + /* invalidate */ + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + return PIPE_OK; + } + + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + /* Buffer cannot be validated in two different lists */ + if(fenced_buf->vl && fenced_buf->vl != vl) + return PIPE_ERROR_RETRY; + + /* Do not validate if buffer is still mapped */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { + /* TODO: wait for the thread that mapped the buffer to unmap it */ + return PIPE_ERROR_RETRY; + } + + /* Allow concurrent GPU reads, but serialize GPU writes */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE) { + if((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_GPU_WRITE) { + _fenced_buffer_finish(fenced_buf); + } + } + + if(fenced_buf->vl == vl && + (fenced_buf->validation_flags & flags) == flags) { + /* Nothing to do -- buffer already validated */ + return PIPE_OK; + } + + /* Final sanity checking */ + assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); + assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE)); + assert(!fenced_buf->mapcount); + + ret = pb_validate(fenced_buf->buffer, vl, flags); + if (ret != PIPE_OK) + return ret; + + fenced_buf->vl = vl; + fenced_buf->validation_flags |= flags; + + return PIPE_OK; +} + + +static void +fenced_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf; + struct fenced_buffer_list *fenced_list; + struct pipe_winsys *winsys; + + fenced_buf = fenced_buffer(buf); + fenced_list = fenced_buf->list; + winsys = fenced_list->winsys; + + if(fence == fenced_buf->fence) { + /* Nothing to do */ + return; + } + + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + winsys->fence_reference(winsys, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + _fenced_buffer_add(fenced_buf); + } + pipe_mutex_unlock(fenced_list->mutex); + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; +} + + static void fenced_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -325,11 +415,13 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, } -const struct pb_vtbl +static const struct pb_vtbl fenced_buffer_vtbl = { fenced_buffer_destroy, fenced_buffer_map, fenced_buffer_unmap, + fenced_buffer_validate, + fenced_buffer_fence, fenced_buffer_get_base_buffer }; @@ -362,52 +454,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, } -void -buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - struct fenced_buffer *fenced_buf; - struct fenced_buffer_list *fenced_list; - struct pipe_winsys *winsys; - /* FIXME: receive this as a parameter */ - unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; - - /* This is a public function, so be extra cautious with the buffer passed, - * as happens frequently to receive null buffers, or pointer to buffers - * other than fenced buffers. */ - assert(buf); - if(!buf) - return; - assert(buf->vtbl == &fenced_buffer_vtbl); - if(buf->vtbl != &fenced_buffer_vtbl) - return; - - fenced_buf = fenced_buffer(buf); - fenced_list = fenced_buf->list; - winsys = fenced_list->winsys; - - if(!fence || fence == fenced_buf->fence) { - /* Handle the same fence case specially, not only because it is a fast - * path, but mostly to avoid serializing two writes with the same fence, - * as that would bring the hardware down to synchronous operation without - * any benefit. - */ - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; - return; - } - - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - winsys->fence_reference(winsys, &fenced_buf->fence, fence); - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; - _fenced_buffer_add(fenced_buf); - } - pipe_mutex_unlock(fenced_list->mutex); -} - - struct fenced_buffer_list * fenced_buffer_list_create(struct pipe_winsys *winsys) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index 50d5891bdb..b15c676194 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -44,7 +44,7 @@ * Between the handle's destruction, and the fence signalling, the buffer is * stored in a fenced buffer list. * - * \author José Fonseca + * \author Jose Fonseca */ #ifndef PB_BUFFER_FENCED_H_ @@ -70,14 +70,6 @@ struct pipe_fence_handle; struct fenced_buffer_list; -/** - * The fenced buffer's virtual function table. - * - * NOTE: Made public for debugging purposes. - */ -extern const struct pb_vtbl fenced_buffer_vtbl; - - /** * Create a fenced buffer list. * @@ -108,17 +100,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced, struct pb_buffer *buffer); -/** - * Set a buffer's fence. - * - * NOTE: Although it takes a generic pb_buffer argument, it will fail - * on everything but buffers returned by fenced_buffer_create. - */ -void -buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 1bf22a2ec0..53f497cfb0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -81,6 +81,24 @@ malloc_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +malloc_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + assert(0); + return PIPE_ERROR; +} + + +static void +malloc_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + assert(0); +} + + static void malloc_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -96,6 +114,8 @@ malloc_buffer_vtbl = { malloc_buffer_destroy, malloc_buffer_map, malloc_buffer_unmap, + malloc_buffer_validate, + malloc_buffer_fence, malloc_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index cafbee045a..8fe2704708 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -43,7 +43,7 @@ * - the fenced buffer manager, which will delay buffer destruction until the * the moment the card finishing processing it. * - * \author José Fonseca + * \author Jose Fonseca */ #ifndef PB_BUFMGR_H_ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 8f118874ec..f57a7bffd7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -29,7 +29,7 @@ * \file * Buffer cache. * - * \author José Fonseca + * \author Jose Fonseca * \author Thomas Hellström */ @@ -183,6 +183,25 @@ pb_cache_buffer_unmap(struct pb_buffer *_buf) } +static enum pipe_error +pb_cache_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_cache_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + static void pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -198,6 +217,8 @@ pb_cache_buffer_vtbl = { pb_cache_buffer_destroy, pb_cache_buffer_map, pb_cache_buffer_unmap, + pb_cache_buffer_validate, + pb_cache_buffer_fence, pb_cache_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 1675e6e182..62639fe1c8 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -29,7 +29,7 @@ * \file * Debug buffer manager to detect buffer under- and overflows. * - * \author José Fonseca + * \author Jose Fonseca */ @@ -255,11 +255,35 @@ pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, } +static enum pipe_error +pb_debug_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + + pb_debug_buffer_check(buf); + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_debug_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + const struct pb_vtbl pb_debug_buffer_vtbl = { pb_debug_buffer_destroy, pb_debug_buffer_map, pb_debug_buffer_unmap, + pb_debug_buffer_validate, + pb_debug_buffer_fence, pb_debug_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index 633ee70a75..f8fd2cd531 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -30,7 +30,7 @@ * \file * A buffer manager that wraps buffers in fenced buffers. * - * \author José Fonseca + * \author Jose Fonseca */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index fe80ca30ee..607f10bc73 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -29,7 +29,7 @@ * \file * Buffer manager using the old texture memory manager. * - * \author José Fonseca + * \author Jose Fonseca */ @@ -124,6 +124,27 @@ mm_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +mm_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + return pb_validate(mm->buffer, vl, flags); +} + + +static void +mm_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + pb_fence(mm->buffer, fence); +} + + static void mm_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -141,6 +162,8 @@ mm_buffer_vtbl = { mm_buffer_destroy, mm_buffer_map, mm_buffer_unmap, + mm_buffer_validate, + mm_buffer_fence, mm_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 61ac291ed7..a6ff37653e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -30,7 +30,7 @@ * \file * Batch buffer pool management. * - * \author José Fonseca + * \author Jose Fonseca * \author Thomas Hellström */ @@ -138,6 +138,27 @@ pool_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +pool_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + return pb_validate(pool->buffer, vl, flags); +} + + +static void +pool_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + pb_fence(pool->buffer, fence); +} + + static void pool_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -155,6 +176,8 @@ pool_buffer_vtbl = { pool_buffer_destroy, pool_buffer_map, pool_buffer_unmap, + pool_buffer_validate, + pool_buffer_fence, pool_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 2a80154920..9b9fedccb4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -248,6 +248,25 @@ pb_slab_buffer_unmap(struct pb_buffer *_buf) } +static enum pipe_error +pb_slab_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + return pb_validate(buf->slab->bo, vl, flags); +} + + +static void +pb_slab_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_fence(buf->slab->bo, fence); +} + + static void pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -264,6 +283,8 @@ pb_slab_buffer_vtbl = { pb_slab_buffer_destroy, pb_slab_buffer_map, pb_slab_buffer_unmap, + pb_slab_buffer_validate, + pb_slab_buffer_fence, pb_slab_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 1e54fc39d4..726ae1688e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -46,9 +46,16 @@ #define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ +struct pb_validate_entry +{ + struct pb_buffer *buf; + unsigned flags; +}; + + struct pb_validate { - struct pb_buffer **buffers; + struct pb_validate_entry *entries; unsigned used; unsigned size; }; @@ -56,54 +63,87 @@ struct pb_validate enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf) + struct pb_buffer *buf, + unsigned flags) { assert(buf); if(!buf) return PIPE_ERROR; + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + /* We only need to store one reference for each buffer, so avoid storing - * consecutive references for the same buffer. It might not be the more - * common pasttern, but it is easy to implement. + * consecutive references for the same buffer. It might not be the most + * common pattern, but it is easy to implement. */ - if(vl->used && vl->buffers[vl->used - 1] == buf) { + if(vl->used && vl->entries[vl->used - 1].buf == buf) { + vl->entries[vl->used - 1].flags |= flags; return PIPE_OK; } /* Grow the table */ if(vl->used == vl->size) { unsigned new_size; - struct pb_buffer **new_buffers; + struct pb_validate_entry *new_entries; new_size = vl->size * 2; if(!new_size) return PIPE_ERROR_OUT_OF_MEMORY; - new_buffers = (struct pb_buffer **)REALLOC(vl->buffers, - vl->size*sizeof(struct pb_buffer *), - new_size*sizeof(struct pb_buffer *)); - if(!new_buffers) + new_entries = (struct pb_validate_entry *)REALLOC(vl->entries, + vl->size*sizeof(struct pb_validate_entry), + new_size*sizeof(struct pb_validate_entry)); + if(!new_entries) return PIPE_ERROR_OUT_OF_MEMORY; - memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *)); + memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry)); vl->size = new_size; - vl->buffers = new_buffers; + vl->entries = new_entries; } - assert(!vl->buffers[vl->used]); - pb_reference(&vl->buffers[vl->used], buf); + assert(!vl->entries[vl->used].buf); + pb_reference(&vl->entries[vl->used].buf, buf); + vl->entries[vl->used].flags = flags; ++vl->used; return PIPE_OK; } +enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = callback(vl->entries[i].buf, data); + if(ret != PIPE_OK) + return ret; + } + return PIPE_OK; +} + + enum pipe_error pb_validate_validate(struct pb_validate *vl) { - /* FIXME: go through each buffer, ensure its not mapped, its address is - * available -- requires a new pb_buffer interface */ + unsigned i; + + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags); + if(ret != PIPE_OK) { + while(i--) + pb_validate(vl->entries[i].buf, NULL, 0); + return ret; + } + } + return PIPE_OK; } @@ -114,8 +154,8 @@ pb_validate_fence(struct pb_validate *vl, { unsigned i; for(i = 0; i < vl->used; ++i) { - buffer_fence(vl->buffers[i], fence); - pb_reference(&vl->buffers[i], NULL); + pb_fence(vl->entries[i].buf, fence); + pb_reference(&vl->entries[i].buf, NULL); } vl->used = 0; } @@ -126,8 +166,8 @@ pb_validate_destroy(struct pb_validate *vl) { unsigned i; for(i = 0; i < vl->used; ++i) - pb_reference(&vl->buffers[i], NULL); - FREE(vl->buffers); + pb_reference(&vl->entries[i].buf, NULL); + FREE(vl->entries); FREE(vl); } @@ -142,8 +182,8 @@ pb_validate_create() return NULL; vl->size = PB_VALIDATE_INITIAL_SIZE; - vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *)); - if(!vl->buffers) { + vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_buffer *)); + if(!vl->entries) { FREE(vl); return NULL; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h index 3db1d5330b..dfb84df1ce 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.h +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -58,7 +58,13 @@ struct pb_validate; enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf); + struct pb_buffer *buf, + unsigned flags); + +enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data); /** * Validate all buffers for hardware access. @@ -71,7 +77,7 @@ pb_validate_validate(struct pb_validate *vl); /** * Fence all buffers and clear the list. * - * Should be called right before issuing commands to the hardware. + * Should be called right after issuing commands to the hardware. */ void pb_validate_fence(struct pb_validate *vl, diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index 28d137dbc4..45a883e532 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -30,7 +30,7 @@ * Implementation of client buffer (also designated as "user buffers"), which * are just state-tracker owned data masqueraded as buffers. * - * \author José Fonseca + * \author Jose Fonseca */ @@ -91,6 +91,24 @@ pb_user_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +pb_user_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + assert(0); + return PIPE_ERROR; +} + + +static void +pb_user_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + assert(0); +} + + static void pb_user_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -106,6 +124,8 @@ pb_user_buffer_vtbl = { pb_user_buffer_destroy, pb_user_buffer_map, pb_user_buffer_unmap, + pb_user_buffer_validate, + pb_user_buffer_fence, pb_user_buffer_get_base_buffer }; -- cgit v1.2.3 From 17849eafaacfbb2124d86f561a91b707317d3b31 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 24 Nov 2008 23:17:49 +0900 Subject: pipebuffer: Ondemand buffer manager. A variation of malloc buffers which get transferred to real graphics memory when there is an attempt to validate them. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 14 + .../auxiliary/pipebuffer/pb_bufmgr_ondemand.c | 303 +++++++++++++++++++++ 4 files changed, 319 insertions(+) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index f9b39d9ce0..4bcf08f8a5 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ pb_bufmgr_debug.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ + pb_bufmgr_ondemand.c \ pb_bufmgr_pool.c \ pb_bufmgr_slab.c \ pb_validate.c \ diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 56a40dda0d..4acf721808 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -10,6 +10,7 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_debug.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', + 'pb_bufmgr_ondemand.c', 'pb_bufmgr_pool.c', 'pb_bufmgr_slab.c', 'pb_validate.c', diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 8fe2704708..0a8264a924 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -182,6 +182,20 @@ pb_alt_manager_create(struct pb_manager *provider1, struct pb_manager *provider2); +/** + * Ondemand buffer manager. + * + * Buffers are created in malloc'ed memory (fast and cached), and the constents + * is transfered to a buffer from the provider (typically in slow uncached + * memory) when there is an attempt to validate the buffer. + * + * Ideal for situations where one does not know before hand whether a given + * buffer will effectively be used by the hardware or not. + */ +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider); + + /** * Debug buffer manager to detect buffer under- and overflows. * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c new file mode 100644 index 0000000000..ba02a84e62 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c @@ -0,0 +1,303 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * A variation of malloc buffers which get transferred to real graphics memory + * when there is an attempt to validate them. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_debug.h" +#include "util/u_memory.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_ondemand_manager; + + +struct pb_ondemand_buffer +{ + struct pb_buffer base; + + struct pb_ondemand_manager *mgr; + + /** Regular malloc'ed memory */ + void *data; + unsigned mapcount; + + /** Real buffer */ + struct pb_buffer *buffer; + size_t size; + struct pb_desc desc; +}; + + +struct pb_ondemand_manager +{ + struct pb_manager base; + + struct pb_manager *provider; +}; + + +extern const struct pb_vtbl pb_ondemand_buffer_vtbl; + +static INLINE struct pb_ondemand_buffer * +pb_ondemand_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &pb_ondemand_buffer_vtbl); + return (struct pb_ondemand_buffer *)buf; +} + +static INLINE struct pb_ondemand_manager * +pb_ondemand_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_ondemand_manager *)mgr; +} + + +static void +pb_ondemand_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + pb_reference(&buf->buffer, NULL); + + align_free(buf->data); + + FREE(buf); +} + + +static void * +pb_ondemand_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + return pb_map(buf->buffer, flags); + } + else { + assert(buf->data); + ++buf->mapcount; + return buf->data; + } +} + + +static void +pb_ondemand_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + pb_unmap(buf->buffer); + } + else { + assert(buf->data); + assert(buf->mapcount); + if(buf->mapcount) + --buf->mapcount; + } +} + + +static enum pipe_error +pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) +{ + if(!buf->buffer) { + struct pb_manager *provider = buf->mgr->provider; + uint8_t *map; + + assert(!buf->mapcount); + + buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc); + if(!buf->buffer) + return PIPE_ERROR_OUT_OF_MEMORY; + + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(!map) { + pb_reference(&buf->buffer, NULL); + return PIPE_ERROR; + } + + memcpy(map, buf->data, buf->size); + + pb_unmap(buf->buffer); + + if(!buf->mapcount) { + FREE(buf->data); + buf->data = NULL; + } + } + + return PIPE_OK; +} + +static enum pipe_error +pb_ondemand_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + enum pipe_error ret; + + assert(!buf->mapcount); + if(buf->mapcount) + return PIPE_ERROR; + + ret = pb_ondemand_buffer_instantiate(buf); + if(ret != PIPE_OK) + return ret; + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_ondemand_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + assert(buf->buffer); + if(!buf->buffer) + return; + + pb_fence(buf->buffer, fence); +} + + +static void +pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) { + assert(0); + *base_buf = &buf->base; + *offset = 0; + return; + } + + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_ondemand_buffer_vtbl = { + pb_ondemand_buffer_destroy, + pb_ondemand_buffer_map, + pb_ondemand_buffer_unmap, + pb_ondemand_buffer_validate, + pb_ondemand_buffer_fence, + pb_ondemand_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_ondemand_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + struct pb_ondemand_buffer *buf; + + buf = CALLOC_STRUCT(pb_ondemand_buffer); + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + buf->base.vtbl = &pb_ondemand_buffer_vtbl; + + buf->mgr = mgr; + + buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); + if(!buf->data) { + FREE(buf); + return NULL; + } + + buf->size = size; + buf->desc = *desc; + + return &buf->base; +} + + +static void +pb_ondemand_manager_flush(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + mgr->provider->flush(mgr->provider); +} + + +static void +pb_ondemand_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + FREE(mgr); +} + + +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider) +{ + struct pb_ondemand_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_ondemand_manager); + if(!mgr) + return NULL; + + mgr->base.destroy = pb_ondemand_manager_destroy; + mgr->base.create_buffer = pb_ondemand_manager_create_buffer; + mgr->base.flush = pb_ondemand_manager_flush; + + mgr->provider = provider; + + return &mgr->base; +} -- cgit v1.2.3 From f0e3366b0860047632bec59c8ee815670cfb2d25 Mon Sep 17 00:00:00 2001 From: Jakob Bornecrantz Date: Mon, 24 Nov 2008 20:01:48 +0100 Subject: util: Add generic tile and detile functions --- src/gallium/auxiliary/util/Makefile | 1 + src/gallium/auxiliary/util/u_linear.c | 69 +++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_linear.h | 60 ++++++++++++++++++++++++++++++ 3 files changed, 130 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_linear.c create mode 100644 src/gallium/auxiliary/util/u_linear.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index b3d1045a8f..f611f82773 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ u_handle_table.c \ u_hash_table.c \ u_keymap.c \ + u_linear.c \ u_math.c \ u_mm.c \ u_rect.c \ diff --git a/src/gallium/auxiliary/util/u_linear.c b/src/gallium/auxiliary/util/u_linear.c new file mode 100644 index 0000000000..a76704ffc7 --- /dev/null +++ b/src/gallium/auxiliary/util/u_linear.c @@ -0,0 +1,69 @@ + +#include "pipe/p_debug.h" +#include "u_linear.h" + +void +pipe_linear_to_tile(size_t src_stride, void *src_ptr, + struct pipe_tile_info *t, void *dst_ptr) +{ + int x, y, z; + char *ptr; + size_t bytes = t->cols * t->block.size; + + + assert(pipe_linear_check_tile(t)); + + /* lets write lineary to the tiled buffer */ + for (y = 0; y < t->tiles_y; y++) { + for (x = 0; x < t->tiles_x; x++) { + /* this inner loop could be replace with SSE magic */ + ptr = (char*)src_ptr + src_stride * t->rows * y + bytes * x; + for (z = 0; z < t->rows; z++) { + memcpy(dst_ptr, ptr, bytes); + dst_ptr += bytes; + ptr += src_stride; + } + } + } +} + +void pipe_linear_from_tile(struct pipe_tile_info *t, void *src_ptr, + size_t dst_stride, void *dst_ptr) +{ + int x, y, z; + char *ptr; + size_t bytes = t->cols * t->block.size; + + /* lets read lineary from the tiled buffer */ + for (y = 0; y < t->tiles_y; y++) { + for (x = 0; x < t->tiles_x; x++) { + /* this inner loop could be replace with SSE magic */ + ptr = (char*)dst_ptr + dst_stride * t->rows * y + bytes * x; + for (z = 0; z < t->rows; z++) { + memcpy(ptr, src_ptr, bytes); + src_ptr += bytes; + ptr += dst_stride; + } + } + } +} + +void +pipe_linear_fill_info(struct pipe_tile_info *t, + struct pipe_format_block *block, + unsigned tile_width, unsigned tile_height, + unsigned tiles_x, unsigned tiles_y) +{ + t->block = *block; + + t->tile.width = tile_width; + t->tile.height = tile_height; + t->cols = t->tile.width / t->block.width; + t->rows = t->tile.height / t->block.height; + t->tile.size = t->cols * t->rows * t->block.size; + + t->tiles_x = tiles_x; + t->tiles_y = tiles_y; + t->stride = t->cols * t->tiles_x * t->block.size; + t->size = t->tiles_x * t->tiles_y * t->tile.size; +} diff --git a/src/gallium/auxiliary/util/u_linear.h b/src/gallium/auxiliary/util/u_linear.h new file mode 100644 index 0000000000..e337cfd770 --- /dev/null +++ b/src/gallium/auxiliary/util/u_linear.h @@ -0,0 +1,60 @@ + +#ifndef U_LINEAR_H +#define U_LINEAR_H + +#include "pipe/p_format.h" +struct pipe_tile_info +{ + unsigned size; + unsigned stride; + + /* The number of tiles */ + unsigned tiles_x; + unsigned tiles_y; + + /* size of each tile expressed in blocks */ + unsigned cols; + unsigned rows; + + /* Describe the tile in pixels */ + struct pipe_format_block tile; + + /* Describe each block within the tile */ + struct pipe_format_block block; +}; + +void pipe_linear_to_tile(size_t src_stride, void *src_ptr, + struct pipe_tile_info *t, void *dst_ptr); + +void pipe_linear_from_tile(struct pipe_tile_info *t, void *src_ptr, + size_t dst_stride, void *dst_ptr); + +/** + * Convenience function to fillout a pipe_tile_info struct. + * @t info to fill out. + * @block block info about pixel layout + * @tile_width the width of the tile in pixels + * @tile_height the height of the tile in pixels + * @tiles_x number of tiles in x axis + * @tiles_y number of tiles in y axis + */ +void pipe_linear_fill_info(struct pipe_tile_info *t, + struct pipe_format_block *block, + unsigned tile_width, unsigned tile_height, + unsigned tiles_x, unsigned tiles_y); + +static INLINE boolean pipe_linear_check_tile(struct pipe_tile_info *t) +{ + if (t->tile.size != t->block.size * t->cols * t->rows) + return FALSE; + + if (t->stride != t->block.size * t->cols * t->tiles_x) + return FALSE; + + if (t->size < t->stride * t->rows * t->tiles_y) + return FALSE; + + return TRUE; +} + +#endif /* U_LINEAR_H */ -- cgit v1.2.3 From 434e255eae90b0f3d836d452b7d3b0c5aadf78b8 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 24 Nov 2008 10:02:44 -0700 Subject: tgsi: add tgsi_declaration fields for centroid sampling, invariant optimization --- src/gallium/auxiliary/tgsi/tgsi_build.c | 2 ++ src/gallium/auxiliary/tgsi/tgsi_dump.c | 8 ++++++++ src/gallium/include/pipe/p_shader_tokens.h | 4 +++- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 38fcaf8829..eee2db7771 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -119,6 +119,8 @@ tgsi_default_declaration( void ) declaration.UsageMask = TGSI_WRITEMASK_XYZW; declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; declaration.Semantic = 0; + declaration.Centroid = 0; + declaration.Invariant = 0; declaration.Padding = 0; declaration.Extended = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 3177f54952..c2a0ac5aff 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -246,6 +246,14 @@ iter_declaration( TXT( ", " ); ENM( decl->Declaration.Interpolate, interpolate_names ); + if (decl->Declaration.Centroid) { + TXT( ", CENTROID" ); + } + + if (decl->Declaration.Invariant) { + TXT( ", INVARIANT" ); + } + EOL(); return TRUE; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index 78c20de3e2..1292367c66 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -112,7 +112,9 @@ struct tgsi_declaration unsigned UsageMask : 4; /* bitmask of TGSI_WRITEMASK_x flags */ unsigned Interpolate : 4; /* TGSI_INTERPOLATE_ */ unsigned Semantic : 1; /* BOOL, any semantic info? */ - unsigned Padding : 6; + unsigned Centroid : 1; /* centroid sampling */ + unsigned Invariant : 1; /* invariant optimization */ + unsigned Padding : 4; unsigned Extended : 1; /* BOOL */ }; -- cgit v1.2.3 From 55839ae064d64b7fcc180fcddb364bf31ab760dc Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 25 Nov 2008 14:01:40 +0900 Subject: pipebuffer: Fix buffer overflow. --- src/gallium/auxiliary/pipebuffer/pb_validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 726ae1688e..94532bb4ce 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -182,7 +182,7 @@ pb_validate_create() return NULL; vl->size = PB_VALIDATE_INITIAL_SIZE; - vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_buffer *)); + vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry)); if(!vl->entries) { FREE(vl); return NULL; -- cgit v1.2.3 From 4de360e67d83cd6503fb8ad053bb8afe507db5fa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 25 Nov 2008 09:02:27 -0700 Subject: gallium: added centroid/invarient fields to declarations --- src/gallium/auxiliary/tgsi/tgsi_build.c | 6 ++++++ src/gallium/auxiliary/tgsi/tgsi_build.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index eee2db7771..fd02c2c87c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -133,6 +133,8 @@ tgsi_build_declaration( unsigned usage_mask, unsigned interpolate, unsigned semantic, + unsigned centroid, + unsigned invariant, struct tgsi_header *header ) { struct tgsi_declaration declaration; @@ -145,6 +147,8 @@ tgsi_build_declaration( declaration.UsageMask = usage_mask; declaration.Interpolate = interpolate; declaration.Semantic = semantic; + declaration.Centroid = centroid; + declaration.Invariant = invariant; header_bodysize_grow( header ); @@ -196,6 +200,8 @@ tgsi_build_full_declaration( full_decl->Declaration.UsageMask, full_decl->Declaration.Interpolate, full_decl->Declaration.Semantic, + full_decl->Declaration.Centroid, + full_decl->Declaration.Invariant, header ); if (maxsize <= size) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 7d6234746a..0fd6fabd83 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -71,6 +71,8 @@ tgsi_build_declaration( unsigned usage_mask, unsigned interpolate, unsigned semantic, + unsigned centroid, + unsigned invariant, struct tgsi_header *header ); struct tgsi_full_declaration -- cgit v1.2.3 From 18a1389077c72717dfbe6ae10793f3329d13b848 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 12:56:23 +0100 Subject: tgsi: Implement OPCODE_ROUND for SSE2 backend. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 8dfd2ced08..8574d79da1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1001,6 +1001,29 @@ emit_rcp ( make_xmm( xmm_src ) ); } +static void PIPE_CDECL +rnd4f( + float *store ) +{ + store[0] = floorf( store[0] + 0.5f ); + store[1] = floorf( store[1] + 0.5f ); + store[2] = floorf( store[2] + 0.5f ); + store[3] = floorf( store[3] + 0.5f ); +} + +static void +emit_rnd( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + rnd4f ); +} + static void emit_rsqrt( struct x86_function *func, @@ -1811,7 +1834,11 @@ emit_instruction( break; case TGSI_OPCODE_ROUND: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_rnd( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_EXPBASE2: -- cgit v1.2.3 From adf14090fb6e43a25eabb13796d5a9385d8511ea Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 13:17:25 +0100 Subject: tgsi: Implement OPCODE_ARR. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 5 +---- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 7 ++++++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 0fdfb91d39..1981d8b68f 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2124,6 +2124,7 @@ exec_instruction( break; case TGSI_OPCODE_ROUND: + case TGSI_OPCODE_ARR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_rnd( &r[0], &r[0] ); @@ -2424,10 +2425,6 @@ exec_instruction( assert (0); break; - case TGSI_OPCODE_ARR: - assert (0); - break; - case TGSI_OPCODE_BRA: assert (0); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 8574d79da1..d2d431980b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -2078,7 +2078,12 @@ emit_instruction( break; case TGSI_OPCODE_ARR: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_rnd( func, 0, 0 ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_BRA: -- cgit v1.2.3 From 685fd2c035e284db2447ede0f6da278adaa70a0d Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 12:56:23 +0100 Subject: tgsi: Implement OPCODE_ROUND for SSE2 backend. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 4d59106dbf..151d2b1c37 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -842,6 +842,29 @@ emit_rcp ( make_xmm( xmm_src ) ); } +static void PIPE_CDECL +rnd4f( + float *store ) +{ + store[0] = floorf( store[0] + 0.5f ); + store[1] = floorf( store[1] + 0.5f ); + store[2] = floorf( store[2] + 0.5f ); + store[3] = floorf( store[3] + 0.5f ); +} + +static void +emit_rnd( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + rnd4f ); +} + static void emit_rsqrt( struct x86_function *func, @@ -1639,7 +1662,11 @@ emit_instruction( break; case TGSI_OPCODE_ROUND: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_rnd( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_EXPBASE2: -- cgit v1.2.3 From eee3d216049f21507a3ff6908f1d506c683efad0 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 13:17:25 +0100 Subject: tgsi: Implement OPCODE_ARR. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 5 +---- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 7 ++++++- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 41dffc3dba..7114119fe2 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2067,6 +2067,7 @@ exec_instruction( break; case TGSI_OPCODE_ROUND: + case TGSI_OPCODE_ARR: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); micro_rnd( &r[0], &r[0] ); @@ -2367,10 +2368,6 @@ exec_instruction( assert (0); break; - case TGSI_OPCODE_ARR: - assert (0); - break; - case TGSI_OPCODE_BRA: assert (0); break; diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 151d2b1c37..752c7c3c31 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1906,7 +1906,12 @@ emit_instruction( break; case TGSI_OPCODE_ARR: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_rnd( func, 0, 0 ); + emit_f2it( func, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_BRA: -- cgit v1.2.3 From 823aac36d5580ea46f76ccec3fd31c91f168274e Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 13:54:28 +0100 Subject: tgsi: Implement OPCODE_SSG/SGN. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 18 +++++++++++++++++- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 1981d8b68f..9fd0497043 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -828,6 +828,17 @@ micro_rnd( dst->f[3] = floorf( src->f[3] + 0.5f ); } +static void +micro_sgn( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + static void micro_shl( union tgsi_exec_channel *dst, @@ -2480,7 +2491,12 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - assert (0); + /* TGSI_OPCODE_SGN */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_sgn( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } break; case TGSI_OPCODE_CMP: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index d2d431980b..cac44af7f4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1083,6 +1083,29 @@ emit_setsign( TGSI_EXEC_TEMP_80000000_C ) ); } +static void PIPE_CDECL +sgn4f( + float *store ) +{ + store[0] = store[0] < 0.0f ? -1.0f : store[0] > 0.0f ? 1.0f : 0.0f; + store[1] = store[1] < 0.0f ? -1.0f : store[1] > 0.0f ? 1.0f : 0.0f; + store[2] = store[2] < 0.0f ? -1.0f : store[2] > 0.0f ? 1.0f : 0.0f; + store[3] = store[3] < 0.0f ? -1.0f : store[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +emit_sgn( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + sgn4f ); +} + static void PIPE_CDECL sin4f( float *store ) @@ -2102,7 +2125,12 @@ emit_instruction( break; case TGSI_OPCODE_SSG: - return 0; + /* TGSI_OPCODE_SGN */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_sgn( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_CMP: -- cgit v1.2.3 From 6e96bd70e56f6ba4ff444c584376475a136bca26 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 26 Nov 2008 07:38:31 -0700 Subject: Revert "tgsi: Implement OPCODE_ROUND for SSE2 backend." This reverts commit 685fd2c035e284db2447ede0f6da278adaa70a0d. Does not compile since emit_rnd() is trying to pass 4 params to emit_func_call_dst() which takes 3 params. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 +---------------------------- 1 file changed, 1 insertion(+), 28 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 752c7c3c31..4bda756dbc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -842,29 +842,6 @@ emit_rcp ( make_xmm( xmm_src ) ); } -static void PIPE_CDECL -rnd4f( - float *store ) -{ - store[0] = floorf( store[0] + 0.5f ); - store[1] = floorf( store[1] + 0.5f ); - store[2] = floorf( store[2] + 0.5f ); - store[3] = floorf( store[3] + 0.5f ); -} - -static void -emit_rnd( - struct x86_function *func, - unsigned xmm_save, - unsigned xmm_dst ) -{ - emit_func_call_dst( - func, - xmm_save, - xmm_dst, - rnd4f ); -} - static void emit_rsqrt( struct x86_function *func, @@ -1662,11 +1639,7 @@ emit_instruction( break; case TGSI_OPCODE_ROUND: - FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { - FETCH( func, *inst, 0, 0, chan_index ); - emit_rnd( func, 0, 0 ); - STORE( func, *inst, 0, 0, chan_index ); - } + return 0; break; case TGSI_OPCODE_EXPBASE2: -- cgit v1.2.3 From 1250526e3012f6958679c5dcdcb990387b53479b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 26 Nov 2008 07:41:19 -0700 Subject: gallium: disable TGSI_OPCODE_ARR case until emit_rnd() is redone. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 4bda756dbc..29442b4ec4 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1878,6 +1878,7 @@ emit_instruction( return 0; break; +#if 0 case TGSI_OPCODE_ARR: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); @@ -1886,7 +1887,7 @@ emit_instruction( STORE( func, *inst, 0, 0, chan_index ); } break; - +#endif case TGSI_OPCODE_BRA: return 0; break; -- cgit v1.2.3 From 1347439a87a26f261ab07c914ea4e69965703ee2 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 13:54:28 +0100 Subject: tgsi: Implement OPCODE_SSG/SGN. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 18 +++++++++++++++++- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 30 +++++++++++++++++++++++++++++- 2 files changed, 46 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 7114119fe2..65a0f39fdb 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -839,6 +839,17 @@ micro_rnd( dst->f[3] = floorf( src->f[3] + 0.5f ); } +static void +micro_sgn( + union tgsi_exec_channel *dst, + const union tgsi_exec_channel *src ) +{ + dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f; + dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f; + dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f; + dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f; +} + static void micro_shl( union tgsi_exec_channel *dst, @@ -2423,7 +2434,12 @@ exec_instruction( break; case TGSI_OPCODE_SSG: - assert (0); + /* TGSI_OPCODE_SGN */ + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( &r[0], 0, chan_index ); + micro_sgn( &r[0], &r[0] ); + STORE( &r[0], 0, chan_index ); + } break; case TGSI_OPCODE_CMP: diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 29442b4ec4..499d865542 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -901,6 +901,29 @@ emit_setsign( TGSI_EXEC_TEMP_80000000_C ) ); } +static void PIPE_CDECL +sgn4f( + float *store ) +{ + store[0] = store[0] < 0.0f ? -1.0f : store[0] > 0.0f ? 1.0f : 0.0f; + store[1] = store[1] < 0.0f ? -1.0f : store[1] > 0.0f ? 1.0f : 0.0f; + store[2] = store[2] < 0.0f ? -1.0f : store[2] > 0.0f ? 1.0f : 0.0f; + store[3] = store[3] < 0.0f ? -1.0f : store[3] > 0.0f ? 1.0f : 0.0f; +} + +static void +emit_sgn( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + sgn4f ); +} + static void PIPE_CDECL sin4f( float *store ) @@ -1904,7 +1927,12 @@ emit_instruction( break; case TGSI_OPCODE_SSG: - return 0; + /* TGSI_OPCODE_SGN */ + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_sgn( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_CMP: -- cgit v1.2.3 From 972922b1bf28346568bedfadc2198ed93230f5d7 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 12:56:23 +0100 Subject: tgsi: Implement OPCODE_ROUND for SSE2 backend. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 499d865542..31480c40da 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -842,6 +842,29 @@ emit_rcp ( make_xmm( xmm_src ) ); } +static void PIPE_CDECL +rnd4f( + float *store ) +{ + store[0] = floorf( store[0] + 0.5f ); + store[1] = floorf( store[1] + 0.5f ); + store[2] = floorf( store[2] + 0.5f ); + store[3] = floorf( store[3] + 0.5f ); +} + +static void +emit_rnd( + struct x86_function *func, + unsigned xmm_save, + unsigned xmm_dst ) +{ + emit_func_call_dst( + func, + xmm_save, + xmm_dst, + rnd4f ); +} + static void emit_rsqrt( struct x86_function *func, @@ -1662,7 +1685,11 @@ emit_instruction( break; case TGSI_OPCODE_ROUND: - return 0; + FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { + FETCH( func, *inst, 0, 0, chan_index ); + emit_rnd( func, 0, 0 ); + STORE( func, *inst, 0, 0, chan_index ); + } break; case TGSI_OPCODE_EXPBASE2: -- cgit v1.2.3 From 527e76a7ec7f330bd321fe9632a0fadedbab1d41 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 17:20:07 +0100 Subject: tgsi: Fix build. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 31480c40da..68f5510351 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -855,12 +855,10 @@ rnd4f( static void emit_rnd( struct x86_function *func, - unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, - xmm_save, xmm_dst, rnd4f ); } @@ -937,12 +935,10 @@ sgn4f( static void emit_sgn( struct x86_function *func, - unsigned xmm_save, unsigned xmm_dst ) { emit_func_call_dst( func, - xmm_save, xmm_dst, sgn4f ); } @@ -1687,7 +1683,7 @@ emit_instruction( case TGSI_OPCODE_ROUND: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_rnd( func, 0, 0 ); + emit_rnd( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; @@ -1957,7 +1953,7 @@ emit_instruction( /* TGSI_OPCODE_SGN */ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_sgn( func, 0, 0 ); + emit_sgn( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; -- cgit v1.2.3 From 158a5f75d8436facfe8163845a942979899213fe Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Wed, 26 Nov 2008 22:29:49 +0100 Subject: tgsi: Reenable OPCODE_ARR. --- src/gallium/auxiliary/tgsi/tgsi_sse2.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c index 68f5510351..ff869c8312 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c +++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c @@ -1924,16 +1924,15 @@ emit_instruction( return 0; break; -#if 0 case TGSI_OPCODE_ARR: FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( func, *inst, 0, 0, chan_index ); - emit_rnd( func, 0, 0 ); + emit_rnd( func, 0 ); emit_f2it( func, 0 ); STORE( func, *inst, 0, 0, chan_index ); } break; -#endif + case TGSI_OPCODE_BRA: return 0; break; -- cgit v1.2.3 From dd55083ac1c13723dba6be71f161e2ca7cac7c66 Mon Sep 17 00:00:00 2001 From: Brian Date: Fri, 28 Nov 2008 10:28:44 -0700 Subject: gallium: minor texture-related clean-ups, comments, etc --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 4 ++-- src/gallium/drivers/softpipe/sp_tex_sample.c | 28 ++++++++++++++-------------- src/gallium/drivers/softpipe/sp_tex_sample.h | 6 +++--- 3 files changed, 19 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 9fd0497043..a2d2cfd1fc 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1535,7 +1535,7 @@ exec_kilp(struct tgsi_exec_machine *mach, /* - * Fetch a texel using STR texture coordinates. + * Fetch a four texture samples using STR texture coordinates. */ static void fetch_texel( struct tgsi_sampler *sampler, @@ -1569,7 +1569,7 @@ exec_tex(struct tgsi_exec_machine *mach, boolean projected) { const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index; - union tgsi_exec_channel r[8]; + union tgsi_exec_channel r[4]; uint chan_index; float lodBias; diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c index b66caf9507..181247739b 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.c +++ b/src/gallium/drivers/softpipe/sp_tex_sample.c @@ -516,6 +516,10 @@ compute_lambda(const struct pipe_texture *tex, * 2. Determine if we're minifying or magnifying * 3. If minifying, choose mipmap levels * 4. Return image filter to use within mipmap images + * \param level0 Returns first mipmap level to sample from + * \param level1 Returns second mipmap level to sample from + * \param levelBlend Returns blend factor between levels, in [0,1] + * \param imgFilter Returns either the min or mag filter, depending on lambda */ static void choose_mipmap_levels(const struct pipe_texture *texture, @@ -527,7 +531,6 @@ choose_mipmap_levels(const struct pipe_texture *texture, unsigned *level0, unsigned *level1, float *levelBlend, unsigned *imgFilter) { - if (sampler->min_mip_filter == PIPE_TEX_MIPFILTER_NONE) { /* no mipmap selection needed */ *level0 = *level1 = CLAMP((int) sampler->min_lod, @@ -589,7 +592,6 @@ choose_mipmap_levels(const struct pipe_texture *texture, /** * Get a texel from a texture, using the texture tile cache. - * Called by the TGSI interpreter. * * \param face the cube face in 0..5 * \param level the mipmap level @@ -603,7 +605,7 @@ choose_mipmap_levels(const struct pipe_texture *texture, * sp_get_cached_tile_tex() function. Also, get 4 texels instead of 1... */ static void -get_texel(struct tgsi_sampler *tgsi_sampler, +get_texel(const struct tgsi_sampler *tgsi_sampler, unsigned face, unsigned level, int x, int y, int z, float rgba[NUM_CHANNELS][QUAD_SIZE], unsigned j) { @@ -693,7 +695,7 @@ shadow_compare(uint compare_func, * Could probably extend for 3D... */ static void -sp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, +sp_get_samples_2d_common(const struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -805,8 +807,8 @@ sp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler, } -static void -sp_get_samples_1d(struct tgsi_sampler *sampler, +static INLINE void +sp_get_samples_1d(const struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -819,8 +821,8 @@ sp_get_samples_1d(struct tgsi_sampler *sampler, } -static void -sp_get_samples_2d(struct tgsi_sampler *sampler, +static INLINE void +sp_get_samples_2d(const struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -832,8 +834,8 @@ sp_get_samples_2d(struct tgsi_sampler *sampler, } -static void -sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, +static INLINE void +sp_get_samples_3d(const struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -960,7 +962,7 @@ sp_get_samples_3d(struct tgsi_sampler *tgsi_sampler, static void -sp_get_samples_cube(struct tgsi_sampler *sampler, +sp_get_samples_cube(const struct tgsi_sampler *sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -977,7 +979,7 @@ sp_get_samples_cube(struct tgsi_sampler *sampler, static void -sp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, +sp_get_samples_rect(const struct tgsi_sampler *tgsi_sampler, const float s[QUAD_SIZE], const float t[QUAD_SIZE], const float p[QUAD_SIZE], @@ -1047,8 +1049,6 @@ sp_get_samples_rect(struct tgsi_sampler *tgsi_sampler, } - - /** * Called via tgsi_sampler::get_samples() * Use the sampler's state setting to get a filtered RGBA value diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.h b/src/gallium/drivers/softpipe/sp_tex_sample.h index 783169c939..f0a8a78778 100644 --- a/src/gallium/drivers/softpipe/sp_tex_sample.h +++ b/src/gallium/drivers/softpipe/sp_tex_sample.h @@ -46,10 +46,10 @@ struct sp_shader_sampler -static INLINE struct sp_shader_sampler * -sp_shader_sampler(struct tgsi_sampler *sampler) +static INLINE const struct sp_shader_sampler * +sp_shader_sampler(const struct tgsi_sampler *sampler) { - return (struct sp_shader_sampler *) sampler; + return (const struct sp_shader_sampler *) sampler; } -- cgit v1.2.3 From f2bccfd3c806a879abf0c40858806ec3825d0628 Mon Sep 17 00:00:00 2001 From: Brian Date: Tue, 2 Dec 2008 22:38:46 -0700 Subject: gallium: added draw_texture_samplers() to support texture fetches from vertex shaders This may only be practical for the softpipe driver at this time. --- src/gallium/auxiliary/draw/draw_context.c | 15 +++++++++++++++ src/gallium/auxiliary/draw/draw_context.h | 8 +++++++- src/gallium/auxiliary/draw/draw_private.h | 3 +++ src/gallium/auxiliary/draw/draw_vs_exec.c | 4 ++-- 4 files changed, 27 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 41a4cba1dd..b2a34811c2 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -331,6 +331,21 @@ draw_num_vs_outputs(const struct draw_context *draw) } +/** + * Provide TGSI sampler objects for vertex shaders that use texture fetches. + * This might only be used by software drivers for the time being. + */ +void +draw_texture_samplers(struct draw_context *draw, + uint num_samplers, + struct tgsi_sampler **samplers) +{ + draw->vs.num_samplers = num_samplers; + draw->vs.samplers = samplers; +} + + + void draw_set_render( struct draw_context *draw, struct vbuf_render *render ) diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 3eeb453531..8f5cecf438 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -45,7 +45,7 @@ struct pipe_context; struct draw_context; struct draw_stage; struct draw_vertex_shader; - +struct tgsi_sampler; struct draw_context *draw_create( void ); @@ -91,6 +91,12 @@ uint draw_num_vs_outputs(const struct draw_context *draw); +void +draw_texture_samplers(struct draw_context *draw, + uint num_samplers, + struct tgsi_sampler **samplers); + + /* * Vertex shader functions diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 5d531146c5..6097fff2c6 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -185,6 +185,9 @@ struct draw_context /** TGSI program interpreter runtime state */ struct tgsi_exec_machine machine; + uint num_samplers; + struct tgsi_sampler **samplers; + /* This (and the tgsi_exec_machine struct) probably need to be moved somewhere private. */ struct gallivm_cpu_engine *engine; diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c index 80c3606657..b3200df811 100644 --- a/src/gallium/auxiliary/draw/draw_vs_exec.c +++ b/src/gallium/auxiliary/draw/draw_vs_exec.c @@ -68,8 +68,8 @@ vs_exec_prepare( struct draw_vertex_shader *shader, if (evs->machine->Tokens != shader->state.tokens) { tgsi_exec_machine_bind_shader(evs->machine, shader->state.tokens, - PIPE_MAX_SAMPLERS, - NULL /*samplers*/ ); + draw->vs.num_samplers, + draw->vs.samplers); } } -- cgit v1.2.3 From 2ce2a40a732923461142d371548ba3243791422e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 9 Dec 2008 19:35:52 +0900 Subject: gallium: Abort by default on windows user space. --- src/gallium/auxiliary/util/p_debug.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 6ff3e6e0a6..125f3daf00 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -329,7 +329,7 @@ void _debug_assert_fail(const char *expr, const char *function) { _debug_printf("%s:%u:%s: Assertion `%s' failed.\n", file, line, function, expr); -#if defined(PIPE_OS_WINDOWS) +#if defined(PIPE_OS_WINDOWS) && !defined(PIPE_SUBSYSTEM_WINDOWS_USER) if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", FALSE)) #else if (debug_get_bool_option("GALLIUM_ABORT_ON_ASSERT", TRUE)) -- cgit v1.2.3 From e3f5370d637f367dbfe7d21f726e84185ad1e07d Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Wed, 10 Dec 2008 11:30:46 +0000 Subject: gallium: temporary check for > 65535 vertices --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 2 ++ src/gallium/auxiliary/draw/draw_pt_emit.c | 3 +++ src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 3 +++ src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 3 +++ 4 files changed, 11 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index 9825e116c3..fb0d895084 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -399,6 +399,8 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) * and it will flush itself if necessary to do so. If this does * fail, we are basically without usable hardware. */ + assert(vbuf->max_vertices < 65536); + vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, (ushort) vbuf->vertex_size, (ushort) vbuf->max_vertices); diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index d520b05869..9c73d9c735 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -165,6 +165,9 @@ void draw_pt_emit( struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (count > 65535) /* FIXME */ + return FALSE; + /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 3966ad48ba..8ab08959e3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -338,6 +338,9 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (count > 65535) /* FIXME */ + return FALSE; + hw_verts = draw->render->allocate_vertices( draw->render, (ushort)feme->translate->key.output_stride, (ushort)count ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index f7e6a1a8ee..77e630c099 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -234,6 +234,9 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (count > 65535) /* FIXME */ + return FALSE; + hw_verts = draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, (ushort)count ); -- cgit v1.2.3 From a8e7852b05f95cc695f3a05692a6ccd36298faf7 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Wed, 10 Dec 2008 12:02:24 +0000 Subject: gallium: more vertex count checks --- src/gallium/auxiliary/draw/draw_pt_emit.c | 11 +++++++++-- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 10 ++++++++++ src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 14 ++++++++++++-- 3 files changed, 31 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index 9c73d9c735..c4fc93ffba 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -165,8 +165,10 @@ void draw_pt_emit( struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count > 65535) /* FIXME */ - return FALSE; + if (vertex_count > 65535) { /* FIXME */ + assert(0); + return; + } /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... @@ -229,6 +231,11 @@ void draw_pt_emit_linear(struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (count > 65535) { /* FIXME */ + assert(0); + return; + } + /* XXX: and work out some way to coordinate the render primitive * between vbuf.c and here... */ diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 8ab08959e3..5727b91c48 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -229,6 +229,11 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (fetch_count > 65535) { /* FIXME */ + assert(0); + return; + } + hw_verts = draw->render->allocate_vertices( draw->render, (ushort)feme->translate->key.output_stride, (ushort)fetch_count ); @@ -283,6 +288,11 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (count > 65535) { /* FIXME */ + assert(0); + return; + } + hw_verts = draw->render->allocate_vertices( draw->render, (ushort)feme->translate->key.output_stride, (ushort)count ); diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 77e630c099..69580f1472 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -234,8 +234,10 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count > 65535) /* FIXME */ - return FALSE; + if (count > 65535) { /* FIXME */ + assert(0); + return; + } hw_verts = draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, @@ -296,6 +298,11 @@ fse_run(struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (fetch_count > 65535) { /* FIXME */ + assert(0); + return; + } + hw_verts = draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, (ushort)fetch_count ); @@ -350,6 +357,9 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); + if (count > 65535) /* FIXME */ + return FALSE; + hw_verts = draw->render->allocate_vertices( draw->render, (ushort)fse->key.output_stride, (ushort)count ); -- cgit v1.2.3 From 7519107a9787970f9b3b8ec317a2b4526e217290 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 9 Dec 2008 16:54:16 +0000 Subject: draw: add const qualifiers --- src/gallium/auxiliary/draw/draw_context.c | 4 ++-- src/gallium/auxiliary/draw/draw_context.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 41a4cba1dd..74deb44bd2 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -362,7 +362,7 @@ draw_set_mapped_element_buffer_range( struct draw_context *draw, unsigned eltSize, unsigned min_index, unsigned max_index, - void *elements ) + const void *elements ) { draw->pt.user.elts = elements; draw->pt.user.eltSize = eltSize; @@ -374,7 +374,7 @@ draw_set_mapped_element_buffer_range( struct draw_context *draw, void draw_set_mapped_element_buffer( struct draw_context *draw, unsigned eltSize, - void *elements ) + const void *elements ) { draw->pt.user.elts = elements; draw->pt.user.eltSize = eltSize; diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 3eeb453531..1d40c6c3be 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -123,11 +123,11 @@ draw_set_mapped_element_buffer_range( struct draw_context *draw, unsigned eltSize, unsigned min_index, unsigned max_index, - void *elements ); + const void *elements ); void draw_set_mapped_element_buffer( struct draw_context *draw, unsigned eltSize, - void *elements ); + const void *elements ); void draw_set_mapped_vertex_buffer(struct draw_context *draw, unsigned attr, const void *buffer); -- cgit v1.2.3 From 50beb86ce399534b049322f1074365ed03395ab2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 9 Dec 2008 16:57:53 +0000 Subject: util: new funcs for triming/validating primitives --- src/gallium/auxiliary/util/u_prim.h | 122 ++++++++++++++++++++++++++++++++++++ 1 file changed, 122 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_prim.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h new file mode 100644 index 0000000000..e45e84ded2 --- /dev/null +++ b/src/gallium/auxiliary/util/u_prim.h @@ -0,0 +1,122 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + + +#ifndef U_BLIT_H +#define U_BLIT_H + + +#ifdef __cplusplus +extern "C" { +#endif + +#include "pipe/p_defines.h" + +static INLINE boolean u_validate_pipe_prim( unsigned pipe_prim, unsigned nr ) +{ + boolean ok = TRUE; + + switch (pipe_prim) { + case PIPE_PRIM_POINTS: + ok = (nr >= 1); + break; + case PIPE_PRIM_LINES: + ok = (nr >= 2); + break; + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + ok = (nr >= 2); + break; + case PIPE_PRIM_TRIANGLES: + ok = (nr >= 3); + break; + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + ok = (nr >= 3); + break; + case PIPE_PRIM_QUADS: + ok = (nr >= 4); + break; + case PIPE_PRIM_QUAD_STRIP: + ok = (nr >= 4); + break; + default: + ok = 0; + break; + } + + return ok; +} + + +static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) +{ + boolean ok = TRUE; + + switch (pipe_prim) { + case PIPE_PRIM_POINTS: + ok = (*nr >= 1); + break; + case PIPE_PRIM_LINES: + ok = (*nr >= 2); + *nr -= (*nr % 2); + break; + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + ok = (*nr >= 2); + break; + case PIPE_PRIM_TRIANGLES: + ok = (*nr >= 3); + *nr -= (*nr % 3); + break; + case PIPE_PRIM_TRIANGLE_STRIP: + case PIPE_PRIM_TRIANGLE_FAN: + case PIPE_PRIM_POLYGON: + ok = (*nr >= 3); + break; + case PIPE_PRIM_QUADS: + ok = (*nr >= 4); + *nr -= (*nr % 4); + break; + case PIPE_PRIM_QUAD_STRIP: + ok = (*nr >= 4); + *nr -= (*nr % 2); + break; + default: + ok = 0; + break; + } + + if (!ok) + *nr = 0; + + return ok; +} + + +#endif -- cgit v1.2.3 From b716de47798defa7d22b0f15b201af6fba27f0b9 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Wed, 10 Dec 2008 20:21:19 +0000 Subject: gallium: change 65535 to UNDEFINED_VERTEX_ID --- src/gallium/auxiliary/draw/draw_pipe_vbuf.c | 2 +- src/gallium/auxiliary/draw/draw_pt_emit.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 6 +++--- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 6 +++--- 4 files changed, 9 insertions(+), 9 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c index fb0d895084..5ead25efff 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c +++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c @@ -399,7 +399,7 @@ vbuf_alloc_vertices( struct vbuf_stage *vbuf ) * and it will flush itself if necessary to do so. If this does * fail, we are basically without usable hardware. */ - assert(vbuf->max_vertices < 65536); + assert(vbuf->max_vertices < UNDEFINED_VERTEX_ID); vbuf->vertices = (uint *) vbuf->render->allocate_vertices(vbuf->render, (ushort) vbuf->vertex_size, diff --git a/src/gallium/auxiliary/draw/draw_pt_emit.c b/src/gallium/auxiliary/draw/draw_pt_emit.c index c4fc93ffba..232dfdaed2 100644 --- a/src/gallium/auxiliary/draw/draw_pt_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_emit.c @@ -165,7 +165,7 @@ void draw_pt_emit( struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (vertex_count > 65535) { /* FIXME */ + if (vertex_count >= UNDEFINED_VERTEX_ID) { assert(0); return; } @@ -231,7 +231,7 @@ void draw_pt_emit_linear(struct pt_emit *emit, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count > 65535) { /* FIXME */ + if (count >= UNDEFINED_VERTEX_ID) { assert(0); return; } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 5727b91c48..39159b747d 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -229,7 +229,7 @@ static void fetch_emit_run( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (fetch_count > 65535) { /* FIXME */ + if (fetch_count >= UNDEFINED_VERTEX_ID) { assert(0); return; } @@ -288,7 +288,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count > 65535) { /* FIXME */ + if (count >= UNDEFINED_VERTEX_ID) assert(0); return; } @@ -348,7 +348,7 @@ static boolean fetch_emit_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count > 65535) /* FIXME */ + if (count >= UNDEFINED_VERTEX_ID) return FALSE; hw_verts = draw->render->allocate_vertices( draw->render, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 69580f1472..1649cdc6cd 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -234,7 +234,7 @@ static void fse_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count > 65535) { /* FIXME */ + if (count >= UNDEFINED_VERTEX_ID) { assert(0); return; } @@ -298,7 +298,7 @@ fse_run(struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (fetch_count > 65535) { /* FIXME */ + if (fetch_count >= UNDEFINED_VERTEX_ID) { assert(0); return; } @@ -357,7 +357,7 @@ static boolean fse_run_linear_elts( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count > 65535) /* FIXME */ + if (count >= UNDEFINED_VERTEX_ID) return FALSE; hw_verts = draw->render->allocate_vertices( draw->render, -- cgit v1.2.3 From d0bc5293d6e1e9c34fa822b7c2928932ed22462c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 10 Dec 2008 18:02:27 -0700 Subject: gallium: added draw_set_mrd() function to fix polygon offset The Minimum Resolvable Depth factor depends on the driver and can't just be computed from the number of Z buffer bits. Glean's polygon offset test now passes with softpipe. Still need to determine the MRD factor for other gallium drivers, if they use the draw module's polygon offset stage... --- src/gallium/auxiliary/draw/draw_context.c | 12 ++++++++++++ src/gallium/auxiliary/draw/draw_context.h | 1 + src/gallium/auxiliary/draw/draw_pipe_offset.c | 3 +-- src/gallium/auxiliary/draw/draw_private.h | 2 ++ src/gallium/drivers/softpipe/sp_state_surface.c | 20 ++++++++++++++++++++ 5 files changed, 36 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_context.c b/src/gallium/auxiliary/draw/draw_context.c index 74deb44bd2..fab8fc95fc 100644 --- a/src/gallium/auxiliary/draw/draw_context.c +++ b/src/gallium/auxiliary/draw/draw_context.c @@ -103,6 +103,18 @@ void draw_flush( struct draw_context *draw ) } +/** + * Specify the Minimum Resolvable Depth factor for polygon offset. + * This factor potentially depends on the number of Z buffer bits, + * the rasterization algorithm and the arithmetic performed on Z + * values between vertex shading and rasterization. It will vary + * from one driver to another. + */ +void draw_set_mrd(struct draw_context *draw, double mrd) +{ + draw->mrd = mrd; +} + /** * Register new primitive rasterization/rendering state. diff --git a/src/gallium/auxiliary/draw/draw_context.h b/src/gallium/auxiliary/draw/draw_context.h index 1d40c6c3be..a29bb01d81 100644 --- a/src/gallium/auxiliary/draw/draw_context.h +++ b/src/gallium/auxiliary/draw/draw_context.h @@ -72,6 +72,7 @@ void draw_enable_line_stipple(struct draw_context *draw, boolean enable); void draw_enable_point_sprites(struct draw_context *draw, boolean enable); +void draw_set_mrd(struct draw_context *draw, double mrd); boolean draw_install_aaline_stage(struct draw_context *draw, struct pipe_context *pipe); diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 1fea5e6dcb..8ddc4ee617 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -122,9 +122,8 @@ static void offset_first_tri( struct draw_stage *stage, struct prim_header *header ) { struct offset_stage *offset = offset_stage(stage); - float mrd = 1.0f / 65535.0f; /* XXX this depends on depthbuffer bits! */ - offset->units = stage->draw->rasterizer->offset_units * mrd; + offset->units = stage->draw->rasterizer->offset_units * stage->draw->mrd; offset->scale = stage->draw->rasterizer->offset_scale; stage->tri = offset_tri; diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h index 37c4c87f87..a16b45d340 100644 --- a/src/gallium/auxiliary/draw/draw_private.h +++ b/src/gallium/auxiliary/draw/draw_private.h @@ -172,6 +172,8 @@ struct draw_context boolean force_passthrough; /**< never clip or shade */ + double mrd; /**< minimum resolvable depth value, for polygon offset */ + /* pipe state that we need: */ const struct pipe_rasterizer_state *rasterizer; struct pipe_viewport_state viewport; diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index ba8c9eece7..8877b18af9 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -101,6 +101,26 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, } #endif + /* Tell draw module how deep the Z/depth buffer is */ + { + int depth_bits; + double mrd; + if (sp->framebuffer.zsbuf) { + depth_bits = pf_get_component_bits(sp->framebuffer.zsbuf->format, + PIPE_FORMAT_COMP_Z); + } + else { + depth_bits = 0; + } + if (depth_bits > 16) { + mrd = 0.0000001; + } + else { + mrd = 0.00002; + } + draw_set_mrd(sp->draw, mrd); + } + sp->framebuffer.width = fb->width; sp->framebuffer.height = fb->height; -- cgit v1.2.3 From ce3436795c0ddc110d83a5658e5ff10c202a4490 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 10 Dec 2008 18:21:40 -0700 Subject: gallium: added missing brace to fix broken build --- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 39159b747d..0227652632 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -288,7 +288,7 @@ static void fetch_emit_run_linear( struct draw_pt_middle_end *middle, */ draw_do_flush( draw, DRAW_FLUSH_BACKEND ); - if (count >= UNDEFINED_VERTEX_ID) + if (count >= UNDEFINED_VERTEX_ID) { assert(0); return; } -- cgit v1.2.3 From 401a18a0c64bf8995c2c888b155a711b6187eba5 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Thu, 11 Dec 2008 13:54:05 +0100 Subject: draw: Silencium compiler warnings on Windows. --- src/gallium/auxiliary/draw/draw_pipe_offset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_offset.c b/src/gallium/auxiliary/draw/draw_pipe_offset.c index 8ddc4ee617..62c31532d0 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_offset.c +++ b/src/gallium/auxiliary/draw/draw_pipe_offset.c @@ -123,7 +123,7 @@ static void offset_first_tri( struct draw_stage *stage, { struct offset_stage *offset = offset_stage(stage); - offset->units = stage->draw->rasterizer->offset_units * stage->draw->mrd; + offset->units = (float) (stage->draw->rasterizer->offset_units * stage->draw->mrd); offset->scale = stage->draw->rasterizer->offset_scale; stage->tri = offset_tri; -- cgit v1.2.3 From 9106a18f46cd83180b17f4b30f54bd2d5b437db1 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Thu, 11 Dec 2008 15:10:55 +0000 Subject: gallium: catch vertex overflow higher up --- src/gallium/auxiliary/draw/draw_pt_vcache.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c index 80d7200ca6..5d268a2226 100644 --- a/src/gallium/auxiliary/draw/draw_pt_vcache.c +++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c @@ -324,7 +324,7 @@ vcache_check_run( struct draw_pt_front_end *frontend, unsigned fetch_count = max_index + 1 - min_index; const ushort *transformed_elts; ushort *storage = NULL; - boolean ok; + boolean ok = FALSE; if (0) debug_printf("fetch_count %d fetch_max %d draw_count %d\n", fetch_count, @@ -413,11 +413,12 @@ vcache_check_run( struct draw_pt_front_end *frontend, transformed_elts = storage; } - ok = vcache->middle->run_linear_elts( vcache->middle, - min_index, /* start */ - fetch_count, - transformed_elts, - draw_count ); + if (fetch_count < UNDEFINED_VERTEX_ID) + ok = vcache->middle->run_linear_elts( vcache->middle, + min_index, /* start */ + fetch_count, + transformed_elts, + draw_count ); FREE(storage); -- cgit v1.2.3 From fd2492d24447e461f36982da268caf0317885967 Mon Sep 17 00:00:00 2001 From: Roland Scheidegger Date: Fri, 12 Dec 2008 05:09:56 +0100 Subject: gallium: fixes for srgb, new srgb formats add some more srgb texture formats, including compressed ones various fixes relating to srgb formats issues: the util code for generating mipmaps will not handle srgb formats correctly (would need to use a linear->srgb conversion shader) --- src/gallium/auxiliary/util/p_debug.c | 11 +++- src/gallium/include/pipe/p_format.h | 35 ++++++++--- src/gallium/state_trackers/python/p_format.i | 12 +++- src/mesa/state_tracker/st_extensions.c | 4 ++ src/mesa/state_tracker/st_format.c | 89 +++++++++++++++++++++++++--- 5 files changed, 132 insertions(+), 19 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index 210332f87a..acdfa211c8 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -533,16 +534,24 @@ static const struct debug_named_value pipe_format_names[] = { DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SSCALED), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SSCALED), DEBUG_NAMED_VALUE(PIPE_FORMAT_L8_SRGB), - DEBUG_NAMED_VALUE(PIPE_FORMAT_A8_L8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8L8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8A8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_R8G8B8X8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_A8R8G8B8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_X8R8G8B8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8A8_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_B8G8R8X8_SRGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_X8UB8UG8SR8S_NORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_B6UG5SR5S_NORM), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGB), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_RGBA), DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_RGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGB), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT1_SRGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT3_SRGBA), + DEBUG_NAMED_VALUE(PIPE_FORMAT_DXT5_SRGBA), #endif DEBUG_NAMED_VALUE_END }; diff --git a/src/gallium/include/pipe/p_format.h b/src/gallium/include/pipe/p_format.h index 97a4c8c510..6bd55d7735 100644 --- a/src/gallium/include/pipe/p_format.h +++ b/src/gallium/include/pipe/p_format.h @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -245,13 +246,14 @@ static INLINE uint pf_rev(pipe_format_ycbcr_t f) /** * Compresssed format layouts (this will probably change) */ -#define _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE ) \ +#define _PIPE_FORMAT_DXT( LEVEL, RSIZE, GSIZE, BSIZE, ASIZE, TYPE ) \ ((PIPE_FORMAT_LAYOUT_DXT << 0) | \ ((LEVEL) << 2) | \ ((RSIZE) << 5) | \ ((GSIZE) << 8) | \ ((BSIZE) << 11) | \ - ((ASIZE) << 14) ) + ((ASIZE) << 14) | \ + ((TYPE) << 29)) @@ -360,20 +362,30 @@ enum pipe_format { PIPE_FORMAT_R32G32B32A32_FIXED = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 4, 4, 4, 4, PIPE_FORMAT_TYPE_FIXED ), /* sRGB formats */ PIPE_FORMAT_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRR1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), - PIPE_FORMAT_A8_L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_A8L8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RRRG, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 0, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_R8G8B8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGBA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), PIPE_FORMAT_R8G8B8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_RGB1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_A8R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_ARGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_X8R8G8B8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_1RGB, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_B8G8R8A8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGRA, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_B8G8R8X8_SRGB = _PIPE_FORMAT_RGBAZS_8 ( _PIPE_FORMAT_BGR1, 1, 1, 1, 1, PIPE_FORMAT_TYPE_SRGB ), /* mixed formats */ PIPE_FORMAT_X8UB8UG8SR8S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_1BGR, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1 ), PIPE_FORMAT_B6UG5SR5S_NORM = _PIPE_FORMAT_MIXED( _PIPE_FORMAT_BGR1, 6, 5, 5, 0, 0, 1, 1, 0, 1, 0 ), /* compressed formats */ - PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0 ), - PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8 ), - PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8 ), - PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8 ) + PIPE_FORMAT_DXT1_RGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_DXT1_RGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_DXT3_RGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), + PIPE_FORMAT_DXT5_RGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8, PIPE_FORMAT_TYPE_UNORM ), + + /* sRGB, compressed */ + PIPE_FORMAT_DXT1_SRGB = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 0, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_DXT1_SRGBA = _PIPE_FORMAT_DXT( 1, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_DXT3_SRGBA = _PIPE_FORMAT_DXT( 3, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ), + PIPE_FORMAT_DXT5_SRGBA = _PIPE_FORMAT_DXT( 5, 8, 8, 8, 8, PIPE_FORMAT_TYPE_SRGB ) }; /** @@ -477,12 +489,16 @@ pf_get_block(enum pipe_format format, struct pipe_format_block *block) switch(format) { case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT1_RGB: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT1_SRGB: block->size = 8; block->width = 4; block->height = 4; break; case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: block->size = 16; block->width = 4; block->height = 4; @@ -540,7 +556,7 @@ pf_has_alpha( enum pipe_format format ) /* FIXME: pf_get_component_bits( PIPE_FORMAT_A8L8_UNORM, PIPE_FORMAT_COMP_A ) should not return 0 right? */ if(format == PIPE_FORMAT_A8_UNORM || format == PIPE_FORMAT_A8L8_UNORM || - format == PIPE_FORMAT_A8_L8_SRGB) + format == PIPE_FORMAT_A8L8_SRGB) return TRUE; return pf_get_component_bits( format, PIPE_FORMAT_COMP_A ) ? TRUE : FALSE; case PIPE_FORMAT_LAYOUT_YCBCR: @@ -550,6 +566,9 @@ pf_has_alpha( enum pipe_format format ) case PIPE_FORMAT_DXT1_RGBA: case PIPE_FORMAT_DXT3_RGBA: case PIPE_FORMAT_DXT5_RGBA: + case PIPE_FORMAT_DXT1_SRGBA: + case PIPE_FORMAT_DXT3_SRGBA: + case PIPE_FORMAT_DXT5_SRGBA: return TRUE; default: return FALSE; diff --git a/src/gallium/state_trackers/python/p_format.i b/src/gallium/state_trackers/python/p_format.i index 51ad4bebcd..26fb12b387 100644 --- a/src/gallium/state_trackers/python/p_format.i +++ b/src/gallium/state_trackers/python/p_format.i @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -128,10 +129,14 @@ enum pipe_format { PIPE_FORMAT_R32G32B32A32_FIXED, PIPE_FORMAT_L8_SRGB, - PIPE_FORMAT_A8_L8_SRGB, + PIPE_FORMAT_A8L8_SRGB, PIPE_FORMAT_R8G8B8_SRGB, PIPE_FORMAT_R8G8B8A8_SRGB, PIPE_FORMAT_R8G8B8X8_SRGB, + PIPE_FORMAT_A8R8G8B8_SRGB, + PIPE_FORMAT_X8R8G8B8_SRGB, + PIPE_FORMAT_B8G8R8A8_SRGB, + PIPE_FORMAT_B8G8R8X8_SRGB, PIPE_FORMAT_X8UB8UG8SR8S_NORM, PIPE_FORMAT_B6UG5SR5S_NORM, @@ -140,6 +145,11 @@ enum pipe_format { PIPE_FORMAT_DXT1_RGBA, PIPE_FORMAT_DXT3_RGBA, PIPE_FORMAT_DXT5_RGBA, + + PIPE_FORMAT_DXT1_SRGB, + PIPE_FORMAT_DXT1_SRGBA, + PIPE_FORMAT_DXT3_SRGBA, + PIPE_FORMAT_DXT5_SRGBA, }; diff --git a/src/mesa/state_tracker/st_extensions.c b/src/mesa/state_tracker/st_extensions.c index d96899b611..5ff0c61147 100644 --- a/src/mesa/state_tracker/st_extensions.c +++ b/src/mesa/state_tracker/st_extensions.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -242,6 +243,9 @@ void st_init_extensions(struct st_context *st) } if (screen->is_format_supported(screen, PIPE_FORMAT_R8G8B8A8_SRGB, + PIPE_TEXTURE_2D, + PIPE_TEXTURE_USAGE_SAMPLER, 0) || + screen->is_format_supported(screen, PIPE_FORMAT_A8R8G8B8_SRGB, PIPE_TEXTURE_2D, PIPE_TEXTURE_USAGE_SAMPLER, 0)) { ctx->Extensions.EXT_texture_sRGB = GL_TRUE; diff --git a/src/mesa/state_tracker/st_format.c b/src/mesa/state_tracker/st_format.c index a9387c05df..9e2d60c926 100644 --- a/src/mesa/state_tracker/st_format.c +++ b/src/mesa/state_tracker/st_format.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * Copyright (c) 2008 VMware, Inc. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -263,6 +264,28 @@ st_mesa_format_to_pipe_format(GLuint mesaFormat) return PIPE_FORMAT_DXT3_RGBA; case MESA_FORMAT_RGBA_DXT5: return PIPE_FORMAT_DXT5_RGBA; +#if FEATURE_EXT_texture_sRGB + case MESA_FORMAT_SRGB_DXT1: + return PIPE_FORMAT_DXT1_SRGB; + case MESA_FORMAT_SRGBA_DXT1: + return PIPE_FORMAT_DXT1_SRGBA; + case MESA_FORMAT_SRGBA_DXT3: + return PIPE_FORMAT_DXT3_SRGBA; + case MESA_FORMAT_SRGBA_DXT5: + return PIPE_FORMAT_DXT5_SRGBA; +#endif +#endif +#if FEATURE_EXT_texture_sRGB + case MESA_FORMAT_SLA8: + return PIPE_FORMAT_A8L8_SRGB; + case MESA_FORMAT_SL8: + return PIPE_FORMAT_L8_SRGB; + case MESA_FORMAT_SRGB8: + return PIPE_FORMAT_R8G8B8_SRGB; + case MESA_FORMAT_SRGBA8: + return PIPE_FORMAT_R8G8B8A8_SRGB; + case MESA_FORMAT_SARGB8: + return PIPE_FORMAT_A8R8G8B8_SRGB; #endif default: assert(0); @@ -294,6 +317,28 @@ default_rgba_format(struct pipe_screen *screen, return PIPE_FORMAT_NONE; } +/** + * Find an sRGBA format supported by the context/winsys. + */ +static enum pipe_format +default_srgba_format(struct pipe_screen *screen, + enum pipe_texture_target target, + unsigned tex_usage, + unsigned geom_flags) +{ + static const enum pipe_format colorFormats[] = { + PIPE_FORMAT_A8R8G8B8_SRGB, + PIPE_FORMAT_B8G8R8A8_SRGB, + PIPE_FORMAT_R8G8B8A8_SRGB, + }; + uint i; + for (i = 0; i < Elements(colorFormats); i++) { + if (screen->is_format_supported( screen, colorFormats[i], target, tex_usage, geom_flags )) { + return colorFormats[i]; + } + } + return PIPE_FORMAT_NONE; +} /** * Search list of formats for first RGBA format with >8 bits/channel. @@ -515,28 +560,32 @@ st_choose_format(struct pipe_context *pipe, GLint internalFormat, case GL_SRGB_EXT: case GL_SRGB8_EXT: case GL_COMPRESSED_SRGB_EXT: - case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + case GL_COMPRESSED_SRGB_ALPHA_EXT: case GL_SRGB_ALPHA_EXT: case GL_SRGB8_ALPHA8_EXT: - case GL_COMPRESSED_SRGB_ALPHA_EXT: + return default_srgba_format( screen, target, tex_usage, geom_flags ); + case GL_COMPRESSED_SRGB_S3TC_DXT1_EXT: + return PIPE_FORMAT_DXT1_SRGB; case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT1_EXT: + return PIPE_FORMAT_DXT1_SRGBA; case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT3_EXT: + return PIPE_FORMAT_DXT3_SRGBA; case GL_COMPRESSED_SRGB_ALPHA_S3TC_DXT5_EXT: - return default_rgba_format( screen, target, tex_usage, geom_flags ); + return PIPE_FORMAT_DXT5_SRGBA; case GL_SLUMINANCE_ALPHA_EXT: case GL_SLUMINANCE8_ALPHA8_EXT: case GL_COMPRESSED_SLUMINANCE_EXT: case GL_COMPRESSED_SLUMINANCE_ALPHA_EXT: - if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_UNORM, target, tex_usage, geom_flags )) - return PIPE_FORMAT_A8L8_UNORM; - return default_rgba_format( screen, target, tex_usage, geom_flags ); + if (screen->is_format_supported( screen, PIPE_FORMAT_A8L8_SRGB, target, tex_usage, geom_flags )) + return PIPE_FORMAT_A8L8_SRGB; + return default_srgba_format( screen, target, tex_usage, geom_flags ); case GL_SLUMINANCE_EXT: case GL_SLUMINANCE8_EXT: - if (screen->is_format_supported( screen, PIPE_FORMAT_L8_UNORM, target, tex_usage, geom_flags )) - return PIPE_FORMAT_L8_UNORM; - return default_rgba_format( screen, target, tex_usage, geom_flags ); + if (screen->is_format_supported( screen, PIPE_FORMAT_L8_SRGB, target, tex_usage, geom_flags )) + return PIPE_FORMAT_L8_SRGB; + return default_srgba_format( screen, target, tex_usage, geom_flags ); default: return PIPE_FORMAT_NONE; @@ -617,6 +666,28 @@ translate_gallium_format_to_mesa_format(enum pipe_format format) return &_mesa_texformat_rgba_dxt3; case PIPE_FORMAT_DXT5_RGBA: return &_mesa_texformat_rgba_dxt5; +#if FEATURE_EXT_texture_sRGB + case PIPE_FORMAT_DXT1_SRGB: + return &_mesa_texformat_srgb_dxt1; + case PIPE_FORMAT_DXT1_SRGBA: + return &_mesa_texformat_srgba_dxt1; + case PIPE_FORMAT_DXT3_SRGBA: + return &_mesa_texformat_srgba_dxt3; + case PIPE_FORMAT_DXT5_SRGBA: + return &_mesa_texformat_srgba_dxt5; +#endif +#endif +#if FEATURE_EXT_texture_sRGB + case PIPE_FORMAT_A8L8_SRGB: + return &_mesa_texformat_sla8; + case PIPE_FORMAT_L8_SRGB: + return &_mesa_texformat_sl8; + case PIPE_FORMAT_R8G8B8_SRGB: + return &_mesa_texformat_srgb8; + case PIPE_FORMAT_R8G8B8A8_SRGB: + return &_mesa_texformat_srgba8; + case PIPE_FORMAT_A8R8G8B8_SRGB: + return &_mesa_texformat_sargb8; #endif /* XXX add additional cases */ default: -- cgit v1.2.3 From 71051f1f40206dd9d86d64cfdc20cc744a15f12c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 12 Dec 2008 12:05:21 +0000 Subject: Revert "pipebuffer: Fix buffer overflow." This reverts commit 55839ae064d64b7fcc180fcddb364bf31ab760dc. --- src/gallium/auxiliary/pipebuffer/pb_validate.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 94532bb4ce..726ae1688e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -182,7 +182,7 @@ pb_validate_create() return NULL; vl->size = PB_VALIDATE_INITIAL_SIZE; - vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry)); + vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_buffer *)); if(!vl->entries) { FREE(vl); return NULL; -- cgit v1.2.3 From 9b3bce6bed36a37293cd67ed4e9a05dd6e1c9d80 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 12 Dec 2008 12:05:30 +0000 Subject: Revert "pipebuffer: Ondemand buffer manager." This reverts commit 17849eafaacfbb2124d86f561a91b707317d3b31. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 - src/gallium/auxiliary/pipebuffer/SConscript | 1 - src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 14 - .../auxiliary/pipebuffer/pb_bufmgr_ondemand.c | 303 --------------------- 4 files changed, 319 deletions(-) delete mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 4bcf08f8a5..f9b39d9ce0 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -11,7 +11,6 @@ C_SOURCES = \ pb_bufmgr_debug.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ - pb_bufmgr_ondemand.c \ pb_bufmgr_pool.c \ pb_bufmgr_slab.c \ pb_validate.c \ diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 4acf721808..56a40dda0d 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -10,7 +10,6 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_debug.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', - 'pb_bufmgr_ondemand.c', 'pb_bufmgr_pool.c', 'pb_bufmgr_slab.c', 'pb_validate.c', diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 0a8264a924..8fe2704708 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -182,20 +182,6 @@ pb_alt_manager_create(struct pb_manager *provider1, struct pb_manager *provider2); -/** - * Ondemand buffer manager. - * - * Buffers are created in malloc'ed memory (fast and cached), and the constents - * is transfered to a buffer from the provider (typically in slow uncached - * memory) when there is an attempt to validate the buffer. - * - * Ideal for situations where one does not know before hand whether a given - * buffer will effectively be used by the hardware or not. - */ -struct pb_manager * -pb_ondemand_manager_create(struct pb_manager *provider); - - /** * Debug buffer manager to detect buffer under- and overflows. * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c deleted file mode 100644 index ba02a84e62..0000000000 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c +++ /dev/null @@ -1,303 +0,0 @@ -/************************************************************************** - * - * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * A variation of malloc buffers which get transferred to real graphics memory - * when there is an attempt to validate them. - * - * @author Jose Fonseca - */ - - -#include "pipe/p_debug.h" -#include "util/u_memory.h" -#include "pb_buffer.h" -#include "pb_bufmgr.h" - - -struct pb_ondemand_manager; - - -struct pb_ondemand_buffer -{ - struct pb_buffer base; - - struct pb_ondemand_manager *mgr; - - /** Regular malloc'ed memory */ - void *data; - unsigned mapcount; - - /** Real buffer */ - struct pb_buffer *buffer; - size_t size; - struct pb_desc desc; -}; - - -struct pb_ondemand_manager -{ - struct pb_manager base; - - struct pb_manager *provider; -}; - - -extern const struct pb_vtbl pb_ondemand_buffer_vtbl; - -static INLINE struct pb_ondemand_buffer * -pb_ondemand_buffer(struct pb_buffer *buf) -{ - assert(buf); - assert(buf->vtbl == &pb_ondemand_buffer_vtbl); - return (struct pb_ondemand_buffer *)buf; -} - -static INLINE struct pb_ondemand_manager * -pb_ondemand_manager(struct pb_manager *mgr) -{ - assert(mgr); - return (struct pb_ondemand_manager *)mgr; -} - - -static void -pb_ondemand_buffer_destroy(struct pb_buffer *_buf) -{ - struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); - - pb_reference(&buf->buffer, NULL); - - align_free(buf->data); - - FREE(buf); -} - - -static void * -pb_ondemand_buffer_map(struct pb_buffer *_buf, - unsigned flags) -{ - struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); - - if(buf->buffer) { - assert(!buf->data); - return pb_map(buf->buffer, flags); - } - else { - assert(buf->data); - ++buf->mapcount; - return buf->data; - } -} - - -static void -pb_ondemand_buffer_unmap(struct pb_buffer *_buf) -{ - struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); - - if(buf->buffer) { - assert(!buf->data); - pb_unmap(buf->buffer); - } - else { - assert(buf->data); - assert(buf->mapcount); - if(buf->mapcount) - --buf->mapcount; - } -} - - -static enum pipe_error -pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) -{ - if(!buf->buffer) { - struct pb_manager *provider = buf->mgr->provider; - uint8_t *map; - - assert(!buf->mapcount); - - buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc); - if(!buf->buffer) - return PIPE_ERROR_OUT_OF_MEMORY; - - map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); - if(!map) { - pb_reference(&buf->buffer, NULL); - return PIPE_ERROR; - } - - memcpy(map, buf->data, buf->size); - - pb_unmap(buf->buffer); - - if(!buf->mapcount) { - FREE(buf->data); - buf->data = NULL; - } - } - - return PIPE_OK; -} - -static enum pipe_error -pb_ondemand_buffer_validate(struct pb_buffer *_buf, - struct pb_validate *vl, - unsigned flags) -{ - struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); - enum pipe_error ret; - - assert(!buf->mapcount); - if(buf->mapcount) - return PIPE_ERROR; - - ret = pb_ondemand_buffer_instantiate(buf); - if(ret != PIPE_OK) - return ret; - - return pb_validate(buf->buffer, vl, flags); -} - - -static void -pb_ondemand_buffer_fence(struct pb_buffer *_buf, - struct pipe_fence_handle *fence) -{ - struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); - - assert(buf->buffer); - if(!buf->buffer) - return; - - pb_fence(buf->buffer, fence); -} - - -static void -pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf, - struct pb_buffer **base_buf, - unsigned *offset) -{ - struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); - - if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) { - assert(0); - *base_buf = &buf->base; - *offset = 0; - return; - } - - pb_get_base_buffer(buf->buffer, base_buf, offset); -} - - -const struct pb_vtbl -pb_ondemand_buffer_vtbl = { - pb_ondemand_buffer_destroy, - pb_ondemand_buffer_map, - pb_ondemand_buffer_unmap, - pb_ondemand_buffer_validate, - pb_ondemand_buffer_fence, - pb_ondemand_buffer_get_base_buffer -}; - - -static struct pb_buffer * -pb_ondemand_manager_create_buffer(struct pb_manager *_mgr, - size_t size, - const struct pb_desc *desc) -{ - struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); - struct pb_ondemand_buffer *buf; - - buf = CALLOC_STRUCT(pb_ondemand_buffer); - if(!buf) - return NULL; - - buf->base.base.refcount = 1; - buf->base.base.alignment = desc->alignment; - buf->base.base.usage = desc->usage; - buf->base.base.size = size; - buf->base.vtbl = &pb_ondemand_buffer_vtbl; - - buf->mgr = mgr; - - buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); - if(!buf->data) { - FREE(buf); - return NULL; - } - - buf->size = size; - buf->desc = *desc; - - return &buf->base; -} - - -static void -pb_ondemand_manager_flush(struct pb_manager *_mgr) -{ - struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); - - mgr->provider->flush(mgr->provider); -} - - -static void -pb_ondemand_manager_destroy(struct pb_manager *_mgr) -{ - struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); - - FREE(mgr); -} - - -struct pb_manager * -pb_ondemand_manager_create(struct pb_manager *provider) -{ - struct pb_ondemand_manager *mgr; - - if(!provider) - return NULL; - - mgr = CALLOC_STRUCT(pb_ondemand_manager); - if(!mgr) - return NULL; - - mgr->base.destroy = pb_ondemand_manager_destroy; - mgr->base.create_buffer = pb_ondemand_manager_create_buffer; - mgr->base.flush = pb_ondemand_manager_flush; - - mgr->provider = provider; - - return &mgr->base; -} -- cgit v1.2.3 From aef455c4a7bbd7df97a6444ae332cb5fb976e627 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 12 Dec 2008 12:05:39 +0000 Subject: Revert "pipebuffer: Implement proper buffer validation." This reverts commit a6d866f72c88d48d2bcfb3e3c882fdb639b5a8ce. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 36 +---- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 158 ++++++++------------- .../auxiliary/pipebuffer/pb_buffer_fenced.h | 21 ++- .../auxiliary/pipebuffer/pb_buffer_malloc.c | 20 --- src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 23 +-- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 26 +--- .../auxiliary/pipebuffer/pb_bufmgr_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 25 +--- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 25 +--- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 21 --- src/gallium/auxiliary/pipebuffer/pb_validate.c | 84 +++-------- src/gallium/auxiliary/pipebuffer/pb_validate.h | 10 +- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 22 +-- 14 files changed, 108 insertions(+), 367 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index c9570d7be2..8505d333bd 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -37,7 +37,7 @@ * There is no obligation of a winsys driver to use this library. And a pipe * driver should be completly agnostic about it. * - * \author Jose Fonseca + * \author Jos� Fonseca */ #ifndef PB_BUFFER_H_ @@ -46,7 +46,6 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_error.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" @@ -57,8 +56,6 @@ extern "C" { struct pb_vtbl; -struct pb_validate; - /** * Buffer description. @@ -107,13 +104,6 @@ struct pb_vtbl void (*unmap)( struct pb_buffer *buf ); - enum pipe_error (*validate)( struct pb_buffer *buf, - struct pb_validate *vl, - unsigned flags ); - - void (*fence)( struct pb_buffer *buf, - struct pipe_fence_handle *fence ); - /** * Get the base buffer and the offset. * @@ -128,7 +118,6 @@ struct pb_vtbl void (*get_base_buffer)( struct pb_buffer *buf, struct pb_buffer **base_buf, unsigned *offset ); - }; @@ -184,33 +173,10 @@ pb_get_base_buffer( struct pb_buffer *buf, offset = 0; return; } - assert(buf->vtbl->get_base_buffer); buf->vtbl->get_base_buffer(buf, base_buf, offset); } -static INLINE enum pipe_error -pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags) -{ - assert(buf); - if(!buf) - return PIPE_ERROR; - assert(buf->vtbl->validate); - return buf->vtbl->validate(buf, vl, flags); -} - - -static INLINE void -pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) -{ - assert(buf); - if(!buf) - return; - assert(buf->vtbl->fence); - buf->vtbl->fence(buf, fence); -} - - static INLINE void pb_destroy(struct pb_buffer *buf) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 17300cb553..17b2781052 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -29,7 +29,7 @@ * \file * Implementation of fenced buffers. * - * \author Jose Fonseca + * \author José Fonseca * \author Thomas Hellström */ @@ -59,6 +59,13 @@ */ #define SUPER(__derived) (&(__derived)->base) +#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) +#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ + ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) +#define PIPE_BUFFER_USAGE_WRITE \ + ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) + struct fenced_buffer_list { @@ -90,8 +97,6 @@ struct fenced_buffer unsigned flags; unsigned mapcount; - struct pb_validate *vl; - unsigned validation_flags; struct pipe_fence_handle *fence; struct list_head head; @@ -103,6 +108,7 @@ static INLINE struct fenced_buffer * fenced_buffer(struct pb_buffer *buf) { assert(buf); + assert(buf->vtbl == &fenced_buffer_vtbl); return (struct fenced_buffer *)buf; } @@ -268,7 +274,6 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); void *map; - assert(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE); assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE)); flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE; @@ -310,101 +315,6 @@ fenced_buffer_unmap(struct pb_buffer *buf) } -static enum pipe_error -fenced_buffer_validate(struct pb_buffer *buf, - struct pb_validate *vl, - unsigned flags) -{ - struct fenced_buffer *fenced_buf = fenced_buffer(buf); - enum pipe_error ret; - - if(!vl) { - /* invalidate */ - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; - return PIPE_OK; - } - - assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); - assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); - flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; - - /* Buffer cannot be validated in two different lists */ - if(fenced_buf->vl && fenced_buf->vl != vl) - return PIPE_ERROR_RETRY; - - /* Do not validate if buffer is still mapped */ - if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { - /* TODO: wait for the thread that mapped the buffer to unmap it */ - return PIPE_ERROR_RETRY; - } - - /* Allow concurrent GPU reads, but serialize GPU writes */ - if(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE) { - if((fenced_buf->flags | flags) & PIPE_BUFFER_USAGE_GPU_WRITE) { - _fenced_buffer_finish(fenced_buf); - } - } - - if(fenced_buf->vl == vl && - (fenced_buf->validation_flags & flags) == flags) { - /* Nothing to do -- buffer already validated */ - return PIPE_OK; - } - - /* Final sanity checking */ - assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); - assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE)); - assert(!fenced_buf->mapcount); - - ret = pb_validate(fenced_buf->buffer, vl, flags); - if (ret != PIPE_OK) - return ret; - - fenced_buf->vl = vl; - fenced_buf->validation_flags |= flags; - - return PIPE_OK; -} - - -static void -fenced_buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - struct fenced_buffer *fenced_buf; - struct fenced_buffer_list *fenced_list; - struct pipe_winsys *winsys; - - fenced_buf = fenced_buffer(buf); - fenced_list = fenced_buf->list; - winsys = fenced_list->winsys; - - if(fence == fenced_buf->fence) { - /* Nothing to do */ - return; - } - - assert(fenced_buf->vl); - assert(fenced_buf->validation_flags); - - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - winsys->fence_reference(winsys, &fenced_buf->fence, fence); - fenced_buf->flags |= fenced_buf->validation_flags; - _fenced_buffer_add(fenced_buf); - } - pipe_mutex_unlock(fenced_list->mutex); - - pb_fence(fenced_buf->buffer, fence); - - fenced_buf->vl = NULL; - fenced_buf->validation_flags = 0; -} - - static void fenced_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -415,13 +325,11 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, } -static const struct pb_vtbl +const struct pb_vtbl fenced_buffer_vtbl = { fenced_buffer_destroy, fenced_buffer_map, fenced_buffer_unmap, - fenced_buffer_validate, - fenced_buffer_fence, fenced_buffer_get_base_buffer }; @@ -454,6 +362,52 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, } +void +buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf; + struct fenced_buffer_list *fenced_list; + struct pipe_winsys *winsys; + /* FIXME: receive this as a parameter */ + unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; + + /* This is a public function, so be extra cautious with the buffer passed, + * as happens frequently to receive null buffers, or pointer to buffers + * other than fenced buffers. */ + assert(buf); + if(!buf) + return; + assert(buf->vtbl == &fenced_buffer_vtbl); + if(buf->vtbl != &fenced_buffer_vtbl) + return; + + fenced_buf = fenced_buffer(buf); + fenced_list = fenced_buf->list; + winsys = fenced_list->winsys; + + if(!fence || fence == fenced_buf->fence) { + /* Handle the same fence case specially, not only because it is a fast + * path, but mostly to avoid serializing two writes with the same fence, + * as that would bring the hardware down to synchronous operation without + * any benefit. + */ + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; + return; + } + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + winsys->fence_reference(winsys, &fenced_buf->fence, fence); + fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; + _fenced_buffer_add(fenced_buf); + } + pipe_mutex_unlock(fenced_list->mutex); +} + + struct fenced_buffer_list * fenced_buffer_list_create(struct pipe_winsys *winsys) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index b15c676194..50d5891bdb 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -44,7 +44,7 @@ * Between the handle's destruction, and the fence signalling, the buffer is * stored in a fenced buffer list. * - * \author Jose Fonseca + * \author José Fonseca */ #ifndef PB_BUFFER_FENCED_H_ @@ -70,6 +70,14 @@ struct pipe_fence_handle; struct fenced_buffer_list; +/** + * The fenced buffer's virtual function table. + * + * NOTE: Made public for debugging purposes. + */ +extern const struct pb_vtbl fenced_buffer_vtbl; + + /** * Create a fenced buffer list. * @@ -100,6 +108,17 @@ fenced_buffer_create(struct fenced_buffer_list *fenced, struct pb_buffer *buffer); +/** + * Set a buffer's fence. + * + * NOTE: Although it takes a generic pb_buffer argument, it will fail + * on everything but buffers returned by fenced_buffer_create. + */ +void +buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence); + + #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 53f497cfb0..1bf22a2ec0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -81,24 +81,6 @@ malloc_buffer_unmap(struct pb_buffer *buf) } -static enum pipe_error -malloc_buffer_validate(struct pb_buffer *buf, - struct pb_validate *vl, - unsigned flags) -{ - assert(0); - return PIPE_ERROR; -} - - -static void -malloc_buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - assert(0); -} - - static void malloc_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -114,8 +96,6 @@ malloc_buffer_vtbl = { malloc_buffer_destroy, malloc_buffer_map, malloc_buffer_unmap, - malloc_buffer_validate, - malloc_buffer_fence, malloc_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 8fe2704708..cafbee045a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -43,7 +43,7 @@ * - the fenced buffer manager, which will delay buffer destruction until the * the moment the card finishing processing it. * - * \author Jose Fonseca + * \author José Fonseca */ #ifndef PB_BUFMGR_H_ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index f57a7bffd7..8f118874ec 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -29,7 +29,7 @@ * \file * Buffer cache. * - * \author Jose Fonseca + * \author José Fonseca * \author Thomas Hellström */ @@ -183,25 +183,6 @@ pb_cache_buffer_unmap(struct pb_buffer *_buf) } -static enum pipe_error -pb_cache_buffer_validate(struct pb_buffer *_buf, - struct pb_validate *vl, - unsigned flags) -{ - struct pb_cache_buffer *buf = pb_cache_buffer(_buf); - return pb_validate(buf->buffer, vl, flags); -} - - -static void -pb_cache_buffer_fence(struct pb_buffer *_buf, - struct pipe_fence_handle *fence) -{ - struct pb_cache_buffer *buf = pb_cache_buffer(_buf); - pb_fence(buf->buffer, fence); -} - - static void pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -217,8 +198,6 @@ pb_cache_buffer_vtbl = { pb_cache_buffer_destroy, pb_cache_buffer_map, pb_cache_buffer_unmap, - pb_cache_buffer_validate, - pb_cache_buffer_fence, pb_cache_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 62639fe1c8..1675e6e182 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -29,7 +29,7 @@ * \file * Debug buffer manager to detect buffer under- and overflows. * - * \author Jose Fonseca + * \author José Fonseca */ @@ -255,35 +255,11 @@ pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, } -static enum pipe_error -pb_debug_buffer_validate(struct pb_buffer *_buf, - struct pb_validate *vl, - unsigned flags) -{ - struct pb_debug_buffer *buf = pb_debug_buffer(_buf); - - pb_debug_buffer_check(buf); - - return pb_validate(buf->buffer, vl, flags); -} - - -static void -pb_debug_buffer_fence(struct pb_buffer *_buf, - struct pipe_fence_handle *fence) -{ - struct pb_debug_buffer *buf = pb_debug_buffer(_buf); - pb_fence(buf->buffer, fence); -} - - const struct pb_vtbl pb_debug_buffer_vtbl = { pb_debug_buffer_destroy, pb_debug_buffer_map, pb_debug_buffer_unmap, - pb_debug_buffer_validate, - pb_debug_buffer_fence, pb_debug_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index f8fd2cd531..633ee70a75 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -30,7 +30,7 @@ * \file * A buffer manager that wraps buffers in fenced buffers. * - * \author Jose Fonseca + * \author José Fonseca */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index 607f10bc73..fe80ca30ee 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -29,7 +29,7 @@ * \file * Buffer manager using the old texture memory manager. * - * \author Jose Fonseca + * \author José Fonseca */ @@ -124,27 +124,6 @@ mm_buffer_unmap(struct pb_buffer *buf) } -static enum pipe_error -mm_buffer_validate(struct pb_buffer *buf, - struct pb_validate *vl, - unsigned flags) -{ - struct mm_buffer *mm_buf = mm_buffer(buf); - struct mm_pb_manager *mm = mm_buf->mgr; - return pb_validate(mm->buffer, vl, flags); -} - - -static void -mm_buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - struct mm_buffer *mm_buf = mm_buffer(buf); - struct mm_pb_manager *mm = mm_buf->mgr; - pb_fence(mm->buffer, fence); -} - - static void mm_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -162,8 +141,6 @@ mm_buffer_vtbl = { mm_buffer_destroy, mm_buffer_map, mm_buffer_unmap, - mm_buffer_validate, - mm_buffer_fence, mm_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index a6ff37653e..61ac291ed7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -30,7 +30,7 @@ * \file * Batch buffer pool management. * - * \author Jose Fonseca + * \author José Fonseca * \author Thomas Hellström */ @@ -138,27 +138,6 @@ pool_buffer_unmap(struct pb_buffer *buf) } -static enum pipe_error -pool_buffer_validate(struct pb_buffer *buf, - struct pb_validate *vl, - unsigned flags) -{ - struct pool_buffer *pool_buf = pool_buffer(buf); - struct pool_pb_manager *pool = pool_buf->mgr; - return pb_validate(pool->buffer, vl, flags); -} - - -static void -pool_buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - struct pool_buffer *pool_buf = pool_buffer(buf); - struct pool_pb_manager *pool = pool_buf->mgr; - pb_fence(pool->buffer, fence); -} - - static void pool_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -176,8 +155,6 @@ pool_buffer_vtbl = { pool_buffer_destroy, pool_buffer_map, pool_buffer_unmap, - pool_buffer_validate, - pool_buffer_fence, pool_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 9b9fedccb4..2a80154920 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -248,25 +248,6 @@ pb_slab_buffer_unmap(struct pb_buffer *_buf) } -static enum pipe_error -pb_slab_buffer_validate(struct pb_buffer *_buf, - struct pb_validate *vl, - unsigned flags) -{ - struct pb_slab_buffer *buf = pb_slab_buffer(_buf); - return pb_validate(buf->slab->bo, vl, flags); -} - - -static void -pb_slab_buffer_fence(struct pb_buffer *_buf, - struct pipe_fence_handle *fence) -{ - struct pb_slab_buffer *buf = pb_slab_buffer(_buf); - pb_fence(buf->slab->bo, fence); -} - - static void pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -283,8 +264,6 @@ pb_slab_buffer_vtbl = { pb_slab_buffer_destroy, pb_slab_buffer_map, pb_slab_buffer_unmap, - pb_slab_buffer_validate, - pb_slab_buffer_fence, pb_slab_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 726ae1688e..1e54fc39d4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -46,16 +46,9 @@ #define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ -struct pb_validate_entry -{ - struct pb_buffer *buf; - unsigned flags; -}; - - struct pb_validate { - struct pb_validate_entry *entries; + struct pb_buffer **buffers; unsigned used; unsigned size; }; @@ -63,87 +56,54 @@ struct pb_validate enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf, - unsigned flags) + struct pb_buffer *buf) { assert(buf); if(!buf) return PIPE_ERROR; - assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); - assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); - flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; - /* We only need to store one reference for each buffer, so avoid storing - * consecutive references for the same buffer. It might not be the most - * common pattern, but it is easy to implement. + * consecutive references for the same buffer. It might not be the more + * common pasttern, but it is easy to implement. */ - if(vl->used && vl->entries[vl->used - 1].buf == buf) { - vl->entries[vl->used - 1].flags |= flags; + if(vl->used && vl->buffers[vl->used - 1] == buf) { return PIPE_OK; } /* Grow the table */ if(vl->used == vl->size) { unsigned new_size; - struct pb_validate_entry *new_entries; + struct pb_buffer **new_buffers; new_size = vl->size * 2; if(!new_size) return PIPE_ERROR_OUT_OF_MEMORY; - new_entries = (struct pb_validate_entry *)REALLOC(vl->entries, - vl->size*sizeof(struct pb_validate_entry), - new_size*sizeof(struct pb_validate_entry)); - if(!new_entries) + new_buffers = (struct pb_buffer **)REALLOC(vl->buffers, + vl->size*sizeof(struct pb_buffer *), + new_size*sizeof(struct pb_buffer *)); + if(!new_buffers) return PIPE_ERROR_OUT_OF_MEMORY; - memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry)); + memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *)); vl->size = new_size; - vl->entries = new_entries; + vl->buffers = new_buffers; } - assert(!vl->entries[vl->used].buf); - pb_reference(&vl->entries[vl->used].buf, buf); - vl->entries[vl->used].flags = flags; + assert(!vl->buffers[vl->used]); + pb_reference(&vl->buffers[vl->used], buf); ++vl->used; return PIPE_OK; } -enum pipe_error -pb_validate_foreach(struct pb_validate *vl, - enum pipe_error (*callback)(struct pb_buffer *buf, void *data), - void *data) -{ - unsigned i; - for(i = 0; i < vl->used; ++i) { - enum pipe_error ret; - ret = callback(vl->entries[i].buf, data); - if(ret != PIPE_OK) - return ret; - } - return PIPE_OK; -} - - enum pipe_error pb_validate_validate(struct pb_validate *vl) { - unsigned i; - - for(i = 0; i < vl->used; ++i) { - enum pipe_error ret; - ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags); - if(ret != PIPE_OK) { - while(i--) - pb_validate(vl->entries[i].buf, NULL, 0); - return ret; - } - } - + /* FIXME: go through each buffer, ensure its not mapped, its address is + * available -- requires a new pb_buffer interface */ return PIPE_OK; } @@ -154,8 +114,8 @@ pb_validate_fence(struct pb_validate *vl, { unsigned i; for(i = 0; i < vl->used; ++i) { - pb_fence(vl->entries[i].buf, fence); - pb_reference(&vl->entries[i].buf, NULL); + buffer_fence(vl->buffers[i], fence); + pb_reference(&vl->buffers[i], NULL); } vl->used = 0; } @@ -166,8 +126,8 @@ pb_validate_destroy(struct pb_validate *vl) { unsigned i; for(i = 0; i < vl->used; ++i) - pb_reference(&vl->entries[i].buf, NULL); - FREE(vl->entries); + pb_reference(&vl->buffers[i], NULL); + FREE(vl->buffers); FREE(vl); } @@ -182,8 +142,8 @@ pb_validate_create() return NULL; vl->size = PB_VALIDATE_INITIAL_SIZE; - vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_buffer *)); - if(!vl->entries) { + vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *)); + if(!vl->buffers) { FREE(vl); return NULL; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h index dfb84df1ce..3db1d5330b 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.h +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -58,13 +58,7 @@ struct pb_validate; enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf, - unsigned flags); - -enum pipe_error -pb_validate_foreach(struct pb_validate *vl, - enum pipe_error (*callback)(struct pb_buffer *buf, void *data), - void *data); + struct pb_buffer *buf); /** * Validate all buffers for hardware access. @@ -77,7 +71,7 @@ pb_validate_validate(struct pb_validate *vl); /** * Fence all buffers and clear the list. * - * Should be called right after issuing commands to the hardware. + * Should be called right before issuing commands to the hardware. */ void pb_validate_fence(struct pb_validate *vl, diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index 45a883e532..28d137dbc4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -30,7 +30,7 @@ * Implementation of client buffer (also designated as "user buffers"), which * are just state-tracker owned data masqueraded as buffers. * - * \author Jose Fonseca + * \author José Fonseca */ @@ -91,24 +91,6 @@ pb_user_buffer_unmap(struct pb_buffer *buf) } -static enum pipe_error -pb_user_buffer_validate(struct pb_buffer *buf, - struct pb_validate *vl, - unsigned flags) -{ - assert(0); - return PIPE_ERROR; -} - - -static void -pb_user_buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - assert(0); -} - - static void pb_user_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -124,8 +106,6 @@ pb_user_buffer_vtbl = { pb_user_buffer_destroy, pb_user_buffer_map, pb_user_buffer_unmap, - pb_user_buffer_validate, - pb_user_buffer_fence, pb_user_buffer_get_base_buffer }; -- cgit v1.2.3 From d2c2e9316d043ab584794a3524f22776deb4c777 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 12 Dec 2008 16:46:34 +0000 Subject: gallium: avoid mapping same vertex buffer in subsequent frames Quite a few util modules were maintaining a single vertex buffer over multiple frames, and potentially reusing it in subsequent frames. Unfortunately that would force us into syncrhonous rendering as the buffer manager would be forced to wait for the previous rendering to complete prior to allowing the map. This resolves that issue, but requires the state tracker to issue a few new flush() calls at the end of each frame. --- src/gallium/auxiliary/util/u_blit.c | 88 ++++++++++++++++++++++--------- src/gallium/auxiliary/util/u_blit.h | 4 ++ src/gallium/auxiliary/util/u_draw_quad.c | 5 +- src/gallium/auxiliary/util/u_draw_quad.h | 2 +- src/gallium/auxiliary/util/u_gen_mipmap.c | 51 +++++++++++++++--- src/gallium/auxiliary/util/u_gen_mipmap.h | 5 ++ src/mesa/state_tracker/st_cb_accum.c | 3 +- src/mesa/state_tracker/st_cb_bitmap.c | 60 +++++++++++++++------ src/mesa/state_tracker/st_cb_bitmap.h | 6 +++ src/mesa/state_tracker/st_cb_clear.c | 31 +++++++++-- src/mesa/state_tracker/st_cb_clear.h | 3 ++ src/mesa/state_tracker/st_cb_drawpixels.c | 2 +- src/mesa/state_tracker/st_cb_fbo.c | 2 +- src/mesa/state_tracker/st_cb_flush.c | 11 +++- src/mesa/state_tracker/st_cb_readpixels.c | 4 +- src/mesa/state_tracker/st_context.h | 2 + 16 files changed, 219 insertions(+), 60 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index d28201ac8d..2cef3338b5 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -66,6 +66,8 @@ struct blit_state void *fs; struct pipe_buffer *vbuf; /**< quad vertices */ + unsigned vbuf_slot; + float vertices[4][2][4]; /**< vertex/texcoords for quad */ }; @@ -138,17 +140,7 @@ util_create_blit(struct pipe_context *pipe, struct cso_context *cso) /* fragment shader */ ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); - - ctx->vbuf = pipe_buffer_create(pipe->screen, - 32, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(ctx->vertices)); - if (!ctx->vbuf) { - FREE(ctx); - ctx->pipe->delete_fs_state(ctx->pipe, ctx->fs); - ctx->pipe->delete_vs_state(ctx->pipe, ctx->vs); - return NULL; - } + ctx->vbuf = NULL; /* init vertex data that doesn't change */ for (i = 0; i < 4; i++) { @@ -181,15 +173,35 @@ util_destroy_blit(struct blit_state *ctx) } +static unsigned get_next_slot( struct blit_state *ctx ) +{ + const unsigned max_slots = 4096 / sizeof ctx->vertices; + + if (ctx->vbuf_slot >= max_slots) + util_blit_flush( ctx ); + + if (!ctx->vbuf) { + ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, + 32, + PIPE_BUFFER_USAGE_VERTEX, + max_slots * sizeof ctx->vertices); + } + + return ctx->vbuf_slot++ * sizeof ctx->vertices; +} + + + /** * Setup vertex data for the textured quad we'll draw. * Note: y=0=top */ -static void +static unsigned setup_vertex_data(struct blit_state *ctx, float x0, float y0, float x1, float y1, float z) { void *buf; + unsigned offset; ctx->vertices[0][0][0] = x0; ctx->vertices[0][0][1] = y0; @@ -215,12 +227,16 @@ setup_vertex_data(struct blit_state *ctx, ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; + offset = get_next_slot( ctx ); + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + memcpy((char *)buf + offset, ctx->vertices, sizeof(ctx->vertices)); pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); + + return offset; } @@ -228,13 +244,14 @@ setup_vertex_data(struct blit_state *ctx, * Setup vertex data for the textured quad we'll draw. * Note: y=0=top */ -static void +static unsigned setup_vertex_data_tex(struct blit_state *ctx, float x0, float y0, float x1, float y1, float s0, float t0, float s1, float t1, float z) { void *buf; + unsigned offset; ctx->vertices[0][0][0] = x0; ctx->vertices[0][0][1] = y0; @@ -260,12 +277,16 @@ setup_vertex_data_tex(struct blit_state *ctx, ctx->vertices[3][1][0] = s0; ctx->vertices[3][1][1] = t1; + offset = get_next_slot( ctx ); + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + memcpy((char *)buf + offset, ctx->vertices, sizeof(ctx->vertices)); pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); + + return offset; } /** * Copy pixel block from src surface to dst surface. @@ -291,6 +312,7 @@ util_blit_pixels(struct blit_state *ctx, const int srcH = abs(srcY1 - srcY0); const int srcLeft = MIN2(srcX0, srcX1); const int srcTop = MIN2(srcY0, srcY1); + unsigned offset; assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); @@ -398,11 +420,11 @@ util_blit_pixels(struct blit_state *ctx, cso_set_framebuffer(ctx->cso, &fb); /* draw quad */ - setup_vertex_data(ctx, - (float) dstX0, (float) dstY0, - (float) dstX1, (float) dstY1, z); + offset = setup_vertex_data(ctx, + (float) dstX0, (float) dstY0, + (float) dstX1, (float) dstY1, z); - util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 2); /* attribs/vert */ @@ -421,6 +443,18 @@ util_blit_pixels(struct blit_state *ctx, screen->texture_release(screen, &tex); } + +/* Release vertex buffer at end of frame to avoid synchronous + * rendering. + */ +void util_blit_flush( struct blit_state *ctx ) +{ + pipe_buffer_reference(ctx->pipe->screen, &ctx->vbuf, NULL); + ctx->vbuf_slot = 0; +} + + + /** * Copy pixel block from src texture to dst surface. * Overlapping regions are acceptable. @@ -442,6 +476,7 @@ util_blit_pixels_tex(struct blit_state *ctx, struct pipe_screen *screen = pipe->screen; struct pipe_framebuffer_state fb; float s0, t0, s1, t1; + unsigned offset; assert(filter == PIPE_TEX_MIPFILTER_NEAREST || filter == PIPE_TEX_MIPFILTER_LINEAR); @@ -496,13 +531,14 @@ util_blit_pixels_tex(struct blit_state *ctx, cso_set_framebuffer(ctx->cso, &fb); /* draw quad */ - setup_vertex_data_tex(ctx, - (float) dstX0, (float) dstY0, - (float) dstX1, (float) dstY1, - s0, t0, s1, t1, - z); - - util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + offset = setup_vertex_data_tex(ctx, + (float) dstX0, (float) dstY0, + (float) dstX1, (float) dstY1, + s0, t0, s1, t1, + z); + + util_draw_vertex_buffer(ctx->pipe, + ctx->vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 2); /* attribs/vert */ diff --git a/src/gallium/auxiliary/util/u_blit.h b/src/gallium/auxiliary/util/u_blit.h index 308075698f..c35beceda8 100644 --- a/src/gallium/auxiliary/util/u_blit.h +++ b/src/gallium/auxiliary/util/u_blit.h @@ -70,6 +70,10 @@ util_blit_pixels_tex(struct blit_state *ctx, int dstX1, int dstY1, float z, uint filter); +/* Call at end of frame to avoid synchronous rendering. + */ +extern void +util_blit_flush( struct blit_state *ctx ); #ifdef __cplusplus } diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 8ecae71b64..d7bb74b87b 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -40,6 +40,7 @@ void util_draw_vertex_buffer(struct pipe_context *pipe, struct pipe_buffer *vbuf, + uint offset, uint prim_type, uint num_verts, uint num_attribs) @@ -53,7 +54,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* tell pipe about the vertex buffer */ vbuffer.buffer = vbuf; vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */ - vbuffer.buffer_offset = 0; + vbuffer.buffer_offset = offset; pipe->set_vertex_buffers(pipe, 1, &vbuffer); /* tell pipe about the vertex attributes */ @@ -124,7 +125,7 @@ util_draw_texquad(struct pipe_context *pipe, v[29] = 1.0; pipe_buffer_unmap(pipe->screen, vbuf); - util_draw_vertex_buffer(pipe, vbuf, PIPE_PRIM_TRIANGLE_FAN, 4, 2); + util_draw_vertex_buffer(pipe, vbuf, 0, PIPE_PRIM_TRIANGLE_FAN, 4, 2); } pipe_buffer_reference(pipe->screen, &vbuf, NULL); diff --git a/src/gallium/auxiliary/util/u_draw_quad.h b/src/gallium/auxiliary/util/u_draw_quad.h index ec4862ead3..00d3f5b715 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.h +++ b/src/gallium/auxiliary/util/u_draw_quad.h @@ -37,7 +37,7 @@ struct pipe_buffer; extern void util_draw_vertex_buffer(struct pipe_context *pipe, - struct pipe_buffer *vbuf, + struct pipe_buffer *vbuf, uint offset, uint num_attribs, uint num_verts, uint prim_type); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 9d305ad763..5f395ec6e9 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -69,6 +69,8 @@ struct gen_mipmap_state void *fs; struct pipe_buffer *vbuf; /**< quad vertices */ + unsigned vbuf_slot; + float vertices[4][2][4]; /**< vertex/texcoords for quad */ }; @@ -779,10 +781,28 @@ util_create_gen_mipmap(struct pipe_context *pipe, } -static void +static unsigned get_next_slot( struct gen_mipmap_state *ctx ) +{ + const unsigned max_slots = 4096 / sizeof ctx->vertices; + + if (ctx->vbuf_slot >= max_slots) + util_gen_mipmap_flush( ctx ); + + if (!ctx->vbuf) { + ctx->vbuf = pipe_buffer_create(ctx->pipe->screen, + 32, + PIPE_BUFFER_USAGE_VERTEX, + max_slots * sizeof ctx->vertices); + } + + return ctx->vbuf_slot++ * sizeof ctx->vertices; +} + +static unsigned set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) { void *buf; + unsigned offset; ctx->vertices[0][0][0] = 0.0f; /*x*/ ctx->vertices[0][0][1] = 0.0f; /*y*/ @@ -804,12 +824,16 @@ set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) ctx->vertices[3][1][0] = 0.0f; ctx->vertices[3][1][1] = 1.0f; + offset = get_next_slot( ctx ); + buf = pipe_buffer_map(ctx->pipe->screen, ctx->vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(buf, ctx->vertices, sizeof(ctx->vertices)); + memcpy((char *)buf + offset, ctx->vertices, sizeof(ctx->vertices)); pipe_buffer_unmap(ctx->pipe->screen, ctx->vbuf); + + return offset; } @@ -834,6 +858,17 @@ util_destroy_gen_mipmap(struct gen_mipmap_state *ctx) } + +/* Release vertex buffer at end of frame to avoid synchronous + * rendering. + */ +void util_gen_mipmap_flush( struct gen_mipmap_state *ctx ) +{ + pipe_buffer_reference(ctx->pipe->screen, &ctx->vbuf, NULL); + ctx->vbuf_slot = 0; +} + + /** * Generate mipmap images. It's assumed all needed texture memory is * already allocated. @@ -855,6 +890,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, struct pipe_framebuffer_state fb; uint dstLevel; uint zslice = 0; + uint offset; /* check if we can render in the texture's format */ if (!screen->is_format_supported(screen, pt->format, PIPE_TEXTURE_2D, @@ -925,10 +961,13 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, cso_set_sampler_textures(ctx->cso, 1, &pt); /* quad coords in window coords (bypassing clipping, viewport mapping) */ - set_vertex_data(ctx, - (float) pt->width[dstLevel], - (float) pt->height[dstLevel]); - util_draw_vertex_buffer(ctx->pipe, ctx->vbuf, + offset = set_vertex_data(ctx, + (float) pt->width[dstLevel], + (float) pt->height[dstLevel]); + + util_draw_vertex_buffer(ctx->pipe, + ctx->vbuf, + offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 2); /* attribs/vert */ diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.h b/src/gallium/auxiliary/util/u_gen_mipmap.h index 3277024f07..54608f9466 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.h +++ b/src/gallium/auxiliary/util/u_gen_mipmap.h @@ -50,6 +50,11 @@ util_create_gen_mipmap(struct pipe_context *pipe, struct cso_context *cso); extern void util_destroy_gen_mipmap(struct gen_mipmap_state *ctx); +/* Release vertex buffer at end of frame to avoid synchronous + * rendering. + */ +extern void +util_gen_mipmap_flush( struct gen_mipmap_state *ctx ); extern void diff --git a/src/mesa/state_tracker/st_cb_accum.c b/src/mesa/state_tracker/st_cb_accum.c index cf3a99e7e9..a4e72b48ed 100644 --- a/src/mesa/state_tracker/st_cb_accum.c +++ b/src/mesa/state_tracker/st_cb_accum.c @@ -38,6 +38,7 @@ #include "st_cb_accum.h" #include "st_cb_fbo.h" #include "st_draw.h" +#include "st_public.h" #include "st_format.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" @@ -324,7 +325,7 @@ st_Accum(GLcontext *ctx, GLenum op, GLfloat value) const GLint height = ctx->DrawBuffer->_Ymax - ypos; /* make sure color bufs aren't cached */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + st_flush( st, PIPE_FLUSH_RENDER_CACHE, NULL ); switch (op) { case GL_ADD: diff --git a/src/mesa/state_tracker/st_cb_bitmap.c b/src/mesa/state_tracker/st_cb_bitmap.c index 73645201cc..bc05ca6a2f 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.c +++ b/src/mesa/state_tracker/st_cb_bitmap.c @@ -349,8 +349,7 @@ make_bitmap_texture(GLcontext *ctx, GLsizei width, GLsizei height, return pt; } - -static void +static GLuint setup_bitmap_vertex_data(struct st_context *st, int x, int y, int width, int height, float z, const float color[4]) @@ -369,12 +368,18 @@ setup_bitmap_vertex_data(struct st_context *st, const GLfloat clip_y0 = (GLfloat)(y0 / fb_height * 2.0 - 1.0); const GLfloat clip_x1 = (GLfloat)(x1 / fb_width * 2.0 - 1.0); const GLfloat clip_y1 = (GLfloat)(y1 / fb_height * 2.0 - 1.0); + const GLuint max_slots = 4096 / sizeof(st->bitmap.vertices); GLuint i; - void *buf; + + if (st->bitmap.vbuf_slot >= max_slots) { + pipe_buffer_reference(pipe->screen, &st->bitmap.vbuf, NULL); + st->bitmap.vbuf_slot = 0; + } if (!st->bitmap.vbuf) { - st->bitmap.vbuf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, - sizeof(st->bitmap.vertices)); + st->bitmap.vbuf = pipe_buffer_create(pipe->screen, 32, + PIPE_BUFFER_USAGE_VERTEX, + max_slots * sizeof(st->bitmap.vertices)); } /* Positions are in clip coords since we need to do clipping in case @@ -413,9 +418,19 @@ setup_bitmap_vertex_data(struct st_context *st, } /* put vertex data into vbuf */ - buf = pipe_buffer_map(pipe->screen, st->bitmap.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(buf, st->bitmap.vertices, sizeof(st->bitmap.vertices)); - pipe_buffer_unmap(pipe->screen, st->bitmap.vbuf); + { + char *buf = pipe_buffer_map(pipe->screen, + st->bitmap.vbuf, + PIPE_BUFFER_USAGE_CPU_WRITE); + + memcpy(buf + st->bitmap.vbuf_slot * sizeof st->bitmap.vertices, + st->bitmap.vertices, + sizeof st->bitmap.vertices); + + pipe_buffer_unmap(pipe->screen, st->bitmap.vbuf); + } + + return st->bitmap.vbuf_slot++ * sizeof st->bitmap.vertices; } @@ -434,6 +449,7 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, struct cso_context *cso = ctx->st->cso_context; struct st_fragment_program *stfp; GLuint maxSize; + GLuint offset; stfp = combined_bitmap_fragment_program(ctx); @@ -518,11 +534,11 @@ draw_bitmap_quad(GLcontext *ctx, GLint x, GLint y, GLfloat z, } /* draw textured quad */ - setup_bitmap_vertex_data(st, x, y, width, height, - ctx->Current.RasterPos[2], - color); + offset = setup_bitmap_vertex_data(st, x, y, width, height, + ctx->Current.RasterPos[2], + color); - util_draw_vertex_buffer(pipe, st->bitmap.vbuf, + util_draw_vertex_buffer(pipe, st->bitmap.vbuf, offset, PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 3); /* attribs/vert */ @@ -592,12 +608,12 @@ st_flush_bitmap_cache(struct st_context *st) struct pipe_screen *screen = pipe->screen; assert(cache->xmin <= cache->xmax); - /* - printf("flush size %d x %d at %d, %d\n", + +/* printf("flush size %d x %d at %d, %d\n", cache->xmax - cache->xmin, cache->ymax - cache->ymin, cache->xpos, cache->ypos); - */ +*/ /* The texture surface has been mapped until now. * So unmap and release the texture surface before drawing. @@ -623,6 +639,20 @@ st_flush_bitmap_cache(struct st_context *st) } } +/* Flush bitmap cache and release vertex buffer. + */ +void +st_flush_bitmap( struct st_context *st ) +{ + st_flush_bitmap_cache(st); + + /* Release vertex buffer to avoid synchronous rendering if we were + * to map it in the next frame. + */ + pipe_buffer_reference(st->pipe->screen, &st->bitmap.vbuf, NULL); + st->bitmap.vbuf_slot = 0; +} + /** * Try to accumulate this glBitmap call in the bitmap cache. diff --git a/src/mesa/state_tracker/st_cb_bitmap.h b/src/mesa/state_tracker/st_cb_bitmap.h index aae11d34c9..81cf61981d 100644 --- a/src/mesa/state_tracker/st_cb_bitmap.h +++ b/src/mesa/state_tracker/st_cb_bitmap.h @@ -42,5 +42,11 @@ st_destroy_bitmap(struct st_context *st); extern void st_flush_bitmap_cache(struct st_context *st); +/* Flush bitmap cache and release vertex buffer. Needed at end of + * frame to avoid synchronous rendering. + */ +extern void +st_flush_bitmap(struct st_context *st); + #endif /* ST_CB_BITMAP_H */ diff --git a/src/mesa/state_tracker/st_cb_clear.c b/src/mesa/state_tracker/st_cb_clear.c index bc3055c3fd..b6cea16163 100644 --- a/src/mesa/state_tracker/st_cb_clear.c +++ b/src/mesa/state_tracker/st_cb_clear.c @@ -148,12 +148,18 @@ draw_quad(GLcontext *ctx, { struct st_context *st = ctx->st; struct pipe_context *pipe = st->pipe; + const GLuint max_slots = 1024 / sizeof(st->clear.vertices); GLuint i; void *buf; + if (st->clear.vbuf_slot >= max_slots) { + pipe_buffer_reference(pipe->screen, &st->clear.vbuf, NULL); + st->clear.vbuf_slot = 0; + } + if (!st->clear.vbuf) { st->clear.vbuf = pipe_buffer_create(pipe->screen, 32, PIPE_BUFFER_USAGE_VERTEX, - sizeof(st->clear.vertices)); + max_slots * sizeof(st->clear.vertices)); } /* positions */ @@ -181,14 +187,23 @@ draw_quad(GLcontext *ctx, /* put vertex data into vbuf */ buf = pipe_buffer_map(pipe->screen, st->clear.vbuf, PIPE_BUFFER_USAGE_CPU_WRITE); - memcpy(buf, st->clear.vertices, sizeof(st->clear.vertices)); + + memcpy((char *)buf + st->clear.vbuf_slot * sizeof(st->clear.vertices), + st->clear.vertices, + sizeof(st->clear.vertices)); + pipe_buffer_unmap(pipe->screen, st->clear.vbuf); /* draw */ - util_draw_vertex_buffer(pipe, st->clear.vbuf, + util_draw_vertex_buffer(pipe, + st->clear.vbuf, + st->clear.vbuf_slot * sizeof(st->clear.vertices), PIPE_PRIM_TRIANGLE_FAN, 4, /* verts */ 2); /* attribs/vert */ + + /* Increment slot */ + st->clear.vbuf_slot++; } @@ -508,6 +523,16 @@ clear_depth_stencil_buffer(GLcontext *ctx, struct gl_renderbuffer *rb) } +void st_flush_clear( struct st_context *st ) +{ + /* Release vertex buffer to avoid synchronous rendering if we were + * to map it in the next frame. + */ + pipe_buffer_reference(st->pipe->screen, &st->clear.vbuf, NULL); + st->clear.vbuf_slot = 0; +} + + /** * Called via ctx->Driver.Clear() diff --git a/src/mesa/state_tracker/st_cb_clear.h b/src/mesa/state_tracker/st_cb_clear.h index f49387747d..bc035ac25c 100644 --- a/src/mesa/state_tracker/st_cb_clear.h +++ b/src/mesa/state_tracker/st_cb_clear.h @@ -37,6 +37,9 @@ st_init_clear(struct st_context *st); extern void st_destroy_clear(struct st_context *st); +extern void +st_flush_clear(struct st_context *st); + extern void st_init_clear_functions(struct dd_function_table *functions); diff --git a/src/mesa/state_tracker/st_cb_drawpixels.c b/src/mesa/state_tracker/st_cb_drawpixels.c index 5b24b9f068..32bf21411d 100644 --- a/src/mesa/state_tracker/st_cb_drawpixels.c +++ b/src/mesa/state_tracker/st_cb_drawpixels.c @@ -494,7 +494,7 @@ draw_quad(GLcontext *ctx, GLfloat x0, GLfloat y0, GLfloat z, memcpy(map, verts, sizeof(verts)); pipe_buffer_unmap(pipe->screen, buf); - util_draw_vertex_buffer(pipe, buf, + util_draw_vertex_buffer(pipe, buf, 0, PIPE_PRIM_QUADS, 4, /* verts */ 3); /* attribs/vert */ diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 00076f61e0..eece7dee11 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -426,7 +426,7 @@ st_finish_render_texture(GLcontext *ctx, if (!strb) return; - ctx->st->pipe->flush(ctx->st->pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + st_flush( ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL ); if (strb->surface) screen->tex_surface_release( screen, &strb->surface ); diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index cc40467941..072f2e92ad 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -37,11 +37,14 @@ #include "st_context.h" #include "st_cb_bitmap.h" #include "st_cb_flush.h" +#include "st_cb_clear.h" #include "st_cb_fbo.h" #include "st_public.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_winsys.h" +#include "util/u_gen_mipmap.h" +#include "util/u_blit.h" static INLINE GLboolean @@ -78,7 +81,13 @@ void st_flush( struct st_context *st, uint pipeFlushFlags, { FLUSH_VERTICES(st->ctx, 0); - st_flush_bitmap_cache(st); + /* Release any vertex buffers that might potentially be accessed in + * successive frames: + */ + st_flush_bitmap(st); + st_flush_clear(st); + util_blit_flush(st->blit); + util_gen_mipmap_flush(st->gen_mipmap); st->pipe->flush( st->pipe, pipeFlushFlags, fence ); } diff --git a/src/mesa/state_tracker/st_cb_readpixels.c b/src/mesa/state_tracker/st_cb_readpixels.c index c801532788..646eaff190 100644 --- a/src/mesa/state_tracker/st_cb_readpixels.c +++ b/src/mesa/state_tracker/st_cb_readpixels.c @@ -317,10 +317,8 @@ st_readpixels(GLcontext *ctx, GLint x, GLint y, GLsizei width, GLsizei height, if (!dest) return; - st_flush_bitmap_cache(ctx->st); - /* make sure rendering has completed */ - pipe->flush(pipe, PIPE_FLUSH_RENDER_CACHE, NULL); + st_flush(ctx->st, PIPE_FLUSH_RENDER_CACHE, NULL); if (format == GL_STENCIL_INDEX) { st_read_stencil_pixels(ctx, x, y, width, height, type, pack, dest); diff --git a/src/mesa/state_tracker/st_context.h b/src/mesa/state_tracker/st_context.h index 5bcb87ce20..695ac4a96f 100644 --- a/src/mesa/state_tracker/st_context.h +++ b/src/mesa/state_tracker/st_context.h @@ -154,6 +154,7 @@ struct st_context void *vs; float vertices[4][3][4]; /**< vertex pos + color + texcoord */ struct pipe_buffer *vbuf; + unsigned vbuf_slot; /* next free slot in vbuf */ struct bitmap_cache *cache; } bitmap; @@ -173,6 +174,7 @@ struct st_context void *fs; float vertices[4][2][4]; /**< vertex pos + color */ struct pipe_buffer *vbuf; + unsigned vbuf_slot; } clear; void *passthrough_fs; /**< simple pass-through frag shader */ -- cgit v1.2.3 From d7e9b0e33a72d282e9326434daf01c61244d9ef1 Mon Sep 17 00:00:00 2001 From: Alan Hourihane Date: Tue, 16 Dec 2008 09:42:52 +0000 Subject: gallium: fix some asserts --- src/gallium/auxiliary/draw/draw_pt_varray.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt_varray.c b/src/gallium/auxiliary/draw/draw_pt_varray.c index c15afe65f1..d0e16c9bc3 100644 --- a/src/gallium/auxiliary/draw/draw_pt_varray.c +++ b/src/gallium/auxiliary/draw/draw_pt_varray.c @@ -67,7 +67,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray, unsigned segment_count, boolean end ) { - assert(segment_count+1 < varray->fetch_max); + assert(segment_count < varray->fetch_max); if (segment_count >= 1) { unsigned nr = 0, i; @@ -77,7 +77,7 @@ static void varray_line_loop_segment(struct varray_frontend *varray, if (end) varray->fetch_elts[nr++] = start; - assert(nr < FETCH_MAX); + assert(nr <= FETCH_MAX); varray->middle->run(varray->middle, varray->fetch_elts, @@ -94,7 +94,7 @@ static void varray_fan_segment(struct varray_frontend *varray, unsigned segment_start, unsigned segment_count ) { - assert(segment_count+1 < varray->fetch_max); + assert(segment_count < varray->fetch_max); if (segment_count >= 2) { unsigned nr = 0, i; @@ -104,7 +104,7 @@ static void varray_fan_segment(struct varray_frontend *varray, for (i = 0 ; i < segment_count; i++) varray->fetch_elts[nr++] = start + segment_start + i; - assert(nr < FETCH_MAX); + assert(nr <= FETCH_MAX); varray->middle->run(varray->middle, varray->fetch_elts, -- cgit v1.2.3 From 3616fb08da8ef392db1d5ccab55b8eb9f6a6f32b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 16 Dec 2008 15:39:14 -0700 Subject: tgsi: use flr(), not trunc() for ARL --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index a2d2cfd1fc..989b6eec27 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -1797,7 +1797,7 @@ exec_instruction( case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_trunc( &r[0], &r[0] ); + micro_flr( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; -- cgit v1.2.3 From d1c8af7c0a18340fdde45ade6f612939a3c8e62a Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 17 Dec 2008 18:52:10 -0700 Subject: gallium: fix memory corruption in u_gen_mipmap.c Remove the old/initial vbuf allocation in util_create_gen_mipmap(). We were allocating a small vbuf at this point so get_next_slot() didn't have as large of buffer as it expected. So all but the first set_vertex_data() was writing out of bounds. Also added some comments. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 5f395ec6e9..5afc52ba35 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -760,15 +760,6 @@ util_create_gen_mipmap(struct pipe_context *pipe, /* fragment shader */ ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); - ctx->vbuf = pipe_buffer_create(pipe->screen, - 32, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(ctx->vertices)); - if (!ctx->vbuf) { - FREE(ctx); - return NULL; - } - /* vertex data that doesn't change */ for (i = 0; i < 4; i++) { ctx->vertices[i][0][2] = 0.0f; /* z */ @@ -777,11 +768,18 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->vertices[i][1][3] = 1.0f; /* q */ } + /* Note: the actual vertex buffer is allocated as needed below */ + return ctx; } -static unsigned get_next_slot( struct gen_mipmap_state *ctx ) +/** + * Get next "slot" of vertex space in the vertex buffer. + * We're allocating one large vertex buffer and using it piece by piece. + */ +static unsigned +get_next_slot(struct gen_mipmap_state *ctx) { const unsigned max_slots = 4096 / sizeof ctx->vertices; @@ -798,6 +796,7 @@ static unsigned get_next_slot( struct gen_mipmap_state *ctx ) return ctx->vbuf_slot++ * sizeof ctx->vertices; } + static unsigned set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) { -- cgit v1.2.3 From b890fffbf4ad435ef533988d288e98b0bfe4d17b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Wed, 17 Dec 2008 18:52:10 -0700 Subject: gallium: fix memory corruption in u_gen_mipmap.c Remove the old/initial vbuf allocation in util_create_gen_mipmap(). We were allocating a small vbuf at this point so get_next_slot() didn't have as large of buffer as it expected. So all but the first set_vertex_data() was writing out of bounds. Also added some comments. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 5f395ec6e9..5afc52ba35 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -760,15 +760,6 @@ util_create_gen_mipmap(struct pipe_context *pipe, /* fragment shader */ ctx->fs = util_make_fragment_tex_shader(pipe, &ctx->frag_shader); - ctx->vbuf = pipe_buffer_create(pipe->screen, - 32, - PIPE_BUFFER_USAGE_VERTEX, - sizeof(ctx->vertices)); - if (!ctx->vbuf) { - FREE(ctx); - return NULL; - } - /* vertex data that doesn't change */ for (i = 0; i < 4; i++) { ctx->vertices[i][0][2] = 0.0f; /* z */ @@ -777,11 +768,18 @@ util_create_gen_mipmap(struct pipe_context *pipe, ctx->vertices[i][1][3] = 1.0f; /* q */ } + /* Note: the actual vertex buffer is allocated as needed below */ + return ctx; } -static unsigned get_next_slot( struct gen_mipmap_state *ctx ) +/** + * Get next "slot" of vertex space in the vertex buffer. + * We're allocating one large vertex buffer and using it piece by piece. + */ +static unsigned +get_next_slot(struct gen_mipmap_state *ctx) { const unsigned max_slots = 4096 / sizeof ctx->vertices; @@ -798,6 +796,7 @@ static unsigned get_next_slot( struct gen_mipmap_state *ctx ) return ctx->vbuf_slot++ * sizeof ctx->vertices; } + static unsigned set_vertex_data(struct gen_mipmap_state *ctx, float width, float height) { -- cgit v1.2.3 From 92dc8ffa719276fe536abf4dcf7874cf12588f60 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 18 Dec 2008 14:53:24 +0000 Subject: gallium: Enable memory debugging on all windows platforms. --- src/gallium/auxiliary/util/u_memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index 857102719d..bcae10832a 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -52,7 +52,7 @@ extern "C" { #endif -#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) +#if defined(PIPE_SUBSYSTEM_WINDOWS) && defined(DEBUG) /* memory debugging */ -- cgit v1.2.3 From 5ccef84c018c5e99bf4b8cffb5ba1bb66bf40cc4 Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Thu, 18 Dec 2008 09:46:53 -0700 Subject: Gallium: fix for conform test The following construction in util_surface_copy() in gallium/auxiliary/util/u_rect.c, introduced in commit d177c9ddda2c452cf7d6696d89cf4458ef986f98, incorrectly inverts the Y coordinate in the last parameter to pipe_copy_rect(). /* If do_flip, invert src_y position and pass negative src stride */ pipe_copy_rect(dst_map, &dst->block, dst->stride, dst_x, dst_y, w, h, src_map, do_flip ? -(int) src->stride : src->stride, src_x, do_flip ? w - src_y : src_y); The intention is to start at the last Y coordinate line and move backwards, in the case of a flip; in that case, the correct calculation is "src_y + h - 1", not "w - src_y". This fixes a Gallium assertion failure in the conformance tests: u_rect.c:65:pipe_copy_rect: Assertion `src_y >= 0' failed. debug_get_bool_option: GALLIUM_ABORT_ON_ASSERT = TRUE Trace/breakpoint trap --- src/gallium/auxiliary/util/u_rect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 30f32413d7..fe81a685be 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -223,7 +223,7 @@ util_surface_copy(struct pipe_context *pipe, src_map, do_flip ? -(int) src->stride : src->stride, src_x, - do_flip ? w - src_y : src_y); + do_flip ? src_y + h - 1 : src_y); } pipe->screen->surface_unmap(pipe->screen, src); -- cgit v1.2.3 From 36c7bb697d47560e2bf4798db11afd7f1751abef Mon Sep 17 00:00:00 2001 From: Robert Ellison Date: Thu, 18 Dec 2008 09:46:53 -0700 Subject: Gallium: fix for conform test The following construction in util_surface_copy() in gallium/auxiliary/util/u_rect.c, introduced in commit d177c9ddda2c452cf7d6696d89cf4458ef986f98, incorrectly inverts the Y coordinate in the last parameter to pipe_copy_rect(). /* If do_flip, invert src_y position and pass negative src stride */ pipe_copy_rect(dst_map, &dst->block, dst->stride, dst_x, dst_y, w, h, src_map, do_flip ? -(int) src->stride : src->stride, src_x, do_flip ? w - src_y : src_y); The intention is to start at the last Y coordinate line and move backwards, in the case of a flip; in that case, the correct calculation is "src_y + h - 1", not "w - src_y". This fixes a Gallium assertion failure in the conformance tests: u_rect.c:65:pipe_copy_rect: Assertion `src_y >= 0' failed. debug_get_bool_option: GALLIUM_ABORT_ON_ASSERT = TRUE Trace/breakpoint trap --- src/gallium/auxiliary/util/u_rect.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c index 30f32413d7..fe81a685be 100644 --- a/src/gallium/auxiliary/util/u_rect.c +++ b/src/gallium/auxiliary/util/u_rect.c @@ -223,7 +223,7 @@ util_surface_copy(struct pipe_context *pipe, src_map, do_flip ? -(int) src->stride : src->stride, src_x, - do_flip ? w - src_y : src_y); + do_flip ? src_y + h - 1 : src_y); } pipe->screen->surface_unmap(pipe->screen, src); -- cgit v1.2.3 From db99ca3bc999137e6d523aa24e13cc5cfbb2b52c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 18 Dec 2008 18:06:38 -0700 Subject: tgsi: scan for additional info: uses_fogcoord, uses_frontfacing --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 49 ++++++++++++++++++++++++---------- src/gallium/auxiliary/tgsi/tgsi_scan.h | 3 ++- 2 files changed, 37 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index be4870a498..cfc7ea8e89 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -79,38 +80,61 @@ tgsi_scan_shader(const struct tgsi_token *tokens, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_INSTRUCTION: { - struct tgsi_full_instruction *fullinst + const struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); info->opcode_count[fullinst->Instruction.Opcode]++; + + /* special case: scan fragment shaders for use of the fog + * input/attribute. The X component is fog, the Y component + * is the front/back-face flag. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + uint i; + for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = + &fullinst->FullSrcRegisters[i]; + if (src->SrcRegister.File == TGSI_FILE_INPUT) { + const int ind = src->SrcRegister.Index; + if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { + if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_X) { + info->uses_fogcoord = TRUE; + } + else if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_Y) { + info->uses_frontfacing = TRUE; + } + } + } + } + } } break; case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *fulldecl + const struct tgsi_full_declaration *fulldecl = &parse.FullToken.FullDeclaration; uint file = fulldecl->Declaration.File; - uint i; - for (i = fulldecl->DeclarationRange.First; - i <= fulldecl->DeclarationRange.Last; - i++) { + uint reg; + for (reg = fulldecl->DeclarationRange.First; + reg <= fulldecl->DeclarationRange.Last; + reg++) { /* only first 32 regs will appear in this bitfield */ - info->file_mask[file] |= (1 << i); + info->file_mask[file] |= (1 << reg); info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)i); if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; + info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_inputs++; } if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; + info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_outputs++; } @@ -133,9 +157,6 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } - assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); - assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); - info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || info->opcode_count[TGSI_OPCODE_KILP]); diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 5cb6efb343..2c1a75bc81 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -41,7 +41,6 @@ struct tgsi_shader_info { uint num_tokens; - /* XXX eventually remove the corresponding fields from pipe_shader_state: */ ubyte num_inputs; ubyte num_outputs; ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ @@ -59,6 +58,8 @@ struct tgsi_shader_info boolean writes_z; /**< does fragment shader write Z value? */ boolean uses_kill; /**< KIL or KILP instruction used? */ + boolean uses_fogcoord; /**< fragment shader uses fog coord? */ + boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */ }; -- cgit v1.2.3 From 59a168d5c9b5f478e4e8bedcd8522e359e98987e Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Thu, 18 Dec 2008 18:06:38 -0700 Subject: tgsi: scan for additional info: uses_fogcoord, uses_frontfacing --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 49 ++++++++++++++++++++++++---------- src/gallium/auxiliary/tgsi/tgsi_scan.h | 3 ++- 2 files changed, 37 insertions(+), 15 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index be4870a498..cfc7ea8e89 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -79,38 +80,61 @@ tgsi_scan_shader(const struct tgsi_token *tokens, switch( parse.FullToken.Token.Type ) { case TGSI_TOKEN_TYPE_INSTRUCTION: { - struct tgsi_full_instruction *fullinst + const struct tgsi_full_instruction *fullinst = &parse.FullToken.FullInstruction; assert(fullinst->Instruction.Opcode < TGSI_OPCODE_LAST); info->opcode_count[fullinst->Instruction.Opcode]++; + + /* special case: scan fragment shaders for use of the fog + * input/attribute. The X component is fog, the Y component + * is the front/back-face flag. + */ + if (procType == TGSI_PROCESSOR_FRAGMENT) { + uint i; + for (i = 0; i < fullinst->Instruction.NumSrcRegs; i++) { + const struct tgsi_full_src_register *src = + &fullinst->FullSrcRegisters[i]; + if (src->SrcRegister.File == TGSI_FILE_INPUT) { + const int ind = src->SrcRegister.Index; + if (info->input_semantic_name[ind] == TGSI_SEMANTIC_FOG) { + if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_X) { + info->uses_fogcoord = TRUE; + } + else if (src->SrcRegister.SwizzleX == TGSI_SWIZZLE_Y) { + info->uses_frontfacing = TRUE; + } + } + } + } + } } break; case TGSI_TOKEN_TYPE_DECLARATION: { - struct tgsi_full_declaration *fulldecl + const struct tgsi_full_declaration *fulldecl = &parse.FullToken.FullDeclaration; uint file = fulldecl->Declaration.File; - uint i; - for (i = fulldecl->DeclarationRange.First; - i <= fulldecl->DeclarationRange.Last; - i++) { + uint reg; + for (reg = fulldecl->DeclarationRange.First; + reg <= fulldecl->DeclarationRange.Last; + reg++) { /* only first 32 regs will appear in this bitfield */ - info->file_mask[file] |= (1 << i); + info->file_mask[file] |= (1 << reg); info->file_count[file]++; info->file_max[file] = MAX2(info->file_max[file], (int)i); if (file == TGSI_FILE_INPUT) { - info->input_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; - info->input_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; + info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_inputs++; } if (file == TGSI_FILE_OUTPUT) { - info->output_semantic_name[i] = (ubyte)fulldecl->Semantic.SemanticName; - info->output_semantic_index[i] = (ubyte)fulldecl->Semantic.SemanticIndex; + info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; + info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_outputs++; } @@ -133,9 +157,6 @@ tgsi_scan_shader(const struct tgsi_token *tokens, } } - assert( info->file_max[TGSI_FILE_INPUT] + 1 == info->num_inputs ); - assert( info->file_max[TGSI_FILE_OUTPUT] + 1 == info->num_outputs ); - info->uses_kill = (info->opcode_count[TGSI_OPCODE_KIL] || info->opcode_count[TGSI_OPCODE_KILP]); diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.h b/src/gallium/auxiliary/tgsi/tgsi_scan.h index 5cb6efb343..2c1a75bc81 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.h +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.h @@ -41,7 +41,6 @@ struct tgsi_shader_info { uint num_tokens; - /* XXX eventually remove the corresponding fields from pipe_shader_state: */ ubyte num_inputs; ubyte num_outputs; ubyte input_semantic_name[PIPE_MAX_SHADER_INPUTS]; /**< TGSI_SEMANTIC_x */ @@ -59,6 +58,8 @@ struct tgsi_shader_info boolean writes_z; /**< does fragment shader write Z value? */ boolean uses_kill; /**< KIL or KILP instruction used? */ + boolean uses_fogcoord; /**< fragment shader uses fog coord? */ + boolean uses_frontfacing; /**< fragment shader uses front/back-face flag? */ }; -- cgit v1.2.3 From 42f7fd7d8189ceeb6d1baef5e23959c95f917ddc Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Dec 2008 07:32:52 -0700 Subject: gallium: replace #elif with #else --- src/gallium/auxiliary/util/u_time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index 57b80e5604..dde2c74fa8 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -111,7 +111,7 @@ util_time_add(const struct util_time *t1, #elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) /* 1 tick = 100 nano seconds. */ t2->counter = t1->counter + usecs * 10; -#elif +#else LARGE_INTEGER temp; LONGLONG freq; freq = temp.QuadPart; -- cgit v1.2.3 From 030a7a320cb2c49ff60f3948bd9c4976ca0b0b17 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Dec 2008 07:33:17 -0700 Subject: gallium: replace #elif with #else --- src/gallium/auxiliary/util/u_time.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c index bf7d1d1c8d..f84514165a 100644 --- a/src/gallium/auxiliary/util/u_time.c +++ b/src/gallium/auxiliary/util/u_time.c @@ -111,7 +111,7 @@ util_time_add(const struct util_time *t1, #elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT) /* 1 tick = 100 nano seconds. */ t2->counter = t1->counter + usecs * 10; -#elif +#else LARGE_INTEGER temp; LONGLONG freq; freq = temp.QuadPart; -- cgit v1.2.3 From b901e1f212c11afda05f2628a522d86802f87c52 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 19 Dec 2008 20:06:11 +0000 Subject: gallium: Simple and efficient cache. Fixed size hash table. Collisions are handled by simply destroying the previous entry. It hasn't received much testing yet. --- src/gallium/auxiliary/util/Makefile | 2 + src/gallium/auxiliary/util/SConscript | 2 + src/gallium/auxiliary/util/u_cache.c | 209 ++++++++++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_cache.h | 87 ++++++++++++++ src/gallium/auxiliary/util/u_hash.c | 121 ++++++++++++++++++++ src/gallium/auxiliary/util/u_hash.h | 55 +++++++++ 6 files changed, 476 insertions(+) create mode 100644 src/gallium/auxiliary/util/u_cache.c create mode 100644 src/gallium/auxiliary/util/u_cache.h create mode 100644 src/gallium/auxiliary/util/u_hash.c create mode 100644 src/gallium/auxiliary/util/u_hash.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index d3951e4e7d..5c227c1eb5 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -6,10 +6,12 @@ LIBNAME = util C_SOURCES = \ p_debug.c \ u_blit.c \ + u_cache.c \ u_draw_quad.c \ u_gen_mipmap.c \ u_handle_table.c \ u_hash_table.c \ + u_hash.c \ u_math.c \ u_mm.c \ u_rect.c \ diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index e65c17b1cc..1ef06631bf 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -7,9 +7,11 @@ util = env.ConvenienceLibrary( 'p_debug_mem.c', 'p_debug_prof.c', 'u_blit.c', + 'u_cache.c', 'u_draw_quad.c', 'u_gen_mipmap.c', 'u_handle_table.c', + 'u_hash.c', 'u_hash_table.c', 'u_math.c', 'u_mm.c', diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c new file mode 100644 index 0000000000..69a52687fb --- /dev/null +++ b/src/gallium/auxiliary/util/u_cache.c @@ -0,0 +1,209 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Simple cache implementation. + * + * We simply have fixed size array, and destroy previous values on collision. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" + +#include "util/u_math.h" +#include "util/u_memory.h" +#include "util/u_cache.h" + + +struct util_cache_entry +{ + void *key; + void *value; + +#ifdef DEBUG + unsigned count; +#endif +}; + + +struct util_cache +{ + /** Hash function */ + uint32_t (*hash)(void *key); + + /** Compare two keys */ + int (*compare)(void *key1, void *key2); + + /** Destroy a (key, value) pair */ + void (*destroy)(void *key, void *value); + + uint32_t size; + + struct util_cache_entry *entries; + +#ifdef DEBUG + unsigned count; +#endif +}; + + +struct util_cache * +util_cache_create(uint32_t (*hash)(void *key), + int (*compare)(void *key1, void *key2), + void (*destroy)(void *key, void *value), + uint32_t size) +{ + struct util_cache *cache; + + cache = CALLOC_STRUCT(util_cache); + if(!cache) + return NULL; + + cache->hash = hash; + cache->compare = compare; + cache->destroy = destroy; + cache->size = size; + + cache->entries = CALLOC(size, sizeof(struct util_cache_entry)); + if(!cache->entries) { + FREE(cache); + return NULL; + } + + return cache; +} + + +static INLINE struct util_cache_entry * +util_cache_entry_get(struct util_cache *cache, + void *key) +{ + uint32_t hash; + + hash = cache->hash(key); + + return &cache->entries[hash % cache->size]; +} + +static INLINE void +util_cache_entry_destroy(struct util_cache *cache, + struct util_cache_entry *entry) +{ + void *key = entry->key; + void *value = entry->value; + + entry->key = NULL; + entry->value = NULL; + + if(key || value) + if(cache->destroy) + cache->destroy(key, value); +} + + +void +util_cache_set(struct util_cache *cache, + void *key, + void *value) +{ + struct util_cache_entry *entry; + + assert(cache); + + entry = util_cache_entry_get(cache, key); + util_cache_entry_destroy(cache, entry); + +#ifdef DEBUG + ++entry->count; + ++cache->count; +#endif + + entry->key = key; + entry->value = value; +} + + +void * +util_cache_get(struct util_cache *cache, + void *key) +{ + struct util_cache_entry *entry; + + assert(cache); + + entry = util_cache_entry_get(cache, key); + if(!entry->key && !entry->value) + return NULL; + + if(cache->compare(key, entry->key) != 0) + return NULL; + + return entry->value; +} + + +void +util_cache_clear(struct util_cache *cache) +{ + uint32_t i; + + assert(cache); + + for(i = 0; i < cache->size; ++i) + util_cache_entry_destroy(cache, &cache->entries[i]); +} + + +void +util_cache_destroy(struct util_cache *cache) +{ + assert(cache); + +#ifdef DEBUG + if(cache->count >= 20*cache->size) { + /* Normal approximation of the Poisson distribution */ + double mean = (double)cache->count/(double)cache->size; + double stddev = sqrt(mean); + unsigned i; + for(i = 0; i < cache->size; ++i) { + double z = fabs(cache->count - mean)/stddev; + /* This assert should not fail 99.9999998027% of the times, unless + * the hash function is a poor one */ + assert(z <= 6.0); + } + } +#endif + + util_cache_clear(cache); + + FREE(cache->entries); + FREE(cache); +} diff --git a/src/gallium/auxiliary/util/u_cache.h b/src/gallium/auxiliary/util/u_cache.h new file mode 100644 index 0000000000..835e083734 --- /dev/null +++ b/src/gallium/auxiliary/util/u_cache.h @@ -0,0 +1,87 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Simple cache. + * + * @author Jose Fonseca + */ + +#ifndef U_CACHE_H_ +#define U_CACHE_H_ + + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** + * Least Recently Used (LRU) cache. + */ +struct util_cache; + + +/** + * Create a cache. + * + * @param hash hash function + * @param compare should return 0 for two equal keys + * @param destroy destruction callback (optional) + * @param size maximum number of entries + */ +struct util_cache * +util_cache_create(uint32_t (*hash)(void *key), + int (*compare)(void *key1, void *key2), + void (*destroy)(void *key, void *value), + uint32_t size); + +void +util_cache_set(struct util_cache *cache, + void *key, + void *value); + +void * +util_cache_get(struct util_cache *cache, + void *key); + +void +util_cache_clear(struct util_cache *cache); + +void +util_cache_destroy(struct util_cache *cache); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_CACHE_H_ */ diff --git a/src/gallium/auxiliary/util/u_hash.c b/src/gallium/auxiliary/util/u_hash.c new file mode 100644 index 0000000000..508a31f2cb --- /dev/null +++ b/src/gallium/auxiliary/util/u_hash.c @@ -0,0 +1,121 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Hash functions implementation. + * + * @author Jose Fonseca + */ + + +#include "u_hash.h" + + +static uint32_t +util_crc32_table[256] = { + 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, + 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, + 0x0edb8832, 0x79dcb8a4, 0xe0d5e91e, 0x97d2d988, + 0x09b64c2b, 0x7eb17cbd, 0xe7b82d07, 0x90bf1d91, + 0x1db71064, 0x6ab020f2, 0xf3b97148, 0x84be41de, + 0x1adad47d, 0x6ddde4eb, 0xf4d4b551, 0x83d385c7, + 0x136c9856, 0x646ba8c0, 0xfd62f97a, 0x8a65c9ec, + 0x14015c4f, 0x63066cd9, 0xfa0f3d63, 0x8d080df5, + 0x3b6e20c8, 0x4c69105e, 0xd56041e4, 0xa2677172, + 0x3c03e4d1, 0x4b04d447, 0xd20d85fd, 0xa50ab56b, + 0x35b5a8fa, 0x42b2986c, 0xdbbbc9d6, 0xacbcf940, + 0x32d86ce3, 0x45df5c75, 0xdcd60dcf, 0xabd13d59, + 0x26d930ac, 0x51de003a, 0xc8d75180, 0xbfd06116, + 0x21b4f4b5, 0x56b3c423, 0xcfba9599, 0xb8bda50f, + 0x2802b89e, 0x5f058808, 0xc60cd9b2, 0xb10be924, + 0x2f6f7c87, 0x58684c11, 0xc1611dab, 0xb6662d3d, + 0x76dc4190, 0x01db7106, 0x98d220bc, 0xefd5102a, + 0x71b18589, 0x06b6b51f, 0x9fbfe4a5, 0xe8b8d433, + 0x7807c9a2, 0x0f00f934, 0x9609a88e, 0xe10e9818, + 0x7f6a0dbb, 0x086d3d2d, 0x91646c97, 0xe6635c01, + 0x6b6b51f4, 0x1c6c6162, 0x856530d8, 0xf262004e, + 0x6c0695ed, 0x1b01a57b, 0x8208f4c1, 0xf50fc457, + 0x65b0d9c6, 0x12b7e950, 0x8bbeb8ea, 0xfcb9887c, + 0x62dd1ddf, 0x15da2d49, 0x8cd37cf3, 0xfbd44c65, + 0x4db26158, 0x3ab551ce, 0xa3bc0074, 0xd4bb30e2, + 0x4adfa541, 0x3dd895d7, 0xa4d1c46d, 0xd3d6f4fb, + 0x4369e96a, 0x346ed9fc, 0xad678846, 0xda60b8d0, + 0x44042d73, 0x33031de5, 0xaa0a4c5f, 0xdd0d7cc9, + 0x5005713c, 0x270241aa, 0xbe0b1010, 0xc90c2086, + 0x5768b525, 0x206f85b3, 0xb966d409, 0xce61e49f, + 0x5edef90e, 0x29d9c998, 0xb0d09822, 0xc7d7a8b4, + 0x59b33d17, 0x2eb40d81, 0xb7bd5c3b, 0xc0ba6cad, + 0xedb88320, 0x9abfb3b6, 0x03b6e20c, 0x74b1d29a, + 0xead54739, 0x9dd277af, 0x04db2615, 0x73dc1683, + 0xe3630b12, 0x94643b84, 0x0d6d6a3e, 0x7a6a5aa8, + 0xe40ecf0b, 0x9309ff9d, 0x0a00ae27, 0x7d079eb1, + 0xf00f9344, 0x8708a3d2, 0x1e01f268, 0x6906c2fe, + 0xf762575d, 0x806567cb, 0x196c3671, 0x6e6b06e7, + 0xfed41b76, 0x89d32be0, 0x10da7a5a, 0x67dd4acc, + 0xf9b9df6f, 0x8ebeeff9, 0x17b7be43, 0x60b08ed5, + 0xd6d6a3e8, 0xa1d1937e, 0x38d8c2c4, 0x4fdff252, + 0xd1bb67f1, 0xa6bc5767, 0x3fb506dd, 0x48b2364b, + 0xd80d2bda, 0xaf0a1b4c, 0x36034af6, 0x41047a60, + 0xdf60efc3, 0xa867df55, 0x316e8eef, 0x4669be79, + 0xcb61b38c, 0xbc66831a, 0x256fd2a0, 0x5268e236, + 0xcc0c7795, 0xbb0b4703, 0x220216b9, 0x5505262f, + 0xc5ba3bbe, 0xb2bd0b28, 0x2bb45a92, 0x5cb36a04, + 0xc2d7ffa7, 0xb5d0cf31, 0x2cd99e8b, 0x5bdeae1d, + 0x9b64c2b0, 0xec63f226, 0x756aa39c, 0x026d930a, + 0x9c0906a9, 0xeb0e363f, 0x72076785, 0x05005713, + 0x95bf4a82, 0xe2b87a14, 0x7bb12bae, 0x0cb61b38, + 0x92d28e9b, 0xe5d5be0d, 0x7cdcefb7, 0x0bdbdf21, + 0x86d3d2d4, 0xf1d4e242, 0x68ddb3f8, 0x1fda836e, + 0x81be16cd, 0xf6b9265b, 0x6fb077e1, 0x18b74777, + 0x88085ae6, 0xff0f6a70, 0x66063bca, 0x11010b5c, + 0x8f659eff, 0xf862ae69, 0x616bffd3, 0x166ccf45, + 0xa00ae278, 0xd70dd2ee, 0x4e048354, 0x3903b3c2, + 0xa7672661, 0xd06016f7, 0x4969474d, 0x3e6e77db, + 0xaed16a4a, 0xd9d65adc, 0x40df0b66, 0x37d83bf0, + 0xa9bcae53, 0xdebb9ec5, 0x47b2cf7f, 0x30b5ffe9, + 0xbdbdf21c, 0xcabac28a, 0x53b39330, 0x24b4a3a6, + 0xbad03605, 0xcdd70693, 0x54de5729, 0x23d967bf, + 0xb3667a2e, 0xc4614ab8, 0x5d681b02, 0x2a6f2b94, + 0xb40bbe37, 0xc30c8ea1, 0x5a05df1b, 0x2d02ef8d +}; + + +/** + * @sa http://www.w3.org/TR/PNG/#D-CRCAppendix + */ +uint32_t +util_hash_crc32(void *data, size_t size) +{ + uint8_t *p = (uint8_t *)data; + uint32_t crc = 0xffffffff; + + while (size--) + crc = util_crc32_table[(crc ^ *p++) & 0xff] ^ (crc >> 8); + + return crc; +} diff --git a/src/gallium/auxiliary/util/u_hash.h b/src/gallium/auxiliary/util/u_hash.h new file mode 100644 index 0000000000..76c3513e4a --- /dev/null +++ b/src/gallium/auxiliary/util/u_hash.h @@ -0,0 +1,55 @@ +/************************************************************************** + * + * Copyright 2008 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Hash functions. + * + * @author Jose Fonseca + */ + +#ifndef U_HASH_H_ +#define U_HASH_H_ + + +#include "pipe/p_compiler.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +uint32_t +util_hash_crc32(void *data, size_t size); + + +#ifdef __cplusplus +} +#endif + +#endif /* U_HASH_H_ */ -- cgit v1.2.3 From e8d80609883db4c827f8c25e816e588b025843c0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Dec 2008 16:14:48 -0700 Subject: gallium: Fix typeo in mipmap filter for GL_UNSIGNED_SHORT_1_5_5_5_REV This is copied from Ian's commit a330933bb75c38148668637cd22b90d75d39506f --- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 5afc52ba35..b0de5968e9 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -420,7 +420,7 @@ do_row(enum dtype datatype, uint comps, int srcWidth, const int rowAr0 = rowA[j] & 0x1f; const int rowAr1 = rowA[k] & 0x1f; const int rowBr0 = rowB[j] & 0x1f; - const int rowBr1 = rowB[k] & 0xf; + const int rowBr1 = rowB[k] & 0x1f; const int rowAg0 = (rowA[j] >> 5) & 0x1f; const int rowAg1 = (rowA[k] >> 5) & 0x1f; const int rowBg0 = (rowB[j] >> 5) & 0x1f; -- cgit v1.2.3 From 9c8db8685432fedd068157795422764ce96b89a0 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Dec 2008 16:37:06 -0700 Subject: gallium: begin adapting Ian's 3D mipmap gen code to gallium utility lib Unfinished, a big no-op for now. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 582 +++++++++++++++++++++++++++++- 1 file changed, 576 insertions(+), 6 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b0de5968e9..30f161fe3b 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -2,6 +2,7 @@ * * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. * All Rights Reserved. + * Copyright 2008 VMware, Inc. All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the @@ -93,13 +94,82 @@ enum dtype typedef ushort half_float; -#if 0 -extern half_float -float_to_half(float f); +static half_float +float_to_half(float f) +{ + /* XXX fix this */ + return 0; +} + +static float +half_to_float(half_float h) +{ + /* XXX fix this */ + return 0.0f; +} + -extern float -half_to_float(half_float h); -#endif + + +/** + * \name Support macros for do_row and do_row_3d + * + * The macro madness is here for two reasons. First, it compacts the code + * slightly. Second, it makes it much easier to adjust the specifics of the + * filter to tune the rounding characteristics. + */ +/*@{*/ +#define DECLARE_ROW_POINTERS(t, e) \ + const t(*rowA)[e] = (const t(*)[e]) srcRowA; \ + const t(*rowB)[e] = (const t(*)[e]) srcRowB; \ + const t(*rowC)[e] = (const t(*)[e]) srcRowC; \ + const t(*rowD)[e] = (const t(*)[e]) srcRowD; \ + t(*dst)[e] = (t(*)[e]) dstRow + +#define DECLARE_ROW_POINTERS0(t) \ + const t *rowA = (const t *) srcRowA; \ + const t *rowB = (const t *) srcRowB; \ + const t *rowC = (const t *) srcRowC; \ + const t *rowD = (const t *) srcRowD; \ + t *dst = (t *) dstRow + +#define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \ + ((unsigned) Aj + (unsigned) Ak \ + + (unsigned) Bj + (unsigned) Bk \ + + (unsigned) Cj + (unsigned) Ck \ + + (unsigned) Dj + (unsigned) Dk \ + + 4) >> 3 + +#define FILTER_3D(e) \ + do { \ + dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \ + rowB[j][e], rowB[k][e], \ + rowC[j][e], rowC[k][e], \ + rowD[j][e], rowD[k][e]); \ + } while(0) + +#define FILTER_F_3D(e) \ + do { \ + dst[i][e] = (rowA[j][e] + rowA[k][e] \ + + rowB[j][e] + rowB[k][e] \ + + rowC[j][e] + rowC[k][e] \ + + rowD[j][e] + rowD[k][e]) * 0.125F; \ + } while(0) + +#define FILTER_HF_3D(e) \ + do { \ + const float aj = half_to_float(rowA[j][e]); \ + const float ak = half_to_float(rowA[k][e]); \ + const float bj = half_to_float(rowB[j][e]); \ + const float bk = half_to_float(rowB[k][e]); \ + const float cj = half_to_float(rowC[j][e]); \ + const float ck = half_to_float(rowC[k][e]); \ + const float dj = half_to_float(rowD[j][e]); \ + const float dk = half_to_float(rowD[k][e]); \ + dst[i][e] = float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \ + * 0.125F); \ + } while(0) +/*@}*/ /** @@ -471,6 +541,385 @@ do_row(enum dtype datatype, uint comps, int srcWidth, } +/** + * Average together four rows of a source image to produce a single new + * row in the dest image. It's legal for the two source rows to point + * to the same data. The source width must be equal to either the + * dest width or two times the dest width. + * + * \param datatype GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT, + * \c GL_FLOAT, etc. + * \param comps number of components per pixel (1..4) + * \param srcWidth Width of a row in the source data + * \param srcRowA Pointer to one of the rows of source data + * \param srcRowB Pointer to one of the rows of source data + * \param srcRowC Pointer to one of the rows of source data + * \param srcRowD Pointer to one of the rows of source data + * \param dstWidth Width of a row in the destination data + * \param srcRowA Pointer to the row of destination data + */ +static void +do_row_3D(enum dtype datatype, uint comps, int srcWidth, + const void *srcRowA, const void *srcRowB, + const void *srcRowC, const void *srcRowD, + int dstWidth, void *dstRow) +{ + const uint k0 = (srcWidth == dstWidth) ? 0 : 1; + const uint colStride = (srcWidth == dstWidth) ? 1 : 2; + uint i, j, k; + + assert(comps >= 1); + assert(comps <= 4); + + if ((datatype == UBYTE) && (comps == 4)) { + DECLARE_ROW_POINTERS(ubyte, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + FILTER_3D(3); + } + } + else if ((datatype == UBYTE) && (comps == 3)) { + DECLARE_ROW_POINTERS(ubyte, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + } + } + else if ((datatype == UBYTE) && (comps == 2)) { + DECLARE_ROW_POINTERS(ubyte, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + } + } + else if ((datatype == UBYTE) && (comps == 1)) { + DECLARE_ROW_POINTERS(ubyte, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + } + } + else if ((datatype == USHORT) && (comps == 4)) { + DECLARE_ROW_POINTERS(ushort, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + FILTER_3D(3); + } + } + else if ((datatype == USHORT) && (comps == 3)) { + DECLARE_ROW_POINTERS(ushort, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + FILTER_3D(2); + } + } + else if ((datatype == USHORT) && (comps == 2)) { + DECLARE_ROW_POINTERS(ushort, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + FILTER_3D(1); + } + } + else if ((datatype == USHORT) && (comps == 1)) { + DECLARE_ROW_POINTERS(ushort, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_3D(0); + } + } + else if ((datatype == FLOAT) && (comps == 4)) { + DECLARE_ROW_POINTERS(float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + FILTER_F_3D(2); + FILTER_F_3D(3); + } + } + else if ((datatype == FLOAT) && (comps == 3)) { + DECLARE_ROW_POINTERS(float, 3); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + FILTER_F_3D(2); + } + } + else if ((datatype == FLOAT) && (comps == 2)) { + DECLARE_ROW_POINTERS(float, 2); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + FILTER_F_3D(1); + } + } + else if ((datatype == FLOAT) && (comps == 1)) { + DECLARE_ROW_POINTERS(float, 1); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_F_3D(0); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 4)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + FILTER_HF_3D(2); + FILTER_HF_3D(3); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 3)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + FILTER_HF_3D(2); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 2)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + FILTER_HF_3D(1); + } + } + else if ((datatype == HALF_FLOAT) && (comps == 1)) { + DECLARE_ROW_POINTERS(half_float, 4); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + FILTER_HF_3D(0); + } + } + else if ((datatype == UINT) && (comps == 1)) { + const uint *rowA = (const uint *) srcRowA; + const uint *rowB = (const uint *) srcRowB; + const uint *rowC = (const uint *) srcRowC; + const uint *rowD = (const uint *) srcRowD; + float *dst = (float *) dstRow; + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k]) + + ((uint64_t) rowB[j] + (uint64_t) rowB[k]) + + ((uint64_t) rowC[j] + (uint64_t) rowC[k]) + + ((uint64_t) rowD[j] + (uint64_t) rowD[k])); + dst[i] = (float)((double) tmp * 0.125); + } + } + else if ((datatype == USHORT_5_6_5) && (comps == 3)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowCr0 = rowC[j] & 0x1f; + const int rowCr1 = rowC[k] & 0x1f; + const int rowDr0 = rowD[j] & 0x1f; + const int rowDr1 = rowD[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x3f; + const int rowAg1 = (rowA[k] >> 5) & 0x3f; + const int rowBg0 = (rowB[j] >> 5) & 0x3f; + const int rowBg1 = (rowB[k] >> 5) & 0x3f; + const int rowCg0 = (rowC[j] >> 5) & 0x3f; + const int rowCg1 = (rowC[k] >> 5) & 0x3f; + const int rowDg0 = (rowD[j] >> 5) & 0x3f; + const int rowDg1 = (rowD[k] >> 5) & 0x3f; + const int rowAb0 = (rowA[j] >> 11) & 0x1f; + const int rowAb1 = (rowA[k] >> 11) & 0x1f; + const int rowBb0 = (rowB[j] >> 11) & 0x1f; + const int rowBb1 = (rowB[k] >> 11) & 0x1f; + const int rowCb0 = (rowC[j] >> 11) & 0x1f; + const int rowCb1 = (rowC[k] >> 11) & 0x1f; + const int rowDb0 = (rowD[j] >> 11) & 0x1f; + const int rowDb1 = (rowD[k] >> 11) & 0x1f; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + dst[i] = (b << 11) | (g << 5) | r; + } + } + else if ((datatype == USHORT_4_4_4_4) && (comps == 4)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0xf; + const int rowAr1 = rowA[k] & 0xf; + const int rowBr0 = rowB[j] & 0xf; + const int rowBr1 = rowB[k] & 0xf; + const int rowCr0 = rowC[j] & 0xf; + const int rowCr1 = rowC[k] & 0xf; + const int rowDr0 = rowD[j] & 0xf; + const int rowDr1 = rowD[k] & 0xf; + const int rowAg0 = (rowA[j] >> 4) & 0xf; + const int rowAg1 = (rowA[k] >> 4) & 0xf; + const int rowBg0 = (rowB[j] >> 4) & 0xf; + const int rowBg1 = (rowB[k] >> 4) & 0xf; + const int rowCg0 = (rowC[j] >> 4) & 0xf; + const int rowCg1 = (rowC[k] >> 4) & 0xf; + const int rowDg0 = (rowD[j] >> 4) & 0xf; + const int rowDg1 = (rowD[k] >> 4) & 0xf; + const int rowAb0 = (rowA[j] >> 8) & 0xf; + const int rowAb1 = (rowA[k] >> 8) & 0xf; + const int rowBb0 = (rowB[j] >> 8) & 0xf; + const int rowBb1 = (rowB[k] >> 8) & 0xf; + const int rowCb0 = (rowC[j] >> 8) & 0xf; + const int rowCb1 = (rowC[k] >> 8) & 0xf; + const int rowDb0 = (rowD[j] >> 8) & 0xf; + const int rowDb1 = (rowD[k] >> 8) & 0xf; + const int rowAa0 = (rowA[j] >> 12) & 0xf; + const int rowAa1 = (rowA[k] >> 12) & 0xf; + const int rowBa0 = (rowB[j] >> 12) & 0xf; + const int rowBa1 = (rowB[k] >> 12) & 0xf; + const int rowCa0 = (rowC[j] >> 12) & 0xf; + const int rowCa1 = (rowC[k] >> 12) & 0xf; + const int rowDa0 = (rowD[j] >> 12) & 0xf; + const int rowDa1 = (rowD[k] >> 12) & 0xf; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1, + rowCa0, rowCa1, rowDa0, rowDa1); + + dst[i] = (a << 12) | (b << 8) | (g << 4) | r; + } + } + else if ((datatype == USHORT_1_5_5_5_REV) && (comps == 4)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x1f; + const int rowAr1 = rowA[k] & 0x1f; + const int rowBr0 = rowB[j] & 0x1f; + const int rowBr1 = rowB[k] & 0x1f; + const int rowCr0 = rowC[j] & 0x1f; + const int rowCr1 = rowC[k] & 0x1f; + const int rowDr0 = rowD[j] & 0x1f; + const int rowDr1 = rowD[k] & 0x1f; + const int rowAg0 = (rowA[j] >> 5) & 0x1f; + const int rowAg1 = (rowA[k] >> 5) & 0x1f; + const int rowBg0 = (rowB[j] >> 5) & 0x1f; + const int rowBg1 = (rowB[k] >> 5) & 0x1f; + const int rowCg0 = (rowC[j] >> 5) & 0x1f; + const int rowCg1 = (rowC[k] >> 5) & 0x1f; + const int rowDg0 = (rowD[j] >> 5) & 0x1f; + const int rowDg1 = (rowD[k] >> 5) & 0x1f; + const int rowAb0 = (rowA[j] >> 10) & 0x1f; + const int rowAb1 = (rowA[k] >> 10) & 0x1f; + const int rowBb0 = (rowB[j] >> 10) & 0x1f; + const int rowBb1 = (rowB[k] >> 10) & 0x1f; + const int rowCb0 = (rowC[j] >> 10) & 0x1f; + const int rowCb1 = (rowC[k] >> 10) & 0x1f; + const int rowDb0 = (rowD[j] >> 10) & 0x1f; + const int rowDb1 = (rowD[k] >> 10) & 0x1f; + const int rowAa0 = (rowA[j] >> 15) & 0x1; + const int rowAa1 = (rowA[k] >> 15) & 0x1; + const int rowBa0 = (rowB[j] >> 15) & 0x1; + const int rowBa1 = (rowB[k] >> 15) & 0x1; + const int rowCa0 = (rowC[j] >> 15) & 0x1; + const int rowCa1 = (rowC[k] >> 15) & 0x1; + const int rowDa0 = (rowD[j] >> 15) & 0x1; + const int rowDa1 = (rowD[k] >> 15) & 0x1; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1, + rowCa0, rowCa1, rowDa0, rowDa1); + + dst[i] = (a << 15) | (b << 10) | (g << 5) | r; + } + } + else if ((datatype == UBYTE_3_3_2) && (comps == 3)) { + DECLARE_ROW_POINTERS0(ushort); + + for (i = j = 0, k = k0; i < (uint) dstWidth; + i++, j += colStride, k += colStride) { + const int rowAr0 = rowA[j] & 0x3; + const int rowAr1 = rowA[k] & 0x3; + const int rowBr0 = rowB[j] & 0x3; + const int rowBr1 = rowB[k] & 0x3; + const int rowCr0 = rowC[j] & 0x3; + const int rowCr1 = rowC[k] & 0x3; + const int rowDr0 = rowD[j] & 0x3; + const int rowDr1 = rowD[k] & 0x3; + const int rowAg0 = (rowA[j] >> 2) & 0x7; + const int rowAg1 = (rowA[k] >> 2) & 0x7; + const int rowBg0 = (rowB[j] >> 2) & 0x7; + const int rowBg1 = (rowB[k] >> 2) & 0x7; + const int rowCg0 = (rowC[j] >> 2) & 0x7; + const int rowCg1 = (rowC[k] >> 2) & 0x7; + const int rowDg0 = (rowD[j] >> 2) & 0x7; + const int rowDg1 = (rowD[k] >> 2) & 0x7; + const int rowAb0 = (rowA[j] >> 5) & 0x7; + const int rowAb1 = (rowA[k] >> 5) & 0x7; + const int rowBb0 = (rowB[j] >> 5) & 0x7; + const int rowBb1 = (rowB[k] >> 5) & 0x7; + const int rowCb0 = (rowC[j] >> 5) & 0x7; + const int rowCb1 = (rowC[k] >> 5) & 0x7; + const int rowDb0 = (rowD[j] >> 5) & 0x7; + const int rowDb1 = (rowD[k] >> 5) & 0x7; + const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1, + rowCr0, rowCr1, rowDr0, rowDr1); + const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1, + rowCg0, rowCg1, rowDg0, rowDg1); + const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1, + rowCb0, rowCb1, rowDb0, rowDb1); + dst[i] = (b << 5) | (g << 2) | r; + } + } + else { + debug_printf("bad format in do_row_3D()"); + } +} + + + static void format_to_type_comps(enum pipe_format pformat, enum dtype *datatype, uint *comps) @@ -575,6 +1024,87 @@ reduce_2d(enum pipe_format pformat, } +static void +reduce_3d(enum pipe_format pformat, + int srcWidth, int srcHeight, int srcDepth, + int srcRowStride, const ubyte *srcPtr, + int dstWidth, int dstHeight, int dstDepth, + int dstRowStride, ubyte *dstPtr) +{ + const int bpt = pf_get_size(pformat); + const int border = 0; + int img, row; + int bytesPerSrcImage, bytesPerDstImage; + int bytesPerSrcRow, bytesPerDstRow; + int srcImageOffset, srcRowOffset; + enum dtype datatype; + uint comps; + + format_to_type_comps(pformat, &datatype, &comps); + + bytesPerSrcImage = srcWidth * srcHeight * bpt; + bytesPerDstImage = dstWidth * dstHeight * bpt; + + bytesPerSrcRow = srcWidth * bpt; + bytesPerDstRow = dstWidth * bpt; + + /* Offset between adjacent src images to be averaged together */ + srcImageOffset = (srcDepth == dstDepth) ? 0 : bytesPerSrcImage; + + /* Offset between adjacent src rows to be averaged together */ + srcRowOffset = (srcHeight == dstHeight) ? 0 : srcWidth * bpt; + + /* + * Need to average together up to 8 src pixels for each dest pixel. + * Break that down into 3 operations: + * 1. take two rows from source image and average them together. + * 2. take two rows from next source image and average them together. + * 3. take the two averaged rows and average them for the final dst row. + */ + + /* + _mesa_printf("mip3d %d x %d x %d -> %d x %d x %d\n", + srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth); + */ + + for (img = 0; img < dstDepth; img++) { + /* first source image pointer, skipping border */ + const ubyte *imgSrcA = srcPtr + + (bytesPerSrcImage + bytesPerSrcRow + border) * bpt * border + + img * (bytesPerSrcImage + srcImageOffset); + /* second source image pointer, skipping border */ + const ubyte *imgSrcB = imgSrcA + srcImageOffset; + /* address of the dest image, skipping border */ + ubyte *imgDst = dstPtr + + (bytesPerDstImage + bytesPerDstRow + border) * bpt * border + + img * bytesPerDstImage; + + /* setup the four source row pointers and the dest row pointer */ + const ubyte *srcImgARowA = imgSrcA; + const ubyte *srcImgARowB = imgSrcA + srcRowOffset; + const ubyte *srcImgBRowA = imgSrcB; + const ubyte *srcImgBRowB = imgSrcB + srcRowOffset; + ubyte *dstImgRow = imgDst; + + for (row = 0; row < dstHeight; row++) { + do_row_3D(datatype, comps, srcWidth, + srcImgARowA, srcImgARowB, + srcImgBRowA, srcImgBRowB, + dstWidth, dstImgRow); + + /* advance to next rows */ + srcImgARowA += bytesPerSrcRow + srcRowOffset; + srcImgARowB += bytesPerSrcRow + srcRowOffset; + srcImgBRowA += bytesPerSrcRow + srcRowOffset; + srcImgBRowB += bytesPerSrcRow + srcRowOffset; + dstImgRow += bytesPerDstRow; + } + } +} + + + + static void make_1d_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, @@ -666,6 +1196,46 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, struct pipe_texture *pt, uint face, uint baseLevel, uint lastLevel) { + struct pipe_context *pipe = ctx->pipe; + struct pipe_screen *screen = pipe->screen; + uint dstLevel, zslice; + + assert(pt->block.width == 1); + assert(pt->block.height == 1); + + for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) { + const uint srcLevel = dstLevel - 1; + struct pipe_surface *srcSurf, *dstSurf; + ubyte *srcMap, *dstMap; + + srcSurf = screen->get_tex_surface(screen, pt, face, srcLevel, zslice, + PIPE_BUFFER_USAGE_CPU_READ); + dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, + PIPE_BUFFER_USAGE_CPU_WRITE); + + srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, + PIPE_BUFFER_USAGE_CPU_READ) + + srcSurf->offset); + dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, + PIPE_BUFFER_USAGE_CPU_WRITE) + + dstSurf->offset); + +#if 0 + reduce_3d(pt->format, + srcSurf->width, srcSurf->height, + srcSurf->stride, srcMap, + dstSurf->width, dstSurf->height, + dstSurf->stride, dstMap); +#else + (void) reduce_3d; +#endif + + pipe_buffer_unmap(screen, srcSurf->buffer); + pipe_buffer_unmap(screen, dstSurf->buffer); + + pipe_surface_reference(&srcSurf, NULL); + pipe_surface_reference(&dstSurf, NULL); + } } -- cgit v1.2.3 From 9127a03bcbef27ed3cfc36d370969b430870fa0e Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sat, 20 Dec 2008 12:59:51 +0000 Subject: gallium: Fix typo in define name. --- src/gallium/auxiliary/util/u_memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index bcae10832a..79e34e185f 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -52,7 +52,7 @@ extern "C" { #endif -#if defined(PIPE_SUBSYSTEM_WINDOWS) && defined(DEBUG) +#if defined(PIPE_OS_WINDOWS) && defined(DEBUG) /* memory debugging */ -- cgit v1.2.3 From ae7e75d6108e8621878083b35a13edc1aca893df Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 22 Dec 2008 16:55:27 +0000 Subject: gallium: const correctness. --- src/gallium/auxiliary/util/u_cache.c | 16 ++++++++-------- src/gallium/auxiliary/util/u_cache.h | 14 +++++++------- src/gallium/auxiliary/util/u_hash.c | 4 ++-- src/gallium/auxiliary/util/u_hash.h | 2 +- 4 files changed, 18 insertions(+), 18 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c index 69a52687fb..0a1a64259f 100644 --- a/src/gallium/auxiliary/util/u_cache.c +++ b/src/gallium/auxiliary/util/u_cache.c @@ -57,10 +57,10 @@ struct util_cache_entry struct util_cache { /** Hash function */ - uint32_t (*hash)(void *key); + uint32_t (*hash)(const void *key); /** Compare two keys */ - int (*compare)(void *key1, void *key2); + int (*compare)(const void *key1, const void *key2); /** Destroy a (key, value) pair */ void (*destroy)(void *key, void *value); @@ -76,10 +76,10 @@ struct util_cache struct util_cache * -util_cache_create(uint32_t (*hash)(void *key), - int (*compare)(void *key1, void *key2), - void (*destroy)(void *key, void *value), - uint32_t size) +util_cache_create(uint32_t (*hash)(const void *key), + int (*compare)(const void *key1, const void *key2), + void (*destroy)(void *key, void *value), + uint32_t size) { struct util_cache *cache; @@ -104,7 +104,7 @@ util_cache_create(uint32_t (*hash)(void *key), static INLINE struct util_cache_entry * util_cache_entry_get(struct util_cache *cache, - void *key) + const void *key) { uint32_t hash; @@ -153,7 +153,7 @@ util_cache_set(struct util_cache *cache, void * util_cache_get(struct util_cache *cache, - void *key) + const void *key) { struct util_cache_entry *entry; diff --git a/src/gallium/auxiliary/util/u_cache.h b/src/gallium/auxiliary/util/u_cache.h index 835e083734..8a612c6585 100644 --- a/src/gallium/auxiliary/util/u_cache.h +++ b/src/gallium/auxiliary/util/u_cache.h @@ -59,19 +59,19 @@ struct util_cache; * @param size maximum number of entries */ struct util_cache * -util_cache_create(uint32_t (*hash)(void *key), - int (*compare)(void *key1, void *key2), - void (*destroy)(void *key, void *value), - uint32_t size); +util_cache_create(uint32_t (*hash)(const void *key), + int (*compare)(const void *key1, const void *key2), + void (*destroy)(void *key, void *value), + uint32_t size); void util_cache_set(struct util_cache *cache, - void *key, - void *value); + void *key, + void *value); void * util_cache_get(struct util_cache *cache, - void *key); + const void *key); void util_cache_clear(struct util_cache *cache); diff --git a/src/gallium/auxiliary/util/u_hash.c b/src/gallium/auxiliary/util/u_hash.c index 508a31f2cb..b67653ec70 100644 --- a/src/gallium/auxiliary/util/u_hash.c +++ b/src/gallium/auxiliary/util/u_hash.c @@ -36,7 +36,7 @@ #include "u_hash.h" -static uint32_t +static const uint32_t util_crc32_table[256] = { 0x00000000, 0x77073096, 0xee0e612c, 0x990951ba, 0x076dc419, 0x706af48f, 0xe963a535, 0x9e6495a3, @@ -109,7 +109,7 @@ util_crc32_table[256] = { * @sa http://www.w3.org/TR/PNG/#D-CRCAppendix */ uint32_t -util_hash_crc32(void *data, size_t size) +util_hash_crc32(const void *data, size_t size) { uint8_t *p = (uint8_t *)data; uint32_t crc = 0xffffffff; diff --git a/src/gallium/auxiliary/util/u_hash.h b/src/gallium/auxiliary/util/u_hash.h index 76c3513e4a..8d92b07c85 100644 --- a/src/gallium/auxiliary/util/u_hash.h +++ b/src/gallium/auxiliary/util/u_hash.h @@ -45,7 +45,7 @@ extern "C" { uint32_t -util_hash_crc32(void *data, size_t size); +util_hash_crc32(const void *data, size_t size); #ifdef __cplusplus -- cgit v1.2.3 From b8e68f2e55ed22a97b7f976fe9556b2abcc49ea9 Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 14 Nov 2008 13:26:01 +0100 Subject: tgsi: Keep address register as a floating point. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 29 +++++++++-------------------- 1 file changed, 9 insertions(+), 20 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 65a0f39fdb..0756d7db17 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -466,17 +466,6 @@ micro_exp2( #endif } -static void -micro_f2it( - union tgsi_exec_channel *dst, - const union tgsi_exec_channel *src ) -{ - dst->i[0] = (int) src->f[0]; - dst->i[1] = (int) src->f[1]; - dst->i[2] = (int) src->f[2]; - dst->i[3] = (int) src->f[3]; -} - static void micro_f2ut( union tgsi_exec_channel *dst, @@ -1075,10 +1064,10 @@ fetch_source( &indir_index ); /* add value of address register to the offset */ - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; + index.i[0] += (int) indir_index.f[0]; + index.i[1] += (int) indir_index.f[1]; + index.i[2] += (int) indir_index.f[2]; + index.i[3] += (int) indir_index.f[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1131,10 +1120,10 @@ fetch_source( &index2, &indir_index ); - index.i[0] += indir_index.i[0]; - index.i[1] += indir_index.i[1]; - index.i[2] += indir_index.i[2]; - index.i[3] += indir_index.i[3]; + index.i[0] += (int) indir_index.f[0]; + index.i[1] += (int) indir_index.f[1]; + index.i[2] += (int) indir_index.f[2]; + index.i[3] += (int) indir_index.f[3]; /* for disabled execution channels, zero-out the index to * avoid using a potential garbage value. @@ -1754,7 +1743,7 @@ exec_instruction( case TGSI_OPCODE_ARL: FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { FETCH( &r[0], 0, chan_index ); - micro_f2it( &r[0], &r[0] ); + micro_trunc( &r[0], &r[0] ); STORE( &r[0], 0, chan_index ); } break; -- cgit v1.2.3 From 4b3c74b4d6786475bc45f883612e76069e722cbd Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Fri, 14 Nov 2008 13:31:06 +0100 Subject: tgsi: Return 0.0 for negative constant register indices. --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 0756d7db17..d55c337207 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -958,14 +958,22 @@ fetch_src_file_channel( switch( file ) { case TGSI_FILE_CONSTANT: assert(mach->Consts); - assert(index->i[0] >= 0); - assert(index->i[1] >= 0); - assert(index->i[2] >= 0); - assert(index->i[3] >= 0); - chan->f[0] = mach->Consts[index->i[0]][swizzle]; - chan->f[1] = mach->Consts[index->i[1]][swizzle]; - chan->f[2] = mach->Consts[index->i[2]][swizzle]; - chan->f[3] = mach->Consts[index->i[3]][swizzle]; + if (index->i[0] < 0) + chan->f[0] = 0.0f; + else + chan->f[0] = mach->Consts[index->i[0]][swizzle]; + if (index->i[1] < 0) + chan->f[1] = 0.0f; + else + chan->f[1] = mach->Consts[index->i[1]][swizzle]; + if (index->i[2] < 0) + chan->f[2] = 0.0f; + else + chan->f[2] = mach->Consts[index->i[2]][swizzle]; + if (index->i[3] < 0) + chan->f[3] = 0.0f; + else + chan->f[3] = mach->Consts[index->i[3]][swizzle]; break; case TGSI_FILE_INPUT: -- cgit v1.2.3 From ed7ba03256fc4503d5d7483d032014ac9e8242fe Mon Sep 17 00:00:00 2001 From: Michal Krol Date: Tue, 23 Dec 2008 15:13:59 +0100 Subject: tgsi: Dump indirect register swizzle. --- src/gallium/auxiliary/tgsi/tgsi_dump.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 3177f54952..485e96379c 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -180,14 +180,16 @@ _dump_register_ind( uint file, int index, uint ind_file, - int ind_index ) + int ind_index, + uint ind_swizzle ) { ENM( file, file_names ); CHR( '[' ); ENM( ind_file, file_names ); CHR( '[' ); SID( ind_index ); - CHR( ']' ); + TXT( "]." ); + ENM( ind_swizzle, swizzle_names ); if (index != 0) { if (index > 0) CHR( '+' ); @@ -377,7 +379,8 @@ iter_instruction( src->SrcRegister.File, src->SrcRegister.Index, src->SrcRegisterInd.File, - src->SrcRegisterInd.Index ); + src->SrcRegisterInd.Index, + src->SrcRegisterInd.SwizzleX ); } else { _dump_register( -- cgit v1.2.3 From f5d4274b4a8effc70c238060c3728aea629663df Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 23 Dec 2008 13:26:25 +0000 Subject: draw: allow driver-override of draw_need_pipeline() --- src/gallium/auxiliary/draw/draw_pipe_validate.c | 21 +++++++++++++++++---- src/gallium/auxiliary/draw/draw_vbuf.h | 11 +++++++++++ 2 files changed, 28 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pipe_validate.c b/src/gallium/auxiliary/draw/draw_pipe_validate.c index f34c68728e..03e842ce08 100644 --- a/src/gallium/auxiliary/draw/draw_pipe_validate.c +++ b/src/gallium/auxiliary/draw/draw_pipe_validate.c @@ -33,6 +33,7 @@ #include "draw_private.h" #include "draw_pipe.h" #include "draw_context.h" +#include "draw_vbuf.h" static boolean points( unsigned prim ) { @@ -52,16 +53,28 @@ static boolean triangles( unsigned prim ) } /** - * Check if we need any special pipeline stages, or whether - * prims/verts can go through untouched. Don't test for bypass - * clipping or vs modes, this function is just about the primitive - * pipeline stages. + * Default version of a function to check if we need any special + * pipeline stages, or whether prims/verts can go through untouched. + * Don't test for bypass clipping or vs modes, this function is just + * about the primitive pipeline stages. + * + * This can be overridden by the driver. */ boolean draw_need_pipeline(const struct draw_context *draw, const struct pipe_rasterizer_state *rasterizer, unsigned int prim ) { + /* If the driver has overridden this, use that version: + */ + if (draw->render && + draw->render->need_pipeline) + { + return draw->render->need_pipeline( draw->render, + rasterizer, + prim ); + } + /* Don't have to worry about triangles turning into lines/points * and triggering the pipeline, because we have to trigger the * pipeline *anyway* if unfilled mode is active. diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index b0aa2df309..9ac068c47b 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -54,6 +54,17 @@ struct vbuf_render { unsigned max_indices; unsigned max_vertex_buffer_bytes; + /** + * Query if the hardware driver needs assistance for a particular + * combination of rasterizer state and primitive. + * + * Currently optional. + */ + boolean (*need_pipeline)(const struct vbuf_render *render, + const struct pipe_rasterizer_state *rasterizer, + unsigned int prim ); + + /** * Get the hardware vertex format. * -- cgit v1.2.3 From fc4cea08fe8320438c72de7f4af2d7091681dca3 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 23 Dec 2008 18:16:49 +0000 Subject: tgsi: fix incomplete rename of loop counter variable --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index cfc7ea8e89..1239f6c076 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -124,7 +124,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, /* only first 32 regs will appear in this bitfield */ info->file_mask[file] |= (1 << reg); info->file_count[file]++; - info->file_max[file] = MAX2(info->file_max[file], (int)i); + info->file_max[file] = MAX2(info->file_max[file], (int)reg); if (file == TGSI_FILE_INPUT) { info->input_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; -- cgit v1.2.3 From 49c40b10c72e64977971ccb96abfc8767ed4c6ea Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 7 Nov 2008 13:02:43 -0700 Subject: gallium: implement TGSI_OPCODE_DP2A, add sqrt to NRM3/NRM4 --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index d55c337207..f98b66dc0b 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -2041,7 +2041,21 @@ exec_instruction( case TGSI_OPCODE_DOT2ADD: /* TGSI_OPCODE_DP2A */ - assert (0); + FETCH( &r[0], 0, CHAN_X ); + FETCH( &r[1], 1, CHAN_X ); + micro_mul( &r[0], &r[0], &r[1] ); + + FETCH( &r[1], 0, CHAN_Y ); + FETCH( &r[2], 1, CHAN_Y ); + micro_mul( &r[1], &r[1], &r[2] ); + micro_add( &r[0], &r[0], &r[1] ); + + FETCH( &r[2], 2, CHAN_X ); + micro_add( &r[0], &r[0], &r[2] ); + + FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { + STORE( &r[0], 0, chan_index ); + } break; case TGSI_OPCODE_INDEX: @@ -2488,7 +2502,8 @@ exec_instruction( micro_mul( &dot, &r[2], &r[2] ); micro_add( &tmp, &tmp, &dot ); - /* tmp = 1 / tmp */ + /* tmp = 1 / sqrt(tmp) */ + micro_sqrt( &tmp, &tmp ); micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); /* note: w channel is undefined */ @@ -2521,7 +2536,8 @@ exec_instruction( micro_mul( &dot, &r[3], &r[3] ); micro_add( &tmp, &tmp, &dot ); - /* tmp = 1 / tmp */ + /* tmp = 1 / sqrt(tmp) */ + micro_sqrt( &tmp, &tmp ); micro_div( &tmp, &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &tmp ); FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) { -- cgit v1.2.3 From 42d00790029da4bc7e77f68c8f1c22ac9c417e42 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 30 Dec 2008 17:06:51 +0000 Subject: rtasm: Remove spurious semi-colons after function bodies. --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index b65bfa7bbd..e9015ec2eb 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -261,7 +261,7 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vB = vB; inst.inst.op2 = op2; emit_instruction(p, inst.bits); -}; +} union vxr_inst { @@ -287,7 +287,7 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.rC = 0; inst.inst.op2 = op2; emit_instruction(p, inst.bits); -}; +} union va_inst { @@ -313,7 +313,7 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) inst.inst.vC = vC; inst.inst.op2 = op2; emit_instruction(p, inst.bits); -}; +} union i_inst { @@ -430,7 +430,7 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) inst.inst.ra = ra; inst.inst.si = (unsigned) (si & 0xffff); emit_instruction(p, inst.bits); -}; +} union a_inst { @@ -459,7 +459,7 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, inst.inst.op2 = op2; inst.inst.rc = rc; emit_instruction(p, inst.bits); -}; +} union xo_inst { -- cgit v1.2.3 From 0e0fb49c4515e14c54f23c1d3f8b2e981fe404a2 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 30 Dec 2008 17:15:34 +0000 Subject: gallium: Remove unused variables. --- src/gallium/auxiliary/tgsi/tgsi_dump_c.c | 1 - src/gallium/drivers/softpipe/sp_quad_fs.c | 1 - src/mesa/state_tracker/st_framebuffer.c | 1 - 3 files changed, 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c index be25cb45a0..c575b6c3e1 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c @@ -646,7 +646,6 @@ tgsi_dump_c( struct tgsi_full_declaration fd; uint ignored = flags & TGSI_DUMP_C_IGNORED; uint deflt = flags & TGSI_DUMP_C_DEFAULT; - uint instno = 0; tgsi_parse_init( &parse, tokens ); diff --git a/src/gallium/drivers/softpipe/sp_quad_fs.c b/src/gallium/drivers/softpipe/sp_quad_fs.c index 40329a9562..5dacbbe55f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_fs.c +++ b/src/gallium/drivers/softpipe/sp_quad_fs.c @@ -171,7 +171,6 @@ static void shade_destroy(struct quad_stage *qs) struct quad_stage *sp_quad_shade_stage( struct softpipe_context *softpipe ) { struct quad_shade_stage *qss = CALLOC_STRUCT(quad_shade_stage); - uint i; /* allocate storage for program inputs/outputs, aligned to 16 bytes */ qss->inputs = MALLOC(PIPE_MAX_ATTRIBS * sizeof(*qss->inputs) + 16); diff --git a/src/mesa/state_tracker/st_framebuffer.c b/src/mesa/state_tracker/st_framebuffer.c index 43ac195e67..ea22a94303 100644 --- a/src/mesa/state_tracker/st_framebuffer.c +++ b/src/mesa/state_tracker/st_framebuffer.c @@ -170,7 +170,6 @@ st_set_framebuffer_surface(struct st_framebuffer *stfb, uint surfIndex, struct pipe_surface *surf) { GET_CURRENT_CONTEXT(ctx); - struct st_context *st; static const GLuint invalid_size = 9999999; struct st_renderbuffer *strb; GLuint width, height, i; -- cgit v1.2.3 From 72f993b5b11174c2917af29ef7a86e7866d681fb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 30 Dec 2008 17:21:15 +0000 Subject: draw: Do not specify types in bitfields. As advised by gcc -pedantic. --- src/gallium/auxiliary/draw/draw_vertex.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vertex.h b/src/gallium/auxiliary/draw/draw_vertex.h index a943607d7e..c143cf2372 100644 --- a/src/gallium/auxiliary/draw/draw_vertex.h +++ b/src/gallium/auxiliary/draw/draw_vertex.h @@ -81,9 +81,9 @@ struct vertex_info * memcmp() comparisons. */ struct { - ubyte interp_mode:4; /**< INTERP_x */ - ubyte emit:4; /**< EMIT_x */ - ubyte src_index; /**< map to post-xform attribs */ + unsigned interp_mode:4; /**< INTERP_x */ + unsigned emit:4; /**< EMIT_x */ + unsigned src_index:8; /**< map to post-xform attribs */ } attrib[PIPE_MAX_SHADER_INPUTS]; }; -- cgit v1.2.3 From 369115e4c7a2985d880951fd8248deefa92025dd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 30 Dec 2008 19:21:30 +0000 Subject: gallium: Initialize var before use. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 30f161fe3b..b5eb896b7a 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1198,7 +1198,7 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, { struct pipe_context *pipe = ctx->pipe; struct pipe_screen *screen = pipe->screen; - uint dstLevel, zslice; + uint dstLevel, zslice = 0; assert(pt->block.width == 1); assert(pt->block.height == 1); -- cgit v1.2.3 From 2f24bc698412b4635422a52f9d7073ce7854dceb Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 30 Dec 2008 20:10:24 +0000 Subject: draw: Avoid integer overflow converting pointers on 64bit archs. Not really an error, as we only care for the lower 4 bits. --- src/gallium/auxiliary/draw/draw_vs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c index 7f305304ff..c057cd67fd 100644 --- a/src/gallium/auxiliary/draw/draw_vs.c +++ b/src/gallium/auxiliary/draw/draw_vs.c @@ -50,7 +50,7 @@ void draw_vs_set_constants( struct draw_context *draw, const float (*constants)[4], unsigned size ) { - if (((unsigned)constants) & 0xf) { + if (((uintptr_t)constants) & 0xf) { if (size > draw->vs.const_storage_size) { if (draw->vs.aligned_constant_storage) align_free((void *)draw->vs.aligned_constant_storage); -- cgit v1.2.3 From e96985b02699e56753d36d33d68dae49a5478921 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 31 Dec 2008 13:29:09 +0000 Subject: util: List new file in SConscript. --- src/gallium/auxiliary/util/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index f5bd308083..5df932c0f7 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -23,6 +23,7 @@ util = env.ConvenienceLibrary( 'u_stream_wd.c', 'u_tile.c', 'u_time.c', + 'u_timed_winsys.c', ]) auxiliaries.insert(0, util) -- cgit v1.2.3 From 62bf6cf6c7b560f59ec72ad138e40383ee47de12 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 19 Dec 2008 16:14:48 -0700 Subject: gallium: Fix typeo in mipmap filter for GL_UNSIGNED_SHORT_1_5_5_5_REV This is copied from Ian's commit a330933bb75c38148668637cd22b90d75d39506f --- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 5afc52ba35..b0de5968e9 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -420,7 +420,7 @@ do_row(enum dtype datatype, uint comps, int srcWidth, const int rowAr0 = rowA[j] & 0x1f; const int rowAr1 = rowA[k] & 0x1f; const int rowBr0 = rowB[j] & 0x1f; - const int rowBr1 = rowB[k] & 0xf; + const int rowBr1 = rowB[k] & 0x1f; const int rowAg0 = (rowA[j] >> 5) & 0x1f; const int rowAg1 = (rowA[k] >> 5) & 0x1f; const int rowBg0 = (rowB[j] >> 5) & 0x1f; -- cgit v1.2.3 From dc48ae97dcd84acf691b33b0ea2e76c5fdfe18e1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 24 Nov 2008 10:02:44 -0700 Subject: tgsi: add tgsi_declaration fields for centroid sampling, invariant optimization (cherry picked from commit 434e255eae90b0f3d836d452b7d3b0c5aadf78b8) --- src/gallium/auxiliary/tgsi/tgsi_build.c | 2 ++ src/gallium/auxiliary/tgsi/tgsi_dump.c | 8 ++++++++ src/gallium/include/pipe/p_shader_tokens.h | 4 +++- 3 files changed, 13 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 74614d3688..1219c7da18 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -119,6 +119,8 @@ tgsi_default_declaration( void ) declaration.UsageMask = TGSI_WRITEMASK_XYZW; declaration.Interpolate = TGSI_INTERPOLATE_CONSTANT; declaration.Semantic = 0; + declaration.Centroid = 0; + declaration.Invariant = 0; declaration.Padding = 0; declaration.Extended = 0; diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c index 485e96379c..2ed8c2bf07 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_dump.c +++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c @@ -248,6 +248,14 @@ iter_declaration( TXT( ", " ); ENM( decl->Declaration.Interpolate, interpolate_names ); + if (decl->Declaration.Centroid) { + TXT( ", CENTROID" ); + } + + if (decl->Declaration.Invariant) { + TXT( ", INVARIANT" ); + } + EOL(); return TRUE; diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h index a562e3a6b1..d591f046fb 100644 --- a/src/gallium/include/pipe/p_shader_tokens.h +++ b/src/gallium/include/pipe/p_shader_tokens.h @@ -85,7 +85,9 @@ struct tgsi_declaration unsigned UsageMask : 4; /* bitmask of TGSI_WRITEMASK_x flags */ unsigned Interpolate : 4; /* TGSI_INTERPOLATE_ */ unsigned Semantic : 1; /* BOOL, any semantic info? */ - unsigned Padding : 6; + unsigned Centroid : 1; /* centroid sampling */ + unsigned Invariant : 1; /* invariant optimization */ + unsigned Padding : 4; unsigned Extended : 1; /* BOOL */ }; -- cgit v1.2.3 From b8cf2f00760b4d2299f0880b7e6896bb2d71407b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Tue, 25 Nov 2008 09:02:27 -0700 Subject: gallium: added centroid/invarient fields to declarations (cherry picked from commit 4de360e67d83cd6503fb8ad053bb8afe507db5fa) --- src/gallium/auxiliary/tgsi/tgsi_build.c | 6 ++++++ src/gallium/auxiliary/tgsi/tgsi_build.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c index 1219c7da18..ed8fc5ac25 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.c +++ b/src/gallium/auxiliary/tgsi/tgsi_build.c @@ -133,6 +133,8 @@ tgsi_build_declaration( unsigned usage_mask, unsigned interpolate, unsigned semantic, + unsigned centroid, + unsigned invariant, struct tgsi_header *header ) { struct tgsi_declaration declaration; @@ -145,6 +147,8 @@ tgsi_build_declaration( declaration.UsageMask = usage_mask; declaration.Interpolate = interpolate; declaration.Semantic = semantic; + declaration.Centroid = centroid; + declaration.Invariant = invariant; header_bodysize_grow( header ); @@ -196,6 +200,8 @@ tgsi_build_full_declaration( full_decl->Declaration.UsageMask, full_decl->Declaration.Interpolate, full_decl->Declaration.Semantic, + full_decl->Declaration.Centroid, + full_decl->Declaration.Invariant, header ); if (maxsize <= size) diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h index 7d6234746a..0fd6fabd83 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_build.h +++ b/src/gallium/auxiliary/tgsi/tgsi_build.h @@ -71,6 +71,8 @@ tgsi_build_declaration( unsigned usage_mask, unsigned interpolate, unsigned semantic, + unsigned centroid, + unsigned invariant, struct tgsi_header *header ); struct tgsi_full_declaration -- cgit v1.2.3 From 395edbc5151b2ce9dd77a22d104ce886e9326354 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Thu, 8 Jan 2009 12:31:36 +0000 Subject: draw: Predeclare struct. --- src/gallium/auxiliary/draw/draw_vbuf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 9ac068c47b..7e1df88f0b 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -30,7 +30,7 @@ * Vertex buffer drawing stage. * * \author Keith Whitwell - * \author José Fonseca + * \author Jose Fonseca */ #ifndef DRAW_VBUF_H_ @@ -38,6 +38,7 @@ +struct pipe_rasterizer_state; struct draw_context; struct vertex_info; -- cgit v1.2.3 From 7844b4e730604e613a88f536c4aeee5c02d300fd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 9 Jan 2009 19:08:56 +0000 Subject: draw: Add missing include. --- src/gallium/auxiliary/draw/draw_vbuf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vbuf.h b/src/gallium/auxiliary/draw/draw_vbuf.h index 7e1df88f0b..a1c4c14445 100644 --- a/src/gallium/auxiliary/draw/draw_vbuf.h +++ b/src/gallium/auxiliary/draw/draw_vbuf.h @@ -37,6 +37,8 @@ #define DRAW_VBUF_H_ +#include "pipe/p_compiler.h" + struct pipe_rasterizer_state; struct draw_context; -- cgit v1.2.3 From 2b26a92cd34f8d83cc0ae621d1cfeb3955de57fa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 20:57:14 -0700 Subject: gallium: s/false/FALSE/ --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index b9a75ae559..071bc2015c 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -443,7 +443,7 @@ void spe_init_func(struct spe_function *p, unsigned code_size) p->regs[i] = 1; } - p->print = false; + p->print = FALSE; p->indent = 0; } -- cgit v1.2.3 From 7acaeb87750226e7407908bc2dfa9989049202fa Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 21:42:17 -0700 Subject: gallium: added comment/annotation support to PPC rtasm --- src/gallium/auxiliary/rtasm/rtasm_ppc.c | 242 ++++++++++++++++++++++++-------- src/gallium/auxiliary/rtasm/rtasm_ppc.h | 7 + 2 files changed, 187 insertions(+), 62 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c index e9015ec2eb..1bb9026205 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -47,6 +48,8 @@ ppc_init_func(struct ppc_function *p) { uint i; + memset(p, 0, sizeof(*p)); + p->num_inst = 0; p->max_inst = 100; /* first guess at buffer size */ p->store = rtasm_exec_malloc(p->max_inst * PPC_INST_SIZE); @@ -54,6 +57,9 @@ ppc_init_func(struct ppc_function *p) p->fp_used = 0x0; p->vec_used = 0x0; + p->print = FALSE; + p->indent = 0; + /* only allow using gp registers 3..12 for now */ for (i = 0; i < 3; i++) ppc_reserve_register(p, i); @@ -105,6 +111,42 @@ ppc_dump_func(const struct ppc_function *p) } +void +ppc_print_code(struct ppc_function *p, boolean enable) +{ + p->print = enable; +} + + +void +ppc_indent(struct ppc_function *p, int spaces) +{ + p->indent += spaces; +} + + +static void +indent(const struct ppc_function *p) +{ + int i; + for (i = 0; i < p->indent; i++) { + putchar(' '); + } +} + + +void +ppc_comment(struct ppc_function *p, int rel_indent, const char *s) +{ + if (p->print) { + p->indent += rel_indent; + indent(p); + p->indent -= rel_indent; + printf("# %s\n", s); + } +} + + /** * Mark a register as being unavailable. */ @@ -132,6 +174,7 @@ ppc_allocate_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC registers!\n"); return -1; } @@ -163,6 +206,7 @@ ppc_allocate_fp_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC FP registers!\n"); return -1; } @@ -194,6 +238,7 @@ ppc_allocate_vec_register(struct ppc_function *p) return i; } } + printf("OUT OF PPC VEC registers!\n"); return -1; } @@ -252,7 +297,8 @@ union vx_inst { }; static INLINE void -emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format, boolean transpose) { union vx_inst inst; inst.inst.op = 4; @@ -261,6 +307,13 @@ emit_vx(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.vB = vB; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, vD, vB, vA); + else + printf(format, vD, vA, vB); + } } @@ -277,7 +330,8 @@ union vxr_inst { }; static INLINE void -emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) +emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, + const char *format) { union vxr_inst inst; inst.inst.op = 4; @@ -287,6 +341,10 @@ emit_vxr(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB) inst.inst.rC = 0; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB); + } } @@ -303,7 +361,8 @@ union va_inst { }; static INLINE void -emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) +emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC, + const char *format) { union va_inst inst; inst.inst.op = 4; @@ -313,6 +372,10 @@ emit_va(struct ppc_function *p, uint op2, uint vD, uint vA, uint vB, uint vC) inst.inst.vC = vC; inst.inst.op2 = op2; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vD, vA, vB, vC); + } } @@ -396,7 +459,8 @@ union x_inst { }; static INLINE void -emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) +emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2, + const char *format) { union x_inst inst; inst.inst.op = op; @@ -406,6 +470,10 @@ emit_x(struct ppc_function *p, uint op, uint vrs, uint ra, uint rb, uint op2) inst.inst.op2 = op2; inst.inst.unused = 0x0; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, vrs, ra, rb); + } } @@ -420,7 +488,8 @@ union d_inst { }; static INLINE void -emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) +emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si, + const char *format, boolean transpose) { union d_inst inst; assert(si >= -32768); @@ -430,6 +499,13 @@ emit_d(struct ppc_function *p, uint op, uint rt, uint ra, int si) inst.inst.ra = ra; inst.inst.si = (unsigned) (si & 0xffff); emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + if (transpose) + printf(format, rt, si, ra); + else + printf(format, rt, ra, si); + } } @@ -448,7 +524,7 @@ union a_inst { static INLINE void emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, - uint rc) + uint rc, const char *format) { union a_inst inst; inst.inst.op = op; @@ -459,6 +535,10 @@ emit_a(struct ppc_function *p, uint op, uint frt, uint fra, uint frb, uint op2, inst.inst.op2 = op2; inst.inst.rc = rc; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, frt, fra, frb); + } } @@ -477,7 +557,7 @@ union xo_inst { static INLINE void emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, - uint op2, uint rc) + uint op2, uint rc, const char *format) { union xo_inst inst; inst.inst.op = op; @@ -488,6 +568,10 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, inst.inst.op2 = op2; inst.inst.rc = rc; emit_instruction(p, inst.bits); + if (p->print) { + indent(p); + printf(format, rt, ra, rb); + } } @@ -502,140 +586,142 @@ emit_xo(struct ppc_function *p, uint op, uint rt, uint ra, uint rb, uint oe, void ppc_vaddfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 10, vD, vA, vB); + emit_vx(p, 10, vD, vA, vB, "vaddfp\t%u, v%u, v%u\n", FALSE); } /** vector float substract */ void ppc_vsubfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 74, vD, vA, vB); + emit_vx(p, 74, vD, vA, vB, "vsubfp\tv%u, v%u, v%u\n", FALSE); } /** vector float min */ void ppc_vminfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1098, vD, vA, vB); + emit_vx(p, 1098, vD, vA, vB, "vminfp\tv%u, v%u, v%u\n", FALSE); } /** vector float max */ void ppc_vmaxfp(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1034, vD, vA, vB); + emit_vx(p, 1034, vD, vA, vB, "vmaxfp\tv%u, v%u, v%u\n", FALSE); } /** vector float mult add: vD = vA * vB + vC */ void ppc_vmaddfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 46, vD, vA, vC, vB); /* note arg order */ + /* note arg order */ + emit_va(p, 46, vD, vA, vC, vB, "vmaddfp\tv%u, v%u, v%u, v%u\n"); } /** vector float negative mult subtract: vD = vA - vB * vC */ void ppc_vnmsubfp(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 47, vD, vB, vA, vC); /* note arg order */ + /* note arg order */ + emit_va(p, 47, vD, vB, vA, vC, "vnmsubfp\tv%u, v%u, v%u, v%u\n"); } /** vector float compare greater than */ void ppc_vcmpgtfpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 710, vD, vA, vB); + emit_vxr(p, 710, vD, vA, vB, "vcmpgtfpx\tv%u, v%u, v%u"); } /** vector float compare greater than or equal to */ void ppc_vcmpgefpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 454, vD, vA, vB); + emit_vxr(p, 454, vD, vA, vB, "vcmpgefpx\tv%u, v%u, v%u"); } /** vector float compare equal */ void ppc_vcmpeqfpx(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vxr(p, 198, vD, vA, vB); + emit_vxr(p, 198, vD, vA, vB, "vcmpeqfpx\tv%u, v%u, v%u"); } /** vector float 2^x */ void ppc_vexptefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 394, vD, 0, vB); + emit_vx(p, 394, vD, 0, vB, "vexptefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float log2(x) */ void ppc_vlogefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 458, vD, 0, vB); + emit_vx(p, 458, vD, 0, vB, "vlogefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float reciprocol */ void ppc_vrefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 266, vD, 0, vB); + emit_vx(p, 266, vD, 0, vB, "vrefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float reciprocol sqrt estimate */ void ppc_vrsqrtefp(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 330, vD, 0, vB); + emit_vx(p, 330, vD, 0, vB, "vrsqrtefp\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to negative infinity */ void ppc_vrfim(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 714, vD, 0, vB); + emit_vx(p, 714, vD, 0, vB, "vrfim\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to positive infinity */ void ppc_vrfip(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 650, vD, 0, vB); + emit_vx(p, 650, vD, 0, vB, "vrfip\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to nearest int */ void ppc_vrfin(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 522, vD, 0, vB); + emit_vx(p, 522, vD, 0, vB, "vrfin\tv%u, 0%u, v%u\n", FALSE); } /** vector float round to int toward zero */ void ppc_vrfiz(struct ppc_function *p, uint vD, uint vB) { - emit_vx(p, 586, vD, 0, vB); + emit_vx(p, 586, vD, 0, vB, "vrfiz\tv%u, 0%u, v%u\n", FALSE); } -/** vector store: store vR at mem[vA+vB] */ +/** vector store: store vR at mem[rA+rB] */ void -ppc_stvx(struct ppc_function *p, uint vR, uint vA, uint vB) +ppc_stvx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vR, vA, vB, 231); + emit_x(p, 31, vR, rA, rB, 231, "stvx\tv%u, r%u, r%u\n"); } -/** vector load: vR = mem[vA+vB] */ +/** vector load: vR = mem[rA+rB] */ void -ppc_lvx(struct ppc_function *p, uint vR, uint vA, uint vB) +ppc_lvx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vR, vA, vB, 103); + emit_x(p, 31, vR, rA, rB, 103, "lvx\tv%u, r%u, r%u\n"); } /** load vector element word: vR = mem_word[ra+rb] */ void -ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) +ppc_lvewx(struct ppc_function *p, uint vR, uint rA, uint rB) { - emit_x(p, 31, vr, ra, rb, 71); + emit_x(p, 31, vR, rA, rB, 71, "lvewx\tv%u, r%u, r%u\n"); } @@ -649,49 +735,63 @@ ppc_lvewx(struct ppc_function *p, uint vr, uint ra, uint rb) void ppc_vand(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1028, vD, vA, vB); + emit_vx(p, 1028, vD, vA, vB, "vand\tv%u, v%u, v%u\n", FALSE); } /** vector and complement */ void ppc_vandc(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1092, vD, vA, vB); + emit_vx(p, 1092, vD, vA, vB, "vandc\tv%u, v%u, v%u\n", FALSE); } /** vector or */ void ppc_vor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1156, vD, vA, vB); + emit_vx(p, 1156, vD, vA, vB, "vor\tv%u, v%u, v%u\n", FALSE); } /** vector nor */ void ppc_vnor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1284, vD, vA, vB); + emit_vx(p, 1284, vD, vA, vB, "vnor\tv%u, v%u, v%u\n", FALSE); } /** vector xor */ void ppc_vxor(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 1220, vD, vA, vB); + emit_vx(p, 1220, vD, vA, vB, "vxor\tv%u, v%u, v%u\n", FALSE); } /** Pseudo-instruction: vector move */ void ppc_vmove(struct ppc_function *p, uint vD, uint vA) { + boolean print = p->print; + p->print = FALSE; ppc_vor(p, vD, vA, vA); + if (print) { + indent(p); + printf("vor\tv%u, v%u, v%u \t# v%u = v%u\n", vD, vA, vA, vD, vA); + } + p->print = print; } /** Set vector register to {0,0,0,0} */ void ppc_vzero(struct ppc_function *p, uint vr) { + boolean print = p->print; + p->print = FALSE; ppc_vxor(p, vr, vr, vr); + if (print) { + indent(p); + printf("vxor\tv%u, v%u, v%u \t# v%u = {0,0,0,0}\n", vr, vr, vr, vr); + } + p->print = print; } @@ -705,35 +805,35 @@ ppc_vzero(struct ppc_function *p, uint vr) void ppc_vperm(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 43, vD, vA, vB, vC); + emit_va(p, 43, vD, vA, vB, vC, "vperm\tr%u, r%u, r%u, r%u"); } /** vector select */ void ppc_vsel(struct ppc_function *p, uint vD, uint vA, uint vB, uint vC) { - emit_va(p, 42, vD, vA, vB, vC); + emit_va(p, 42, vD, vA, vB, vC, "vsel\tr%u, r%u, r%u, r%u"); } /** vector splat byte */ void ppc_vspltb(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 42, vD, imm, vB); + emit_vx(p, 42, vD, imm, vB, "vspltb\tv%u, v%u, %u\n", TRUE); } /** vector splat half word */ void ppc_vsplthw(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 588, vD, imm, vB); + emit_vx(p, 588, vD, imm, vB, "vsplthw\tv%u, v%u, %u\n", TRUE); } /** vector splat word */ void ppc_vspltw(struct ppc_function *p, uint vD, uint vB, uint imm) { - emit_vx(p, 652, vD, imm, vB); + emit_vx(p, 652, vD, imm, vB, "vspltw\tv%u, v%u, %u\n", TRUE); } /** vector splat signed immediate word */ @@ -742,14 +842,14 @@ ppc_vspltisw(struct ppc_function *p, uint vD, int imm) { assert(imm >= -16); assert(imm < 15); - emit_vx(p, 908, vD, imm, 0); + emit_vx(p, 908, vD, imm, 0, "vspltisw\tv%u, %d, %u\n", FALSE); } /** vector shift left word: vD[word] = vA[word] << (vB[word] & 0x1f) */ void ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) { - emit_vx(p, 388, vD, vA, vB); + emit_vx(p, 388, vD, vA, vB, "vslw\tv%u, v%u, v%u\n", FALSE); } @@ -763,63 +863,66 @@ ppc_vslw(struct ppc_function *p, uint vD, uint vA, uint vB) void ppc_addi(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 14, rt, ra, imm); + emit_d(p, 14, rt, ra, imm, "addi\tr%u, r%u, %d\n", FALSE); } /** rt = ra + (imm << 16) */ void ppc_addis(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 15, rt, ra, imm); + emit_d(p, 15, rt, ra, imm, "addis\tr%u, r%u, %d\n", FALSE); } /** rt = ra + rb */ void ppc_add(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_xo(p, 31, rt, ra, rb, 0, 266, 0); + emit_xo(p, 31, rt, ra, rb, 0, 266, 0, "add\tr%u, r%u, r%u\n"); } /** rt = ra AND ra */ void ppc_and(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 28); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 28, "and\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra AND imm */ void ppc_andi(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 28, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 28, ra, rt, imm, "andi\tr%u, r%u, %d\n", FALSE); } /** rt = ra OR ra */ void ppc_or(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 444); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 444, "or\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra OR imm */ void ppc_ori(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 24, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 24, ra, rt, imm, "ori\tr%u, r%u, %d\n", FALSE); } /** rt = ra XOR ra */ void ppc_xor(struct ppc_function *p, uint rt, uint ra, uint rb) { - emit_x(p, 31, ra, rt, rb, 316); /* note argument order */ + emit_x(p, 31, ra, rt, rb, 316, "xor\tr%u, r%u, r%u\n"); /* note argument order */ } /** rt = ra XOR imm */ void ppc_xori(struct ppc_function *p, uint rt, uint ra, int imm) { - emit_d(p, 26, ra, rt, imm); /* note argument order */ + /* note argument order */ + emit_d(p, 26, ra, rt, imm, "xori\tr%u, r%u, %d\n", FALSE); } /** pseudo instruction: move: rt = ra */ @@ -833,7 +936,14 @@ ppc_mr(struct ppc_function *p, uint rt, uint ra) void ppc_li(struct ppc_function *p, uint rt, int imm) { + boolean print = p->print; + p->print = FALSE; ppc_addi(p, rt, 0, imm); + if (print) { + indent(p); + printf("addi\tr%u, r0, %d \t# r%u = %d\n", rt, imm, rt, imm); + } + p->print = print; } /** rt = imm << 16 */ @@ -864,21 +974,21 @@ ppc_load_int(struct ppc_function *p, uint rt, int imm) void ppc_stwu(struct ppc_function *p, uint rs, uint ra, int d) { - emit_d(p, 37, rs, ra, d); + emit_d(p, 37, rs, ra, d, "stwu\tr%u, %d(r%u)\n", TRUE); } /** store rs at memory[(ra)+d] */ void ppc_stw(struct ppc_function *p, uint rs, uint ra, int d) { - emit_d(p, 36, rs, ra, d); + emit_d(p, 36, rs, ra, d, "stw\tr%u, %d(r%u)\n", TRUE); } /** Load rt = mem[(ra)+d]; then zero set high 32 bits to zero. */ void ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) { - emit_d(p, 32, rt, ra, d); + emit_d(p, 32, rt, ra, d, "lwz\tr%u, %d(r%u)\n", TRUE); } @@ -891,42 +1001,42 @@ ppc_lwz(struct ppc_function *p, uint rt, uint ra, int d) void ppc_fadd(struct ppc_function *p, uint frt, uint fra, uint frb) { - emit_a(p, 63, frt, fra, frb, 21, 0); + emit_a(p, 63, frt, fra, frb, 21, 0, "fadd\tf%u, f%u, f%u\n"); } /** sub: frt = fra - frb */ void ppc_fsub(struct ppc_function *p, uint frt, uint fra, uint frb) { - emit_a(p, 63, frt, fra, frb, 20, 0); + emit_a(p, 63, frt, fra, frb, 20, 0, "fsub\tf%u, f%u, f%u\n"); } /** convert to int: rt = (int) ra */ void ppc_fctiwz(struct ppc_function *p, uint rt, uint fra) { - emit_x(p, 63, rt, 0, fra, 15); + emit_x(p, 63, rt, 0, fra, 15, "fctiwz\tr%u, r%u, r%u\n"); } /** store frs at mem[(ra)+offset] */ void ppc_stfs(struct ppc_function *p, uint frs, uint ra, int offset) { - emit_d(p, 52, frs, ra, offset); + emit_d(p, 52, frs, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); } /** store frs at mem[(ra)+(rb)] */ void ppc_stfiwx(struct ppc_function *p, uint frs, uint ra, uint rb) { - emit_x(p, 31, frs, ra, rb, 983); + emit_x(p, 31, frs, ra, rb, 983, "stfiwx\tr%u, r%u, r%u\n"); } /** load frt = mem[(ra)+offset] */ void ppc_lfs(struct ppc_function *p, uint frt, uint ra, int offset) { - emit_d(p, 48, frt, ra, offset); + emit_d(p, 48, frt, ra, offset, "stfs\tr%u, %d(r%u)\n", TRUE); } @@ -942,6 +1052,10 @@ void ppc_blr(struct ppc_function *p) { emit_i(p, 18, 0, 0, 1); + if (p->print) { + indent(p); + printf("blr\n"); + } } /** Branch Conditional to Link Register (p. 36) */ @@ -949,6 +1063,10 @@ void ppc_bclr(struct ppc_function *p, uint condOp, uint branchHint, uint condReg) { emit_xl(p, 19, condOp, condReg, branchHint, 16, 0); + if (p->print) { + indent(p); + printf("bclr\t%u %u %u\n", condOp, branchHint, condReg); + } } /** Pseudo instruction: return from subroutine */ diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.h b/src/gallium/auxiliary/rtasm/rtasm_ppc.h index 08212a2a25..93e5f5187d 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.h @@ -1,6 +1,7 @@ /************************************************************************** * * Copyright (C) 2008 Tungsten Graphics, Inc. All Rights Reserved. + * Copyright (C) 2009 VMware, Inc. All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), @@ -58,6 +59,8 @@ struct ppc_function uint32_t reg_used; /** used/free general-purpose registers bitmask */ uint32_t fp_used; /** used/free floating point registers bitmask */ uint32_t vec_used; /** used/free vector registers bitmask */ + int indent; + boolean print; }; @@ -68,6 +71,10 @@ extern uint ppc_num_instructions(const struct ppc_function *p); extern void (*ppc_get_func( struct ppc_function *p ))( void ); extern void ppc_dump_func(const struct ppc_function *p); +extern void ppc_print_code(struct ppc_function *p, boolean enable); +extern void ppc_indent(struct ppc_function *p, int spaces); +extern void ppc_comment(struct ppc_function *p, int rel_indent, const char *s); + extern int ppc_reserve_register(struct ppc_function *p, int reg); extern int ppc_allocate_register(struct ppc_function *p); extern void ppc_release_register(struct ppc_function *p, int reg); -- cgit v1.2.3 From 2ebd969f0f0d0e45e6ac462059cf322f037775f1 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 21:42:58 -0700 Subject: gallium: code to dump/debug PPC code (disabled) --- src/gallium/auxiliary/draw/draw_vs_ppc.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c index 8b75136144..d35db57d57 100644 --- a/src/gallium/auxiliary/draw/draw_vs_ppc.c +++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c @@ -199,6 +199,11 @@ draw_create_vs_ppc(struct draw_context *draw, ppc_init_func( &vs->ppc_program ); +#if 0 + ppc_print_code(&vs->ppc_program, TRUE); + ppc_indent(&vs->ppc_program, 8); +#endif + if (!tgsi_emit_ppc( (struct tgsi_token *) vs->base.state.tokens, &vs->ppc_program, (float (*)[4]) vs->base.immediates, -- cgit v1.2.3 From 1922ea965ac5c411cf5a3ed0ac7c8dbb873dba6c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 21:46:08 -0700 Subject: gallium: emit comments in TGSI->PPC codegen --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 92 ++++++++++++++++++++++++++++++++++- 1 file changed, 90 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index a92b1902e3..1a26504738 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -555,6 +555,18 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) int v0, v1; uint chan_index; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_RSQ: + ppc_comment(gen->f, -4, "RSQ:"); + break; + case TGSI_OPCODE_RCP: + ppc_comment(gen->f, -4, "LCP:"); + break; + default: + assert(0); + } + + v0 = get_src_vec(gen, inst, 0, CHAN_X); v1 = ppc_allocate_vec_register(gen->f); @@ -584,6 +596,33 @@ static void emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { uint chan_index; + + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ABS: + ppc_comment(gen->f, -4, "ABS:"); + break; + case TGSI_OPCODE_FLOOR: + ppc_comment(gen->f, -4, "FLOOR:"); + break; + case TGSI_OPCODE_FRAC: + ppc_comment(gen->f, -4, "FRAC:"); + break; + case TGSI_OPCODE_EXPBASE2: + ppc_comment(gen->f, -4, "EXPBASE2:"); + break; + case TGSI_OPCODE_LOGBASE2: + ppc_comment(gen->f, -4, "LOGBASE2:"); + break; + case TGSI_OPCODE_MOV: + ppc_comment(gen->f, -4, "MOV:"); + break; + case TGSI_OPCODE_SWZ: + ppc_comment(gen->f, -4, "SWZ:"); + break; + default: + assert(0); + } + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ int v1 = get_dst_vec(gen, inst, chan_index); @@ -630,6 +669,26 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) int zero_vec = -1; uint chan; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_ADD: + ppc_comment(gen->f, -4, "ADD:"); + break; + case TGSI_OPCODE_SUB: + ppc_comment(gen->f, -4, "SUB:"); + break; + case TGSI_OPCODE_MUL: + ppc_comment(gen->f, -4, "MUL:"); + break; + case TGSI_OPCODE_MIN: + ppc_comment(gen->f, -4, "MIN:"); + break; + case TGSI_OPCODE_MAX: + ppc_comment(gen->f, -4, "MAX:"); + break; + default: + assert(0); + } + if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { zero_vec = ppc_allocate_vec_register(gen->f); ppc_vzero(gen->f, zero_vec); @@ -678,6 +737,17 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) { uint chan; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_MAD: + ppc_comment(gen->f, -4, "MAD:"); + break; + case TGSI_OPCODE_LRP: + ppc_comment(gen->f, -4, "LRP:"); + break; + default: + assert(0); + } + FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { /* fetch src operands */ int v0 = get_src_vec(gen, inst, 0, chan); @@ -768,9 +838,23 @@ emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) int v0, v1, v2; uint chan_index; + switch (inst->Instruction.Opcode) { + case TGSI_OPCODE_DP3: + ppc_comment(gen->f, -4, "DP3:"); + break; + case TGSI_OPCODE_DP4: + ppc_comment(gen->f, -4, "DP4:"); + break; + case TGSI_OPCODE_DPH: + ppc_comment(gen->f, -4, "DPH:"); + break; + default: + assert(0); + } + v2 = ppc_allocate_vec_register(gen->f); - ppc_vxor(gen->f, v2, v2, v2); /* v2 = {0, 0, 0, 0} */ + ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ v0 = get_src_vec(gen, inst, 0, CHAN_X); /* v0 = src0.XXXX */ v1 = get_src_vec(gen, inst, 1, CHAN_X); /* v1 = src1.XXXX */ @@ -812,10 +896,11 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) int t_vec = ppc_allocate_vec_register(f); int zero_vec = ppc_allocate_vec_register(f); + ppc_comment(f, -4, "POW:"); ppc_vzero(f, zero_vec); ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ - ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb */ + ppc_vmaddfp(f, t_vec, t_vec, vb, zero_vec); /* t = t * vb + zero */ ppc_vexptefp(f, vr, t_vec); /* vr = 2^t */ ppc_release_vec_register(f, t_vec); @@ -1221,9 +1306,12 @@ emit_prologue(struct ppc_function *func) static void emit_epilogue(struct ppc_function *func) { + ppc_comment(func, -4, "Epilogue:"); ppc_return(func); /* XXX restore prev stack frame */ +#if 0 debug_printf("PPC: Emitted %u instructions\n", func->num_inst); +#endif } -- cgit v1.2.3 From 0c71313970be3d097814839577cd141d46666783 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 21:48:54 -0700 Subject: gallium: fix register clobber bug in TGSI->PPC codegen When negating a src vector that's stored in a altivec register, need to put negated value into a new register so we don't upset the original value. This solves the dark colors in the mandelbrot GLSL demo. Also, use new predicate functions to check if a TGSI temp is stored in an altivec register. --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 51 ++++++++++++++++++++++++++++------- 1 file changed, 41 insertions(+), 10 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 1a26504738..af180adbed 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -78,7 +78,7 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { * How many TGSI temps should be implemented with real PPC vector registers * rather than memory. */ -#define MAX_PPC_TEMPS 4 +#define MAX_PPC_TEMPS 3 struct reg_chan_vec @@ -157,6 +157,29 @@ init_gen_context(struct gen_context *gen, struct ppc_function *func) } +/** + * Is the given TGSI register stored as a real PPC vector register? + */ +static boolean +is_ppc_vec_temporary(const struct tgsi_full_src_register *reg) +{ + return (reg->SrcRegister.File == TGSI_FILE_TEMPORARY && + reg->SrcRegister.Index < MAX_PPC_TEMPS); +} + + +/** + * Is the given TGSI register stored as a real PPC vector register? + */ +static boolean +is_ppc_vec_temporary_dst(const struct tgsi_full_dst_register *reg) +{ + return (reg->DstRegister.File == TGSI_FILE_TEMPORARY && + reg->DstRegister.Index < MAX_PPC_TEMPS); +} + + + /** * All PPC vector load/store instructions form an effective address * by adding the contents of two registers. For example: @@ -285,7 +308,7 @@ emit_fetch(struct gen_context *gen, } break; case TGSI_FILE_TEMPORARY: - if (reg->SrcRegister.Index < MAX_PPC_TEMPS) { + if (is_ppc_vec_temporary(reg)) { /* use PPC vec register */ dst_vec = gen->temps_map[reg->SrcRegister.Index][swizzle]; } @@ -353,23 +376,33 @@ emit_fetch(struct gen_context *gen, uint sign_op = tgsi_util_get_full_src_register_sign_mode(reg, chan_index); if (sign_op != TGSI_UTIL_SIGN_KEEP) { int bit31_vec = gen_get_bit31_vec(gen); + int dst_vec2; + + if (is_ppc_vec_temporary(reg)) { + /* need to use a new temp */ + dst_vec2 = ppc_allocate_vec_register(gen->f); + } + else { + dst_vec2 = dst_vec; + } switch (sign_op) { case TGSI_UTIL_SIGN_CLEAR: /* vec = vec & ~bit31 */ - ppc_vandc(gen->f, dst_vec, dst_vec, bit31_vec); + ppc_vandc(gen->f, dst_vec2, dst_vec, bit31_vec); break; case TGSI_UTIL_SIGN_SET: /* vec = vec | bit31 */ - ppc_vor(gen->f, dst_vec, dst_vec, bit31_vec); + ppc_vor(gen->f, dst_vec2, dst_vec, bit31_vec); break; case TGSI_UTIL_SIGN_TOGGLE: /* vec = vec ^ bit31 */ - ppc_vxor(gen->f, dst_vec, dst_vec, bit31_vec); + ppc_vxor(gen->f, dst_vec2, dst_vec, bit31_vec); break; default: assert(0); } + return dst_vec2; } } @@ -452,8 +485,7 @@ release_src_vecs(struct gen_context *gen) uint i; for (i = 0; i < gen->num_regs; i++) { const const struct tgsi_full_src_register src = gen->regs[i].src; - if (!(src.SrcRegister.File == TGSI_FILE_TEMPORARY && - src.SrcRegister.Index < MAX_PPC_TEMPS)) { + if (!is_ppc_vec_temporary(&src)) { ppc_release_vec_register(gen->f, gen->regs[i].vec); } } @@ -469,8 +501,7 @@ get_dst_vec(struct gen_context *gen, { const struct tgsi_full_dst_register *reg = &inst->FullDstRegisters[0]; - if (reg->DstRegister.File == TGSI_FILE_TEMPORARY && - reg->DstRegister.Index < MAX_PPC_TEMPS) { + if (is_ppc_vec_temporary_dst(reg)) { int vec = gen->temps_map[reg->DstRegister.Index][chan_index]; return vec; } @@ -502,7 +533,7 @@ emit_store(struct gen_context *gen, } break; case TGSI_FILE_TEMPORARY: - if (reg->DstRegister.Index < MAX_PPC_TEMPS) { + if (is_ppc_vec_temporary_dst(reg)) { if (!free_vec) { int dst_vec = gen->temps_map[reg->DstRegister.Index][chan_index]; if (dst_vec != src_vec) -- cgit v1.2.3 From d4394bb768f17ac6a7e99116f2bc79c40dca3c5b Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 21:51:22 -0700 Subject: gallium: remove unused struct type --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index af180adbed..638fe09861 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -81,14 +81,6 @@ const float ppc_builtin_constants[] ALIGN16_ATTRIB = { #define MAX_PPC_TEMPS 3 -struct reg_chan_vec -{ - struct tgsi_full_src_register src; - uint chan; - uint vec; -}; - - /** * Context/state used during code gen. */ -- cgit v1.2.3 From 2acf07983f8bb134d639c9e652e7e0e3307e20f3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Fri, 9 Jan 2009 22:03:48 -0700 Subject: gallium: use tgsi_dump_instruction() instead of ppc_comment() --- src/gallium/auxiliary/tgsi/tgsi_ppc.c | 91 +++-------------------------------- 1 file changed, 7 insertions(+), 84 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c index 638fe09861..1a4db47501 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c +++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c @@ -578,18 +578,6 @@ emit_scalar_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) int v0, v1; uint chan_index; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_RSQ: - ppc_comment(gen->f, -4, "RSQ:"); - break; - case TGSI_OPCODE_RCP: - ppc_comment(gen->f, -4, "LCP:"); - break; - default: - assert(0); - } - - v0 = get_src_vec(gen, inst, 0, CHAN_X); v1 = ppc_allocate_vec_register(gen->f); @@ -620,32 +608,6 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst) { uint chan_index; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ABS: - ppc_comment(gen->f, -4, "ABS:"); - break; - case TGSI_OPCODE_FLOOR: - ppc_comment(gen->f, -4, "FLOOR:"); - break; - case TGSI_OPCODE_FRAC: - ppc_comment(gen->f, -4, "FRAC:"); - break; - case TGSI_OPCODE_EXPBASE2: - ppc_comment(gen->f, -4, "EXPBASE2:"); - break; - case TGSI_OPCODE_LOGBASE2: - ppc_comment(gen->f, -4, "LOGBASE2:"); - break; - case TGSI_OPCODE_MOV: - ppc_comment(gen->f, -4, "MOV:"); - break; - case TGSI_OPCODE_SWZ: - ppc_comment(gen->f, -4, "SWZ:"); - break; - default: - assert(0); - } - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan_index) { int v0 = get_src_vec(gen, inst, 0, chan_index); /* v0 = srcreg[0] */ int v1 = get_dst_vec(gen, inst, chan_index); @@ -692,26 +654,6 @@ emit_binop(struct gen_context *gen, struct tgsi_full_instruction *inst) int zero_vec = -1; uint chan; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_ADD: - ppc_comment(gen->f, -4, "ADD:"); - break; - case TGSI_OPCODE_SUB: - ppc_comment(gen->f, -4, "SUB:"); - break; - case TGSI_OPCODE_MUL: - ppc_comment(gen->f, -4, "MUL:"); - break; - case TGSI_OPCODE_MIN: - ppc_comment(gen->f, -4, "MIN:"); - break; - case TGSI_OPCODE_MAX: - ppc_comment(gen->f, -4, "MAX:"); - break; - default: - assert(0); - } - if (inst->Instruction.Opcode == TGSI_OPCODE_MUL) { zero_vec = ppc_allocate_vec_register(gen->f); ppc_vzero(gen->f, zero_vec); @@ -760,17 +702,6 @@ emit_triop(struct gen_context *gen, struct tgsi_full_instruction *inst) { uint chan; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_MAD: - ppc_comment(gen->f, -4, "MAD:"); - break; - case TGSI_OPCODE_LRP: - ppc_comment(gen->f, -4, "LRP:"); - break; - default: - assert(0); - } - FOR_EACH_DST0_ENABLED_CHANNEL(*inst, chan) { /* fetch src operands */ int v0 = get_src_vec(gen, inst, 0, chan); @@ -861,20 +792,6 @@ emit_dotprod(struct gen_context *gen, struct tgsi_full_instruction *inst) int v0, v1, v2; uint chan_index; - switch (inst->Instruction.Opcode) { - case TGSI_OPCODE_DP3: - ppc_comment(gen->f, -4, "DP3:"); - break; - case TGSI_OPCODE_DP4: - ppc_comment(gen->f, -4, "DP4:"); - break; - case TGSI_OPCODE_DPH: - ppc_comment(gen->f, -4, "DPH:"); - break; - default: - assert(0); - } - v2 = ppc_allocate_vec_register(gen->f); ppc_vzero(gen->f, v2); /* v2 = {0, 0, 0, 0} */ @@ -919,7 +836,6 @@ ppc_vec_pow(struct ppc_function *f, int vr, int va, int vb) int t_vec = ppc_allocate_vec_register(f); int zero_vec = ppc_allocate_vec_register(f); - ppc_comment(f, -4, "POW:"); ppc_vzero(f, zero_vec); ppc_vlogefp(f, t_vec, va); /* t = log2(va) */ @@ -1359,6 +1275,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, unsigned ok = 1; uint num_immediates = 0; struct gen_context gen; + uint ic = 0; if (use_ppc_asm < 0) { /* If GALLIUM_NOPPC is set, don't use PPC codegen */ @@ -1391,6 +1308,12 @@ tgsi_emit_ppc(const struct tgsi_token *tokens, break; case TGSI_TOKEN_TYPE_INSTRUCTION: + if (func->print) { + _debug_printf("# "); + ic++; + tgsi_dump_instruction(&parse.FullToken.FullInstruction, ic); + } + ok = emit_instruction(&gen, &parse.FullToken.FullInstruction); if (!ok) { -- cgit v1.2.3 From c4a782041b19cb4a08712384b19be25b79acba3c Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Sun, 11 Jan 2009 14:22:00 -0700 Subject: cell: datatype clean-ups in SPE rtasm --- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c | 123 ++++++++++++++-------------- src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h | 81 +++++++++--------- 2 files changed, 99 insertions(+), 105 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c index 071bc2015c..53a0e722cf 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.c @@ -213,8 +213,8 @@ emit_instruction(struct spe_function *p, uint32_t inst_bits) -static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB, const char *name) +static void emit_RR(struct spe_function *p, unsigned op, int rT, + int rA, int rB, const char *name) { union spe_inst_RR inst; inst.inst.op = op; @@ -230,8 +230,8 @@ static void emit_RR(struct spe_function *p, unsigned op, unsigned rT, } -static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, unsigned rB, unsigned rC, const char *name) +static void emit_RRR(struct spe_function *p, unsigned op, int rT, + int rA, int rB, int rC, const char *name) { union spe_inst_RRR inst; inst.inst.op = op; @@ -248,8 +248,8 @@ static void emit_RRR(struct spe_function *p, unsigned op, unsigned rT, } -static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm, const char *name) +static void emit_RI7(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI7 inst; inst.inst.op = op; @@ -266,8 +266,8 @@ static void emit_RI7(struct spe_function *p, unsigned op, unsigned rT, -static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm, const char *name) +static void emit_RI8(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI8 inst; inst.inst.op = op; @@ -284,8 +284,8 @@ static void emit_RI8(struct spe_function *p, unsigned op, unsigned rT, -static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm, const char *name) +static void emit_RI10(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { union spe_inst_RI10 inst; inst.inst.op = op; @@ -302,8 +302,8 @@ static void emit_RI10(struct spe_function *p, unsigned op, unsigned rT, /** As above, but do range checking on signed immediate value */ -static void emit_RI10s(struct spe_function *p, unsigned op, unsigned rT, - unsigned rA, int imm, const char *name) +static void emit_RI10s(struct spe_function *p, unsigned op, int rT, + int rA, int imm, const char *name) { assert(imm <= 511); assert(imm >= -512); @@ -311,7 +311,7 @@ static void emit_RI10s(struct spe_function *p, unsigned op, unsigned rT, } -static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, +static void emit_RI16(struct spe_function *p, unsigned op, int rT, int imm, const char *name) { union spe_inst_RI16 inst; @@ -326,7 +326,7 @@ static void emit_RI16(struct spe_function *p, unsigned op, unsigned rT, } -static void emit_RI18(struct spe_function *p, unsigned op, unsigned rT, +static void emit_RI18(struct spe_function *p, unsigned op, int rT, int imm, const char *name) { union spe_inst_RI18 inst; @@ -348,61 +348,61 @@ void _name (struct spe_function *p) \ } #define EMIT_(_name, _op) \ -void _name (struct spe_function *p, unsigned rT) \ +void _name (struct spe_function *p, int rT) \ { \ emit_RR(p, _op, rT, 0, 0, __FUNCTION__); \ } #define EMIT_R(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA) \ +void _name (struct spe_function *p, int rT, int rA) \ { \ emit_RR(p, _op, rT, rA, 0, __FUNCTION__); \ } #define EMIT_RR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) \ +void _name (struct spe_function *p, int rT, int rA, int rB) \ { \ emit_RR(p, _op, rT, rA, rB, __FUNCTION__); \ } #define EMIT_RRR(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, unsigned rB, unsigned rC) \ +void _name (struct spe_function *p, int rT, int rA, int rB, int rC) \ { \ emit_RRR(p, _op, rT, rA, rB, rC, __FUNCTION__); \ } #define EMIT_RI7(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ emit_RI7(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI8(_name, _op, bias) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ emit_RI8(p, _op, rT, rA, bias - imm, __FUNCTION__); \ } #define EMIT_RI10(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ emit_RI10(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI10s(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, unsigned rA, int imm) \ +void _name (struct spe_function *p, int rT, int rA, int imm) \ { \ emit_RI10s(p, _op, rT, rA, imm, __FUNCTION__); \ } #define EMIT_RI16(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ +void _name (struct spe_function *p, int rT, int imm) \ { \ emit_RI16(p, _op, rT, imm, __FUNCTION__); \ } #define EMIT_RI18(_name, _op) \ -void _name (struct spe_function *p, unsigned rT, int imm) \ +void _name (struct spe_function *p, int rT, int imm) \ { \ emit_RI18(p, _op, rT, imm, __FUNCTION__); \ } @@ -424,7 +424,7 @@ void _name (struct spe_function *p, int imm) \ */ void spe_init_func(struct spe_function *p, unsigned code_size) { - unsigned int i; + uint i; if (!code_size) code_size = 64; @@ -503,6 +503,7 @@ int spe_allocate_register(struct spe_function *p, int reg) */ void spe_release_register(struct spe_function *p, int reg) { + assert(reg >= 0); assert(reg < SPE_NUM_REGS); assert(p->regs[reg] == 1); @@ -517,7 +518,7 @@ void spe_release_register(struct spe_function *p, int reg) */ void spe_allocate_register_set(struct spe_function *p) { - unsigned int i; + uint i; /* Keep track of the set count. If it ever wraps around to 0, * we're in trouble. @@ -538,7 +539,7 @@ void spe_allocate_register_set(struct spe_function *p) void spe_release_register_set(struct spe_function *p) { - unsigned int i; + uint i; /* If the set count drops below zero, we're in trouble. */ assert(p->set_count > 0); @@ -599,7 +600,7 @@ spe_comment(struct spe_function *p, int rel_indent, const char *s) * Load quad word. * NOTE: offset is in bytes and the least significant 4 bits must be zero! */ -void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) +void spe_lqd(struct spe_function *p, int rT, int rA, int offset) { const boolean pSave = p->print; @@ -624,7 +625,7 @@ void spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) * Store quad word. * NOTE: offset is in bytes and the least significant 4 bits must be zero! */ -void spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) +void spe_stqd(struct spe_function *p, int rT, int rA, int offset) { const boolean pSave = p->print; @@ -653,51 +654,51 @@ void spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset) */ /** Branch Indirect to address in rA */ -void spe_bi(struct spe_function *p, unsigned rA, int d, int e) +void spe_bi(struct spe_function *p, int rA, int d, int e) { emit_RI7(p, 0x1a8, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Interupt Return */ -void spe_iret(struct spe_function *p, unsigned rA, int d, int e) +void spe_iret(struct spe_function *p, int rA, int d, int e) { emit_RI7(p, 0x1aa, 0, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link on external data */ -void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, int d, +void spe_bisled(struct spe_function *p, int rT, int rA, int d, int e) { emit_RI7(p, 0x1ab, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect and set link. Save PC in rT, jump to rA. */ -void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, int d, +void spe_bisl(struct spe_function *p, int rT, int rA, int d, int e) { emit_RI7(p, 0x1a9, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero word. If rT.word[0]==0, jump to rA. */ -void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_biz(struct spe_function *p, int rT, int rA, int d, int e) { emit_RI7(p, 0x128, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero word. If rT.word[0]!=0, jump to rA. */ -void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_binz(struct spe_function *p, int rT, int rA, int d, int e) { emit_RI7(p, 0x129, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if zero halfword. If rT.halfword[1]==0, jump to rA. */ -void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e) { emit_RI7(p, 0x12a, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } /** Branch indirect if non-zero halfword. If rT.halfword[1]!=0, jump to rA. */ -void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, int d, int e) +void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e) { emit_RI7(p, 0x12b, rT, rA, (d << 5) | (e << 4), __FUNCTION__); } @@ -733,7 +734,7 @@ EMIT_R (spe_mtspr, 0x10c); void -spe_load_float(struct spe_function *p, unsigned rT, float x) +spe_load_float(struct spe_function *p, int rT, float x) { if (x == 0.0f) { spe_il(p, rT, 0x0); @@ -760,7 +761,7 @@ spe_load_float(struct spe_function *p, unsigned rT, float x) void -spe_load_int(struct spe_function *p, unsigned rT, int i) +spe_load_int(struct spe_function *p, int rT, int i) { if (-32768 <= i && i <= 32767) { spe_il(p, rT, i); @@ -772,7 +773,7 @@ spe_load_int(struct spe_function *p, unsigned rT, int i) } } -void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) +void spe_load_uint(struct spe_function *p, int rT, uint ui) { /* If the whole value is in the lower 18 bits, use ila, which * doesn't sign-extend. Otherwise, if the two halfwords of @@ -793,7 +794,7 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) ((ui & 0x00ff0000) == 0 || (ui & 0x00ff0000) == 0x00ff0000) && ((ui & 0xff000000) == 0 || (ui & 0xff000000) == 0xff000000) ) { - unsigned int mask = 0; + uint mask = 0; /* fsmbi duplicates each bit in the given mask eight times, * using a 16-bit value to initialize a 16-byte quadword. * Each 4-bit nybble of the mask corresponds to a full word @@ -822,7 +823,7 @@ void spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui) * Changes to one should be made in the other. */ void -spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +spe_and_uint(struct spe_function *p, int rT, int rA, uint ui) { /* If we can, emit a single instruction, either And Byte Immediate * (which uses the same constant across each byte), And Halfword Immediate @@ -832,7 +833,7 @@ spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) * * Otherwise, we'll need to use a temporary register. */ - unsigned int tmp; + uint tmp; /* If the upper 23 bits are all 0s or all 1s, sign extension * will work and we can use And Word Immediate @@ -863,7 +864,7 @@ spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) } /* Otherwise, we'll have to use a temporary register. */ - unsigned int tmp_reg = spe_allocate_available_register(p); + int tmp_reg = spe_allocate_available_register(p); spe_load_uint(p, tmp_reg, ui); spe_and(p, rT, rA, tmp_reg); spe_release_register(p, tmp_reg); @@ -875,7 +876,7 @@ spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) * Changes to one should be made in the other. */ void -spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui) { /* If we can, emit a single instruction, either Exclusive Or Byte * Immediate (which uses the same constant across each byte), Exclusive @@ -885,7 +886,7 @@ spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) * * Otherwise, we'll need to use a temporary register. */ - unsigned int tmp; + uint tmp; /* If the upper 23 bits are all 0s or all 1s, sign extension * will work and we can use Exclusive Or Word Immediate @@ -916,14 +917,14 @@ spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) } /* Otherwise, we'll have to use a temporary register. */ - unsigned int tmp_reg = spe_allocate_available_register(p); + int tmp_reg = spe_allocate_available_register(p); spe_load_uint(p, tmp_reg, ui); spe_xor(p, rT, rA, tmp_reg); spe_release_register(p, tmp_reg); } void -spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui) { /* If the comparison value is 9 bits or less, it fits inside a * Compare Equal Word Immediate instruction. @@ -933,7 +934,7 @@ spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigne } /* Otherwise, we're going to have to load a word first. */ else { - unsigned int tmp_reg = spe_allocate_available_register(p); + int tmp_reg = spe_allocate_available_register(p); spe_load_uint(p, tmp_reg, ui); spe_ceq(p, rT, rA, tmp_reg); spe_release_register(p, tmp_reg); @@ -941,7 +942,7 @@ spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigne } void -spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui) +spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui) { /* If the comparison value is 10 bits or less, it fits inside a * Compare Logical Greater Than Word Immediate instruction. @@ -951,7 +952,7 @@ spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsig } /* Otherwise, we're going to have to load a word first. */ else { - unsigned int tmp_reg = spe_allocate_available_register(p); + int tmp_reg = spe_allocate_available_register(p); spe_load_uint(p, tmp_reg, ui); spe_clgt(p, rT, rA, tmp_reg); spe_release_register(p, tmp_reg); @@ -959,10 +960,10 @@ spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsig } void -spe_splat(struct spe_function *p, unsigned rT, unsigned rA) +spe_splat(struct spe_function *p, int rT, int rA) { /* Use a temporary, just in case rT == rA */ - unsigned int tmp_reg = spe_allocate_available_register(p); + int tmp_reg = spe_allocate_available_register(p); /* Duplicate bytes 0, 1, 2, and 3 across the whole register */ spe_ila(p, tmp_reg, 0x00010203); spe_shufb(p, rT, rA, rA, tmp_reg); @@ -971,14 +972,14 @@ spe_splat(struct spe_function *p, unsigned rT, unsigned rA) void -spe_complement(struct spe_function *p, unsigned rT, unsigned rA) +spe_complement(struct spe_function *p, int rT, int rA) { spe_nor(p, rT, rA, rA); } void -spe_move(struct spe_function *p, unsigned rT, unsigned rA) +spe_move(struct spe_function *p, int rT, int rA) { /* Use different instructions depending on the instruction address * to take advantage of the dual pipelines. @@ -991,14 +992,14 @@ spe_move(struct spe_function *p, unsigned rT, unsigned rA) void -spe_zero(struct spe_function *p, unsigned rT) +spe_zero(struct spe_function *p, int rT) { spe_xor(p, rT, rT, rT); } void -spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) +spe_splat_word(struct spe_function *p, int rT, int rA, int word) { assert(word >= 0); assert(word <= 3); @@ -1038,9 +1039,9 @@ spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word) * like "x = min(x, a)", we always allocate a new register to be safe. */ void -spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) +spe_float_min(struct spe_function *p, int rT, int rA, int rB) { - unsigned int compare_reg = spe_allocate_available_register(p); + int compare_reg = spe_allocate_available_register(p); spe_fcgt(p, compare_reg, rA, rB); spe_selb(p, rT, rA, rB, compare_reg); spe_release_register(p, compare_reg); @@ -1055,9 +1056,9 @@ spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) * so that the larger of the two is selected instead of the smaller. */ void -spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB) +spe_float_max(struct spe_function *p, int rT, int rA, int rB) { - unsigned int compare_reg = spe_allocate_available_register(p); + int compare_reg = spe_allocate_available_register(p); spe_fcgt(p, compare_reg, rA, rB); spe_selb(p, rT, rB, rA, compare_reg); spe_release_register(p, compare_reg); diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h index f9ad2acacd..65d9c77415 100644 --- a/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h +++ b/src/gallium/auxiliary/rtasm/rtasm_ppc_spe.h @@ -79,9 +79,9 @@ struct spe_function }; -extern void spe_init_func(struct spe_function *p, unsigned code_size); +extern void spe_init_func(struct spe_function *p, uint code_size); extern void spe_release_func(struct spe_function *p); -extern unsigned spe_code_size(const struct spe_function *p); +extern uint spe_code_size(const struct spe_function *p); extern int spe_allocate_available_register(struct spe_function *p); extern int spe_allocate_register(struct spe_function *p, int reg); @@ -89,8 +89,7 @@ extern void spe_release_register(struct spe_function *p, int reg); extern void spe_allocate_register_set(struct spe_function *p); extern void spe_release_register_set(struct spe_function *p); -extern unsigned -spe_get_registers_used(const struct spe_function *p, ubyte used[]); +extern uint spe_get_registers_used(const struct spe_function *p, ubyte used[]); extern void spe_print_code(struct spe_function *p, boolean enable); extern void spe_indent(struct spe_function *p, int spaces); @@ -103,31 +102,25 @@ extern void spe_comment(struct spe_function *p, int rel_indent, const char *s); #define EMIT(_name, _op) \ extern void _name (struct spe_function *p); #define EMIT_(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT); + extern void _name (struct spe_function *p, int rT); #define EMIT_R(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA); + extern void _name (struct spe_function *p, int rT, int rA); #define EMIT_RR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB); + extern void _name (struct spe_function *p, int rT, int rA, int rB); #define EMIT_RRR(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - unsigned rB, unsigned rC); + extern void _name (struct spe_function *p, int rT, int rA, int rB, int rC); #define EMIT_RI7(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm); + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI8(_name, _op, bias) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm); + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI10(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm); + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI10s(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, unsigned rA, \ - int imm); + extern void _name (struct spe_function *p, int rT, int rA, int imm); #define EMIT_RI16(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm); + extern void _name (struct spe_function *p, int rT, int imm); #define EMIT_RI18(_name, _op) \ - extern void _name (struct spe_function *p, unsigned rT, int imm); + extern void _name (struct spe_function *p, int rT, int imm); #define EMIT_I16(_name, _op) \ extern void _name (struct spe_function *p, int imm); #define UNDEF_EMIT_MACROS @@ -301,82 +294,82 @@ EMIT_RI16(spe_brhz, 0x044) EMIT (spe_lnop, 0x001) extern void -spe_lqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); +spe_lqd(struct spe_function *p, int rT, int rA, int offset); extern void -spe_stqd(struct spe_function *p, unsigned rT, unsigned rA, int offset); +spe_stqd(struct spe_function *p, int rT, int rA, int offset); -extern void spe_bi(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_iret(struct spe_function *p, unsigned rA, int d, int e); -extern void spe_bisled(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bi(struct spe_function *p, int rA, int d, int e); +extern void spe_iret(struct spe_function *p, int rA, int d, int e); +extern void spe_bisled(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bisl(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bisl(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_biz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_biz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_binz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_binz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bihz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bihz(struct spe_function *p, int rT, int rA, int d, int e); -extern void spe_bihnz(struct spe_function *p, unsigned rT, unsigned rA, +extern void spe_bihnz(struct spe_function *p, int rT, int rA, int d, int e); /** Load/splat immediate float into rT. */ extern void -spe_load_float(struct spe_function *p, unsigned rT, float x); +spe_load_float(struct spe_function *p, int rT, float x); /** Load/splat immediate int into rT. */ extern void -spe_load_int(struct spe_function *p, unsigned rT, int i); +spe_load_int(struct spe_function *p, int rT, int i); /** Load/splat immediate unsigned int into rT. */ extern void -spe_load_uint(struct spe_function *p, unsigned rT, unsigned int ui); +spe_load_uint(struct spe_function *p, int rT, uint ui); /** And immediate value into rT. */ extern void -spe_and_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); +spe_and_uint(struct spe_function *p, int rT, int rA, uint ui); /** Xor immediate value into rT. */ extern void -spe_xor_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); +spe_xor_uint(struct spe_function *p, int rT, int rA, uint ui); /** Compare equal with immediate value. */ extern void -spe_compare_equal_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); +spe_compare_equal_uint(struct spe_function *p, int rT, int rA, uint ui); /** Compare greater with immediate value. */ extern void -spe_compare_greater_uint(struct spe_function *p, unsigned rT, unsigned rA, unsigned int ui); +spe_compare_greater_uint(struct spe_function *p, int rT, int rA, uint ui); /** Replicate word 0 of rA across rT. */ extern void -spe_splat(struct spe_function *p, unsigned rT, unsigned rA); +spe_splat(struct spe_function *p, int rT, int rA); /** rT = complement_all_bits(rA). */ extern void -spe_complement(struct spe_function *p, unsigned rT, unsigned rA); +spe_complement(struct spe_function *p, int rT, int rA); /** rT = rA. */ extern void -spe_move(struct spe_function *p, unsigned rT, unsigned rA); +spe_move(struct spe_function *p, int rT, int rA); /** rT = {0,0,0,0}. */ extern void -spe_zero(struct spe_function *p, unsigned rT); +spe_zero(struct spe_function *p, int rT); /** rT = splat(rA, word) */ extern void -spe_splat_word(struct spe_function *p, unsigned rT, unsigned rA, int word); +spe_splat_word(struct spe_function *p, int rT, int rA, int word); /** rT = float min(rA, rB) */ extern void -spe_float_min(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); +spe_float_min(struct spe_function *p, int rT, int rA, int rB); /** rT = float max(rA, rB) */ extern void -spe_float_max(struct spe_function *p, unsigned rT, unsigned rA, unsigned rB); +spe_float_max(struct spe_function *p, int rT, int rA, int rB); /* Floating-point instructions -- cgit v1.2.3 From f586c31fa6259757ae92868b63783377d7943389 Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Mon, 12 Jan 2009 12:34:27 +0100 Subject: gallivm: Adapt to header file move in LLVM 2.4. --- src/gallium/auxiliary/gallivm/gallivm.cpp | 2 +- src/gallium/auxiliary/gallivm/gallivm_cpu.cpp | 2 +- src/gallium/auxiliary/gallivm/instructions.cpp | 2 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 2 +- src/gallium/auxiliary/gallivm/tgsitollvm.cpp | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/gallivm.cpp b/src/gallium/auxiliary/gallivm/gallivm.cpp index 29adeea47d..f4af5cc8ad 100644 --- a/src/gallium/auxiliary/gallivm/gallivm.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm.cpp @@ -53,7 +53,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp index 93a9748bdb..1bd00a0c2a 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_cpu.cpp @@ -56,7 +56,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/src/gallium/auxiliary/gallivm/instructions.cpp b/src/gallium/auxiliary/gallivm/instructions.cpp index 599975d5ad..ee8162efce 100644 --- a/src/gallium/auxiliary/gallivm/instructions.cpp +++ b/src/gallium/auxiliary/gallivm/instructions.cpp @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index d5600fd22d..ad57acbe1a 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp index c11b88af9e..6b18a68fe6 100644 --- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp +++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From f43e621e2207f819f756d9b9539b2a25b7b936fe Mon Sep 17 00:00:00 2001 From: Michel Dänzer Date: Mon, 12 Jan 2009 12:39:31 +0100 Subject: gallivm: Print error message from ParseBitcodeFile() in case it fails. --- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index ad57acbe1a..f93a31d54b 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -206,11 +206,12 @@ llvm::Module * InstructionsSoa::currentModule() const void InstructionsSoa::createBuiltins() { + std::string ErrMsg; MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); - m_builtins = ParseBitcodeFile(buffer); - std::cout<<"Builtins created at "< Date: Mon, 12 Jan 2009 15:05:05 +0100 Subject: gallivm: Make sure the bitcode buffer is followed by a 0 byte. May fail to parse otherwise. --- src/gallium/auxiliary/gallivm/Makefile | 4 ++-- src/gallium/auxiliary/gallivm/gallivm_builtins.cpp | 2 +- src/gallium/auxiliary/gallivm/instructionssoa.cpp | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/gallivm/Makefile b/src/gallium/auxiliary/gallivm/Makefile index c3f7bfba93..5a96d94ec3 100644 --- a/src/gallium/auxiliary/gallivm/Makefile +++ b/src/gallium/auxiliary/gallivm/Makefile @@ -66,12 +66,12 @@ depend: $(C_SOURCES) $(CPP_SOURCES) $(ASM_SOURCES) $(INC_SOURCES) gallivm_builtins.cpp: llvm_builtins.c clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp1.bin - (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + (echo "static const unsigned char llvm_builtins_data[] = {"; od -txC temp1.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ rm temp1.bin gallivmsoabuiltins.cpp: soabuiltins.c clang --emit-llvm < $< |llvm-as|opt -std-compile-opts > temp2.bin - (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/};/") >$@ + (echo "static const unsigned char soabuiltins_data[] = {"; od -txC temp2.bin | sed -e "s/^[0-9]*//" -e s"/ \([0-9a-f][0-9a-f]\)/0x\1,/g" -e"\$$d" | sed -e"\$$s/,$$/,0x00};/") >$@ rm temp2.bin # Emacs tags diff --git a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp index fcc5c05794..634bac0150 100644 --- a/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp +++ b/src/gallium/auxiliary/gallivm/gallivm_builtins.cpp @@ -137,4 +137,4 @@ static const unsigned char llvm_builtins_data[] = { 0x58,0x85,0x05,0x14,0xbe,0x34,0x45,0xb5,0x21,0x10,0x82,0x23,0x15,0x46,0x30,0x2c, 0xc8,0x64,0x02,0x06,0xf0,0x3c,0x91,0x73,0x19,0x00,0xe1,0x4b,0x53,0x64,0x0a,0x84, 0x84,0x34,0x85,0x25,0x0c,0x92,0x20,0x59,0xc1,0x20,0x30,0x8f,0x2d,0x10,0x95,0x84, -0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; +0x34,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00}; diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp index f93a31d54b..925e948763 100644 --- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp +++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp @@ -209,7 +209,7 @@ void InstructionsSoa::createBuiltins() std::string ErrMsg; MemoryBuffer *buffer = MemoryBuffer::getMemBuffer( (const char*)&soabuiltins_data[0], - (const char*)&soabuiltins_data[Elements(soabuiltins_data)]); + (const char*)&soabuiltins_data[Elements(soabuiltins_data) - 1]); m_builtins = ParseBitcodeFile(buffer, &ErrMsg); std::cout<<"Builtins created at "< Date: Wed, 14 Jan 2009 11:39:12 +0000 Subject: gallium: Disable memory debugging for Windows OGL. Unfortunately both Mesa and Gallium use the same defines for memory allocation (MALLOC, FREE, etc), and worse, some times memory is allocated with one set and freed with the other set, causing the homegrown memory debugger to trip on itself. In the future mesa and gallium should use different names, but for now, memory debugging on Windows will have to be carried with different tools.. --- src/gallium/auxiliary/util/u_memory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h index 626b13af83..1a6b596421 100644 --- a/src/gallium/auxiliary/util/u_memory.h +++ b/src/gallium/auxiliary/util/u_memory.h @@ -52,7 +52,7 @@ extern "C" { #endif -#if defined(PIPE_OS_WINDOWS) && defined(DEBUG) +#if defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) && defined(DEBUG) /* memory debugging */ -- cgit v1.2.3 From 8f3fac6107460b6d9b011b5c76246468bb16004b Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Sat, 17 Jan 2009 18:45:20 +0000 Subject: debug: add noprefix version of debug_dump_enum --- src/gallium/auxiliary/util/p_debug.c | 26 ++++++++++++++++++++++++++ src/gallium/include/pipe/p_debug.h | 5 +++++ 2 files changed, 31 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/p_debug.c b/src/gallium/auxiliary/util/p_debug.c index acdfa211c8..f373f941dd 100644 --- a/src/gallium/auxiliary/util/p_debug.c +++ b/src/gallium/auxiliary/util/p_debug.c @@ -405,6 +405,32 @@ debug_dump_enum(const struct debug_named_value *names, } +const char * +debug_dump_enum_noprefix(const struct debug_named_value *names, + const char *prefix, + unsigned long value) +{ + static char rest[64]; + + while(names->name) { + if(names->value == value) { + const char *name = names->name; + while (*name == *prefix) { + name++; + prefix++; + } + return name; + } + ++names; + } + + + + util_snprintf(rest, sizeof(rest), "0x%08lx", value); + return rest; +} + + const char * debug_dump_flags(const struct debug_named_value *names, unsigned long value) diff --git a/src/gallium/include/pipe/p_debug.h b/src/gallium/include/pipe/p_debug.h index 3b00fb9aa8..e9c95982dd 100644 --- a/src/gallium/include/pipe/p_debug.h +++ b/src/gallium/include/pipe/p_debug.h @@ -261,6 +261,11 @@ const char * debug_dump_enum(const struct debug_named_value *names, unsigned long value); +const char * +debug_dump_enum_noprefix(const struct debug_named_value *names, + const char *prefix, + unsigned long value); + /** * Convert binary flags value to a string. -- cgit v1.2.3 From 1907135235a684872706d1a28ea8e2b4e1b6e7d3 Mon Sep 17 00:00:00 2001 From: Brian Paul Date: Mon, 12 Jan 2009 12:01:09 -0700 Subject: tgsi: change an if to an else-if, added const qual, added comments --- src/gallium/auxiliary/tgsi/tgsi_scan.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_scan.c b/src/gallium/auxiliary/tgsi/tgsi_scan.c index 1239f6c076..d02205a63e 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_scan.c +++ b/src/gallium/auxiliary/tgsi/tgsi_scan.c @@ -43,6 +43,9 @@ /** + * Scan the given TGSI shader to collect information such as number of + * registers used, special instructions used, etc. + * \return info the result of the scan */ void tgsi_scan_shader(const struct tgsi_token *tokens, @@ -115,7 +118,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, { const struct tgsi_full_declaration *fulldecl = &parse.FullToken.FullDeclaration; - uint file = fulldecl->Declaration.File; + const uint file = fulldecl->Declaration.File; uint reg; for (reg = fulldecl->DeclarationRange.First; reg <= fulldecl->DeclarationRange.Last; @@ -131,8 +134,7 @@ tgsi_scan_shader(const struct tgsi_token *tokens, info->input_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_inputs++; } - - if (file == TGSI_FILE_OUTPUT) { + else if (file == TGSI_FILE_OUTPUT) { info->output_semantic_name[reg] = (ubyte)fulldecl->Semantic.SemanticName; info->output_semantic_index[reg] = (ubyte)fulldecl->Semantic.SemanticIndex; info->num_outputs++; -- cgit v1.2.3 From bc0e00ad42ee651701ef1e211d36ee92acf18d6f Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 19 Jan 2009 19:43:21 +0000 Subject: util: add reduced prim helper --- src/gallium/auxiliary/util/u_prim.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h index e45e84ded2..d7c3995dbf 100644 --- a/src/gallium/auxiliary/util/u_prim.h +++ b/src/gallium/auxiliary/util/u_prim.h @@ -119,4 +119,20 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr ) } +static INLINE boolean u_reduced_prim( unsigned pipe_prim ) +{ + switch (pipe_prim) { + case PIPE_PRIM_POINTS: + return PIPE_PRIM_POINTS; + + case PIPE_PRIM_LINES: + case PIPE_PRIM_LINE_STRIP: + case PIPE_PRIM_LINE_LOOP: + return PIPE_PRIM_LINES; + + default: + return PIPE_PRIM_TRIANGLES; + } +} + #endif -- cgit v1.2.3 From 5897383344da3320d158c26adae05de35480471f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Tue, 20 Jan 2009 12:22:49 +0000 Subject: gallium: Remove the standalone surfaces. This commit is mostly just a cosmetic change that cleans-up the interfaces, replacing pipe_winsys::surface_* calls by /** * Allocate storage for a display target surface. * * Often surfaces which are meant to be blitted to the front screen (i.e., * display targets) must be allocated with special characteristics, memory * pools, or obtained directly from the windowing system. * * This callback is invoked by the pipe_screenwhen creating a texture marked * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying * buffer storage. */ struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws, unsigned width, unsigned height, enum pipe_format format, unsigned usage, unsigned *stride); Most drivers were updated but not all were tested. Use the softpipe pipe driver and the xlib winsys changes as a reference when fixing other drivers. --- src/gallium/auxiliary/util/u_timed_winsys.c | 45 ++---------- src/gallium/drivers/i915simple/i915_texture.c | 1 - src/gallium/drivers/i965simple/brw_tex_layout.c | 7 +- src/gallium/drivers/nv04/nv04_miptree.c | 6 +- src/gallium/drivers/nv10/nv10_miptree.c | 4 +- src/gallium/drivers/nv20/nv20_miptree.c | 1 - src/gallium/drivers/nv30/nv30_miptree.c | 1 - src/gallium/drivers/nv40/nv40_miptree.c | 1 - src/gallium/drivers/nv50/nv50_miptree.c | 15 ++-- src/gallium/drivers/softpipe/sp_texture.c | 43 +++--------- src/gallium/drivers/softpipe/sp_texture.h | 2 +- src/gallium/drivers/trace/tr_texture.c | 1 - src/gallium/drivers/trace/tr_winsys.c | 81 +++++----------------- src/gallium/include/pipe/p_inlines.h | 44 ++++-------- src/gallium/include/pipe/p_state.h | 4 +- src/gallium/include/pipe/p_winsys.h | 36 +++++----- .../state_trackers/python/st_softpipe_winsys.c | 70 ++++--------------- .../winsys/drm/intel/common/intel_be_device.c | 46 +++++------- src/gallium/winsys/egl_xlib/sw_winsys.c | 70 ++++--------------- .../winsys/g3dvl/nouveau/nouveau_winsys_pipe.c | 35 ++++------ src/gallium/winsys/g3dvl/xsp_winsys.c | 67 ++++-------------- src/gallium/winsys/gdi/gdi_softpipe_winsys.c | 68 ++++-------------- src/gallium/winsys/xlib/xlib_brw_screen.c | 63 ++++------------- src/gallium/winsys/xlib/xlib_cell.c | 80 +++++---------------- src/gallium/winsys/xlib/xlib_softpipe.c | 80 +++++---------------- 25 files changed, 217 insertions(+), 654 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c index 8beb3b4c88..dc3c9be595 100644 --- a/src/gallium/auxiliary/util/u_timed_winsys.c +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -205,34 +205,18 @@ timed_flush_frontbuffer( struct pipe_winsys *winsys, -static struct pipe_surface * -timed_surface_alloc(struct pipe_winsys *winsys) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - uint64_t start = time_start(); - - struct pipe_surface *surf = backend->surface_alloc( backend ); - - time_finish(winsys, start, 6, __FUNCTION__); - - return surf; -} - - - -static int -timed_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +timed_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - int ret = backend->surface_alloc_storage( backend, surf, width, height, - format, flags, tex_usage ); + struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height, + format, usage, stride ); time_finish(winsys, start, 7, __FUNCTION__); @@ -240,19 +224,6 @@ timed_surface_alloc_storage(struct pipe_winsys *winsys, } -static void -timed_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_winsys *backend = timed_winsys(winsys)->backend; - uint64_t start = time_start(); - - backend->surface_release( backend, s ); - - time_finish(winsys, start, 8, __FUNCTION__); -} - - - static const char * timed_get_name( struct pipe_winsys *winsys ) { @@ -331,9 +302,7 @@ struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ) ws->base.buffer_create = timed_buffer_create; ws->base.flush_frontbuffer = timed_flush_frontbuffer; ws->base.get_name = timed_get_name; - ws->base.surface_alloc = timed_surface_alloc; - ws->base.surface_alloc_storage = timed_surface_alloc_storage; - ws->base.surface_release = timed_surface_release; + ws->base.surface_buffer_create = timed_surface_buffer_create; ws->base.fence_reference = timed_fence_reference; ws->base.fence_signalled = timed_fence_signalled; ws->base.fence_finish = timed_fence_finish; diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 2f5459af67..af823f2d3c 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -683,7 +683,6 @@ i915_get_tex_surface(struct pipe_screen *screen, ps = CALLOC_STRUCT(pipe_surface); if (ps) { ps->refcount = 1; - ps->winsys = ws; pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index cc0c665e02..12e2e02cfd 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -365,10 +365,10 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, assert(zslice == 0); } - ps = ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface); if (ps) { - assert(ps->format); - assert(ps->refcount); + ps->refcount = 1; + pipe_texture_reference(&ps->texture, pt); winsys_buffer_reference(ws, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; @@ -378,6 +378,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, ps->nblocksy = pt->nblocksy[level]; ps->stride = tex->stride; ps->offset = offset; + ps->status = PIPE_SURFACE_STATUS_DEFINED; } return ps; } diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 0cbb91e187..094c38256b 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -96,13 +96,12 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, struct nv04_miptree *nv04mt = (struct nv04_miptree *)pt; struct pipe_surface *ps; - ps = ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface); if (!ps) return NULL; + pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(pscreen, &ps->buffer, nv04mt->buffer); ps->format = pt->format; - ps->width = pt->width[level]; - ps->height = pt->height[level]; ps->block = pt->block; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -110,7 +109,6 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->nblocksy = pt->nblocksy[level]; ps->stride = nv04mt->level[level].pitch; ps->refcount = 1; - ps->winsys = pscreen->winsys; if (pt->target == PIPE_TEXTURE_CUBE) { ps->offset = nv04mt->level[level].image_offset[face]; diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 943f9e21e9..f8c021261b 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -110,9 +110,10 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, struct nv10_miptree *nv10mt = (struct nv10_miptree *)pt; struct pipe_surface *ps; - ps = ws->surface_alloc(ws); + ps = CALLOC_STRUCT(pipe_surface); if (!ps) return NULL; + pipe_texture_reference(&ps->texture, pt); pipe_buffer_reference(screen, &ps->buffer, nv10mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; @@ -122,7 +123,6 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, ps->nblocksy = pt->nblocksy[level]; ps->stride = nv10mt->level[level].pitch; ps->refcount = 1; - ps->winsys = screen->winsys; if (pt->target == PIPE_TEXTURE_CUBE) { ps->offset = nv10mt->level[level].image_offset[face]; diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index c6106d58c4..d2038c391d 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -117,7 +117,6 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, ps->usage = flags; ps->status = PIPE_SURFACE_STATUS_DEFINED; ps->refcount = 1; - ps->winsys = screen->winsys; if (pt->target == PIPE_TEXTURE_CUBE) { ps->offset = nv20mt->level[level].image_offset[face]; diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 37d297cc0f..54fb3585f8 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -154,7 +154,6 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->usage = flags; ps->status = PIPE_SURFACE_STATUS_DEFINED; ps->refcount = 1; - ps->winsys = pscreen->winsys; ps->face = face; ps->level = level; ps->zslice = zslice; diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 00ce6be985..ba912ddcbb 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -155,7 +155,6 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->usage = flags; ps->status = PIPE_SURFACE_STATUS_DEFINED; ps->refcount = 1; - ps->winsys = pscreen->winsys; ps->face = face; ps->level = level; ps->zslice = zslice; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 63a23d06b8..7770fcc3f2 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -217,7 +217,6 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, { struct nv50_miptree *mt = nv50_miptree(pt); struct nv50_miptree_level *lvl = &mt->level[level]; - struct nv50_surface *s; struct pipe_surface *ps; int img; @@ -229,13 +228,11 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, else img = 0; - s = CALLOC_STRUCT(nv50_surface); - if (!s) + ps = CALLOC_STRUCT(pipe_surface); + if (!ps) return NULL; - ps = &s->base; - - ps->refcount = 1; - ps->winsys = pscreen->winsys; + pipe_texture_reference(&ps->texture, pt); + pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -245,6 +242,10 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->stride = ps->width * ps->block.size; ps->usage = flags; ps->status = PIPE_SURFACE_STATUS_DEFINED; + ps->refcount = 1; + ps->face = face; + ps->level = level; + ps->zslice = zslice; if (flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE)); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index a64dc89f43..faf9e871f9 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -94,49 +94,23 @@ softpipe_texture_layout(struct pipe_screen *screen, return spt->buffer != NULL; } -/* Hack it up to use the old winsys->surface_alloc_storage() - * method for now: - */ static boolean softpipe_displaytarget_layout(struct pipe_screen *screen, struct softpipe_texture * spt) { struct pipe_winsys *ws = screen->winsys; - struct pipe_surface surf; - unsigned flags = (PIPE_BUFFER_USAGE_CPU_READ | - PIPE_BUFFER_USAGE_CPU_WRITE | - PIPE_BUFFER_USAGE_GPU_READ | - PIPE_BUFFER_USAGE_GPU_WRITE); - int ret; - - - memset(&surf, 0, sizeof(surf)); - - ret =ws->surface_alloc_storage( ws, - &surf, - spt->base.width[0], - spt->base.height[0], - spt->base.format, - flags, - spt->base.tex_usage); - if(ret != 0) - return FALSE; - - if (!surf.buffer) { - /* allocation failed */ - return FALSE; - } + unsigned usage = (PIPE_BUFFER_USAGE_CPU_READ_WRITE | + PIPE_BUFFER_USAGE_GPU_READ_WRITE); - /* Now extract the goodies: - */ spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); - spt->stride[0] = surf.stride; - /* Transfer the reference: - */ - spt->buffer = surf.buffer; - surf.buffer = NULL; + spt->buffer = ws->surface_buffer_create( ws, + spt->base.width[0], + spt->base.height[0], + spt->base.format, + usage, + &spt->stride[0]); return spt->buffer != NULL; } @@ -231,7 +205,6 @@ softpipe_get_tex_surface(struct pipe_screen *screen, unsigned face, unsigned level, unsigned zslice, unsigned usage) { - struct pipe_winsys *ws = screen->winsys; struct softpipe_texture *spt = softpipe_texture(pt); struct pipe_surface *ps; diff --git a/src/gallium/drivers/softpipe/sp_texture.h b/src/gallium/drivers/softpipe/sp_texture.h index bf437a7c61..c1636920cd 100644 --- a/src/gallium/drivers/softpipe/sp_texture.h +++ b/src/gallium/drivers/softpipe/sp_texture.h @@ -42,7 +42,7 @@ struct softpipe_texture struct pipe_texture base; unsigned long level_offset[PIPE_MAX_TEXTURE_LEVELS]; - unsigned long stride[PIPE_MAX_TEXTURE_LEVELS]; + unsigned stride[PIPE_MAX_TEXTURE_LEVELS]; /* The data is held here: */ diff --git a/src/gallium/drivers/trace/tr_texture.c b/src/gallium/drivers/trace/tr_texture.c index 440a78704a..1cc4f0bd43 100644 --- a/src/gallium/drivers/trace/tr_texture.c +++ b/src/gallium/drivers/trace/tr_texture.c @@ -87,7 +87,6 @@ trace_surface_create(struct trace_texture *tr_tex, memcpy(&tr_surf->base, surface, sizeof(struct pipe_surface)); - tr_surf->base.winsys = tr_tex->base.screen->winsys; tr_surf->base.texture = NULL; pipe_texture_reference(&tr_surf->base.texture, &tr_tex->base); tr_surf->surface = surface; diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 177835854e..c4148fe810 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -98,86 +98,41 @@ trace_winsys_flush_frontbuffer(struct pipe_winsys *_winsys, } -static struct pipe_surface * -trace_winsys_surface_alloc(struct pipe_winsys *_winsys) -{ - struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct pipe_winsys *winsys = tr_ws->winsys; - struct pipe_surface *result; - - trace_dump_call_begin("pipe_winsys", "surface_alloc"); - - trace_dump_arg(ptr, winsys); - - result = winsys->surface_alloc(winsys); - - trace_dump_ret(ptr, result); - - trace_dump_call_end(); - - assert(!result || !result->texture); - - return result; -} - - -static int -trace_winsys_surface_alloc_storage(struct pipe_winsys *_winsys, - struct pipe_surface *surface, +static struct pipe_buffer * +trace_winsys_surface_buffer_create(struct pipe_winsys *_winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *pstride) { struct trace_winsys *tr_ws = trace_winsys(_winsys); struct pipe_winsys *winsys = tr_ws->winsys; - int result; + unsigned stride; + struct pipe_buffer *result; - assert(surface && !surface->texture); - - trace_dump_call_begin("pipe_winsys", "surface_alloc_storage"); + trace_dump_call_begin("pipe_winsys", "surface_buffer_create"); trace_dump_arg(ptr, winsys); - trace_dump_arg(ptr, surface); trace_dump_arg(uint, width); trace_dump_arg(uint, height); trace_dump_arg(format, format); - trace_dump_arg(uint, flags); - trace_dump_arg(uint, tex_usage); + trace_dump_arg(uint, usage); - result = winsys->surface_alloc_storage(winsys, - surface, + result = winsys->surface_buffer_create(winsys, width, height, format, - flags, - tex_usage); + usage, + pstride); - trace_dump_ret(int, result); + stride = *pstride; - trace_dump_call_end(); + trace_dump_arg(uint, stride); - return result; -} - - -static void -trace_winsys_surface_release(struct pipe_winsys *_winsys, - struct pipe_surface **psurface) -{ - struct trace_winsys *tr_ws = trace_winsys(_winsys); - struct pipe_winsys *winsys = tr_ws->winsys; - struct pipe_surface *surface = *psurface; - - assert(psurface && *psurface && !(*psurface)->texture); - - trace_dump_call_begin("pipe_winsys", "surface_release"); - - trace_dump_arg(ptr, winsys); - trace_dump_arg(ptr, surface); - - winsys->surface_release(winsys, psurface); + trace_dump_ret(ptr, result); trace_dump_call_end(); + + return result; } @@ -465,9 +420,7 @@ trace_winsys_create(struct pipe_winsys *winsys) tr_ws->base.destroy = trace_winsys_destroy; tr_ws->base.get_name = trace_winsys_get_name; tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer; - tr_ws->base.surface_alloc = trace_winsys_surface_alloc; - tr_ws->base.surface_alloc_storage = trace_winsys_surface_alloc_storage; - tr_ws->base.surface_release = trace_winsys_surface_release; + tr_ws->base.surface_buffer_create = trace_winsys_surface_buffer_create; tr_ws->base.buffer_create = trace_winsys_buffer_create; tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create; tr_ws->base.buffer_map = trace_winsys_buffer_map; diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 5e79b7f485..7378392616 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -45,30 +45,19 @@ extern "C" { static INLINE void * pipe_surface_map( struct pipe_surface *surf, unsigned flags ) { - if (surf->texture) { - struct pipe_screen *screen = surf->texture->screen; - return surf->texture->screen->surface_map( screen, surf, flags ); - } - else { - struct pipe_winsys *winsys = surf->winsys; - char *map = (char *)winsys->buffer_map( winsys, surf->buffer, flags ); - if (map == NULL) - return NULL; - return (void *)(map + surf->offset); - } + struct pipe_screen *screen; + assert(surf->texture); + screen = surf->texture->screen; + return screen->surface_map( screen, surf, flags ); } static INLINE void pipe_surface_unmap( struct pipe_surface *surf ) { - if (surf->texture) { - struct pipe_screen *screen = surf->texture->screen; - surf->texture->screen->surface_unmap( screen, surf ); - } - else { - struct pipe_winsys *winsys = surf->winsys; - winsys->buffer_unmap( winsys, surf->buffer ); - } + struct pipe_screen *screen; + assert(surf->texture); + screen = surf->texture->screen; + screen->surface_unmap( screen, surf ); } @@ -88,20 +77,11 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) } if (*ptr) { + struct pipe_screen *screen; assert((*ptr)->refcount); - - /* There are currently two sorts of surfaces... This needs to be - * fixed so that all surfaces are views into a texture. - */ - if ((*ptr)->texture) { - struct pipe_screen *screen = (*ptr)->texture->screen; - screen->tex_surface_release( screen, ptr ); - } - else { - struct pipe_winsys *winsys = (*ptr)->winsys; - winsys->surface_release(winsys, ptr); - } - + assert((*ptr)->texture); + screen = (*ptr)->texture->screen; + screen->tex_surface_release( screen, ptr ); assert(!*ptr); } diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 317121c64a..abe7cbe9e7 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -292,9 +292,7 @@ struct pipe_surface unsigned refcount; unsigned usage; /**< PIPE_BUFFER_USAGE_* */ - struct pipe_winsys *winsys; /**< winsys which owns/created the surface */ - - struct pipe_texture *texture; /**< optional texture into which this is a view */ + struct pipe_texture *texture; /**< texture into which this is a view */ unsigned face; unsigned level; unsigned zslice; diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 5d18291dc6..3ae83e8105 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -76,24 +76,6 @@ struct pipe_winsys void *context_private ); - /** allocate a new surface (no context dependency) */ - struct pipe_surface *(*surface_alloc)(struct pipe_winsys *ws); - - /** - * Allocate storage for a pipe_surface. - * \param flags XXX unused, remove someday - * \return 0 if succeeds. - */ - int (*surface_alloc_storage)(struct pipe_winsys *ws, - struct pipe_surface *surf, - unsigned width, unsigned height, - enum pipe_format format, - unsigned flags, - unsigned tex_usage); - - void (*surface_release)(struct pipe_winsys *ws, struct pipe_surface **s); - - /** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * @@ -138,6 +120,24 @@ struct pipe_winsys void *ptr, unsigned bytes); + /** + * Allocate storage for a display target surface. + * + * Often surfaces which are meant to be blitted to the front screen (i.e., + * display targets) must be allocated with special characteristics, memory + * pools, or obtained directly from the windowing system. + * + * This callback is invoked by the pipe_screenwhen creating a texture marked + * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying + * buffer storage. + */ + struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride); + + /** * Map the entire data store of a buffer object into the client's address. * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index f62113a469..01d88ee499 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -168,63 +168,25 @@ round_up(unsigned n, unsigned multiple) } -static int -st_softpipe_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +st_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, - enum pipe_format format, - unsigned flags, - unsigned tex_usage) + enum pipe_format format, + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * surf->nblocksy); - if(!surf->buffer) - return -1; - - return 0; -} - - -static struct pipe_surface * -st_softpipe_surface_alloc(struct pipe_winsys *winsys) -{ - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - - assert(winsys); - - surface->refcount = 1; - surface->winsys = winsys; - - return surface; -} - + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); -static void -st_softpipe_surface_release(struct pipe_winsys *winsys, - struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -279,9 +241,7 @@ st_softpipe_screen_create(void) winsys->buffer_unmap = st_softpipe_buffer_unmap; winsys->buffer_destroy = st_softpipe_buffer_destroy; - winsys->surface_alloc = st_softpipe_surface_alloc; - winsys->surface_alloc_storage = st_softpipe_surface_alloc_storage; - winsys->surface_release = st_softpipe_surface_release; + winsys->surface_buffer_create = st_softpipe_surface_buffer_create; winsys->fence_reference = st_softpipe_fence_reference; winsys->fence_signalled = st_softpipe_fence_signalled; diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c index 019ee5cbd2..14aeaf61db 100644 --- a/src/gallium/winsys/drm/intel/common/intel_be_device.c +++ b/src/gallium/winsys/drm/intel/common/intel_be_device.c @@ -157,35 +157,25 @@ err: } -/* - * Surface functions. - * - * Deprecated! - */ - -static struct pipe_surface * -intel_i915_surface_alloc(struct pipe_winsys *winsys) -{ - assert((size_t)"intel_i915_surface_alloc is deprecated" & 0); - return NULL; -} - -static int -intel_i915_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +intel_i915_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) -{ - assert((size_t)"intel_i915_surface_alloc_storage is deprecated" & 0); - return -1; -} - -static void -intel_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) + unsigned usage, + unsigned *stride) { - assert((size_t)"intel_i915_surface_release is deprecated" & 0); + const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; + + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); + + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -238,9 +228,7 @@ intel_be_init_device(struct intel_be_device *dev, int fd, unsigned id) dev->base.buffer_map = intel_be_buffer_map; dev->base.buffer_unmap = intel_be_buffer_unmap; dev->base.buffer_destroy = intel_be_buffer_destroy; - dev->base.surface_alloc = intel_i915_surface_alloc; - dev->base.surface_alloc_storage = intel_i915_surface_alloc_storage; - dev->base.surface_release = intel_i915_surface_release; + dev->base.surface_buffer_create = intel_i915_surface_buffer_create; dev->base.fence_reference = intel_be_fence_reference; dev->base.fence_signalled = intel_be_fence_signalled; dev->base.fence_finish = intel_be_fence_finish; diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index 2fd190da52..a09ad5e8e9 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -161,65 +161,25 @@ buffer_destroy(struct pipe_winsys *pws, struct pipe_buffer *buf) } -/** - * Called via winsys->surface_alloc() to create new surfaces. - */ -static struct pipe_surface * -surface_alloc(struct pipe_winsys *ws) -{ - struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface); - if (!surf) - return NULL; - - surf->refcount = 1; - surf->winsys = ws; - - return surf; -} - - -static int -surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(surf->format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * height); - if(!surf->buffer) - return -1; - - return 0; -} - + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); -static void -surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -268,9 +228,7 @@ create_sw_winsys(void) ws->Base.buffer_unmap = buffer_unmap; ws->Base.buffer_destroy = buffer_destroy; - ws->Base.surface_alloc = surface_alloc; - ws->Base.surface_alloc_storage = surface_alloc_storage; - ws->Base.surface_release = surface_release; + ws->Base.surface_buffer_create = surface_buffer_create; ws->Base.fence_reference = fence_reference; ws->Base.fence_signalled = fence_signalled; diff --git a/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c index 17c409e1ce..2d8463037f 100644 --- a/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/g3dvl/nouveau/nouveau_winsys_pipe.c @@ -46,34 +46,29 @@ round_up(unsigned n, unsigned multiple) return (n + multiple - 1) & ~(multiple - 1); } -static int -nouveau_surface_alloc_storage +static struct pipe_buffer * +nouveau_surface_buffer_create ( struct pipe_winsys *pws, - struct pipe_surface *surface, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage + unsigned usage, + unsigned *stride ) { const unsigned int ALIGNMENT = 256; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - assert(pws); - assert(surface); - - surface->width = width; - surface->height = height; - surface->format = format; - pf_get_block(format, &surface->block); - surface->nblocksx = pf_get_nblocksx(&surface->block, width); - surface->nblocksy = pf_get_nblocksy(&surface->block, height); - surface->stride = round_up(surface->nblocksx * surface->block.size, ALIGNMENT); - surface->usage = flags; - surface->buffer = pws->buffer_create(pws, ALIGNMENT, PIPE_BUFFER_USAGE_PIXEL, surface->stride * surface->nblocksy); + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, ALIGNMENT); - return 0; + return winsys->buffer_create(winsys, ALIGNMENT, + usage, + *stride * nblocksy); } static void @@ -269,9 +264,7 @@ nouveau_create_pipe_winsys(struct nouveau_context *nv) pws->flush_frontbuffer = nouveau_flush_frontbuffer; - pws->surface_alloc = nouveau_surface_alloc; - pws->surface_alloc_storage = nouveau_surface_alloc_storage; - pws->surface_release = nouveau_surface_release; + pws->surface_buffer_create = nouveau_surface_buffer_create; pws->buffer_create = nouveau_pipe_bo_create; pws->buffer_destroy = nouveau_pipe_bo_del; diff --git a/src/gallium/winsys/g3dvl/xsp_winsys.c b/src/gallium/winsys/g3dvl/xsp_winsys.c index 68be2c2ea3..40d683234f 100644 --- a/src/gallium/winsys/g3dvl/xsp_winsys.c +++ b/src/gallium/winsys/g3dvl/xsp_winsys.c @@ -96,73 +96,34 @@ static void xsp_buffer_destroy(struct pipe_winsys *pws, struct pipe_buffer *buff free(xsp_buf); } -static struct pipe_surface* xsp_surface_alloc(struct pipe_winsys *pws) -{ - struct pipe_surface *surface; - - assert(pws); - - surface = calloc(1, sizeof(struct pipe_surface)); - surface->refcount = 1; - surface->winsys = pws; - - return surface; -} - /* Borrowed from Mesa's xm_winsys */ static unsigned int round_up(unsigned n, unsigned multiple) { return (n + multiple - 1) & ~(multiple - 1); } -static int xsp_surface_alloc_storage +static struct pipe_buffer* xsp_surface_buffer_create ( struct pipe_winsys *pws, - struct pipe_surface *surface, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage + unsigned usage, + unsigned *stride ) { const unsigned int ALIGNMENT = 1; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - assert(pws); - assert(surface); - - surface->width = width; - surface->height = height; - surface->format = format; - pf_get_block(format, &surface->block); - surface->nblocksx = pf_get_nblocksx(&surface->block, width); - surface->nblocksy = pf_get_nblocksy(&surface->block, height); - surface->stride = round_up(surface->nblocksx * surface->block.size, ALIGNMENT); - surface->usage = flags; - surface->buffer = pws->buffer_create(pws, ALIGNMENT, PIPE_BUFFER_USAGE_PIXEL, surface->stride * surface->nblocksy); - - return 0; -} - -static void xsp_surface_release(struct pipe_winsys *pws, struct pipe_surface **surface) -{ - struct pipe_surface *s; - - assert(pws); - assert(surface); - assert(*surface); - - s = *surface; - - s->refcount--; - - if (s->refcount == 0) - { - winsys_buffer_reference(pws, &s->buffer, NULL); - free(s); - } + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, ALIGNMENT); - *surface = NULL; + return winsys->buffer_create(winsys, ALIGNMENT, + usage, + *stride * nblocksy); } static void xsp_fence_reference(struct pipe_winsys *pws, struct pipe_fence_handle **ptr, struct pipe_fence_handle *fence) @@ -273,9 +234,7 @@ struct pipe_context* create_pipe_context(Display *display, int screen) xsp_winsys->base.buffer_map = xsp_buffer_map; xsp_winsys->base.buffer_unmap = xsp_buffer_unmap; xsp_winsys->base.buffer_destroy = xsp_buffer_destroy; - xsp_winsys->base.surface_alloc = xsp_surface_alloc; - xsp_winsys->base.surface_alloc_storage = xsp_surface_alloc_storage; - xsp_winsys->base.surface_release = xsp_surface_release; + xsp_winsys->base.surface_buffer_create = xsp_surface_buffer_create; xsp_winsys->base.fence_reference = xsp_fence_reference; xsp_winsys->base.fence_signalled = xsp_fence_signalled; xsp_winsys->base.fence_finish = xsp_fence_finish; diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index bd5aa10a20..cc12007193 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -161,63 +161,25 @@ round_up(unsigned n, unsigned multiple) } -static int -gdi_softpipe_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +gdi_softpipe_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * surf->nblocksy); - if(!surf->buffer) - return -1; - - return 0; -} - - -static struct pipe_surface * -gdi_softpipe_surface_alloc(struct pipe_winsys *winsys) -{ - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - - assert(winsys); - - surface->refcount = 1; - surface->winsys = winsys; - - return surface; -} - + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); -static void -gdi_softpipe_surface_release(struct pipe_winsys *winsys, - struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -281,9 +243,7 @@ gdi_softpipe_screen_create(void) winsys->buffer_unmap = gdi_softpipe_buffer_unmap; winsys->buffer_destroy = gdi_softpipe_buffer_destroy; - winsys->surface_alloc = gdi_softpipe_surface_alloc; - winsys->surface_alloc_storage = gdi_softpipe_surface_alloc_storage; - winsys->surface_release = gdi_softpipe_surface_release; + winsys->surface_buffer_create = gdi_softpipe_surface_buffer_create; winsys->fence_reference = gdi_softpipe_fence_reference; winsys->fence_signalled = gdi_softpipe_fence_signalled; diff --git a/src/gallium/winsys/xlib/xlib_brw_screen.c b/src/gallium/winsys/xlib/xlib_brw_screen.c index 030cd66bd9..1fd7da8a2f 100644 --- a/src/gallium/winsys/xlib/xlib_brw_screen.c +++ b/src/gallium/winsys/xlib/xlib_brw_screen.c @@ -229,17 +229,6 @@ aub_flush_frontbuffer( struct pipe_winsys *winsys, aub_bo(surface->buffer)->offset ); } -static struct pipe_surface * -aub_i915_surface_alloc(struct pipe_winsys *winsys) -{ - struct pipe_surface *surf = CALLOC_STRUCT(pipe_surface); - if (surf) { - surf->refcount = 1; - surf->winsys = winsys; - } - return surf; -} - /** * Round n up to next multiple. @@ -250,50 +239,28 @@ round_up(unsigned n, unsigned multiple) return (n + multiple - 1) & ~(multiple - 1); } -static int -aub_i915_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +aub_i915_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, enum pipe_format format, - unsigned flags, - unsigned tex_usage) + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * surf->nblocksy); - if(!surf->buffer) - return -1; - - return 0; -} + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); -static void -aub_i915_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } - static const char * aub_get_name( struct pipe_winsys *winsys ) { @@ -333,9 +300,7 @@ xlib_create_brw_winsys( void ) iws->winsys.get_name = aub_get_name; iws->winsys.destroy = xlib_brw_destroy_pipe_winsys_aub; - iws->winsys.surface_alloc = aub_i915_surface_alloc; - iws->winsys.surface_alloc_storage = aub_i915_surface_alloc_storage; - iws->winsys.surface_release = aub_i915_surface_release; + iws->winsys.surface_buffer_create = aub_i915_surface_buffer_create; iws->aubfile = brw_aubfile_create(); iws->size = AUB_BUF_SIZE; diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c index 93bc8ecd81..5af9ee3bb5 100644 --- a/src/gallium/winsys/xlib/xlib_cell.c +++ b/src/gallium/winsys/xlib/xlib_cell.c @@ -284,68 +284,26 @@ round_up(unsigned n, unsigned multiple) return (n + multiple - 1) & ~(multiple - 1); } -static int -xm_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, - enum pipe_format format, - unsigned flags, - unsigned tex_usage) + enum pipe_format format, + unsigned usage, + unsigned *stride) { const unsigned alignment = 64; - - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = round_up(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - /* XXX a bit of a hack */ - surf->stride * round_up(surf->nblocksy, TILE_SIZE)); - - if(!surf->buffer) - return -1; - - return 0; -} - - -/** - * Called via winsys->surface_alloc() to create new surfaces. - */ -static struct pipe_surface * -xm_surface_alloc(struct pipe_winsys *ws) -{ - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - - assert(ws); - - surface->refcount = 1; - surface->winsys = ws; - - return surface; -} - - - -static void -xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; + + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = round_up(nblocksx * block.size, alignment); + + return winsys->buffer_create(winsys, alignment, + usage, + /* XXX a bit of a hack */ + *stride * round_up(nblocksy, TILE_SIZE)); } @@ -395,9 +353,7 @@ xlib_create_cell_winsys( void ) ws->base.buffer_unmap = xm_buffer_unmap; ws->base.buffer_destroy = xm_buffer_destroy; - ws->base.surface_alloc = xm_surface_alloc; - ws->base.surface_alloc_storage = xm_surface_alloc_storage; - ws->base.surface_release = xm_surface_release; + ws->base.surface_buffer_create = xm_surface_buffer_create; ws->base.fence_reference = xm_fence_reference; ws->base.fence_signalled = xm_fence_signalled; diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c index 0c3097c42e..c0bf37050a 100644 --- a/src/gallium/winsys/xlib/xlib_softpipe.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -344,67 +344,25 @@ xm_user_buffer_create(struct pipe_winsys *pws, void *ptr, unsigned bytes) } -static int -xm_surface_alloc_storage(struct pipe_winsys *winsys, - struct pipe_surface *surf, +static struct pipe_buffer * +xm_surface_buffer_create(struct pipe_winsys *winsys, unsigned width, unsigned height, - enum pipe_format format, - unsigned flags, - unsigned tex_usage) -{ - const int alignment = 64; - - surf->width = width; - surf->height = height; - surf->format = format; - pf_get_block(format, &surf->block); - surf->nblocksx = pf_get_nblocksx(&surf->block, width); - surf->nblocksy = pf_get_nblocksy(&surf->block, height); - surf->stride = align(surf->nblocksx * surf->block.size, alignment); - surf->usage = flags; - - assert(!surf->buffer); - surf->buffer = winsys->buffer_create(winsys, alignment, - PIPE_BUFFER_USAGE_PIXEL, - surf->stride * surf->nblocksy); - - if(!surf->buffer) - return -1; - - return 0; -} - - -/** - * Called via winsys->surface_alloc() to create new surfaces. - */ -static struct pipe_surface * -xm_surface_alloc(struct pipe_winsys *ws) + enum pipe_format format, + unsigned usage, + unsigned *stride) { - struct pipe_surface *surface = CALLOC_STRUCT(pipe_surface); - - assert(ws); - - surface->refcount = 1; - surface->winsys = ws; - - return surface; -} - - - -static void -xm_surface_release(struct pipe_winsys *winsys, struct pipe_surface **s) -{ - struct pipe_surface *surf = *s; - assert(!surf->texture); - surf->refcount--; - if (surf->refcount == 0) { - if (surf->buffer) - winsys_buffer_reference(winsys, &surf->buffer, NULL); - free(surf); - } - *s = NULL; + const unsigned alignment = 64; + struct pipe_format_block block; + unsigned nblocksx, nblocksy; + + pf_get_block(format, &block); + nblocksx = pf_get_nblocksx(&block, width); + nblocksy = pf_get_nblocksy(&block, height); + *stride = align(nblocksx * block.size, alignment); + + return winsys->buffer_create(winsys, alignment, + usage, + *stride * nblocksy); } @@ -454,9 +412,7 @@ xlib_create_softpipe_winsys( void ) ws->base.buffer_unmap = xm_buffer_unmap; ws->base.buffer_destroy = xm_buffer_destroy; - ws->base.surface_alloc = xm_surface_alloc; - ws->base.surface_alloc_storage = xm_surface_alloc_storage; - ws->base.surface_release = xm_surface_release; + ws->base.surface_buffer_create = xm_surface_buffer_create; ws->base.fence_reference = xm_fence_reference; ws->base.fence_signalled = xm_fence_signalled; -- cgit v1.2.3 From f6759724281a46723d372ba0f73cb626b7f869f0 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 Jan 2009 12:42:23 +0000 Subject: draw: queiten compiler warnings --- src/gallium/auxiliary/draw/draw_vs_aos.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index 6141ba9cbf..e0923a809e 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -901,6 +901,7 @@ static void PIPE_CDECL print_reg( const char *msg, debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]); } +#if 0 static void emit_print( struct aos_compilation *cp, const char *message, /* must point to a static string! */ unsigned file, @@ -952,6 +953,7 @@ static void emit_print( struct aos_compilation *cp, /* Done... */ } +#endif /** * The traditional instructions. All operate on internal registers @@ -1090,7 +1092,7 @@ static boolean emit_LG2( struct aos_compilation *cp, const struct tgsi_full_inst return TRUE; } - +#if 0 static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) { x87_fld_src(cp, &op->FullSrcRegisters[0], 0); @@ -1098,6 +1100,7 @@ static boolean emit_EX2( struct aos_compilation *cp, const struct tgsi_full_inst x87_fstp_dest4(cp, &op->FullDstRegisters[0]); return TRUE; } +#endif static boolean emit_FLR( struct aos_compilation *cp, const struct tgsi_full_instruction *op ) -- cgit v1.2.3 From 27e5097e2aa6ee2b9cc8dbc5129fa7f4c8eb283c Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 Jan 2009 11:15:42 +0000 Subject: draw: silence some warnings --- src/gallium/auxiliary/draw/draw_vs_aos.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c index e0923a809e..6817f29c2a 100644 --- a/src/gallium/auxiliary/draw/draw_vs_aos.c +++ b/src/gallium/auxiliary/draw/draw_vs_aos.c @@ -871,7 +871,7 @@ static void set_fpu_round_nearest( struct aos_compilation *cp ) } } - +#if 0 static void x87_emit_ex2( struct aos_compilation *cp ) { struct x86_reg st0 = x86_make_reg(file_x87, 0); @@ -894,12 +894,15 @@ static void x87_emit_ex2( struct aos_compilation *cp ) assert( stack == cp->func->x87_stack); } +#endif +#if 0 static void PIPE_CDECL print_reg( const char *msg, const float *reg ) { debug_printf("%s: %f %f %f %f\n", msg, reg[0], reg[1], reg[2], reg[3]); } +#endif #if 0 static void emit_print( struct aos_compilation *cp, -- cgit v1.2.3 From 4d710dd3cf3187e94e5765b46e4dd6899a7a41d6 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Tue, 27 Jan 2009 11:15:52 +0000 Subject: tgsi: silence some warnings --- src/gallium/auxiliary/tgsi/tgsi_exec.c | 37 ++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c index 989b6eec27..a182e679da 100644 --- a/src/gallium/auxiliary/tgsi/tgsi_exec.c +++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c @@ -320,6 +320,7 @@ micro_add( dst->f[3] = src0->f[3] + src1->f[3]; } +#if 0 static void micro_iadd( union tgsi_exec_channel *dst, @@ -331,6 +332,7 @@ micro_iadd( dst->i[2] = src0->i[2] + src1->i[2]; dst->i[3] = src0->i[3] + src1->i[3]; } +#endif static void micro_and( @@ -408,6 +410,7 @@ micro_div( } } +#if 0 static void micro_udiv( union tgsi_exec_channel *dst, @@ -419,6 +422,7 @@ micro_udiv( dst->u[2] = src0->u[2] / src1->u[2]; dst->u[3] = src0->u[3] / src1->u[3]; } +#endif static void micro_eq( @@ -434,6 +438,7 @@ micro_eq( dst->f[3] = src0->f[3] == src1->f[3] ? src2->f[3] : src3->f[3]; } +#if 0 static void micro_ieq( union tgsi_exec_channel *dst, @@ -447,6 +452,7 @@ micro_ieq( dst->i[2] = src0->i[2] == src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] == src1->i[3] ? src2->i[3] : src3->i[3]; } +#endif static void micro_exp2( @@ -466,6 +472,7 @@ micro_exp2( #endif } +#if 0 static void micro_f2ut( union tgsi_exec_channel *dst, @@ -476,6 +483,7 @@ micro_f2ut( dst->u[2] = (uint) src->f[2]; dst->u[3] = (uint) src->f[3]; } +#endif static void micro_flr( @@ -570,6 +578,7 @@ micro_lt( dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3]; } +#if 0 static void micro_ilt( union tgsi_exec_channel *dst, @@ -583,7 +592,9 @@ micro_ilt( dst->i[2] = src0->i[2] < src1->i[2] ? src2->i[2] : src3->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src2->i[3] : src3->i[3]; } +#endif +#if 0 static void micro_ult( union tgsi_exec_channel *dst, @@ -597,6 +608,7 @@ micro_ult( dst->u[2] = src0->u[2] < src1->u[2] ? src2->u[2] : src3->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src2->u[3] : src3->u[3]; } +#endif static void micro_max( @@ -610,6 +622,7 @@ micro_max( dst->f[3] = src0->f[3] > src1->f[3] ? src0->f[3] : src1->f[3]; } +#if 0 static void micro_imax( union tgsi_exec_channel *dst, @@ -621,7 +634,9 @@ micro_imax( dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3]; } +#endif +#if 0 static void micro_umax( union tgsi_exec_channel *dst, @@ -633,6 +648,7 @@ micro_umax( dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3]; } +#endif static void micro_min( @@ -646,6 +662,7 @@ micro_min( dst->f[3] = src0->f[3] < src1->f[3] ? src0->f[3] : src1->f[3]; } +#if 0 static void micro_imin( union tgsi_exec_channel *dst, @@ -657,7 +674,9 @@ micro_imin( dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2]; dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3]; } +#endif +#if 0 static void micro_umin( union tgsi_exec_channel *dst, @@ -669,7 +688,9 @@ micro_umin( dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2]; dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3]; } +#endif +#if 0 static void micro_umod( union tgsi_exec_channel *dst, @@ -681,6 +702,7 @@ micro_umod( dst->u[2] = src0->u[2] % src1->u[2]; dst->u[3] = src0->u[3] % src1->u[3]; } +#endif static void micro_mul( @@ -694,6 +716,7 @@ micro_mul( dst->f[3] = src0->f[3] * src1->f[3]; } +#if 0 static void micro_imul( union tgsi_exec_channel *dst, @@ -705,7 +728,9 @@ micro_imul( dst->i[2] = src0->i[2] * src1->i[2]; dst->i[3] = src0->i[3] * src1->i[3]; } +#endif +#if 0 static void micro_imul64( union tgsi_exec_channel *dst0, @@ -722,7 +747,9 @@ micro_imul64( dst0->i[2] = 0; dst0->i[3] = 0; } +#endif +#if 0 static void micro_umul64( union tgsi_exec_channel *dst0, @@ -739,7 +766,10 @@ micro_umul64( dst0->u[2] = 0; dst0->u[3] = 0; } +#endif + +#if 0 static void micro_movc( union tgsi_exec_channel *dst, @@ -752,6 +782,7 @@ micro_movc( dst->u[2] = src0->u[2] ? src1->u[2] : src2->u[2]; dst->u[3] = src0->u[3] ? src1->u[3] : src2->u[3]; } +#endif static void micro_neg( @@ -764,6 +795,7 @@ micro_neg( dst->f[3] = -src->f[3]; } +#if 0 static void micro_ineg( union tgsi_exec_channel *dst, @@ -774,6 +806,7 @@ micro_ineg( dst->i[2] = -src->i[2]; dst->i[3] = -src->i[3]; } +#endif static void micro_not( @@ -874,6 +907,7 @@ micro_trunc( dst->f[3] = (float) (int) src0->f[3]; } +#if 0 static void micro_ushr( union tgsi_exec_channel *dst, @@ -885,6 +919,7 @@ micro_ushr( dst->u[2] = src0->u[2] >> src1->u[2]; dst->u[3] = src0->u[3] >> src1->u[3]; } +#endif static void micro_sin( @@ -919,6 +954,7 @@ micro_sub( dst->f[3] = src0->f[3] - src1->f[3]; } +#if 0 static void micro_u2f( union tgsi_exec_channel *dst, @@ -929,6 +965,7 @@ micro_u2f( dst->f[2] = (float) src->u[2]; dst->f[3] = (float) src->u[3]; } +#endif static void micro_xor( -- cgit v1.2.3 From 872b515e8f0bb1be5bad85fd9d01529c71f07ba2 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 26 Jan 2009 13:45:45 -0500 Subject: gallium: standardize on stride instead of pitch in the interface --- src/gallium/auxiliary/draw/draw_pt.c | 6 +++--- src/gallium/auxiliary/draw/draw_pt_fetch.c | 4 ++-- src/gallium/auxiliary/draw/draw_pt_fetch_emit.c | 2 +- src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c | 4 ++-- src/gallium/auxiliary/util/u_draw_quad.c | 2 +- src/gallium/drivers/i965simple/brw_draw_upload.c | 2 +- src/gallium/drivers/nv30/nv30_vbo.c | 4 ++-- src/gallium/drivers/nv40/nv40_vbo.c | 4 ++-- src/gallium/drivers/nv50/nv50_vbo.c | 2 +- src/gallium/drivers/trace/tr_state.c | 4 ++-- src/gallium/include/pipe/p_screen.h | 2 +- src/gallium/include/pipe/p_state.h | 2 +- src/mesa/state_tracker/st_draw.c | 6 +++--- src/mesa/state_tracker/st_draw_feedback.c | 2 +- 14 files changed, 23 insertions(+), 23 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c index 18f24e5980..4e5ffa0930 100644 --- a/src/gallium/auxiliary/draw/draw_pt.c +++ b/src/gallium/auxiliary/draw/draw_pt.c @@ -228,7 +228,7 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count) for (j = 0; j < draw->pt.nr_vertex_elements; j++) { uint buf = draw->pt.vertex_element[j].vertex_buffer_index; ubyte *ptr = (ubyte *) draw->pt.user.vbuffer[buf]; - ptr += draw->pt.vertex_buffer[buf].pitch * ii; + ptr += draw->pt.vertex_buffer[buf].stride * ii; ptr += draw->pt.vertex_element[j].src_offset; debug_printf(" Attr %u: ", j); @@ -301,8 +301,8 @@ draw_arrays(struct draw_context *draw, unsigned prim, } debug_printf("Buffers:\n"); for (i = 0; i < draw->pt.nr_vertex_buffers; i++) { - debug_printf(" pitch=%u offset=%u ptr=%p\n", - draw->pt.vertex_buffer[i].pitch, + debug_printf(" stride=%u offset=%u ptr=%p\n", + draw->pt.vertex_buffer[i].stride, draw->pt.vertex_buffer[i].buffer_offset, draw->pt.user.vbuffer[i]); } diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch.c b/src/gallium/auxiliary/draw/draw_pt_fetch.c index 6377f896fb..058caf7dcc 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch.c @@ -144,7 +144,7 @@ void draw_pt_fetch_run( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } translate->run_elts( translate, @@ -180,7 +180,7 @@ void draw_pt_fetch_run_linear( struct pt_fetch *fetch, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } translate->run( translate, diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c index 0227652632..dcb7744b17 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_emit.c @@ -195,7 +195,7 @@ static void fetch_emit_prepare( struct draw_pt_middle_end *middle, i, ((char *)draw->pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c index 1649cdc6cd..84ffe3296a 100644 --- a/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c +++ b/src/gallium/auxiliary/draw/draw_pt_fetch_shade_emit.c @@ -121,7 +121,7 @@ static void fse_prepare( struct draw_pt_middle_end *middle, } for (i = 0; i < 5 && i < nr_vbs; i++) { - if (draw->pt.vertex_buffer[i].pitch == 0) + if (draw->pt.vertex_buffer[i].stride == 0) fse->key.const_vbuffers |= (1<pt.user.vbuffer[i] + draw->pt.vertex_buffer[i].buffer_offset), - draw->pt.vertex_buffer[i].pitch ); + draw->pt.vertex_buffer[i].stride ); } *max_vertices = (draw->render->max_vertex_buffer_bytes / diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index d7bb74b87b..1af575530f 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -53,7 +53,7 @@ util_draw_vertex_buffer(struct pipe_context *pipe, /* tell pipe about the vertex buffer */ vbuffer.buffer = vbuf; - vbuffer.pitch = num_attribs * 4 * sizeof(float); /* vertex size */ + vbuffer.stride = num_attribs * 4 * sizeof(float); /* vertex size */ vbuffer.buffer_offset = offset; pipe->set_vertex_buffers(pipe, 1, &vbuffer); diff --git a/src/gallium/drivers/i965simple/brw_draw_upload.c b/src/gallium/drivers/i965simple/brw_draw_upload.c index 7c20ea52af..2d9ca3f2ea 100644 --- a/src/gallium/drivers/i965simple/brw_draw_upload.c +++ b/src/gallium/drivers/i965simple/brw_draw_upload.c @@ -223,7 +223,7 @@ boolean brw_upload_vertex_buffers( struct brw_context *brw ) break; } - vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->pitch; + vbp.vb[i].vb0.bits.pitch = brw->vb.vbo_array[i]->stride; vbp.vb[i].vb0.bits.pad = 0; vbp.vb[i].vb0.bits.access_type = BRW_VERTEXBUFFER_ACCESS_VERTEXDATA; vbp.vb[i].vb0.bits.vb_index = i; diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 556f981d4a..2d6d48ac16 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -503,7 +503,7 @@ nv30_vbo_validate(struct nv30_context *nv30) ve = &nv30->vtxelt[hw]; vb = &nv30->vtxbuf[ve->vertex_buffer_index]; - if (!vb->pitch) { + if (!vb->stride) { if (!sattr) sattr = so_new(16 * 5, 0); @@ -524,7 +524,7 @@ nv30_vbo_validate(struct nv30_context *nv30) so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, 0, NV34TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->pitch << NV34TCL_VTXFMT_STRIDE_SHIFT) | + so_data (vtxfmt, ((vb->stride << NV34TCL_VTXFMT_STRIDE_SHIFT) | (ncomp << NV34TCL_VTXFMT_SIZE_SHIFT) | type)); } diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 09f6e79d32..8f1834628f 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -501,7 +501,7 @@ nv40_vbo_validate(struct nv40_context *nv40) ve = &nv40->vtxelt[hw]; vb = &nv40->vtxbuf[ve->vertex_buffer_index]; - if (!vb->pitch) { + if (!vb->stride) { if (!sattr) sattr = so_new(16 * 5, 0); @@ -522,7 +522,7 @@ nv40_vbo_validate(struct nv40_context *nv40) so_reloc(vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, vb_flags | NOUVEAU_BO_LOW | NOUVEAU_BO_OR, 0, NV40TCL_VTXBUF_ADDRESS_DMA1); - so_data (vtxfmt, ((vb->pitch << NV40TCL_VTXFMT_STRIDE_SHIFT) | + so_data (vtxfmt, ((vb->stride << NV40TCL_VTXFMT_STRIDE_SHIFT) | (ncomp << NV40TCL_VTXFMT_SIZE_SHIFT) | type)); } diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 435dc9777d..c482a4c241 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -226,7 +226,7 @@ nv50_vbo_validate(struct nv50_context *nv50) } so_method(vtxbuf, tesla, 0x900 + (i * 16), 3); - so_data (vtxbuf, 0x20000000 | vb->pitch); + so_data (vtxbuf, 0x20000000 | vb->stride); so_reloc (vtxbuf, vb->buffer, vb->buffer_offset + ve->src_offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 986d939e0c..546231612f 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -280,7 +280,7 @@ void trace_dump_depth_stencil_alpha_state(const struct pipe_depth_stencil_alpha_ trace_dump_member(uint, &state->stencil[i], fail_op); trace_dump_member(uint, &state->stencil[i], zpass_op); trace_dump_member(uint, &state->stencil[i], zfail_op); - trace_dump_member(uint, &state->stencil[i], ref_value); + trace_dump_member(uint, &state->stencil[i], ref_value); trace_dump_member(uint, &state->stencil[i], value_mask); trace_dump_member(uint, &state->stencil[i], write_mask); trace_dump_struct_end(); @@ -435,7 +435,7 @@ void trace_dump_vertex_buffer(const struct pipe_vertex_buffer *state) trace_dump_struct_begin("pipe_vertex_buffer"); - trace_dump_member(uint, state, pitch); + trace_dump_member(uint, state, stride); trace_dump_member(uint, state, max_index); trace_dump_member(uint, state, buffer_offset); trace_dump_member(ptr, state, buffer); diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 3bedc75294..492667c93a 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -103,7 +103,7 @@ struct pipe_screen { */ struct pipe_texture * (*texture_blanket)(struct pipe_screen *, const struct pipe_texture *templat, - const unsigned *pitch, + const unsigned *stride, struct pipe_buffer *buffer); void (*texture_release)(struct pipe_screen *, diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index abe7cbe9e7..46f62abf3f 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -337,7 +337,7 @@ struct pipe_texture */ struct pipe_vertex_buffer { - unsigned pitch; /**< stride to same attrib in next vertex, in bytes */ + unsigned stride; /**< stride to same attrib in next vertex, in bytes */ unsigned max_index; /**< number of vertices in this buffer */ unsigned buffer_offset; /**< offset to start of data in buffer, in bytes */ struct pipe_buffer *buffer; /**< the actual buffer */ diff --git a/src/mesa/state_tracker/st_draw.c b/src/mesa/state_tracker/st_draw.c index 31383b4887..630ad2bcdf 100644 --- a/src/mesa/state_tracker/st_draw.c +++ b/src/mesa/state_tracker/st_draw.c @@ -381,7 +381,7 @@ setup_interleaved_attribs(GLcontext *ctx, pipe_buffer_reference(pipe->screen, &vbuffer->buffer, stobj->buffer); vbuffer->buffer_offset = (unsigned) arrays[mesaAttr]->Ptr; } - vbuffer->pitch = stride; /* in bytes */ + vbuffer->stride = stride; /* in bytes */ vbuffer->max_index = max_index; } @@ -472,7 +472,7 @@ setup_non_interleaved_attribs(GLcontext *ctx, assert(velements[attr].src_offset <= 2048); /* 11-bit field */ /* common-case setup */ - vbuffer[attr].pitch = stride; /* in bytes */ + vbuffer[attr].stride = stride; /* in bytes */ vbuffer[attr].max_index = max_index; velements[attr].vertex_buffer_index = attr; velements[attr].nr_components = arrays[mesaAttr]->Size; @@ -569,7 +569,7 @@ st_draw_vbo(GLcontext *ctx, { GLuint i; for (i = 0; i < num_vbuffers; i++) { - printf("buffers[%d].pitch = %u\n", i, vbuffer[i].pitch); + printf("buffers[%d].stride = %u\n", i, vbuffer[i].stride); printf("buffers[%d].max_index = %u\n", i, vbuffer[i].max_index); printf("buffers[%d].buffer_offset = %u\n", i, vbuffer[i].buffer_offset); printf("buffers[%d].buffer = %p\n", i, (void*) vbuffer[i].buffer); diff --git a/src/mesa/state_tracker/st_draw_feedback.c b/src/mesa/state_tracker/st_draw_feedback.c index 834c3844c4..5c9c4506c2 100644 --- a/src/mesa/state_tracker/st_draw_feedback.c +++ b/src/mesa/state_tracker/st_draw_feedback.c @@ -171,7 +171,7 @@ st_feedback_draw_vbo(GLcontext *ctx, } /* common-case setup */ - vbuffers[attr].pitch = arrays[mesaAttr]->StrideB; /* in bytes */ + vbuffers[attr].stride = arrays[mesaAttr]->StrideB; /* in bytes */ vbuffers[attr].max_index = max_index; velements[attr].vertex_buffer_index = attr; velements[attr].nr_components = arrays[mesaAttr]->Size; -- cgit v1.2.3 From 2299f21f8da816fc4588492965e7dac422da1a96 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 26 Jan 2009 14:49:54 -0500 Subject: gallium: standardize api on the prefix "nr" --- src/gallium/auxiliary/cso_cache/cso_context.c | 2 +- src/gallium/auxiliary/util/u_blit.c | 4 ++-- src/gallium/auxiliary/util/u_gen_mipmap.c | 2 +- src/gallium/drivers/cell/ppu/cell_pipe_state.c | 2 +- src/gallium/drivers/nv30/nv30_state_emit.c | 2 +- src/gallium/drivers/nv30/nv30_state_fb.c | 4 ++-- src/gallium/drivers/nv40/nv40_state_emit.c | 2 +- src/gallium/drivers/nv40/nv40_state_fb.c | 4 ++-- src/gallium/drivers/nv50/nv50_clear.c | 4 ++-- src/gallium/drivers/nv50/nv50_state_validate.c | 4 ++-- src/gallium/drivers/softpipe/sp_clear.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 6 +++--- src/gallium/drivers/softpipe/sp_flush.c | 2 +- src/gallium/drivers/softpipe/sp_quad_blend.c | 4 ++-- src/gallium/drivers/softpipe/sp_quad_bufloop.c | 4 ++-- src/gallium/drivers/softpipe/sp_quad_colormask.c | 2 +- src/gallium/drivers/softpipe/sp_quad_coverage.c | 2 +- src/gallium/drivers/softpipe/sp_quad_output.c | 2 +- src/gallium/drivers/softpipe/sp_setup.c | 2 +- src/gallium/drivers/softpipe/sp_state_surface.c | 2 +- src/gallium/drivers/trace/tr_context.c | 4 ++-- src/gallium/drivers/trace/tr_state.c | 2 +- src/gallium/include/pipe/p_state.h | 2 +- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 2 +- src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 2 +- src/mesa/state_tracker/st_atom_framebuffer.c | 6 +++--- 26 files changed, 38 insertions(+), 38 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c index 68508f24de..a9157aad71 100644 --- a/src/gallium/auxiliary/cso_cache/cso_context.c +++ b/src/gallium/auxiliary/cso_cache/cso_context.c @@ -783,7 +783,7 @@ copy_framebuffer_state(struct pipe_framebuffer_state *dst, dst->width = src->width; dst->height = src->height; - dst->num_cbufs = src->num_cbufs; + dst->nr_cbufs = src->nr_cbufs; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&dst->cbufs[i], src->cbufs[i]); } diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index 2cef3338b5..bc88086b5e 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -415,7 +415,7 @@ util_blit_pixels(struct blit_state *ctx, memset(&fb, 0, sizeof(fb)); fb.width = dst->width; fb.height = dst->height; - fb.num_cbufs = 1; + fb.nr_cbufs = 1; fb.cbufs[0] = dst; cso_set_framebuffer(ctx->cso, &fb); @@ -526,7 +526,7 @@ util_blit_pixels_tex(struct blit_state *ctx, memset(&fb, 0, sizeof(fb)); fb.width = dst->width; fb.height = dst->height; - fb.num_cbufs = 1; + fb.nr_cbufs = 1; fb.cbufs[0] = dst; cso_set_framebuffer(ctx->cso, &fb); diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index b5eb896b7a..cb9776ed95 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1490,7 +1490,7 @@ util_gen_mipmap(struct gen_mipmap_state *ctx, /* init framebuffer state */ memset(&fb, 0, sizeof(fb)); - fb.num_cbufs = 1; + fb.nr_cbufs = 1; /* set min/mag to same filter for faster sw speed */ ctx->sampler.mag_img_filter = filter; diff --git a/src/gallium/drivers/cell/ppu/cell_pipe_state.c b/src/gallium/drivers/cell/ppu/cell_pipe_state.c index 81efd137c7..ca358ed031 100644 --- a/src/gallium/drivers/cell/ppu/cell_pipe_state.c +++ b/src/gallium/drivers/cell/ppu/cell_pipe_state.c @@ -307,7 +307,7 @@ cell_set_framebuffer_state(struct pipe_context *pipe, */ cell->framebuffer.width = fb->width; cell->framebuffer.height = fb->height; - cell->framebuffer.num_cbufs = fb->num_cbufs; + cell->framebuffer.nr_cbufs = fb->nr_cbufs; for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) { pipe_surface_reference(&cell->framebuffer.cbufs[i], fb->cbufs[i]); } diff --git a/src/gallium/drivers/nv30/nv30_state_emit.c b/src/gallium/drivers/nv30/nv30_state_emit.c index 9480695d6e..f77b08ff69 100644 --- a/src/gallium/drivers/nv30/nv30_state_emit.c +++ b/src/gallium/drivers/nv30/nv30_state_emit.c @@ -24,7 +24,7 @@ nv30_state_do_validate(struct nv30_context *nv30, const struct pipe_framebuffer_state *fb = &nv30->framebuffer; unsigned i; - for (i = 0; i < fb->num_cbufs; i++) + for (i = 0; i < fb->nr_cbufs; i++) fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; if (fb->zsbuf) fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 73bdf7e56c..8536acc570 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -14,7 +14,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) unsigned h = fb->height; rt_enable = 0; - for (i = 0; i < fb->num_cbufs; i++) { + for (i = 0; i < fb->nr_cbufs; i++) { if (colour_format) { assert(colour_format == fb->cbufs[i]->format); } else { @@ -34,7 +34,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->num_cbufs; i++) + for (i = 1; i < fb->nr_cbufs; i++) assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); /* FIXME: NV34TCL_RT_FORMAT_LOG2_[WIDTH/HEIGHT] */ diff --git a/src/gallium/drivers/nv40/nv40_state_emit.c b/src/gallium/drivers/nv40/nv40_state_emit.c index 52ec4c044b..ce859def10 100644 --- a/src/gallium/drivers/nv40/nv40_state_emit.c +++ b/src/gallium/drivers/nv40/nv40_state_emit.c @@ -41,7 +41,7 @@ nv40_state_do_validate(struct nv40_context *nv40, const struct pipe_framebuffer_state *fb = &nv40->framebuffer; unsigned i; - for (i = 0; i < fb->num_cbufs; i++) + for (i = 0; i < fb->nr_cbufs; i++) fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; if (fb->zsbuf) fb->zsbuf->status = PIPE_SURFACE_STATUS_DEFINED; diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index 28592d71c3..a2e09e18a4 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -14,7 +14,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) unsigned h = fb->height; rt_enable = 0; - for (i = 0; i < fb->num_cbufs; i++) { + for (i = 0; i < fb->nr_cbufs; i++) { if (colour_format) { assert(colour_format == fb->cbufs[i]->format); } else { @@ -35,7 +35,7 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) if (!(rt[0]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { assert(!(fb->width & (fb->width - 1)) && !(fb->height & (fb->height - 1))); - for (i = 1; i < fb->num_cbufs; i++) + for (i = 1; i < fb->nr_cbufs; i++) assert(!(rt[i]->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)); rt_format = NV40TCL_RT_FORMAT_TYPE_SWIZZLED | diff --git a/src/gallium/drivers/nv50/nv50_clear.c b/src/gallium/drivers/nv50/nv50_clear.c index a31a42d6b5..6380f397ea 100644 --- a/src/gallium/drivers/nv50/nv50_clear.c +++ b/src/gallium/drivers/nv50/nv50_clear.c @@ -39,10 +39,10 @@ nv50_clear(struct pipe_context *pipe, struct pipe_surface *ps, if (ps->format == PIPE_FORMAT_Z24S8_UNORM || ps->format == PIPE_FORMAT_Z16_UNORM) { - fb.num_cbufs = 0; + fb.nr_cbufs = 0; fb.zsbuf = ps; } else { - fb.num_cbufs = 1; + fb.nr_cbufs = 1; fb.cbufs[0] = ps; fb.zsbuf = NULL; } diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 198e25f448..4dc4c04493 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -31,7 +31,7 @@ nv50_state_validate_fb(struct nv50_context *nv50) struct pipe_framebuffer_state *fb = &nv50->framebuffer; unsigned i, w, h, gw = 0; - for (i = 0; i < fb->num_cbufs; i++) { + for (i = 0; i < fb->nr_cbufs; i++) { if (!gw) { w = fb->cbufs[i]->width; h = fb->cbufs[i]->height; @@ -178,7 +178,7 @@ nv50_state_validate(struct nv50_context *nv50) struct nouveau_stateobj *so; unsigned i; - for (i = 0; i < fb->num_cbufs; i++) + for (i = 0; i < fb->nr_cbufs; i++) fb->cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; if (fb->zsbuf) diff --git a/src/gallium/drivers/softpipe/sp_clear.c b/src/gallium/drivers/softpipe/sp_clear.c index dfa46c9fb7..ad108ec446 100644 --- a/src/gallium/drivers/softpipe/sp_clear.c +++ b/src/gallium/drivers/softpipe/sp_clear.c @@ -85,7 +85,7 @@ softpipe_clear(struct pipe_context *pipe, struct pipe_surface *ps, #endif } - for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { if (ps == sp_tile_cache_get_surface(softpipe->cbuf_cache[i])) { unsigned cv; if (ps->format != PIPE_FORMAT_A8R8G8B8_UNORM) { diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index 800f944838..d8a5631488 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -57,7 +57,7 @@ softpipe_map_surfaces(struct softpipe_context *sp) { unsigned i; - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { sp_tile_cache_map_surfaces(sp->cbuf_cache[i]); } @@ -73,11 +73,11 @@ softpipe_unmap_surfaces(struct softpipe_context *sp) { uint i; - for (i = 0; i < sp->framebuffer.num_cbufs; i++) + for (i = 0; i < sp->framebuffer.nr_cbufs; i++) sp_flush_tile_cache(sp, sp->cbuf_cache[i]); sp_flush_tile_cache(sp, sp->zsbuf_cache); - for (i = 0; i < sp->framebuffer.num_cbufs; i++) { + for (i = 0; i < sp->framebuffer.nr_cbufs; i++) { sp_tile_cache_unmap_surfaces(sp->cbuf_cache[i]); } sp_tile_cache_unmap_surfaces(sp->zsbuf_cache); diff --git a/src/gallium/drivers/softpipe/sp_flush.c b/src/gallium/drivers/softpipe/sp_flush.c index 401764bb43..c21faf57f3 100644 --- a/src/gallium/drivers/softpipe/sp_flush.c +++ b/src/gallium/drivers/softpipe/sp_flush.c @@ -57,7 +57,7 @@ softpipe_flush( struct pipe_context *pipe, } if (flags & PIPE_FLUSH_RENDER_CACHE) { - for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) if (softpipe->cbuf_cache[i]) sp_flush_tile_cache(softpipe, softpipe->cbuf_cache[i]); diff --git a/src/gallium/drivers/softpipe/sp_quad_blend.c b/src/gallium/drivers/softpipe/sp_quad_blend.c index 6f64c6e584..fb1d430a4f 100644 --- a/src/gallium/drivers/softpipe/sp_quad_blend.c +++ b/src/gallium/drivers/softpipe/sp_quad_blend.c @@ -105,7 +105,7 @@ logicop_quad(struct quad_stage *qs, struct quad_header *quad) uint cbuf; /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float dest[4][QUAD_SIZE]; ubyte src[4][4], dst[4][4], res[4][4]; uint *src4 = (uint *) src; @@ -239,7 +239,7 @@ blend_quad(struct quad_stage *qs, struct quad_header *quad) } /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float source[4][QUAD_SIZE], dest[4][QUAD_SIZE]; struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, diff --git a/src/gallium/drivers/softpipe/sp_quad_bufloop.c b/src/gallium/drivers/softpipe/sp_quad_bufloop.c index 92e9af09c1..d7d6a6974d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_bufloop.c +++ b/src/gallium/drivers/softpipe/sp_quad_bufloop.c @@ -17,7 +17,7 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) unsigned i; assert(sizeof(quad->outputs.color) == sizeof(tmp)); - assert(softpipe->framebuffer.num_cbufs <= PIPE_MAX_COLOR_BUFS); + assert(softpipe->framebuffer.nr_cbufs <= PIPE_MAX_COLOR_BUFS); /* make copy of original colors since they can get modified * by blending and masking. @@ -28,7 +28,7 @@ cbuf_loop_quad(struct quad_stage *qs, struct quad_header *quad) */ memcpy(tmp, quad->outputs.color, sizeof(tmp)); - for (i = 0; i < softpipe->framebuffer.num_cbufs; i++) { + for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) { /* set current cbuffer */ #if 0 /* obsolete & going away */ softpipe->current_cbuf = i; diff --git a/src/gallium/drivers/softpipe/sp_quad_colormask.c b/src/gallium/drivers/softpipe/sp_quad_colormask.c index f32bdfab78..563c2fc739 100644 --- a/src/gallium/drivers/softpipe/sp_quad_colormask.c +++ b/src/gallium/drivers/softpipe/sp_quad_colormask.c @@ -51,7 +51,7 @@ colormask_quad(struct quad_stage *qs, struct quad_header *quad) uint cbuf; /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float dest[4][QUAD_SIZE]; struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, diff --git a/src/gallium/drivers/softpipe/sp_quad_coverage.c b/src/gallium/drivers/softpipe/sp_quad_coverage.c index ee29aa7dfe..c27fd1482d 100644 --- a/src/gallium/drivers/softpipe/sp_quad_coverage.c +++ b/src/gallium/drivers/softpipe/sp_quad_coverage.c @@ -53,7 +53,7 @@ coverage_quad(struct quad_stage *qs, struct quad_header *quad) uint cbuf; /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { float (*quadColor)[4] = quad->output.color[cbuf]; unsigned j; for (j = 0; j < QUAD_SIZE; j++) { diff --git a/src/gallium/drivers/softpipe/sp_quad_output.c b/src/gallium/drivers/softpipe/sp_quad_output.c index b7aac7f84a..a37c8b4c39 100644 --- a/src/gallium/drivers/softpipe/sp_quad_output.c +++ b/src/gallium/drivers/softpipe/sp_quad_output.c @@ -48,7 +48,7 @@ output_quad(struct quad_stage *qs, struct quad_header *quad) uint cbuf; /* loop over colorbuffer outputs */ - for (cbuf = 0; cbuf < softpipe->framebuffer.num_cbufs; cbuf++) { + for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++) { struct softpipe_cached_tile *tile = sp_get_cached_tile(softpipe, softpipe->cbuf_cache[cbuf], diff --git a/src/gallium/drivers/softpipe/sp_setup.c b/src/gallium/drivers/softpipe/sp_setup.c index 13d8017393..b1adb9cb7a 100644 --- a/src/gallium/drivers/softpipe/sp_setup.c +++ b/src/gallium/drivers/softpipe/sp_setup.c @@ -1497,7 +1497,7 @@ void setup_prepare( struct setup_context *setup ) } /* Mark surfaces as defined now */ - for (i = 0; i < sp->framebuffer.num_cbufs; i++){ + for (i = 0; i < sp->framebuffer.nr_cbufs; i++){ if (sp->framebuffer.cbufs[i]) { sp->framebuffer.cbufs[i]->status = PIPE_SURFACE_STATUS_DEFINED; } diff --git a/src/gallium/drivers/softpipe/sp_state_surface.c b/src/gallium/drivers/softpipe/sp_state_surface.c index b5376e522d..1493c65884 100644 --- a/src/gallium/drivers/softpipe/sp_state_surface.c +++ b/src/gallium/drivers/softpipe/sp_state_surface.c @@ -64,7 +64,7 @@ softpipe_set_framebuffer_state(struct pipe_context *pipe, } } - sp->framebuffer.num_cbufs = fb->num_cbufs; + sp->framebuffer.nr_cbufs = fb->nr_cbufs; /* zbuf changing? */ if (sp->framebuffer.zsbuf != fb->zsbuf) { diff --git a/src/gallium/drivers/trace/tr_context.c b/src/gallium/drivers/trace/tr_context.c index f0d51ad82e..ec8be27077 100644 --- a/src/gallium/drivers/trace/tr_context.c +++ b/src/gallium/drivers/trace/tr_context.c @@ -722,9 +722,9 @@ trace_context_set_framebuffer_state(struct pipe_context *_pipe, /* Unwrap the input state */ memcpy(&unwrapped_state, state, sizeof(unwrapped_state)); - for(i = 0; i < state->num_cbufs; ++i) + for(i = 0; i < state->nr_cbufs; ++i) unwrapped_state.cbufs[i] = trace_surface_unwrap(tr_ctx, state->cbufs[i]); - for(i = state->num_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) + for(i = state->nr_cbufs; i < PIPE_MAX_COLOR_BUFS; ++i) unwrapped_state.cbufs[i] = NULL; unwrapped_state.zsbuf = trace_surface_unwrap(tr_ctx, state->zsbuf); state = &unwrapped_state; diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index 8b147a8d37..155f1cb859 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -351,7 +351,7 @@ void trace_dump_framebuffer_state(const struct pipe_framebuffer_state *state) trace_dump_member(uint, state, width); trace_dump_member(uint, state, height); - trace_dump_member(uint, state, num_cbufs); + trace_dump_member(uint, state, nr_cbufs); trace_dump_member_array(ptr, state, cbufs); trace_dump_member(ptr, state, zsbuf); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 0a0ca770da..1f4dc3f7dc 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -241,7 +241,7 @@ struct pipe_framebuffer_state unsigned width, height; /** multiple colorbuffers for multiple render targets */ - unsigned num_cbufs; + unsigned nr_cbufs; struct pipe_surface *cbufs[PIPE_MAX_COLOR_BUFS]; struct pipe_surface *zsbuf; /**< Z/stencil buffer */ diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index da119ff1bd..53ef275349 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -657,7 +657,7 @@ static int vlInit csc->framebuffer_tex = NULL; csc->framebuffer.width = 0; csc->framebuffer.height = 0; - csc->framebuffer.num_cbufs = 1; + csc->framebuffer.nr_cbufs = 1; csc->framebuffer.cbufs[0] = NULL; csc->framebuffer.zsbuf = NULL; diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index f0f8294473..789042f6f2 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -1052,7 +1052,7 @@ static int vlInit mc->render_target.width = vlRoundUpPOT(mc->picture_width); mc->render_target.height = vlRoundUpPOT(mc->picture_height); - mc->render_target.num_cbufs = 1; + mc->render_target.nr_cbufs = 1; /* FB for MC stage is a vlSurface created by the user, set at render time */ mc->render_target.zsbuf = NULL; diff --git a/src/mesa/state_tracker/st_atom_framebuffer.c b/src/mesa/state_tracker/st_atom_framebuffer.c index 902bdf94f2..625efdd66b 100644 --- a/src/mesa/state_tracker/st_atom_framebuffer.c +++ b/src/mesa/state_tracker/st_atom_framebuffer.c @@ -108,7 +108,7 @@ update_framebuffer_state( struct st_context *st ) /* Examine Mesa's ctx->DrawBuffer->_ColorDrawBuffers state * to determine which surfaces to draw to */ - framebuffer->num_cbufs = 0; + framebuffer->nr_cbufs = 0; for (i = 0; i < fb->_NumColorDrawBuffers; i++) { strb = st_renderbuffer(fb->_ColorDrawBuffers[i]); @@ -119,8 +119,8 @@ update_framebuffer_state( struct st_context *st ) } if (strb->surface) { - framebuffer->cbufs[framebuffer->num_cbufs] = strb->surface; - framebuffer->num_cbufs++; + framebuffer->cbufs[framebuffer->nr_cbufs] = strb->surface; + framebuffer->nr_cbufs++; } } -- cgit v1.2.3 From 665d6d6c1e5ebbb925e73bd3637d228def5a977d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 Jan 2009 15:52:39 +0000 Subject: pipebuffer: Export the pipe buffer winsys hooks. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 4 -- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 9 ++-- src/gallium/auxiliary/pipebuffer/pb_winsys.h | 79 ++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 8 deletions(-) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_winsys.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 4f1a8a3894..a4650e469f 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -240,10 +240,6 @@ pb_malloc_buffer_create(size_t size, const struct pb_desc *desc); -void -pb_init_winsys(struct pipe_winsys *winsys); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index 28d137dbc4..e2f1008cc2 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -38,6 +38,7 @@ #include "util/u_memory.h" #include "pb_buffer.h" +#include "pb_winsys.h" /** @@ -110,7 +111,7 @@ pb_user_buffer_vtbl = { }; -static struct pipe_buffer * +struct pipe_buffer * pb_winsys_user_buffer_create(struct pipe_winsys *winsys, void *data, unsigned bytes) @@ -132,7 +133,7 @@ pb_winsys_user_buffer_create(struct pipe_winsys *winsys, } -static void * +void * pb_winsys_buffer_map(struct pipe_winsys *winsys, struct pipe_buffer *buf, unsigned flags) @@ -142,7 +143,7 @@ pb_winsys_buffer_map(struct pipe_winsys *winsys, } -static void +void pb_winsys_buffer_unmap(struct pipe_winsys *winsys, struct pipe_buffer *buf) { @@ -151,7 +152,7 @@ pb_winsys_buffer_unmap(struct pipe_winsys *winsys, } -static void +void pb_winsys_buffer_destroy(struct pipe_winsys *winsys, struct pipe_buffer *buf) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.h b/src/gallium/auxiliary/pipebuffer/pb_winsys.h new file mode 100644 index 0000000000..7cf6f8e3f0 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.h @@ -0,0 +1,79 @@ +/************************************************************************** + * + * Copyright 2009 VMWare, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * Drop-in replacements for winsys buffer callbacks. + * + * The requirement to use this functions is that all pipe_buffers must be in + * fact pb_buffers. + * + * @author Jose Fonseca + */ + +#ifndef PB_WINSYS_H_ +#define PB_WINSYS_H_ + + +#include "pipe/p_compiler.h" +#include "pipe/p_debug.h" +#include "pipe/p_state.h" +#include "pipe/p_inlines.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +struct pipe_buffer * +pb_winsys_user_buffer_create(struct pipe_winsys *winsys, + void *data, + unsigned bytes); + +void * +pb_winsys_buffer_map(struct pipe_winsys *winsys, + struct pipe_buffer *buf, + unsigned flags); + +void +pb_winsys_buffer_unmap(struct pipe_winsys *winsys, + struct pipe_buffer *buf); + +void +pb_winsys_buffer_destroy(struct pipe_winsys *winsys, + struct pipe_buffer *buf); + +void +pb_init_winsys(struct pipe_winsys *winsys); + + +#ifdef __cplusplus +} +#endif + +#endif /*PB_WINSYS_H_*/ -- cgit v1.2.3 From 815de0a5dfadbe9a2618b94c4f28a799cc501a14 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 Jan 2009 15:53:09 +0000 Subject: pipebuffer: Fix alignment assertion. --- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index a976d3041a..d4434c6962 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -154,8 +154,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr, struct mm_buffer *mm_buf; /* We don't handle alignments larger then the one initially setup */ - assert(desc->alignment % (1 << mm->align2) == 0); - if(desc->alignment % (1 << mm->align2)) + assert(pb_check_alignment(desc->alignment, 1 << mm->align2)); + if(!pb_check_alignment(desc->alignment, 1 << mm->align2)) return NULL; pipe_mutex_lock(mm->mutex); -- cgit v1.2.3 From 1be4d4d4c6af61bda9e682e3fd347228d2589f8a Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Wed, 28 Jan 2009 15:53:21 +0000 Subject: pipebuffer: More assertions. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index a4650e469f..fb0ba15948 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -148,6 +148,7 @@ pb_map(struct pb_buffer *buf, assert(buf); if(!buf) return NULL; + assert(buf->base.refcount > 0); return buf->vtbl->map(buf, flags); } @@ -158,6 +159,7 @@ pb_unmap(struct pb_buffer *buf) assert(buf); if(!buf) return; + assert(buf->base.refcount > 0); buf->vtbl->unmap(buf); } @@ -173,7 +175,10 @@ pb_get_base_buffer( struct pb_buffer *buf, offset = 0; return; } + assert(buf->base.refcount > 0); buf->vtbl->get_base_buffer(buf, base_buf, offset); + assert(*base_buf); + assert(*offset < (*base_buf)->base.size); } -- cgit v1.2.3 From e06474dbae6979177629fb6187331291ff230c65 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 24 Nov 2008 13:59:06 +0900 Subject: pipebuffer: Implement proper buffer validation. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 36 ++++- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 150 +++++++++++++-------- .../auxiliary/pipebuffer/pb_buffer_fenced.h | 21 +-- .../auxiliary/pipebuffer/pb_buffer_malloc.c | 20 +++ src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 23 +++- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 26 +++- .../auxiliary/pipebuffer/pb_bufmgr_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c | 25 +++- src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c | 25 +++- src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c | 21 +++ src/gallium/auxiliary/pipebuffer/pb_validate.c | 84 +++++++++--- src/gallium/auxiliary/pipebuffer/pb_validate.h | 10 +- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 22 ++- 14 files changed, 359 insertions(+), 108 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index fb0ba15948..7cba5fa441 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -37,7 +37,7 @@ * There is no obligation of a winsys driver to use this library. And a pipe * driver should be completly agnostic about it. * - * \author Jos� Fonseca + * \author Jose Fonseca */ #ifndef PB_BUFFER_H_ @@ -46,6 +46,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" +#include "pipe/p_error.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" @@ -56,6 +57,8 @@ extern "C" { struct pb_vtbl; +struct pb_validate; + /** * Buffer description. @@ -104,6 +107,13 @@ struct pb_vtbl void (*unmap)( struct pb_buffer *buf ); + enum pipe_error (*validate)( struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags ); + + void (*fence)( struct pb_buffer *buf, + struct pipe_fence_handle *fence ); + /** * Get the base buffer and the offset. * @@ -118,6 +128,7 @@ struct pb_vtbl void (*get_base_buffer)( struct pb_buffer *buf, struct pb_buffer **base_buf, unsigned *offset ); + }; @@ -176,12 +187,35 @@ pb_get_base_buffer( struct pb_buffer *buf, return; } assert(buf->base.refcount > 0); + assert(buf->vtbl->get_base_buffer); buf->vtbl->get_base_buffer(buf, base_buf, offset); assert(*base_buf); assert(*offset < (*base_buf)->base.size); } +static INLINE enum pipe_error +pb_validate(struct pb_buffer *buf, struct pb_validate *vl, unsigned flags) +{ + assert(buf); + if(!buf) + return PIPE_ERROR; + assert(buf->vtbl->validate); + return buf->vtbl->validate(buf, vl, flags); +} + + +static INLINE void +pb_fence(struct pb_buffer *buf, struct pipe_fence_handle *fence) +{ + assert(buf); + if(!buf) + return; + assert(buf->vtbl->fence); + buf->vtbl->fence(buf, fence); +} + + static INLINE void pb_destroy(struct pb_buffer *buf) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index 17b2781052..aa4b096274 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -29,7 +29,7 @@ * \file * Implementation of fenced buffers. * - * \author José Fonseca + * \author Jose Fonseca * \author Thomas Hellström */ @@ -59,13 +59,6 @@ */ #define SUPER(__derived) (&(__derived)->base) -#define PIPE_BUFFER_USAGE_CPU_READ_WRITE \ - ( PIPE_BUFFER_USAGE_CPU_READ | PIPE_BUFFER_USAGE_CPU_WRITE ) -#define PIPE_BUFFER_USAGE_GPU_READ_WRITE \ - ( PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE ) -#define PIPE_BUFFER_USAGE_WRITE \ - ( PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_GPU_WRITE ) - struct fenced_buffer_list { @@ -97,6 +90,8 @@ struct fenced_buffer unsigned flags; unsigned mapcount; + struct pb_validate *vl; + unsigned validation_flags; struct pipe_fence_handle *fence; struct list_head head; @@ -108,7 +103,6 @@ static INLINE struct fenced_buffer * fenced_buffer(struct pb_buffer *buf) { assert(buf); - assert(buf->vtbl == &fenced_buffer_vtbl); return (struct fenced_buffer *)buf; } @@ -274,6 +268,7 @@ fenced_buffer_map(struct pb_buffer *buf, struct fenced_buffer *fenced_buf = fenced_buffer(buf); void *map; + assert(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE); assert(!(flags & ~PIPE_BUFFER_USAGE_CPU_READ_WRITE)); flags &= PIPE_BUFFER_USAGE_CPU_READ_WRITE; @@ -315,6 +310,93 @@ fenced_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +fenced_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct fenced_buffer *fenced_buf = fenced_buffer(buf); + enum pipe_error ret; + + if(!vl) { + /* invalidate */ + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; + return PIPE_OK; + } + + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + + /* Buffer cannot be validated in two different lists */ + if(fenced_buf->vl && fenced_buf->vl != vl) + return PIPE_ERROR_RETRY; + + /* Do not validate if buffer is still mapped */ + if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) { + /* TODO: wait for the thread that mapped the buffer to unmap it */ + return PIPE_ERROR_RETRY; + } + + if(fenced_buf->vl == vl && + (fenced_buf->validation_flags & flags) == flags) { + /* Nothing to do -- buffer already validated */ + return PIPE_OK; + } + + /* Final sanity checking */ + assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE)); + assert(!fenced_buf->mapcount); + + ret = pb_validate(fenced_buf->buffer, vl, flags); + if (ret != PIPE_OK) + return ret; + + fenced_buf->vl = vl; + fenced_buf->validation_flags |= flags; + + return PIPE_OK; +} + + +static void +fenced_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct fenced_buffer *fenced_buf; + struct fenced_buffer_list *fenced_list; + struct pipe_winsys *winsys; + + fenced_buf = fenced_buffer(buf); + fenced_list = fenced_buf->list; + winsys = fenced_list->winsys; + + if(fence == fenced_buf->fence) { + /* Nothing to do */ + return; + } + + assert(fenced_buf->vl); + assert(fenced_buf->validation_flags); + + pipe_mutex_lock(fenced_list->mutex); + if (fenced_buf->fence) + _fenced_buffer_remove(fenced_list, fenced_buf); + if (fence) { + winsys->fence_reference(winsys, &fenced_buf->fence, fence); + fenced_buf->flags |= fenced_buf->validation_flags; + _fenced_buffer_add(fenced_buf); + } + pipe_mutex_unlock(fenced_list->mutex); + + pb_fence(fenced_buf->buffer, fence); + + fenced_buf->vl = NULL; + fenced_buf->validation_flags = 0; +} + + static void fenced_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -325,11 +407,13 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf, } -const struct pb_vtbl +static const struct pb_vtbl fenced_buffer_vtbl = { fenced_buffer_destroy, fenced_buffer_map, fenced_buffer_unmap, + fenced_buffer_validate, + fenced_buffer_fence, fenced_buffer_get_base_buffer }; @@ -362,52 +446,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list, } -void -buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - struct fenced_buffer *fenced_buf; - struct fenced_buffer_list *fenced_list; - struct pipe_winsys *winsys; - /* FIXME: receive this as a parameter */ - unsigned flags = fence ? PIPE_BUFFER_USAGE_GPU_READ_WRITE : 0; - - /* This is a public function, so be extra cautious with the buffer passed, - * as happens frequently to receive null buffers, or pointer to buffers - * other than fenced buffers. */ - assert(buf); - if(!buf) - return; - assert(buf->vtbl == &fenced_buffer_vtbl); - if(buf->vtbl != &fenced_buffer_vtbl) - return; - - fenced_buf = fenced_buffer(buf); - fenced_list = fenced_buf->list; - winsys = fenced_list->winsys; - - if(!fence || fence == fenced_buf->fence) { - /* Handle the same fence case specially, not only because it is a fast - * path, but mostly to avoid serializing two writes with the same fence, - * as that would bring the hardware down to synchronous operation without - * any benefit. - */ - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; - return; - } - - pipe_mutex_lock(fenced_list->mutex); - if (fenced_buf->fence) - _fenced_buffer_remove(fenced_list, fenced_buf); - if (fence) { - winsys->fence_reference(winsys, &fenced_buf->fence, fence); - fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE; - _fenced_buffer_add(fenced_buf); - } - pipe_mutex_unlock(fenced_list->mutex); -} - - struct fenced_buffer_list * fenced_buffer_list_create(struct pipe_winsys *winsys) { diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h index 50d5891bdb..b15c676194 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.h @@ -44,7 +44,7 @@ * Between the handle's destruction, and the fence signalling, the buffer is * stored in a fenced buffer list. * - * \author José Fonseca + * \author Jose Fonseca */ #ifndef PB_BUFFER_FENCED_H_ @@ -70,14 +70,6 @@ struct pipe_fence_handle; struct fenced_buffer_list; -/** - * The fenced buffer's virtual function table. - * - * NOTE: Made public for debugging purposes. - */ -extern const struct pb_vtbl fenced_buffer_vtbl; - - /** * Create a fenced buffer list. * @@ -108,17 +100,6 @@ fenced_buffer_create(struct fenced_buffer_list *fenced, struct pb_buffer *buffer); -/** - * Set a buffer's fence. - * - * NOTE: Although it takes a generic pb_buffer argument, it will fail - * on everything but buffers returned by fenced_buffer_create. - */ -void -buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence); - - #ifdef __cplusplus } #endif diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c index 1bf22a2ec0..53f497cfb0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c @@ -81,6 +81,24 @@ malloc_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +malloc_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + assert(0); + return PIPE_ERROR; +} + + +static void +malloc_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + assert(0); +} + + static void malloc_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -96,6 +114,8 @@ malloc_buffer_vtbl = { malloc_buffer_destroy, malloc_buffer_map, malloc_buffer_unmap, + malloc_buffer_validate, + malloc_buffer_fence, malloc_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index cafbee045a..8fe2704708 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -43,7 +43,7 @@ * - the fenced buffer manager, which will delay buffer destruction until the * the moment the card finishing processing it. * - * \author José Fonseca + * \author Jose Fonseca */ #ifndef PB_BUFMGR_H_ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 8f118874ec..f57a7bffd7 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -29,7 +29,7 @@ * \file * Buffer cache. * - * \author José Fonseca + * \author Jose Fonseca * \author Thomas Hellström */ @@ -183,6 +183,25 @@ pb_cache_buffer_unmap(struct pb_buffer *_buf) } +static enum pipe_error +pb_cache_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_cache_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_cache_buffer *buf = pb_cache_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + static void pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -198,6 +217,8 @@ pb_cache_buffer_vtbl = { pb_cache_buffer_destroy, pb_cache_buffer_map, pb_cache_buffer_unmap, + pb_cache_buffer_validate, + pb_cache_buffer_fence, pb_cache_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 1675e6e182..62639fe1c8 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -29,7 +29,7 @@ * \file * Debug buffer manager to detect buffer under- and overflows. * - * \author José Fonseca + * \author Jose Fonseca */ @@ -255,11 +255,35 @@ pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf, } +static enum pipe_error +pb_debug_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + + pb_debug_buffer_check(buf); + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_debug_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_debug_buffer *buf = pb_debug_buffer(_buf); + pb_fence(buf->buffer, fence); +} + + const struct pb_vtbl pb_debug_buffer_vtbl = { pb_debug_buffer_destroy, pb_debug_buffer_map, pb_debug_buffer_unmap, + pb_debug_buffer_validate, + pb_debug_buffer_fence, pb_debug_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c index e2594ea236..513ed28ca6 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c @@ -30,7 +30,7 @@ * \file * A buffer manager that wraps buffers in fenced buffers. * - * \author José Fonseca + * \author Jose Fonseca */ diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c index d4434c6962..2f5a5d8ea0 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c @@ -29,7 +29,7 @@ * \file * Buffer manager using the old texture memory manager. * - * \author José Fonseca + * \author Jose Fonseca */ @@ -124,6 +124,27 @@ mm_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +mm_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + return pb_validate(mm->buffer, vl, flags); +} + + +static void +mm_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct mm_buffer *mm_buf = mm_buffer(buf); + struct mm_pb_manager *mm = mm_buf->mgr; + pb_fence(mm->buffer, fence); +} + + static void mm_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -141,6 +162,8 @@ mm_buffer_vtbl = { mm_buffer_destroy, mm_buffer_map, mm_buffer_unmap, + mm_buffer_validate, + mm_buffer_fence, mm_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c index 61ac291ed7..a6ff37653e 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c @@ -30,7 +30,7 @@ * \file * Batch buffer pool management. * - * \author José Fonseca + * \author Jose Fonseca * \author Thomas Hellström */ @@ -138,6 +138,27 @@ pool_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +pool_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + return pb_validate(pool->buffer, vl, flags); +} + + +static void +pool_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + struct pool_buffer *pool_buf = pool_buffer(buf); + struct pool_pb_manager *pool = pool_buf->mgr; + pb_fence(pool->buffer, fence); +} + + static void pool_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -155,6 +176,8 @@ pool_buffer_vtbl = { pool_buffer_destroy, pool_buffer_map, pool_buffer_unmap, + pool_buffer_validate, + pool_buffer_fence, pool_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c index 2a80154920..9b9fedccb4 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c @@ -248,6 +248,25 @@ pb_slab_buffer_unmap(struct pb_buffer *_buf) } +static enum pipe_error +pb_slab_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + return pb_validate(buf->slab->bo, vl, flags); +} + + +static void +pb_slab_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_slab_buffer *buf = pb_slab_buffer(_buf); + pb_fence(buf->slab->bo, fence); +} + + static void pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf, struct pb_buffer **base_buf, @@ -264,6 +283,8 @@ pb_slab_buffer_vtbl = { pb_slab_buffer_destroy, pb_slab_buffer_map, pb_slab_buffer_unmap, + pb_slab_buffer_validate, + pb_slab_buffer_fence, pb_slab_buffer_get_base_buffer }; diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.c b/src/gallium/auxiliary/pipebuffer/pb_validate.c index 1e54fc39d4..94532bb4ce 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.c +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.c @@ -46,9 +46,16 @@ #define PB_VALIDATE_INITIAL_SIZE 1 /* 512 */ +struct pb_validate_entry +{ + struct pb_buffer *buf; + unsigned flags; +}; + + struct pb_validate { - struct pb_buffer **buffers; + struct pb_validate_entry *entries; unsigned used; unsigned size; }; @@ -56,54 +63,87 @@ struct pb_validate enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf) + struct pb_buffer *buf, + unsigned flags) { assert(buf); if(!buf) return PIPE_ERROR; + assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE); + assert(!(flags & ~PIPE_BUFFER_USAGE_GPU_READ_WRITE)); + flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE; + /* We only need to store one reference for each buffer, so avoid storing - * consecutive references for the same buffer. It might not be the more - * common pasttern, but it is easy to implement. + * consecutive references for the same buffer. It might not be the most + * common pattern, but it is easy to implement. */ - if(vl->used && vl->buffers[vl->used - 1] == buf) { + if(vl->used && vl->entries[vl->used - 1].buf == buf) { + vl->entries[vl->used - 1].flags |= flags; return PIPE_OK; } /* Grow the table */ if(vl->used == vl->size) { unsigned new_size; - struct pb_buffer **new_buffers; + struct pb_validate_entry *new_entries; new_size = vl->size * 2; if(!new_size) return PIPE_ERROR_OUT_OF_MEMORY; - new_buffers = (struct pb_buffer **)REALLOC(vl->buffers, - vl->size*sizeof(struct pb_buffer *), - new_size*sizeof(struct pb_buffer *)); - if(!new_buffers) + new_entries = (struct pb_validate_entry *)REALLOC(vl->entries, + vl->size*sizeof(struct pb_validate_entry), + new_size*sizeof(struct pb_validate_entry)); + if(!new_entries) return PIPE_ERROR_OUT_OF_MEMORY; - memset(new_buffers + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_buffer *)); + memset(new_entries + vl->size, 0, (new_size - vl->size)*sizeof(struct pb_validate_entry)); vl->size = new_size; - vl->buffers = new_buffers; + vl->entries = new_entries; } - assert(!vl->buffers[vl->used]); - pb_reference(&vl->buffers[vl->used], buf); + assert(!vl->entries[vl->used].buf); + pb_reference(&vl->entries[vl->used].buf, buf); + vl->entries[vl->used].flags = flags; ++vl->used; return PIPE_OK; } +enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data) +{ + unsigned i; + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = callback(vl->entries[i].buf, data); + if(ret != PIPE_OK) + return ret; + } + return PIPE_OK; +} + + enum pipe_error pb_validate_validate(struct pb_validate *vl) { - /* FIXME: go through each buffer, ensure its not mapped, its address is - * available -- requires a new pb_buffer interface */ + unsigned i; + + for(i = 0; i < vl->used; ++i) { + enum pipe_error ret; + ret = pb_validate(vl->entries[i].buf, vl, vl->entries[i].flags); + if(ret != PIPE_OK) { + while(i--) + pb_validate(vl->entries[i].buf, NULL, 0); + return ret; + } + } + return PIPE_OK; } @@ -114,8 +154,8 @@ pb_validate_fence(struct pb_validate *vl, { unsigned i; for(i = 0; i < vl->used; ++i) { - buffer_fence(vl->buffers[i], fence); - pb_reference(&vl->buffers[i], NULL); + pb_fence(vl->entries[i].buf, fence); + pb_reference(&vl->entries[i].buf, NULL); } vl->used = 0; } @@ -126,8 +166,8 @@ pb_validate_destroy(struct pb_validate *vl) { unsigned i; for(i = 0; i < vl->used; ++i) - pb_reference(&vl->buffers[i], NULL); - FREE(vl->buffers); + pb_reference(&vl->entries[i].buf, NULL); + FREE(vl->entries); FREE(vl); } @@ -142,8 +182,8 @@ pb_validate_create() return NULL; vl->size = PB_VALIDATE_INITIAL_SIZE; - vl->buffers = (struct pb_buffer **)CALLOC(vl->size, sizeof(struct pb_buffer *)); - if(!vl->buffers) { + vl->entries = (struct pb_validate_entry *)CALLOC(vl->size, sizeof(struct pb_validate_entry)); + if(!vl->entries) { FREE(vl); return NULL; } diff --git a/src/gallium/auxiliary/pipebuffer/pb_validate.h b/src/gallium/auxiliary/pipebuffer/pb_validate.h index 3db1d5330b..dfb84df1ce 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_validate.h +++ b/src/gallium/auxiliary/pipebuffer/pb_validate.h @@ -58,7 +58,13 @@ struct pb_validate; enum pipe_error pb_validate_add_buffer(struct pb_validate *vl, - struct pb_buffer *buf); + struct pb_buffer *buf, + unsigned flags); + +enum pipe_error +pb_validate_foreach(struct pb_validate *vl, + enum pipe_error (*callback)(struct pb_buffer *buf, void *data), + void *data); /** * Validate all buffers for hardware access. @@ -71,7 +77,7 @@ pb_validate_validate(struct pb_validate *vl); /** * Fence all buffers and clear the list. * - * Should be called right before issuing commands to the hardware. + * Should be called right after issuing commands to the hardware. */ void pb_validate_fence(struct pb_validate *vl, diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index e2f1008cc2..452835fdad 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -30,7 +30,7 @@ * Implementation of client buffer (also designated as "user buffers"), which * are just state-tracker owned data masqueraded as buffers. * - * \author José Fonseca + * \author Jose Fonseca */ @@ -92,6 +92,24 @@ pb_user_buffer_unmap(struct pb_buffer *buf) } +static enum pipe_error +pb_user_buffer_validate(struct pb_buffer *buf, + struct pb_validate *vl, + unsigned flags) +{ + assert(0); + return PIPE_ERROR; +} + + +static void +pb_user_buffer_fence(struct pb_buffer *buf, + struct pipe_fence_handle *fence) +{ + assert(0); +} + + static void pb_user_buffer_get_base_buffer(struct pb_buffer *buf, struct pb_buffer **base_buf, @@ -107,6 +125,8 @@ pb_user_buffer_vtbl = { pb_user_buffer_destroy, pb_user_buffer_map, pb_user_buffer_unmap, + pb_user_buffer_validate, + pb_user_buffer_fence, pb_user_buffer_get_base_buffer }; -- cgit v1.2.3 From 444e98de31c5456008a4b3568373db02ddc5385f Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Mon, 24 Nov 2008 23:17:49 +0900 Subject: pipebuffer: Ondemand buffer manager. A variation of malloc buffers which get transferred to real graphics memory when there is an attempt to validate them. --- src/gallium/auxiliary/pipebuffer/Makefile | 1 + src/gallium/auxiliary/pipebuffer/SConscript | 1 + src/gallium/auxiliary/pipebuffer/pb_bufmgr.h | 14 + .../auxiliary/pipebuffer/pb_bufmgr_ondemand.c | 303 +++++++++++++++++++++ 4 files changed, 319 insertions(+) create mode 100644 src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index f9b39d9ce0..4bcf08f8a5 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -11,6 +11,7 @@ C_SOURCES = \ pb_bufmgr_debug.c \ pb_bufmgr_fenced.c \ pb_bufmgr_mm.c \ + pb_bufmgr_ondemand.c \ pb_bufmgr_pool.c \ pb_bufmgr_slab.c \ pb_validate.c \ diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 56a40dda0d..4acf721808 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -10,6 +10,7 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_debug.c', 'pb_bufmgr_fenced.c', 'pb_bufmgr_mm.c', + 'pb_bufmgr_ondemand.c', 'pb_bufmgr_pool.c', 'pb_bufmgr_slab.c', 'pb_validate.c', diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h index 8fe2704708..0a8264a924 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h @@ -182,6 +182,20 @@ pb_alt_manager_create(struct pb_manager *provider1, struct pb_manager *provider2); +/** + * Ondemand buffer manager. + * + * Buffers are created in malloc'ed memory (fast and cached), and the constents + * is transfered to a buffer from the provider (typically in slow uncached + * memory) when there is an attempt to validate the buffer. + * + * Ideal for situations where one does not know before hand whether a given + * buffer will effectively be used by the hardware or not. + */ +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider); + + /** * Debug buffer manager to detect buffer under- and overflows. * diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c new file mode 100644 index 0000000000..ba02a84e62 --- /dev/null +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c @@ -0,0 +1,303 @@ +/************************************************************************** + * + * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * @file + * A variation of malloc buffers which get transferred to real graphics memory + * when there is an attempt to validate them. + * + * @author Jose Fonseca + */ + + +#include "pipe/p_debug.h" +#include "util/u_memory.h" +#include "pb_buffer.h" +#include "pb_bufmgr.h" + + +struct pb_ondemand_manager; + + +struct pb_ondemand_buffer +{ + struct pb_buffer base; + + struct pb_ondemand_manager *mgr; + + /** Regular malloc'ed memory */ + void *data; + unsigned mapcount; + + /** Real buffer */ + struct pb_buffer *buffer; + size_t size; + struct pb_desc desc; +}; + + +struct pb_ondemand_manager +{ + struct pb_manager base; + + struct pb_manager *provider; +}; + + +extern const struct pb_vtbl pb_ondemand_buffer_vtbl; + +static INLINE struct pb_ondemand_buffer * +pb_ondemand_buffer(struct pb_buffer *buf) +{ + assert(buf); + assert(buf->vtbl == &pb_ondemand_buffer_vtbl); + return (struct pb_ondemand_buffer *)buf; +} + +static INLINE struct pb_ondemand_manager * +pb_ondemand_manager(struct pb_manager *mgr) +{ + assert(mgr); + return (struct pb_ondemand_manager *)mgr; +} + + +static void +pb_ondemand_buffer_destroy(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + pb_reference(&buf->buffer, NULL); + + align_free(buf->data); + + FREE(buf); +} + + +static void * +pb_ondemand_buffer_map(struct pb_buffer *_buf, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + return pb_map(buf->buffer, flags); + } + else { + assert(buf->data); + ++buf->mapcount; + return buf->data; + } +} + + +static void +pb_ondemand_buffer_unmap(struct pb_buffer *_buf) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(buf->buffer) { + assert(!buf->data); + pb_unmap(buf->buffer); + } + else { + assert(buf->data); + assert(buf->mapcount); + if(buf->mapcount) + --buf->mapcount; + } +} + + +static enum pipe_error +pb_ondemand_buffer_instantiate(struct pb_ondemand_buffer *buf) +{ + if(!buf->buffer) { + struct pb_manager *provider = buf->mgr->provider; + uint8_t *map; + + assert(!buf->mapcount); + + buf->buffer = provider->create_buffer(provider, buf->size, &buf->desc); + if(!buf->buffer) + return PIPE_ERROR_OUT_OF_MEMORY; + + map = pb_map(buf->buffer, PIPE_BUFFER_USAGE_CPU_READ); + if(!map) { + pb_reference(&buf->buffer, NULL); + return PIPE_ERROR; + } + + memcpy(map, buf->data, buf->size); + + pb_unmap(buf->buffer); + + if(!buf->mapcount) { + FREE(buf->data); + buf->data = NULL; + } + } + + return PIPE_OK; +} + +static enum pipe_error +pb_ondemand_buffer_validate(struct pb_buffer *_buf, + struct pb_validate *vl, + unsigned flags) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + enum pipe_error ret; + + assert(!buf->mapcount); + if(buf->mapcount) + return PIPE_ERROR; + + ret = pb_ondemand_buffer_instantiate(buf); + if(ret != PIPE_OK) + return ret; + + return pb_validate(buf->buffer, vl, flags); +} + + +static void +pb_ondemand_buffer_fence(struct pb_buffer *_buf, + struct pipe_fence_handle *fence) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + assert(buf->buffer); + if(!buf->buffer) + return; + + pb_fence(buf->buffer, fence); +} + + +static void +pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf, + struct pb_buffer **base_buf, + unsigned *offset) +{ + struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf); + + if(pb_ondemand_buffer_instantiate(buf) != PIPE_OK) { + assert(0); + *base_buf = &buf->base; + *offset = 0; + return; + } + + pb_get_base_buffer(buf->buffer, base_buf, offset); +} + + +const struct pb_vtbl +pb_ondemand_buffer_vtbl = { + pb_ondemand_buffer_destroy, + pb_ondemand_buffer_map, + pb_ondemand_buffer_unmap, + pb_ondemand_buffer_validate, + pb_ondemand_buffer_fence, + pb_ondemand_buffer_get_base_buffer +}; + + +static struct pb_buffer * +pb_ondemand_manager_create_buffer(struct pb_manager *_mgr, + size_t size, + const struct pb_desc *desc) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + struct pb_ondemand_buffer *buf; + + buf = CALLOC_STRUCT(pb_ondemand_buffer); + if(!buf) + return NULL; + + buf->base.base.refcount = 1; + buf->base.base.alignment = desc->alignment; + buf->base.base.usage = desc->usage; + buf->base.base.size = size; + buf->base.vtbl = &pb_ondemand_buffer_vtbl; + + buf->mgr = mgr; + + buf->data = align_malloc(size, desc->alignment < sizeof(void*) ? sizeof(void*) : desc->alignment); + if(!buf->data) { + FREE(buf); + return NULL; + } + + buf->size = size; + buf->desc = *desc; + + return &buf->base; +} + + +static void +pb_ondemand_manager_flush(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + mgr->provider->flush(mgr->provider); +} + + +static void +pb_ondemand_manager_destroy(struct pb_manager *_mgr) +{ + struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr); + + FREE(mgr); +} + + +struct pb_manager * +pb_ondemand_manager_create(struct pb_manager *provider) +{ + struct pb_ondemand_manager *mgr; + + if(!provider) + return NULL; + + mgr = CALLOC_STRUCT(pb_ondemand_manager); + if(!mgr) + return NULL; + + mgr->base.destroy = pb_ondemand_manager_destroy; + mgr->base.create_buffer = pb_ondemand_manager_create_buffer; + mgr->base.flush = pb_ondemand_manager_flush; + + mgr->provider = provider; + + return &mgr->base; +} -- cgit v1.2.3 From b3028acd98e2b7fd09344f9005c5b20bba91262c Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Thu, 29 Jan 2009 21:43:15 -0500 Subject: gallium: give the screen priority when it comes to buffer allocations allows the driver to overwrite buffer allocation, first step on the way to making winsys interface internal to the drivers. state trackers and the code above it will go through the screen --- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 8 +-- src/gallium/auxiliary/util/u_timed_winsys.c | 25 ++++----- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 6 +-- src/gallium/drivers/cell/ppu/cell_state_shader.c | 6 +-- src/gallium/drivers/cell/ppu/cell_texture.c | 18 +++---- src/gallium/drivers/i915simple/i915_state.c | 6 +-- src/gallium/drivers/i915simple/i915_texture.c | 10 ++-- src/gallium/drivers/i965simple/brw_curbe.c | 4 +- src/gallium/drivers/i965simple/brw_tex_layout.c | 14 +++-- src/gallium/drivers/nv04/nv04_miptree.c | 2 +- src/gallium/drivers/nv04/nv04_screen.c | 4 +- src/gallium/drivers/nv04/nv04_vbo.c | 8 +-- src/gallium/drivers/nv10/nv10_miptree.c | 2 +- src/gallium/drivers/nv10/nv10_prim_vbuf.c | 6 +-- src/gallium/drivers/nv10/nv10_screen.c | 4 +- src/gallium/drivers/nv10/nv10_state.c | 4 +- src/gallium/drivers/nv10/nv10_vbo.c | 8 +-- src/gallium/drivers/nv20/nv20_miptree.c | 2 +- src/gallium/drivers/nv20/nv20_prim_vbuf.c | 6 +-- src/gallium/drivers/nv20/nv20_screen.c | 4 +- src/gallium/drivers/nv20/nv20_state.c | 4 +- src/gallium/drivers/nv20/nv20_vbo.c | 8 +-- src/gallium/drivers/nv20/nv20_vertprog.c | 4 +- src/gallium/drivers/nv30/nv30_fragprog.c | 10 ++-- src/gallium/drivers/nv30/nv30_miptree.c | 4 +- src/gallium/drivers/nv30/nv30_screen.c | 4 +- src/gallium/drivers/nv30/nv30_vbo.c | 12 ++--- src/gallium/drivers/nv30/nv30_vertprog.c | 4 +- src/gallium/drivers/nv40/nv40_draw.c | 14 ++--- src/gallium/drivers/nv40/nv40_fragprog.c | 10 ++-- src/gallium/drivers/nv40/nv40_miptree.c | 2 +- src/gallium/drivers/nv40/nv40_screen.c | 4 +- src/gallium/drivers/nv40/nv40_vbo.c | 12 ++--- src/gallium/drivers/nv40/nv40_vertprog.c | 4 +- src/gallium/drivers/nv50/nv50_miptree.c | 4 +- src/gallium/drivers/nv50/nv50_program.c | 6 +-- src/gallium/drivers/nv50/nv50_query.c | 8 +-- src/gallium/drivers/nv50/nv50_screen.c | 6 +-- src/gallium/drivers/nv50/nv50_surface.c | 4 +- src/gallium/drivers/nv50/nv50_vbo.c | 2 +- src/gallium/drivers/softpipe/sp_context.c | 4 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 4 +- src/gallium/drivers/softpipe/sp_state_fs.c | 4 +- src/gallium/drivers/softpipe/sp_texture.c | 18 +++---- src/gallium/drivers/trace/tr_winsys.c | 32 +++++------ src/gallium/include/pipe/p_inlines.h | 63 +++++++++++----------- src/gallium/include/pipe/p_screen.h | 69 +++++++++++++++++++++++- src/gallium/include/pipe/p_winsys.h | 12 ++--- 48 files changed, 274 insertions(+), 205 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index 452835fdad..2b0c4606cf 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -184,8 +184,8 @@ pb_winsys_buffer_destroy(struct pipe_winsys *winsys, void pb_init_winsys(struct pipe_winsys *winsys) { - winsys->user_buffer_create = pb_winsys_user_buffer_create; - winsys->buffer_map = pb_winsys_buffer_map; - winsys->buffer_unmap = pb_winsys_buffer_unmap; - winsys->buffer_destroy = pb_winsys_buffer_destroy; + winsys->_user_buffer_create = pb_winsys_user_buffer_create; + winsys->_buffer_map = pb_winsys_buffer_map; + winsys->_buffer_unmap = pb_winsys_buffer_unmap; + winsys->_buffer_destroy = pb_winsys_buffer_destroy; } diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c index dc3c9be595..c5797f5d63 100644 --- a/src/gallium/auxiliary/util/u_timed_winsys.c +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -121,7 +121,8 @@ timed_buffer_create(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - struct pipe_buffer *buf = backend->buffer_create( backend, alignment, usage, size ); + struct pipe_buffer *buf = + backend->_buffer_create( backend, alignment, usage, size ); time_finish(winsys, start, 0, __FUNCTION__); @@ -139,7 +140,7 @@ timed_user_buffer_create(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - struct pipe_buffer *buf = backend->user_buffer_create( backend, data, bytes ); + struct pipe_buffer *buf = backend->_user_buffer_create( backend, data, bytes ); time_finish(winsys, start, 1, __FUNCTION__); @@ -155,7 +156,7 @@ timed_buffer_map(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - void *map = backend->buffer_map( backend, buf, flags ); + void *map = backend->_buffer_map( backend, buf, flags ); time_finish(winsys, start, 2, __FUNCTION__); @@ -170,7 +171,7 @@ timed_buffer_unmap(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - backend->buffer_unmap( backend, buf ); + backend->_buffer_unmap( backend, buf ); time_finish(winsys, start, 3, __FUNCTION__); } @@ -183,7 +184,7 @@ timed_buffer_destroy(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - backend->buffer_destroy( backend, buf ); + backend->_buffer_destroy( backend, buf ); time_finish(winsys, start, 4, __FUNCTION__); } @@ -215,7 +216,7 @@ timed_surface_buffer_create(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height, + struct pipe_buffer *ret = backend->_surface_buffer_create( backend, width, height, format, usage, stride ); time_finish(winsys, start, 7, __FUNCTION__); @@ -295,14 +296,14 @@ struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ) { struct timed_winsys *ws = CALLOC_STRUCT(timed_winsys); - ws->base.user_buffer_create = timed_user_buffer_create; - ws->base.buffer_map = timed_buffer_map; - ws->base.buffer_unmap = timed_buffer_unmap; - ws->base.buffer_destroy = timed_buffer_destroy; - ws->base.buffer_create = timed_buffer_create; + ws->base._user_buffer_create = timed_user_buffer_create; + ws->base._buffer_map = timed_buffer_map; + ws->base._buffer_unmap = timed_buffer_unmap; + ws->base._buffer_destroy = timed_buffer_destroy; + ws->base._buffer_create = timed_buffer_create; + ws->base._surface_buffer_create = timed_surface_buffer_create; ws->base.flush_frontbuffer = timed_flush_frontbuffer; ws->base.get_name = timed_get_name; - ws->base.surface_buffer_create = timed_surface_buffer_create; ws->base.fence_reference = timed_fence_reference; ws->base.fence_signalled = timed_fence_signalled; ws->base.fence_finish = timed_fence_finish; diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index 880d535320..ff3871d933 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -52,8 +52,8 @@ cell_map_constant_buffers(struct cell_context *sp) uint i; for (i = 0; i < 2; i++) { if (sp->constants[i].size) { - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + sp->mapped_constants[i] = ws->_buffer_map(ws, sp->constants[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, sp->mapped_constants[i], sp->constants[i].buffer->size); } @@ -71,7 +71,7 @@ cell_unmap_constant_buffers(struct cell_context *sp) uint i; for (i = 0; i < 2; i++) { if (sp->constants[i].size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + ws->_buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index cda39f8d59..bcbd81922c 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -194,9 +194,9 @@ cell_set_constant_buffer(struct pipe_context *pipe, draw_flush(cell->draw); /* note: reference counting */ - winsys_buffer_reference(ws, - &cell->constants[shader].buffer, - buf->buffer); + pipe_buffer_reference(pipe->screen, + &cell->constants[shader].buffer, + buf->buffer); cell->constants[shader].size = buf->size; if (shader == PIPE_SHADER_VERTEX) diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index 9f83ab8fa4..f1b1a38efc 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -112,8 +112,8 @@ cell_texture_create(struct pipe_screen *screen, cell_texture_layout(ct); - ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, - ct->buffer_size); + ct->buffer = ws->_buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, + ct->buffer_size); if (!ct->buffer) { FREE(ct); @@ -154,7 +154,7 @@ cell_texture_release(struct pipe_screen *screen, */ if (ct->tiled_buffer[i]) { ct->tiled_mapped[i] = NULL; - winsys_buffer_reference(screen->winsys, &ct->tiled_buffer[i], NULL); + pipe_buffer_reference(screen, &ct->tiled_buffer[i], NULL); } } @@ -324,12 +324,12 @@ cell_twiddle_texture(struct pipe_screen *screen, /* allocate buffer for tiled data now */ struct pipe_winsys *ws = screen->winsys; uint bytes = bufWidth * bufHeight * 4 * numFaces; - ct->tiled_buffer[level] = ws->buffer_create(ws, 16, - PIPE_BUFFER_USAGE_PIXEL, - bytes); + ct->tiled_buffer[level] = ws->_buffer_create(ws, 16, + PIPE_BUFFER_USAGE_PIXEL, + bytes); /* and map it */ - ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level], - PIPE_BUFFER_USAGE_GPU_READ); + ct->tiled_mapped[level] = ws->_buffer_map(ws, ct->tiled_buffer[level], + PIPE_BUFFER_USAGE_GPU_READ); } dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); @@ -406,7 +406,7 @@ cell_get_tex_surface(struct pipe_screen *screen, if (ps) { assert(ps->refcount); assert(ps->winsys); - winsys_buffer_reference(ws, &ps->buffer, ct->buffer); + pipe_buffer_reference(screen, &ps->buffer, ct->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index 19f194c027..b931556b7e 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -536,10 +536,10 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, if (buf) { void *mapped; if (buf->buffer && buf->buffer->size && - (mapped = ws->buffer_map(ws, buf->buffer, - PIPE_BUFFER_USAGE_CPU_READ))) { + (mapped = ws->_buffer_map(ws, buf->buffer, + PIPE_BUFFER_USAGE_CPU_READ))) { memcpy(i915->current.constants[shader], mapped, buf->buffer->size); - ws->buffer_unmap(ws, buf->buffer); + ws->_buffer_unmap(ws, buf->buffer); i915->current.num_user_constants[shader] = buf->buffer->size / (4 * sizeof(float)); } diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 4acc4b0214..7847f2ef86 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -605,18 +605,18 @@ i915_texture_create(struct pipe_screen *screen, tex_size = tex->stride * tex->total_nblocksy; - tex->buffer = ws->buffer_create(ws, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex_size); + tex->buffer = ws->_buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex_size); if (!tex->buffer) goto fail; #if 0 - void *ptr = ws->buffer_map(ws, tex->buffer, + void *ptr = ws->_buffer_map(ws, tex->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); memset(ptr, 0x80, tex_size); - ws->buffer_unmap(ws, tex->buffer); + ws->_buffer_unmap(ws, tex->buffer); #endif return &tex->base; diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c index 5e1cce7530..4b1f4d3121 100644 --- a/src/gallium/drivers/i965simple/brw_curbe.c +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -256,13 +256,13 @@ static void upload_constant_buffer(struct brw_context *brw) /* FIXME: buffer size is num_consts + num_immediates */ if (brw->vs.prog_data->num_consts) { /* map the vertex constant buffer and copy to curbe: */ - void *data = ws->buffer_map(ws, cbuffer->buffer, 0); + void *data = ws->_buffer_map(ws, cbuffer->buffer, 0); /* FIXME: this is wrong. the cbuffer->buffer->size currently * represents size of consts + immediates. so if we'll * have both we'll copy over the end of the buffer * with the subsequent memcpy */ memcpy(&buf[offset], data, cbuffer->buffer->size); - ws->buffer_unmap(ws, cbuffer->buffer); + ws->_buffer_unmap(ws, cbuffer->buffer); offset += cbuffer->buffer->size; } /*immediates*/ diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 12e2e02cfd..c99eb8e75a 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -295,10 +295,10 @@ brw_texture_create_screen(struct pipe_screen *screen, tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); if (brw_miptree_layout(tex)) - tex->buffer = ws->buffer_create(ws, 64, - PIPE_BUFFER_USAGE_PIXEL, - tex->stride * - tex->total_nblocksy); + tex->buffer = ws->_buffer_create(ws, 64, + PIPE_BUFFER_USAGE_PIXEL, + tex->stride * + tex->total_nblocksy); if (!tex->buffer) { FREE(tex); @@ -322,7 +322,6 @@ brw_texture_release_screen(struct pipe_screen *screen, __FUNCTION__, (void *) *pt, (*pt)->refcount - 1); */ if (--(*pt)->refcount <= 0) { - struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = (struct brw_texture *)*pt; uint i; @@ -330,7 +329,7 @@ brw_texture_release_screen(struct pipe_screen *screen, DBG("%s deleting %p\n", __FUNCTION__, (void *) tex); */ - winsys_buffer_reference(ws, &tex->buffer, NULL); + pipe_buffer_reference(screen, &tex->buffer, NULL); for (i = 0; i < PIPE_MAX_TEXTURE_LEVELS; i++) if (tex->image_offset[i]) @@ -347,7 +346,6 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, struct pipe_texture *pt, unsigned face, unsigned level, unsigned zslice) { - struct pipe_winsys *ws = screen->winsys; struct brw_texture *tex = (struct brw_texture *)pt; struct pipe_surface *ps; unsigned offset; /* in bytes */ @@ -369,7 +367,7 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { ps->refcount = 1; pipe_texture_reference(&ps->texture, pt); - winsys_buffer_reference(ws, &ps->buffer, tex->buffer); + pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 094c38256b..32800f9741 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -58,7 +58,7 @@ nv04_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) nv04_miptree_layout(mt); - mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->buffer = ws->_buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { FREE(mt); diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 65eacde6b2..2fa7d35294 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -117,7 +117,7 @@ nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; void *map; - map = ws->buffer_map(ws, surface->buffer, flags); + map = ws->_buffer_map(ws, surface->buffer, flags); if (!map) return NULL; @@ -129,7 +129,7 @@ nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; - ws->buffer_unmap(ws, surface->buffer); + ws->_buffer_unmap(ws, surface->buffer); } static void diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index 91f919d48e..117a73a1e4 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -23,7 +23,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv04->vertex_buffer[i].buffer) { void *buf - = pipe->winsys->buffer_map(pipe->winsys, + = pipe->winsys->_buffer_map(pipe->winsys, nv04->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -32,7 +32,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + = pipe->winsys->_buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } @@ -49,12 +49,12 @@ boolean nv04_draw_elements( struct pipe_context *pipe, */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv04->vertex_buffer[i].buffer) { - pipe->winsys->buffer_unmap(pipe->winsys, nv04->vertex_buffer[i].buffer); + pipe->winsys->_buffer_unmap(pipe->winsys, nv04->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); } } if (indexBuffer) { - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe->winsys->_buffer_unmap(pipe->winsys, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index f8c021261b..384f89c391 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -65,7 +65,7 @@ nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) nv10_miptree_layout(mt); - mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->buffer = ws->_buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { FREE(mt); diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index e7e81d3dff..bdffaacf78 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -111,11 +111,11 @@ nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, size_t size = (size_t)vertex_size * (size_t)nr_vertices; assert(!nv10_render->buffer); - nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); + nv10_render->buffer = winsys->_buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); nv10->dirty |= NV10_NEW_VTXARRAYS; - return winsys->buffer_map(winsys, + return winsys->_buffer_map(winsys, nv10_render->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); } @@ -187,7 +187,7 @@ nv10_vbuf_render_release_vertices( struct vbuf_render *render, struct pipe_screen *pscreen = &nv10->screen->pipe; assert(nv10_render->buffer); - winsys->buffer_unmap(winsys, nv10_render->buffer); + winsys->_buffer_unmap(winsys, nv10_render->buffer); pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL); } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 4d9fbd4b5f..333e0b3252 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -122,7 +122,7 @@ nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; void *map; - map = ws->buffer_map(ws, surface->buffer, flags); + map = ws->_buffer_map(ws, surface->buffer, flags); if (!map) return NULL; @@ -134,7 +134,7 @@ nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; - ws->buffer_unmap(ws, surface->buffer); + ws->_buffer_unmap(ws, surface->buffer); } static void diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index 119af66dfd..f84d45a730 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -468,12 +468,12 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; if (buf->buffer && buf->buffer->size && - (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + (mapped = ws->_buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) { memcpy(nv10->constbuf[shader], mapped, buf->buffer->size); nv10->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); - ws->buffer_unmap(ws, buf->buffer); + ws->_buffer_unmap(ws, buf->buffer); } } } diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index d0e788ac03..a6b80e4050 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -25,7 +25,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv10->vtxbuf[i].buffer) { void *buf - = pipe->winsys->buffer_map(pipe->winsys, + = pipe->winsys->_buffer_map(pipe->winsys, nv10->vtxbuf[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -34,7 +34,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + = pipe->winsys->_buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } @@ -55,12 +55,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe, */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv10->vtxbuf[i].buffer) { - pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); + pipe->winsys->_buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); } } if (indexBuffer) { - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe->winsys->_buffer_unmap(pipe->winsys, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index d2038c391d..759f29c951 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -65,7 +65,7 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) nv20_miptree_layout(mt); - mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->buffer = ws->_buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { FREE(mt); diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c index 74540845a8..c4841026b3 100644 --- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -113,9 +113,9 @@ static void * nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size) { struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys; - nv20_render->pbuffer = winsys->buffer_create(winsys, 64, + nv20_render->pbuffer = winsys->_buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); - return winsys->buffer_map(winsys, + return winsys->_buffer_map(winsys, nv20_render->pbuffer, PIPE_BUFFER_USAGE_CPU_WRITE); } @@ -334,7 +334,7 @@ nv20_vbuf_render_release_vertices( struct vbuf_render *render, struct pipe_screen *pscreen = &nv20->screen->pipe; if (nv20_render->pbuffer) { - winsys->buffer_unmap(winsys, nv20_render->pbuffer); + winsys->_buffer_unmap(winsys, nv20_render->pbuffer); pipe_buffer_reference(pscreen, &nv20_render->pbuffer, NULL); } else if (nv20_render->mbuffer) { FREE(nv20_render->mbuffer); diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index 2ca6e6b149..e9adf05a7d 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -122,7 +122,7 @@ nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; void *map; - map = ws->buffer_map(ws, surface->buffer, flags); + map = ws->_buffer_map(ws, surface->buffer, flags); if (!map) return NULL; @@ -134,7 +134,7 @@ nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; - ws->buffer_unmap(ws, surface->buffer); + ws->_buffer_unmap(ws, surface->buffer); } static void diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index ecec4f49a0..65060006da 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -461,12 +461,12 @@ nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; if (buf->buffer && buf->buffer->size && - (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + (mapped = ws->_buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) { memcpy(nv20->constbuf[shader], mapped, buf->buffer->size); nv20->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); - ws->buffer_unmap(ws, buf->buffer); + ws->_buffer_unmap(ws, buf->buffer); } } } diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index 4edc4efebd..d6b731790c 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -25,7 +25,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv20->vtxbuf[i].buffer) { void *buf - = pipe->winsys->buffer_map(pipe->winsys, + = pipe->winsys->_buffer_map(pipe->winsys, nv20->vtxbuf[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -34,7 +34,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, + = pipe->winsys->_buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } @@ -55,12 +55,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe, */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv20->vtxbuf[i].buffer) { - pipe->winsys->buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer); + pipe->winsys->_buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); } } if (indexBuffer) { - pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); + pipe->winsys->_buffer_unmap(pipe->winsys, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index a885fcd7a5..c4f3d0f14f 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -749,7 +749,7 @@ nv20_vertprog_validate(struct nv20_context *nv20) float *map = NULL; if (constbuf) { - map = ws->buffer_map(ws, constbuf, + map = ws->_buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -771,7 +771,7 @@ nv20_vertprog_validate(struct nv20_context *nv20) } if (constbuf) { - ws->buffer_unmap(ws, constbuf); + ws->_buffer_unmap(ws, constbuf); } } diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index 320ba3f4bf..f22a06c1a3 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -803,7 +803,7 @@ nv30_fragprog_upload(struct nv30_context *nv30, uint32_t *map; int i; - map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = ws->_buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); #if 0 for (i = 0; i < fp->insn_len; i++) { @@ -825,7 +825,7 @@ nv30_fragprog_upload(struct nv30_context *nv30, } } - ws->buffer_unmap(ws, fp->buffer); + ws->_buffer_unmap(ws, fp->buffer); } static boolean @@ -849,7 +849,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) return FALSE; } - fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + fp->buffer = ws->_buffer_create(ws, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); so = so_new(8, 1); @@ -869,7 +869,7 @@ update_constants: if (fp->nr_consts) { float *map; - map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->_buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { struct nv30_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; @@ -880,7 +880,7 @@ update_constants: memcpy(p, cb, 4 * sizeof(float)); new_consts = TRUE; } - ws->buffer_unmap(ws, constbuf); + ws->_buffer_unmap(ws, constbuf); if (new_consts) nv30_fragprog_upload(nv30, fp); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 54fb3585f8..bf6c4a1c74 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -93,7 +93,7 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) nv30_miptree_layout(mt); - mt->buffer = ws->buffer_create(ws, 256, + mt->buffer = ws->_buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL | NOUVEAU_BUFFER_USAGE_TEXTURE, mt->total_size); @@ -181,7 +181,7 @@ nv30_miptree_surface_del(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL); + pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 1fac6d3df8..56b20ae2fd 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -161,7 +161,7 @@ nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, assert(surface_to_map); - map = ws->buffer_map(ws, surface_to_map->buffer, flags); + map = ws->_buffer_map(ws, surface_to_map->buffer, flags); if (!map) return NULL; @@ -189,7 +189,7 @@ nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) assert(surface_to_unmap); - ws->buffer_unmap(ws, surface_to_unmap->buffer); + ws->_buffer_unmap(ws, surface_to_unmap->buffer); if (surface_to_unmap != surface) { struct nv30_screen *nvscreen = nv30_screen(screen); diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index 2d6d48ac16..cf0468f879 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -116,7 +116,7 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) return FALSE; - map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->_buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); map += vb->buffer_offset + ve->src_offset; switch (type) { @@ -148,17 +148,17 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, so_data (so, fui(v[0])); break; default: - ws->buffer_unmap(ws, vb->buffer); + ws->_buffer_unmap(ws, vb->buffer); return FALSE; } } break; default: - ws->buffer_unmap(ws, vb->buffer); + ws->_buffer_unmap(ws, vb->buffer); return FALSE; } - ws->buffer_unmap(ws, vb->buffer); + ws->_buffer_unmap(ws, vb->buffer); return TRUE; } @@ -371,7 +371,7 @@ nv30_draw_elements_inline(struct pipe_context *pipe, struct pipe_winsys *ws = pipe->winsys; void *map; - map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->_buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); return FALSE; @@ -392,7 +392,7 @@ nv30_draw_elements_inline(struct pipe_context *pipe, break; } - ws->buffer_unmap(ws, ib); + ws->_buffer_unmap(ws, ib); return TRUE; } diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index 72824559e8..b67dde0808 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -749,7 +749,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) float *map = NULL; if (constbuf) { - map = ws->buffer_map(ws, constbuf, + map = ws->_buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -771,7 +771,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) } if (constbuf) { - ws->buffer_unmap(ws, constbuf); + ws->_buffer_unmap(ws, constbuf); } } diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 8e56cdc2fe..3d5332a80b 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -241,13 +241,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, nv40_state_emit(nv40); for (i = 0; i < nv40->vtxbuf_nr; i++) { - map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, - PIPE_BUFFER_USAGE_CPU_READ); + map = ws->_buffer_map(ws, nv40->vtxbuf[i].buffer, + PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(nv40->draw, i, map); } if (idxbuf) { - map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->_buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); } else { draw_set_mapped_element_buffer(nv40->draw, 0, NULL); @@ -256,7 +256,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, if (nv40->constbuf[PIPE_SHADER_VERTEX]) { const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; - map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], + map = ws->_buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_constant_buffer(nv40->draw, map, nr); } @@ -264,13 +264,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, draw_arrays(nv40->draw, mode, start, count); for (i = 0; i < nv40->vtxbuf_nr; i++) - ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); + ws->_buffer_unmap(ws, nv40->vtxbuf[i].buffer); if (idxbuf) - ws->buffer_unmap(ws, idxbuf); + ws->_buffer_unmap(ws, idxbuf); if (nv40->constbuf[PIPE_SHADER_VERTEX]) - ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); + ws->_buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); draw_flush(nv40->draw); pipe->flush(pipe, 0, NULL); diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 91dcbebda0..5a127d9c7b 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -886,7 +886,7 @@ nv40_fragprog_upload(struct nv40_context *nv40, uint32_t *map; int i; - map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = ws->_buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); #if 0 for (i = 0; i < fp->insn_len; i++) { @@ -908,7 +908,7 @@ nv40_fragprog_upload(struct nv40_context *nv40, } } - ws->buffer_unmap(ws, fp->buffer); + ws->_buffer_unmap(ws, fp->buffer); } static boolean @@ -932,7 +932,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) return FALSE; } - fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); + fp->buffer = ws->_buffer_create(ws, 0x100, 0, fp->insn_len * 4); nv40_fragprog_upload(nv40, fp); so = so_new(4, 1); @@ -948,7 +948,7 @@ update_constants: if (fp->nr_consts) { float *map; - map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->_buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { struct nv40_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; @@ -959,7 +959,7 @@ update_constants: memcpy(p, cb, 4 * sizeof(float)); new_consts = TRUE; } - ws->buffer_unmap(ws, constbuf); + ws->_buffer_unmap(ws, constbuf); if (new_consts) nv40_fragprog_upload(nv40, fp); diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index ba912ddcbb..6ed0d39edf 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -97,7 +97,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) nv40_miptree_layout(mt); - mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); + mt->buffer = ws->_buffer_create(ws, 256, buf_usage, mt->total_size); if (!mt->buffer) { FREE(mt); return NULL; diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index ab128fecda..20662fd3ff 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -170,7 +170,7 @@ nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, assert(surface_to_map); - map = ws->buffer_map(ws, surface_to_map->buffer, flags); + map = ws->_buffer_map(ws, surface_to_map->buffer, flags); if (!map) return NULL; @@ -198,7 +198,7 @@ nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) assert(surface_to_unmap); - ws->buffer_unmap(ws, surface_to_unmap->buffer); + ws->_buffer_unmap(ws, surface_to_unmap->buffer); if (surface_to_unmap != surface) { struct nv40_screen *nvscreen = nv40_screen(screen); diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index 8f1834628f..f20183ddd4 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -116,7 +116,7 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) return FALSE; - map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->_buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); map += vb->buffer_offset + ve->src_offset; switch (type) { @@ -148,17 +148,17 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, so_data (so, fui(v[0])); break; default: - ws->buffer_unmap(ws, vb->buffer); + ws->_buffer_unmap(ws, vb->buffer); return FALSE; } } break; default: - ws->buffer_unmap(ws, vb->buffer); + ws->_buffer_unmap(ws, vb->buffer); return FALSE; } - ws->buffer_unmap(ws, vb->buffer); + ws->_buffer_unmap(ws, vb->buffer); return TRUE; } @@ -370,7 +370,7 @@ nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_winsys *ws = pipe->winsys; void *map; - map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->_buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); return FALSE; @@ -391,7 +391,7 @@ nv40_draw_elements_inline(struct pipe_context *pipe, break; } - ws->buffer_unmap(ws, ib); + ws->_buffer_unmap(ws, ib); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 1392fe956f..7a82bb0f5e 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -980,7 +980,7 @@ check_gpu_resources: float *map = NULL; if (constbuf) { - map = ws->buffer_map(ws, constbuf, + map = ws->_buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -1002,7 +1002,7 @@ check_gpu_resources: } if (constbuf) - ws->buffer_unmap(ws, constbuf); + ws->_buffer_unmap(ws, constbuf); } /* Upload vtxprog */ diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 7770fcc3f2..3965dad5ad 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -88,14 +88,14 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) size = align(size, 64); size *= align(pt->height[l], 8) * pt->block.size; - lvl->image[i] = ws->buffer_create(ws, 256, 0, size); + lvl->image[i] = ws->_buffer_create(ws, 256, 0, size); lvl->image_offset[i] = mt->total_size; mt->total_size += size; } } - mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size); + mt->buffer = ws->_buffer_create(ws, 256, usage, mt->total_size); if (!mt->buffer) { FREE(mt); return NULL; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7686f746eb..73867cf675 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1581,11 +1581,11 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) } if (p->param_nr) { - float *map = ws->buffer_map(ws, nv50->constbuf[p->type], + float *map = ws->_buffer_map(ws, nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); nv50_program_upload_data(nv50, map, p->data->start, p->param_nr); - ws->buffer_unmap(ws, nv50->constbuf[p->type]); + ws->_buffer_unmap(ws, nv50->constbuf[p->type]); } if (p->immd_nr) { @@ -1606,7 +1606,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) boolean upload = FALSE; if (!p->buffer) { - p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); + p->buffer = ws->_buffer_create(ws, 0x100, 0, p->exec_size * 4); upload = TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index b923c820eb..b0fb346ba1 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -47,7 +47,7 @@ nv50_query_create(struct pipe_context *pipe, unsigned type) assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); q->type = type; - q->buffer = ws->buffer_create(ws, 256, 0, 16); + q->buffer = ws->_buffer_create(ws, 256, 0, 16); if (!q->buffer) { FREE(q); return NULL; @@ -62,7 +62,7 @@ nv50_query_destroy(struct pipe_context *pipe, struct pipe_query *pq) struct nv50_query *q = nv50_query(pq); if (q) { - pipe_buffer_reference(pipe, &q->buffer, NULL); + pipe_buffer_reference(pipe->screen, &q->buffer, NULL); FREE(q); } } @@ -107,11 +107,11 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, */ if (!q->ready) { - uint32_t *map = ws->buffer_map(ws, q->buffer, + uint32_t *map = ws->_buffer_map(ws, q->buffer, PIPE_BUFFER_USAGE_CPU_READ); q->result = map[1]; q->ready = TRUE; - ws->buffer_unmap(ws, q->buffer); + ws->_buffer_unmap(ws, q->buffer); } *result = q->result; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ef46233f83..3abacfc8d5 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -242,7 +242,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 8); /* Shared constant buffer */ - screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4); + screen->constbuf = ws->_buffer_create(ws, 0, 0, 128 * 4 * 4); if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { NOUVEAU_ERR("Error initialising constant buffer\n"); nv50_screen_destroy(&screen->pipe); @@ -261,7 +261,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) * blocks. At some point we *may* want to go the NVIDIA way of doing * things? */ - screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); + screen->tic = ws->_buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -275,7 +275,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, 0x00000800); - screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); + screen->tsc = ws->_buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 3f45a2fe18..743eb6e257 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -65,7 +65,7 @@ nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, { struct pipe_winsys *ws = screen->winsys; - return ws->buffer_map(ws, ps->buffer, flags); + return ws->_buffer_map(ws, ps->buffer, flags); } static void @@ -73,7 +73,7 @@ nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps) { struct pipe_winsys *ws = pscreen->winsys; - ws->buffer_unmap(ws, ps->buffer); + ws->_buffer_unmap(ws, ps->buffer); } void diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index c482a4c241..86471c00e0 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -153,7 +153,7 @@ nv50_draw_elements(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); struct pipe_winsys *ws = pipe->winsys; - void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + void *map = ws->_buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); nv50_state_validate(nv50); diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c index d8a5631488..c2d882a819 100644 --- a/src/gallium/drivers/softpipe/sp_context.c +++ b/src/gallium/drivers/softpipe/sp_context.c @@ -87,7 +87,7 @@ softpipe_unmap_surfaces(struct softpipe_context *sp) static void softpipe_destroy( struct pipe_context *pipe ) { struct softpipe_context *softpipe = softpipe_context( pipe ); - struct pipe_winsys *ws = pipe->winsys; + struct pipe_screen *screen = pipe->screen; uint i; if (softpipe->draw) @@ -116,7 +116,7 @@ static void softpipe_destroy( struct pipe_context *pipe ) for (i = 0; i < Elements(softpipe->constants); i++) { if (softpipe->constants[i].buffer) { - winsys_buffer_reference(ws, &softpipe->constants[i].buffer, NULL); + pipe_buffer_reference(screen, &softpipe->constants[i].buffer, NULL); } } diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index ed3e8f95ae..8d58b1ed16 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -50,7 +50,7 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) uint i; for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) - sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, + sp->mapped_constants[i] = ws->_buffer_map(ws, sp->constants[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); } @@ -74,7 +74,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) for (i = 0; i < 2; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->buffer_unmap(ws, sp->constants[i].buffer); + ws->_buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 15815160ed..43b134354f 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -146,13 +146,13 @@ softpipe_set_constant_buffer(struct pipe_context *pipe, const struct pipe_constant_buffer *buf) { struct softpipe_context *softpipe = softpipe_context(pipe); - struct pipe_winsys *ws = pipe->winsys; + struct pipe_screen *screen = pipe->screen; assert(shader < PIPE_SHADER_TYPES); assert(index == 0); /* note: reference counting */ - winsys_buffer_reference(ws, + pipe_buffer_reference(screen, &softpipe->constants[shader].buffer, buf ? buf->buffer : NULL); diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index faf9e871f9..078925ca45 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -87,9 +87,9 @@ softpipe_texture_layout(struct pipe_screen *screen, depth = minify(depth); } - spt->buffer = ws->buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - buffer_size); + spt->buffer = ws->_buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + buffer_size); return spt->buffer != NULL; } @@ -105,12 +105,12 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); - spt->buffer = ws->surface_buffer_create( ws, - spt->base.width[0], - spt->base.height[0], - spt->base.format, - usage, - &spt->stride[0]); + spt->buffer = ws->_surface_buffer_create( ws, + spt->base.width[0], + spt->base.height[0], + spt->base.format, + usage, + &spt->stride[0]); return spt->buffer != NULL; } diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index c4148fe810..9a19d4d077 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -118,7 +118,7 @@ trace_winsys_surface_buffer_create(struct pipe_winsys *_winsys, trace_dump_arg(format, format); trace_dump_arg(uint, usage); - result = winsys->surface_buffer_create(winsys, + result = winsys->_surface_buffer_create(winsys, width, height, format, usage, @@ -153,7 +153,7 @@ trace_winsys_buffer_create(struct pipe_winsys *_winsys, trace_dump_arg(uint, usage); trace_dump_arg(uint, size); - buffer = winsys->buffer_create(winsys, alignment, usage, size); + buffer = winsys->_buffer_create(winsys, alignment, usage, size); trace_dump_ret(ptr, buffer); @@ -162,10 +162,10 @@ trace_winsys_buffer_create(struct pipe_winsys *_winsys, /* Zero the buffer to avoid dumping uninitialized memory */ if(buffer->usage & PIPE_BUFFER_USAGE_CPU_WRITE) { void *map; - map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = winsys->_buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE); if(map) { memset(map, 0, buffer->size); - winsys->buffer_unmap(winsys, buffer); + winsys->_buffer_unmap(winsys, buffer); } } @@ -190,7 +190,7 @@ trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, trace_dump_arg_end(); trace_dump_arg(uint, size); - result = winsys->user_buffer_create(winsys, data, size); + result = winsys->_user_buffer_create(winsys, data, size); trace_dump_ret(ptr, result); @@ -216,7 +216,7 @@ trace_winsys_user_buffer_update(struct pipe_winsys *_winsys, const void *map; if(buffer && buffer->usage & TRACE_BUFFER_USAGE_USER) { - map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = winsys->_buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ); if(map) { trace_dump_call_begin("pipe_winsys", "buffer_write"); @@ -234,7 +234,7 @@ trace_winsys_user_buffer_update(struct pipe_winsys *_winsys, trace_dump_call_end(); - winsys->buffer_unmap(winsys, buffer); + winsys->_buffer_unmap(winsys, buffer); } } } @@ -249,7 +249,7 @@ trace_winsys_buffer_map(struct pipe_winsys *_winsys, struct pipe_winsys *winsys = tr_ws->winsys; void *map; - map = winsys->buffer_map(winsys, buffer, usage); + map = winsys->_buffer_map(winsys, buffer, usage); if(map) { if(usage & PIPE_BUFFER_USAGE_CPU_WRITE) { assert(!hash_table_get(tr_ws->buffer_maps, buffer)); @@ -290,7 +290,7 @@ trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, hash_table_remove(tr_ws->buffer_maps, buffer); } - winsys->buffer_unmap(winsys, buffer); + winsys->_buffer_unmap(winsys, buffer); } @@ -306,7 +306,7 @@ trace_winsys_buffer_destroy(struct pipe_winsys *_winsys, trace_dump_arg(ptr, winsys); trace_dump_arg(ptr, buffer); - winsys->buffer_destroy(winsys, buffer); + winsys->_buffer_destroy(winsys, buffer); trace_dump_call_end(); } @@ -420,12 +420,12 @@ trace_winsys_create(struct pipe_winsys *winsys) tr_ws->base.destroy = trace_winsys_destroy; tr_ws->base.get_name = trace_winsys_get_name; tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer; - tr_ws->base.surface_buffer_create = trace_winsys_surface_buffer_create; - tr_ws->base.buffer_create = trace_winsys_buffer_create; - tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create; - tr_ws->base.buffer_map = trace_winsys_buffer_map; - tr_ws->base.buffer_unmap = trace_winsys_buffer_unmap; - tr_ws->base.buffer_destroy = trace_winsys_buffer_destroy; + tr_ws->base._surface_buffer_create = trace_winsys_surface_buffer_create; + tr_ws->base._buffer_create = trace_winsys_buffer_create; + tr_ws->base._user_buffer_create = trace_winsys_user_buffer_create; + tr_ws->base._buffer_map = trace_winsys_buffer_map; + tr_ws->base._buffer_unmap = trace_winsys_buffer_unmap; + tr_ws->base._buffer_destroy = trace_winsys_buffer_destroy; tr_ws->base.fence_reference = trace_winsys_fence_reference; tr_ws->base.fence_signalled = trace_winsys_fence_signalled; tr_ws->base.fence_finish = trace_winsys_fence_finish; diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index 7378392616..da7334bb67 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -89,29 +89,6 @@ pipe_surface_reference(struct pipe_surface **ptr, struct pipe_surface *surf) } -/* XXX: thread safety issues! - */ -static INLINE void -winsys_buffer_reference(struct pipe_winsys *winsys, - struct pipe_buffer **ptr, - struct pipe_buffer *buf) -{ - if (buf) { - assert(buf->refcount); - buf->refcount++; - } - - if (*ptr) { - assert((*ptr)->refcount); - if(--(*ptr)->refcount == 0) - winsys->buffer_destroy( winsys, *ptr ); - } - - *ptr = buf; -} - - - /** * \sa pipe_surface_reference */ @@ -159,13 +136,19 @@ static INLINE struct pipe_buffer * pipe_buffer_create( struct pipe_screen *screen, unsigned alignment, unsigned usage, unsigned size ) { - return screen->winsys->buffer_create(screen->winsys, alignment, usage, size); + if (screen->buffer_create) + return screen->buffer_create(screen, alignment, usage, size); + else + return screen->winsys->_buffer_create(screen->winsys, alignment, usage, size); } static INLINE struct pipe_buffer * pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size ) { - return screen->winsys->user_buffer_create(screen->winsys, ptr, size); + if (screen->user_buffer_create) + return screen->user_buffer_create(screen, ptr, size); + else + return screen->winsys->_user_buffer_create(screen->winsys, ptr, size); } static INLINE void * @@ -173,25 +156,45 @@ pipe_buffer_map(struct pipe_screen *screen, struct pipe_buffer *buf, unsigned usage) { - return screen->winsys->buffer_map(screen->winsys, buf, usage); + if (screen->buffer_map) + return screen->buffer_map(screen, buf, usage); + else + return screen->winsys->_buffer_map(screen->winsys, buf, usage); } static INLINE void pipe_buffer_unmap(struct pipe_screen *screen, struct pipe_buffer *buf) { - screen->winsys->buffer_unmap(screen->winsys, buf); + if (screen->buffer_unmap) + screen->buffer_unmap(screen, buf); + else + screen->winsys->_buffer_unmap(screen->winsys, buf); } -/* XXX when we're using this everywhere, get rid of - * winsys_buffer_reference() above. +/* XXX: thread safety issues! */ static INLINE void pipe_buffer_reference(struct pipe_screen *screen, struct pipe_buffer **ptr, struct pipe_buffer *buf) { - winsys_buffer_reference(screen->winsys, ptr, buf); + if (buf) { + assert(buf->refcount); + buf->refcount++; + } + + if (*ptr) { + assert((*ptr)->refcount); + if(--(*ptr)->refcount == 0) { + if (screen->buffer_destroy) + screen->buffer_destroy( screen, *ptr ); + else + screen->winsys->_buffer_destroy( screen->winsys, *ptr ); + } + } + + *ptr = buf; } diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 492667c93a..b072484a84 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -128,7 +128,74 @@ struct pipe_screen { void (*surface_unmap)( struct pipe_screen *, struct pipe_surface *surface ); - + + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * + */ + struct pipe_buffer *(*buffer_create)( struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size ); + + /** + * Create a buffer that wraps user-space data. + * + * Effectively this schedules a delayed call to buffer_create + * followed by an upload of the data at *some point in the future*, + * or perhaps never. Basically the allocate/upload is delayed + * until the buffer is actually passed to hardware. + * + * The intention is to provide a quick way to turn regular data + * into a buffer, and secondly to avoid a copy operation if that + * data subsequently turns out to be only accessed by the CPU. + * + * Common example is OpenGL vertex buffers that are subsequently + * processed either by software TNL in the driver or by passing to + * hardware. + * + * XXX: What happens if the delayed call to buffer_create() fails? + * + * Note that ptr may be accessed at any time upto the time when the + * buffer is destroyed, so the data must not be freed before then. + */ + struct pipe_buffer *(*user_buffer_create)(struct pipe_screen *screen, + void *ptr, + unsigned bytes); + + /** + * Allocate storage for a display target surface. + * + * Often surfaces which are meant to be blitted to the front screen (i.e., + * display targets) must be allocated with special characteristics, memory + * pools, or obtained directly from the windowing system. + * + * This callback is invoked by the pipe_screenwhen creating a texture marked + * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying + * buffer storage. + */ + struct pipe_buffer *(*surface_buffer_create)(struct pipe_screen *screen, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride); + + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. + */ + void *(*buffer_map)( struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage ); + + void (*buffer_unmap)( struct pipe_screen *screen, + struct pipe_buffer *buf ); + + void (*buffer_destroy)( struct pipe_screen *screen, + struct pipe_buffer *buf ); }; diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h index 3ae83e8105..bda1907cc1 100644 --- a/src/gallium/include/pipe/p_winsys.h +++ b/src/gallium/include/pipe/p_winsys.h @@ -90,7 +90,7 @@ struct pipe_winsys * alignment indicates the client's alignment requirements, eg for * SSE instructions. */ - struct pipe_buffer *(*buffer_create)( struct pipe_winsys *ws, + struct pipe_buffer *(*_buffer_create)( struct pipe_winsys *ws, unsigned alignment, unsigned usage, unsigned size ); @@ -116,7 +116,7 @@ struct pipe_winsys * Note that ptr may be accessed at any time upto the time when the * buffer is destroyed, so the data must not be freed before then. */ - struct pipe_buffer *(*user_buffer_create)(struct pipe_winsys *ws, + struct pipe_buffer *(*_user_buffer_create)(struct pipe_winsys *ws, void *ptr, unsigned bytes); @@ -131,7 +131,7 @@ struct pipe_winsys * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying * buffer storage. */ - struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws, + struct pipe_buffer *(*_surface_buffer_create)(struct pipe_winsys *ws, unsigned width, unsigned height, enum pipe_format format, unsigned usage, @@ -142,14 +142,14 @@ struct pipe_winsys * Map the entire data store of a buffer object into the client's address. * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. */ - void *(*buffer_map)( struct pipe_winsys *ws, + void *(*_buffer_map)( struct pipe_winsys *ws, struct pipe_buffer *buf, unsigned usage ); - void (*buffer_unmap)( struct pipe_winsys *ws, + void (*_buffer_unmap)( struct pipe_winsys *ws, struct pipe_buffer *buf ); - void (*buffer_destroy)( struct pipe_winsys *ws, + void (*_buffer_destroy)( struct pipe_winsys *ws, struct pipe_buffer *buf ); -- cgit v1.2.3 From 7996b3e034f92eeceed3f3e7c01eb1f829d98b18 Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 30 Jan 2009 14:06:25 +0000 Subject: pipebuffer: Consider 0 as no alignment needed. --- src/gallium/auxiliary/pipebuffer/pb_buffer.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h index 7cba5fa441..d8f1f02d68 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h @@ -255,7 +255,13 @@ pb_reference(struct pb_buffer **dst, static INLINE boolean pb_check_alignment(size_t requested, size_t provided) { - return requested <= provided && (provided % requested) == 0 ? TRUE : FALSE; + if(!requested) + return TRUE; + if(requested > provided) + return FALSE; + if(provided % requested != 0) + return FALSE; + return TRUE; } -- cgit v1.2.3 From 462f09487efac27173c231b09861b4f5316eb11d Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Fri, 30 Jan 2009 14:59:32 +0000 Subject: util: Define ffs for MinGW. --- src/gallium/auxiliary/util/u_math.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h index aa4fa17b59..ab6f39ac31 100644 --- a/src/gallium/auxiliary/util/u_math.h +++ b/src/gallium/auxiliary/util/u_math.h @@ -341,6 +341,10 @@ unsigned ffs( unsigned u ) } #endif +#ifdef __MINGW32__ +#define ffs __builtin_ffs +#endif + /** * Return float bits. -- cgit v1.2.3 From adfbba476db1fc55006efb748656ebb1a481d143 Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 30 Jan 2009 15:56:00 -0500 Subject: gallium: make p_winsys internal move it to pipe/internal/p_winsys_screen.h and start converting the state trackers to the screen usage --- .../auxiliary/pipebuffer/pb_buffer_fenced.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 2 +- src/gallium/auxiliary/pipebuffer/pb_winsys.c | 10 +- src/gallium/auxiliary/util/u_blit.c | 1 - src/gallium/auxiliary/util/u_draw_quad.c | 1 - src/gallium/auxiliary/util/u_gen_mipmap.c | 1 - src/gallium/auxiliary/util/u_simple_shaders.c | 2 +- src/gallium/auxiliary/util/u_timed_winsys.c | 26 +-- src/gallium/drivers/cell/ppu/cell_context.c | 2 +- src/gallium/drivers/cell/ppu/cell_draw_arrays.c | 6 +- src/gallium/drivers/cell/ppu/cell_screen.c | 2 +- src/gallium/drivers/cell/ppu/cell_state_shader.c | 2 +- src/gallium/drivers/cell/ppu/cell_texture.c | 8 +- src/gallium/drivers/cell/ppu/cell_vertex_shader.c | 2 +- src/gallium/drivers/failover/fo_context.c | 2 +- src/gallium/drivers/i915simple/i915_context.c | 2 +- src/gallium/drivers/i915simple/i915_debug.c | 2 +- src/gallium/drivers/i915simple/i915_debug.h | 2 +- src/gallium/drivers/i915simple/i915_debug_fp.c | 2 +- src/gallium/drivers/i915simple/i915_prim_vbuf.c | 2 +- src/gallium/drivers/i915simple/i915_screen.c | 2 +- src/gallium/drivers/i915simple/i915_state.c | 6 +- src/gallium/drivers/i915simple/i915_surface.c | 2 +- src/gallium/drivers/i915simple/i915_texture.c | 8 +- src/gallium/drivers/i965simple/brw_blit.c | 2 +- src/gallium/drivers/i965simple/brw_context.c | 2 +- src/gallium/drivers/i965simple/brw_curbe.c | 6 +- src/gallium/drivers/i965simple/brw_draw.c | 2 +- src/gallium/drivers/i965simple/brw_screen.c | 2 +- src/gallium/drivers/i965simple/brw_state.c | 2 +- src/gallium/drivers/i965simple/brw_state_pool.c | 2 +- src/gallium/drivers/i965simple/brw_surface.c | 2 +- src/gallium/drivers/i965simple/brw_tex_layout.c | 4 +- src/gallium/drivers/nouveau/nouveau_winsys.h | 2 +- src/gallium/drivers/nv04/nv04_context.c | 2 +- src/gallium/drivers/nv04/nv04_miptree.c | 2 +- src/gallium/drivers/nv04/nv04_prim_vbuf.c | 2 +- src/gallium/drivers/nv04/nv04_screen.c | 4 +- src/gallium/drivers/nv04/nv04_surface.c | 2 +- src/gallium/drivers/nv04/nv04_vbo.c | 8 +- src/gallium/drivers/nv10/nv10_context.c | 2 +- src/gallium/drivers/nv10/nv10_miptree.c | 2 +- src/gallium/drivers/nv10/nv10_prim_vbuf.c | 8 +- src/gallium/drivers/nv10/nv10_screen.c | 4 +- src/gallium/drivers/nv10/nv10_state.c | 4 +- src/gallium/drivers/nv10/nv10_surface.c | 2 +- src/gallium/drivers/nv10/nv10_vbo.c | 8 +- src/gallium/drivers/nv20/nv20_context.c | 2 +- src/gallium/drivers/nv20/nv20_miptree.c | 2 +- src/gallium/drivers/nv20/nv20_prim_vbuf.c | 8 +- src/gallium/drivers/nv20/nv20_screen.c | 4 +- src/gallium/drivers/nv20/nv20_state.c | 4 +- src/gallium/drivers/nv20/nv20_surface.c | 2 +- src/gallium/drivers/nv20/nv20_vbo.c | 8 +- src/gallium/drivers/nv20/nv20_vertprog.c | 4 +- src/gallium/drivers/nv30/nv30_context.c | 2 +- src/gallium/drivers/nv30/nv30_fragprog.c | 10 +- src/gallium/drivers/nv30/nv30_miptree.c | 2 +- src/gallium/drivers/nv30/nv30_screen.c | 4 +- src/gallium/drivers/nv30/nv30_surface.c | 2 +- src/gallium/drivers/nv30/nv30_vbo.c | 12 +- src/gallium/drivers/nv30/nv30_vertprog.c | 4 +- src/gallium/drivers/nv40/nv40_context.c | 2 +- src/gallium/drivers/nv40/nv40_draw.c | 12 +- src/gallium/drivers/nv40/nv40_fragprog.c | 10 +- src/gallium/drivers/nv40/nv40_miptree.c | 2 +- src/gallium/drivers/nv40/nv40_screen.c | 4 +- src/gallium/drivers/nv40/nv40_surface.c | 2 +- src/gallium/drivers/nv40/nv40_vbo.c | 12 +- src/gallium/drivers/nv40/nv40_vertprog.c | 4 +- src/gallium/drivers/nv50/nv50_context.c | 2 +- src/gallium/drivers/nv50/nv50_miptree.c | 4 +- src/gallium/drivers/nv50/nv50_program.c | 6 +- src/gallium/drivers/nv50/nv50_query.c | 6 +- src/gallium/drivers/nv50/nv50_screen.c | 6 +- src/gallium/drivers/nv50/nv50_surface.c | 6 +- src/gallium/drivers/nv50/nv50_vbo.c | 2 +- src/gallium/drivers/softpipe/sp_draw_arrays.c | 6 +- src/gallium/drivers/softpipe/sp_screen.c | 2 +- src/gallium/drivers/softpipe/sp_state_fs.c | 2 +- src/gallium/drivers/softpipe/sp_texture.c | 20 +-- src/gallium/drivers/trace/tr_winsys.c | 32 ++-- src/gallium/drivers/trace/tr_winsys.h | 2 +- .../include/pipe/internal/p_winsys_screen.h | 185 ++++++++++++++++++++ src/gallium/include/pipe/p_inlines.h | 28 +--- src/gallium/include/pipe/p_screen.h | 37 ++++ src/gallium/include/pipe/p_winsys.h | 186 --------------------- src/gallium/state_trackers/egl/egl_context.c | 1 - src/gallium/state_trackers/egl/egl_tracker.c | 2 +- src/gallium/state_trackers/g3dvl/vl_basic_csc.c | 1 - .../state_trackers/g3dvl/vl_r16snorm_mc_buf.c | 5 +- src/gallium/state_trackers/python/st_device.c | 2 +- .../state_trackers/python/st_softpipe_winsys.c | 2 +- .../winsys/drm/intel/common/intel_be_device.c | 2 +- .../winsys/drm/intel/common/intel_be_device.h | 2 +- src/gallium/winsys/drm/intel/gem/intel_be_device.c | 2 +- src/gallium/winsys/drm/intel/gem/intel_be_device.h | 2 +- .../drm/nouveau/common/nouveau_winsys_pipe.c | 2 +- .../drm/nouveau/common/nouveau_winsys_pipe.h | 2 +- .../drm/nouveau/common/nouveau_winsys_softpipe.c | 2 +- src/gallium/winsys/egl_xlib/egl_xlib.c | 2 +- src/gallium/winsys/egl_xlib/sw_winsys.c | 2 +- src/gallium/winsys/gdi/gdi_softpipe_winsys.c | 2 +- src/gallium/winsys/xlib/xlib_brw_context.c | 2 +- src/gallium/winsys/xlib/xlib_brw_screen.c | 2 +- src/gallium/winsys/xlib/xlib_cell.c | 2 +- src/gallium/winsys/xlib/xlib_softpipe.c | 2 +- src/mesa/state_tracker/st_cb_fbo.c | 2 +- src/mesa/state_tracker/st_cb_feedback.c | 1 - src/mesa/state_tracker/st_cb_flush.c | 10 +- src/mesa/state_tracker/st_cb_strings.c | 3 +- src/mesa/state_tracker/wgl/stw_device.c | 2 +- src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c | 2 +- 114 files changed, 450 insertions(+), 436 deletions(-) create mode 100644 src/gallium/include/pipe/internal/p_winsys_screen.h delete mode 100644 src/gallium/include/pipe/p_winsys.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c index aa4b096274..61afdfe82a 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c +++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c @@ -44,7 +44,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_error.h" #include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index f57a7bffd7..19baa82282 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -36,7 +36,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index 62639fe1c8..a741bae794 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -35,7 +35,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c index 2b0c4606cf..d26800be48 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ b/src/gallium/auxiliary/pipebuffer/pb_winsys.c @@ -34,7 +34,7 @@ */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "pb_buffer.h" @@ -184,8 +184,8 @@ pb_winsys_buffer_destroy(struct pipe_winsys *winsys, void pb_init_winsys(struct pipe_winsys *winsys) { - winsys->_user_buffer_create = pb_winsys_user_buffer_create; - winsys->_buffer_map = pb_winsys_buffer_map; - winsys->_buffer_unmap = pb_winsys_buffer_unmap; - winsys->_buffer_destroy = pb_winsys_buffer_destroy; + winsys->user_buffer_create = pb_winsys_user_buffer_create; + winsys->buffer_map = pb_winsys_buffer_map; + winsys->buffer_unmap = pb_winsys_buffer_unmap; + winsys->buffer_destroy = pb_winsys_buffer_destroy; } diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c index bc88086b5e..841e9c01e7 100644 --- a/src/gallium/auxiliary/util/u_blit.c +++ b/src/gallium/auxiliary/util/u_blit.c @@ -37,7 +37,6 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" #include "util/u_blit.h" diff --git a/src/gallium/auxiliary/util/u_draw_quad.c b/src/gallium/auxiliary/util/u_draw_quad.c index 1af575530f..f282f3d289 100644 --- a/src/gallium/auxiliary/util/u_draw_quad.c +++ b/src/gallium/auxiliary/util/u_draw_quad.c @@ -29,7 +29,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "util/u_draw_quad.h" diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index cb9776ed95..301a58ed7b 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -38,7 +38,6 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c index f06d13c2c4..706155e99a 100644 --- a/src/gallium/auxiliary/util/u_simple_shaders.c +++ b/src/gallium/auxiliary/util/u_simple_shaders.c @@ -37,7 +37,7 @@ #include "pipe/p_debug.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" #include "pipe/p_shader_tokens.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c index c5797f5d63..f237e12d73 100644 --- a/src/gallium/auxiliary/util/u_timed_winsys.c +++ b/src/gallium/auxiliary/util/u_timed_winsys.c @@ -29,7 +29,7 @@ * Authors: Keith Whitwell */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "u_timed_winsys.h" #include "util/u_memory.h" #include "util/u_time.h" @@ -122,7 +122,7 @@ timed_buffer_create(struct pipe_winsys *winsys, uint64_t start = time_start(); struct pipe_buffer *buf = - backend->_buffer_create( backend, alignment, usage, size ); + backend->buffer_create( backend, alignment, usage, size ); time_finish(winsys, start, 0, __FUNCTION__); @@ -140,7 +140,7 @@ timed_user_buffer_create(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - struct pipe_buffer *buf = backend->_user_buffer_create( backend, data, bytes ); + struct pipe_buffer *buf = backend->user_buffer_create( backend, data, bytes ); time_finish(winsys, start, 1, __FUNCTION__); @@ -156,7 +156,7 @@ timed_buffer_map(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - void *map = backend->_buffer_map( backend, buf, flags ); + void *map = backend->buffer_map( backend, buf, flags ); time_finish(winsys, start, 2, __FUNCTION__); @@ -171,7 +171,7 @@ timed_buffer_unmap(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - backend->_buffer_unmap( backend, buf ); + backend->buffer_unmap( backend, buf ); time_finish(winsys, start, 3, __FUNCTION__); } @@ -184,7 +184,7 @@ timed_buffer_destroy(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - backend->_buffer_destroy( backend, buf ); + backend->buffer_destroy( backend, buf ); time_finish(winsys, start, 4, __FUNCTION__); } @@ -216,7 +216,7 @@ timed_surface_buffer_create(struct pipe_winsys *winsys, struct pipe_winsys *backend = timed_winsys(winsys)->backend; uint64_t start = time_start(); - struct pipe_buffer *ret = backend->_surface_buffer_create( backend, width, height, + struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height, format, usage, stride ); time_finish(winsys, start, 7, __FUNCTION__); @@ -296,12 +296,12 @@ struct pipe_winsys *u_timed_winsys_create( struct pipe_winsys *backend ) { struct timed_winsys *ws = CALLOC_STRUCT(timed_winsys); - ws->base._user_buffer_create = timed_user_buffer_create; - ws->base._buffer_map = timed_buffer_map; - ws->base._buffer_unmap = timed_buffer_unmap; - ws->base._buffer_destroy = timed_buffer_destroy; - ws->base._buffer_create = timed_buffer_create; - ws->base._surface_buffer_create = timed_surface_buffer_create; + ws->base.user_buffer_create = timed_user_buffer_create; + ws->base.buffer_map = timed_buffer_map; + ws->base.buffer_unmap = timed_buffer_unmap; + ws->base.buffer_destroy = timed_buffer_destroy; + ws->base.buffer_create = timed_buffer_create; + ws->base.surface_buffer_create = timed_surface_buffer_create; ws->base.flush_frontbuffer = timed_flush_frontbuffer; ws->base.get_name = timed_get_name; ws->base.fence_reference = timed_fence_reference; diff --git a/src/gallium/drivers/cell/ppu/cell_context.c b/src/gallium/drivers/cell/ppu/cell_context.c index 8f502823f9..ae82ded334 100644 --- a/src/gallium/drivers/cell/ppu/cell_context.c +++ b/src/gallium/drivers/cell/ppu/cell_context.c @@ -36,7 +36,7 @@ #include "pipe/p_defines.h" #include "pipe/p_format.h" #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_screen.h" #include "draw/draw_context.h" diff --git a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c index ff3871d933..67949b73dd 100644 --- a/src/gallium/drivers/cell/ppu/cell_draw_arrays.c +++ b/src/gallium/drivers/cell/ppu/cell_draw_arrays.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "cell_context.h" @@ -52,7 +52,7 @@ cell_map_constant_buffers(struct cell_context *sp) uint i; for (i = 0; i < 2; i++) { if (sp->constants[i].size) { - sp->mapped_constants[i] = ws->_buffer_map(ws, sp->constants[i].buffer, + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); cell_flush_buffer_range(sp, sp->mapped_constants[i], sp->constants[i].buffer->size); @@ -71,7 +71,7 @@ cell_unmap_constant_buffers(struct cell_context *sp) uint i; for (i = 0; i < 2; i++) { if (sp->constants[i].size) - ws->_buffer_unmap(ws, sp->constants[i].buffer); + ws->buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index 6fc2257e2a..bbe80793ca 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -27,7 +27,7 @@ #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/cell/ppu/cell_state_shader.c b/src/gallium/drivers/cell/ppu/cell_state_shader.c index bcbd81922c..990f23e170 100644 --- a/src/gallium/drivers/cell/ppu/cell_state_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_state_shader.c @@ -28,7 +28,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "draw/draw_context.h" #include "tgsi/tgsi_parse.h" diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c index f1b1a38efc..4f16e2c6af 100644 --- a/src/gallium/drivers/cell/ppu/cell_texture.c +++ b/src/gallium/drivers/cell/ppu/cell_texture.c @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -112,7 +112,7 @@ cell_texture_create(struct pipe_screen *screen, cell_texture_layout(ct); - ct->buffer = ws->_buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, + ct->buffer = ws->buffer_create(ws, 32, PIPE_BUFFER_USAGE_PIXEL, ct->buffer_size); if (!ct->buffer) { @@ -324,11 +324,11 @@ cell_twiddle_texture(struct pipe_screen *screen, /* allocate buffer for tiled data now */ struct pipe_winsys *ws = screen->winsys; uint bytes = bufWidth * bufHeight * 4 * numFaces; - ct->tiled_buffer[level] = ws->_buffer_create(ws, 16, + ct->tiled_buffer[level] = ws->buffer_create(ws, 16, PIPE_BUFFER_USAGE_PIXEL, bytes); /* and map it */ - ct->tiled_mapped[level] = ws->_buffer_map(ws, ct->tiled_buffer[level], + ct->tiled_mapped[level] = ws->buffer_map(ws, ct->tiled_buffer[level], PIPE_BUFFER_USAGE_GPU_READ); } dst = (uint *) ((ubyte *) ct->tiled_mapped[level] + offset); diff --git a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c index 2b10c116fa..403cf6d50f 100644 --- a/src/gallium/drivers/cell/ppu/cell_vertex_shader.c +++ b/src/gallium/drivers/cell/ppu/cell_vertex_shader.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "cell_context.h" diff --git a/src/gallium/drivers/failover/fo_context.c b/src/gallium/drivers/failover/fo_context.c index 10c4ffc209..0742b27b8f 100644 --- a/src/gallium/drivers/failover/fo_context.c +++ b/src/gallium/drivers/failover/fo_context.c @@ -27,7 +27,7 @@ #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "pipe/p_context.h" diff --git a/src/gallium/drivers/i915simple/i915_context.c b/src/gallium/drivers/i915simple/i915_context.c index 6dd3eda85d..3e3a596884 100644 --- a/src/gallium/drivers/i915simple/i915_context.c +++ b/src/gallium/drivers/i915simple/i915_context.c @@ -34,7 +34,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i915simple/i915_debug.c b/src/gallium/drivers/i915simple/i915_debug.c index 4adf9decae..a300b61c3b 100644 --- a/src/gallium/drivers/i915simple/i915_debug.c +++ b/src/gallium/drivers/i915simple/i915_debug.c @@ -30,7 +30,7 @@ #include "i915_winsys.h" #include "i915_debug.h" #include "i915_batch.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_debug.h" diff --git a/src/gallium/drivers/i915simple/i915_debug.h b/src/gallium/drivers/i915simple/i915_debug.h index c33ee36110..16ca7277c7 100644 --- a/src/gallium/drivers/i915simple/i915_debug.h +++ b/src/gallium/drivers/i915simple/i915_debug.h @@ -72,7 +72,7 @@ void i915_print_ureg(const char *msg, unsigned ureg); #if defined(DEBUG) && defined(FILE_DEBUG_FLAG) -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" static INLINE void I915_DBG( diff --git a/src/gallium/drivers/i915simple/i915_debug_fp.c b/src/gallium/drivers/i915simple/i915_debug_fp.c index 48be3e1472..9c5b117b6d 100644 --- a/src/gallium/drivers/i915simple/i915_debug_fp.c +++ b/src/gallium/drivers/i915simple/i915_debug_fp.c @@ -28,7 +28,7 @@ #include "i915_reg.h" #include "i915_debug.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" diff --git a/src/gallium/drivers/i915simple/i915_prim_vbuf.c b/src/gallium/drivers/i915simple/i915_prim_vbuf.c index a8e97e7c30..f49f6d6ed1 100644 --- a/src/gallium/drivers/i915simple/i915_prim_vbuf.c +++ b/src/gallium/drivers/i915simple/i915_prim_vbuf.c @@ -42,7 +42,7 @@ #include "draw/draw_vbuf.h" #include "pipe/p_debug.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 1c976082df..069cc331bb 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -27,7 +27,7 @@ #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_string.h" diff --git a/src/gallium/drivers/i915simple/i915_state.c b/src/gallium/drivers/i915simple/i915_state.c index b931556b7e..273e74002a 100644 --- a/src/gallium/drivers/i915simple/i915_state.c +++ b/src/gallium/drivers/i915simple/i915_state.c @@ -30,7 +30,7 @@ #include "draw/draw_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -536,10 +536,10 @@ static void i915_set_constant_buffer(struct pipe_context *pipe, if (buf) { void *mapped; if (buf->buffer && buf->buffer->size && - (mapped = ws->_buffer_map(ws, buf->buffer, + (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) { memcpy(i915->current.constants[shader], mapped, buf->buffer->size); - ws->_buffer_unmap(ws, buf->buffer); + ws->buffer_unmap(ws, buf->buffer); i915->current.num_user_constants[shader] = buf->buffer->size / (4 * sizeof(float)); } diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 3b3d9217a0..5ffdb76682 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -31,7 +31,7 @@ #include "pipe/p_defines.h" #include "pipe/p_inlines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_tile.h" #include "util/u_rect.h" diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 7847f2ef86..803ef3a187 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -34,7 +34,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -605,7 +605,7 @@ i915_texture_create(struct pipe_screen *screen, tex_size = tex->stride * tex->total_nblocksy; - tex->buffer = ws->_buffer_create(ws, 64, + tex->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, tex_size); @@ -613,10 +613,10 @@ i915_texture_create(struct pipe_screen *screen, goto fail; #if 0 - void *ptr = ws->_buffer_map(ws, tex->buffer, + void *ptr = ws->buffer_map(ws, tex->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); memset(ptr, 0x80, tex_size); - ws->_buffer_unmap(ws, tex->buffer); + ws->buffer_unmap(ws, tex->buffer); #endif return &tex->base; diff --git a/src/gallium/drivers/i965simple/brw_blit.c b/src/gallium/drivers/i965simple/brw_blit.c index 8494f70493..4d11f8d2ab 100644 --- a/src/gallium/drivers/i965simple/brw_blit.c +++ b/src/gallium/drivers/i965simple/brw_blit.c @@ -35,7 +35,7 @@ #include "brw_reg.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #define FILE_DEBUG_FLAG DEBUG_BLIT diff --git a/src/gallium/drivers/i965simple/brw_context.c b/src/gallium/drivers/i965simple/brw_context.c index 96920df008..c74cbf8d73 100644 --- a/src/gallium/drivers/i965simple/brw_context.c +++ b/src/gallium/drivers/i965simple/brw_context.c @@ -37,7 +37,7 @@ #include "brw_tex_layout.h" #include "brw_winsys.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_context.h" #include "util/u_memory.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/i965simple/brw_curbe.c b/src/gallium/drivers/i965simple/brw_curbe.c index 4b1f4d3121..904cde8e30 100644 --- a/src/gallium/drivers/i965simple/brw_curbe.c +++ b/src/gallium/drivers/i965simple/brw_curbe.c @@ -38,7 +38,7 @@ #include "brw_util.h" #include "brw_wm.h" #include "pipe/p_state.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -256,13 +256,13 @@ static void upload_constant_buffer(struct brw_context *brw) /* FIXME: buffer size is num_consts + num_immediates */ if (brw->vs.prog_data->num_consts) { /* map the vertex constant buffer and copy to curbe: */ - void *data = ws->_buffer_map(ws, cbuffer->buffer, 0); + void *data = ws->buffer_map(ws, cbuffer->buffer, 0); /* FIXME: this is wrong. the cbuffer->buffer->size currently * represents size of consts + immediates. so if we'll * have both we'll copy over the end of the buffer * with the subsequent memcpy */ memcpy(&buf[offset], data, cbuffer->buffer->size); - ws->_buffer_unmap(ws, cbuffer->buffer); + ws->buffer_unmap(ws, cbuffer->buffer); offset += cbuffer->buffer->size; } /*immediates*/ diff --git a/src/gallium/drivers/i965simple/brw_draw.c b/src/gallium/drivers/i965simple/brw_draw.c index 7598e3dc8a..648aaa0da5 100644 --- a/src/gallium/drivers/i965simple/brw_draw.c +++ b/src/gallium/drivers/i965simple/brw_draw.c @@ -34,7 +34,7 @@ #include "brw_state.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" static unsigned hw_prim[PIPE_PRIM_POLYGON+1] = { _3DPRIM_POINTLIST, diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index ab7cd624b2..036ddd8c90 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -27,7 +27,7 @@ #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_string.h" #include "brw_context.h" diff --git a/src/gallium/drivers/i965simple/brw_state.c b/src/gallium/drivers/i965simple/brw_state.c index af46cb546f..b47f5373f3 100644 --- a/src/gallium/drivers/i965simple/brw_state.c +++ b/src/gallium/drivers/i965simple/brw_state.c @@ -30,7 +30,7 @@ */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "pipe/p_inlines.h" #include "pipe/p_shader_tokens.h" diff --git a/src/gallium/drivers/i965simple/brw_state_pool.c b/src/gallium/drivers/i965simple/brw_state_pool.c index 007dc8f9de..e91263cb1f 100644 --- a/src/gallium/drivers/i965simple/brw_state_pool.c +++ b/src/gallium/drivers/i965simple/brw_state_pool.c @@ -42,7 +42,7 @@ * the pool. */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index b89756c47b..3159eba2fd 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -30,7 +30,7 @@ #include "brw_state.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_tile.h" #include "util/u_rect.h" diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index c99eb8e75a..6af0d5cf4b 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -37,7 +37,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" #include "brw_context.h" @@ -295,7 +295,7 @@ brw_texture_create_screen(struct pipe_screen *screen, tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]); if (brw_miptree_layout(tex)) - tex->buffer = ws->_buffer_create(ws, 64, + tex->buffer = ws->buffer_create(ws, 64, PIPE_BUFFER_USAGE_PIXEL, tex->stride * tex->total_nblocksy); diff --git a/src/gallium/drivers/nouveau/nouveau_winsys.h b/src/gallium/drivers/nouveau/nouveau_winsys.h index 5535ebb6a9..25e0b05be1 100644 --- a/src/gallium/drivers/nouveau/nouveau_winsys.h +++ b/src/gallium/drivers/nouveau/nouveau_winsys.h @@ -2,7 +2,7 @@ #define NOUVEAU_WINSYS_H #include -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "nouveau/nouveau_bo.h" diff --git a/src/gallium/drivers/nv04/nv04_context.c b/src/gallium/drivers/nv04/nv04_context.c index 9f75253363..a14273e288 100644 --- a/src/gallium/drivers/nv04/nv04_context.c +++ b/src/gallium/drivers/nv04/nv04_context.c @@ -1,6 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nv04_context.h" #include "nv04_screen.h" diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 32800f9741..094c38256b 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -58,7 +58,7 @@ nv04_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) nv04_miptree_layout(mt); - mt->buffer = ws->_buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { FREE(mt); diff --git a/src/gallium/drivers/nv04/nv04_prim_vbuf.c b/src/gallium/drivers/nv04/nv04_prim_vbuf.c index 19979fff79..18a8872ae3 100644 --- a/src/gallium/drivers/nv04/nv04_prim_vbuf.c +++ b/src/gallium/drivers/nv04/nv04_prim_vbuf.c @@ -1,7 +1,7 @@ #include "pipe/p_debug.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_compiler.h" #include "draw/draw_vbuf.h" diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 2fa7d35294..65eacde6b2 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -117,7 +117,7 @@ nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; void *map; - map = ws->_buffer_map(ws, surface->buffer, flags); + map = ws->buffer_map(ws, surface->buffer, flags); if (!map) return NULL; @@ -129,7 +129,7 @@ nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; - ws->_buffer_unmap(ws, surface->buffer); + ws->buffer_unmap(ws, surface->buffer); } static void diff --git a/src/gallium/drivers/nv04/nv04_surface.c b/src/gallium/drivers/nv04/nv04_surface.c index 9d9943ed4e..0d0983f9d4 100644 --- a/src/gallium/drivers/nv04/nv04_surface.c +++ b/src/gallium/drivers/nv04/nv04_surface.c @@ -28,7 +28,7 @@ #include "nv04_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_tile.h" diff --git a/src/gallium/drivers/nv04/nv04_vbo.c b/src/gallium/drivers/nv04/nv04_vbo.c index 117a73a1e4..91f919d48e 100644 --- a/src/gallium/drivers/nv04/nv04_vbo.c +++ b/src/gallium/drivers/nv04/nv04_vbo.c @@ -23,7 +23,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv04->vertex_buffer[i].buffer) { void *buf - = pipe->winsys->_buffer_map(pipe->winsys, + = pipe->winsys->buffer_map(pipe->winsys, nv04->vertex_buffer[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -32,7 +32,7 @@ boolean nv04_draw_elements( struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->_buffer_map(pipe->winsys, indexBuffer, + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } @@ -49,12 +49,12 @@ boolean nv04_draw_elements( struct pipe_context *pipe, */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv04->vertex_buffer[i].buffer) { - pipe->winsys->_buffer_unmap(pipe->winsys, nv04->vertex_buffer[i].buffer); + pipe->winsys->buffer_unmap(pipe->winsys, nv04->vertex_buffer[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); } } if (indexBuffer) { - pipe->winsys->_buffer_unmap(pipe->winsys, indexBuffer); + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } diff --git a/src/gallium/drivers/nv10/nv10_context.c b/src/gallium/drivers/nv10/nv10_context.c index 4eb4ed9185..ef2c0c5d9f 100644 --- a/src/gallium/drivers/nv10/nv10_context.c +++ b/src/gallium/drivers/nv10/nv10_context.c @@ -1,6 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nv10_context.h" #include "nv10_screen.h" diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index 384f89c391..f8c021261b 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -65,7 +65,7 @@ nv10_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) nv10_miptree_layout(mt); - mt->buffer = ws->_buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { FREE(mt); diff --git a/src/gallium/drivers/nv10/nv10_prim_vbuf.c b/src/gallium/drivers/nv10/nv10_prim_vbuf.c index bdffaacf78..7435d87315 100644 --- a/src/gallium/drivers/nv10/nv10_prim_vbuf.c +++ b/src/gallium/drivers/nv10/nv10_prim_vbuf.c @@ -40,7 +40,7 @@ #include "pipe/p_debug.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nv10_context.h" #include "nv10_state.h" @@ -111,11 +111,11 @@ nv10_vbuf_render_allocate_vertices( struct vbuf_render *render, size_t size = (size_t)vertex_size * (size_t)nr_vertices; assert(!nv10_render->buffer); - nv10_render->buffer = winsys->_buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); + nv10_render->buffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); nv10->dirty |= NV10_NEW_VTXARRAYS; - return winsys->_buffer_map(winsys, + return winsys->buffer_map(winsys, nv10_render->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); } @@ -187,7 +187,7 @@ nv10_vbuf_render_release_vertices( struct vbuf_render *render, struct pipe_screen *pscreen = &nv10->screen->pipe; assert(nv10_render->buffer); - winsys->_buffer_unmap(winsys, nv10_render->buffer); + winsys->buffer_unmap(winsys, nv10_render->buffer); pipe_buffer_reference(pscreen, &nv10_render->buffer, NULL); } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 333e0b3252..4d9fbd4b5f 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -122,7 +122,7 @@ nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; void *map; - map = ws->_buffer_map(ws, surface->buffer, flags); + map = ws->buffer_map(ws, surface->buffer, flags); if (!map) return NULL; @@ -134,7 +134,7 @@ nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; - ws->_buffer_unmap(ws, surface->buffer); + ws->buffer_unmap(ws, surface->buffer); } static void diff --git a/src/gallium/drivers/nv10/nv10_state.c b/src/gallium/drivers/nv10/nv10_state.c index f84d45a730..119af66dfd 100644 --- a/src/gallium/drivers/nv10/nv10_state.c +++ b/src/gallium/drivers/nv10/nv10_state.c @@ -468,12 +468,12 @@ nv10_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; if (buf->buffer && buf->buffer->size && - (mapped = ws->_buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) { memcpy(nv10->constbuf[shader], mapped, buf->buffer->size); nv10->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); - ws->_buffer_unmap(ws, buf->buffer); + ws->buffer_unmap(ws, buf->buffer); } } } diff --git a/src/gallium/drivers/nv10/nv10_surface.c b/src/gallium/drivers/nv10/nv10_surface.c index be44c7bed5..78fd7b42da 100644 --- a/src/gallium/drivers/nv10/nv10_surface.c +++ b/src/gallium/drivers/nv10/nv10_surface.c @@ -28,7 +28,7 @@ #include "nv10_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_tile.h" diff --git a/src/gallium/drivers/nv10/nv10_vbo.c b/src/gallium/drivers/nv10/nv10_vbo.c index a6b80e4050..d0e788ac03 100644 --- a/src/gallium/drivers/nv10/nv10_vbo.c +++ b/src/gallium/drivers/nv10/nv10_vbo.c @@ -25,7 +25,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv10->vtxbuf[i].buffer) { void *buf - = pipe->winsys->_buffer_map(pipe->winsys, + = pipe->winsys->buffer_map(pipe->winsys, nv10->vtxbuf[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -34,7 +34,7 @@ boolean nv10_draw_elements( struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->_buffer_map(pipe->winsys, indexBuffer, + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } @@ -55,12 +55,12 @@ boolean nv10_draw_elements( struct pipe_context *pipe, */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv10->vtxbuf[i].buffer) { - pipe->winsys->_buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); + pipe->winsys->buffer_unmap(pipe->winsys, nv10->vtxbuf[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); } } if (indexBuffer) { - pipe->winsys->_buffer_unmap(pipe->winsys, indexBuffer); + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } diff --git a/src/gallium/drivers/nv20/nv20_context.c b/src/gallium/drivers/nv20/nv20_context.c index c8fb690ee9..d3aca8d937 100644 --- a/src/gallium/drivers/nv20/nv20_context.c +++ b/src/gallium/drivers/nv20/nv20_context.c @@ -1,6 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nv20_context.h" #include "nv20_screen.h" diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index 759f29c951..d2038c391d 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -65,7 +65,7 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt) nv20_miptree_layout(mt); - mt->buffer = ws->_buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL, mt->total_size); if (!mt->buffer) { FREE(mt); diff --git a/src/gallium/drivers/nv20/nv20_prim_vbuf.c b/src/gallium/drivers/nv20/nv20_prim_vbuf.c index c4841026b3..4dd7052814 100644 --- a/src/gallium/drivers/nv20/nv20_prim_vbuf.c +++ b/src/gallium/drivers/nv20/nv20_prim_vbuf.c @@ -40,7 +40,7 @@ #include "pipe/p_debug.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nv20_context.h" #include "nv20_state.h" @@ -113,9 +113,9 @@ static void * nv20__allocate_pbuffer(struct nv20_vbuf_render *nv20_render, size_t size) { struct pipe_winsys *winsys = nv20_render->nv20->pipe.winsys; - nv20_render->pbuffer = winsys->_buffer_create(winsys, 64, + nv20_render->pbuffer = winsys->buffer_create(winsys, 64, PIPE_BUFFER_USAGE_VERTEX, size); - return winsys->_buffer_map(winsys, + return winsys->buffer_map(winsys, nv20_render->pbuffer, PIPE_BUFFER_USAGE_CPU_WRITE); } @@ -334,7 +334,7 @@ nv20_vbuf_render_release_vertices( struct vbuf_render *render, struct pipe_screen *pscreen = &nv20->screen->pipe; if (nv20_render->pbuffer) { - winsys->_buffer_unmap(winsys, nv20_render->pbuffer); + winsys->buffer_unmap(winsys, nv20_render->pbuffer); pipe_buffer_reference(pscreen, &nv20_render->pbuffer, NULL); } else if (nv20_render->mbuffer) { FREE(nv20_render->mbuffer); diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index e9adf05a7d..2ca6e6b149 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -122,7 +122,7 @@ nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; void *map; - map = ws->_buffer_map(ws, surface->buffer, flags); + map = ws->buffer_map(ws, surface->buffer, flags); if (!map) return NULL; @@ -134,7 +134,7 @@ nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; - ws->_buffer_unmap(ws, surface->buffer); + ws->buffer_unmap(ws, surface->buffer); } static void diff --git a/src/gallium/drivers/nv20/nv20_state.c b/src/gallium/drivers/nv20/nv20_state.c index 65060006da..ecec4f49a0 100644 --- a/src/gallium/drivers/nv20/nv20_state.c +++ b/src/gallium/drivers/nv20/nv20_state.c @@ -461,12 +461,12 @@ nv20_set_constant_buffer(struct pipe_context *pipe, uint shader, uint index, if (buf) { void *mapped; if (buf->buffer && buf->buffer->size && - (mapped = ws->_buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) + (mapped = ws->buffer_map(ws, buf->buffer, PIPE_BUFFER_USAGE_CPU_READ))) { memcpy(nv20->constbuf[shader], mapped, buf->buffer->size); nv20->constbuf_nr[shader] = buf->buffer->size / (4 * sizeof(float)); - ws->_buffer_unmap(ws, buf->buffer); + ws->buffer_unmap(ws, buf->buffer); } } } diff --git a/src/gallium/drivers/nv20/nv20_surface.c b/src/gallium/drivers/nv20/nv20_surface.c index 7bc68d0ca2..9b4c028eae 100644 --- a/src/gallium/drivers/nv20/nv20_surface.c +++ b/src/gallium/drivers/nv20/nv20_surface.c @@ -28,7 +28,7 @@ #include "nv20_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_tile.h" diff --git a/src/gallium/drivers/nv20/nv20_vbo.c b/src/gallium/drivers/nv20/nv20_vbo.c index d6b731790c..4edc4efebd 100644 --- a/src/gallium/drivers/nv20/nv20_vbo.c +++ b/src/gallium/drivers/nv20/nv20_vbo.c @@ -25,7 +25,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv20->vtxbuf[i].buffer) { void *buf - = pipe->winsys->_buffer_map(pipe->winsys, + = pipe->winsys->buffer_map(pipe->winsys, nv20->vtxbuf[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(draw, i, buf); @@ -34,7 +34,7 @@ boolean nv20_draw_elements( struct pipe_context *pipe, /* Map index buffer, if present */ if (indexBuffer) { void *mapped_indexes - = pipe->winsys->_buffer_map(pipe->winsys, indexBuffer, + = pipe->winsys->buffer_map(pipe->winsys, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(draw, indexSize, mapped_indexes); } @@ -55,12 +55,12 @@ boolean nv20_draw_elements( struct pipe_context *pipe, */ for (i = 0; i < PIPE_MAX_ATTRIBS; i++) { if (nv20->vtxbuf[i].buffer) { - pipe->winsys->_buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer); + pipe->winsys->buffer_unmap(pipe->winsys, nv20->vtxbuf[i].buffer); draw_set_mapped_vertex_buffer(draw, i, NULL); } } if (indexBuffer) { - pipe->winsys->_buffer_unmap(pipe->winsys, indexBuffer); + pipe->winsys->buffer_unmap(pipe->winsys, indexBuffer); draw_set_mapped_element_buffer(draw, 0, NULL); } diff --git a/src/gallium/drivers/nv20/nv20_vertprog.c b/src/gallium/drivers/nv20/nv20_vertprog.c index c4f3d0f14f..a885fcd7a5 100644 --- a/src/gallium/drivers/nv20/nv20_vertprog.c +++ b/src/gallium/drivers/nv20/nv20_vertprog.c @@ -749,7 +749,7 @@ nv20_vertprog_validate(struct nv20_context *nv20) float *map = NULL; if (constbuf) { - map = ws->_buffer_map(ws, constbuf, + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -771,7 +771,7 @@ nv20_vertprog_validate(struct nv20_context *nv20) } if (constbuf) { - ws->_buffer_unmap(ws, constbuf); + ws->buffer_unmap(ws, constbuf); } } diff --git a/src/gallium/drivers/nv30/nv30_context.c b/src/gallium/drivers/nv30/nv30_context.c index 2bff28aca9..61654f8756 100644 --- a/src/gallium/drivers/nv30/nv30_context.c +++ b/src/gallium/drivers/nv30/nv30_context.c @@ -1,6 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nv30_context.h" #include "nv30_screen.h" diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c index f22a06c1a3..320ba3f4bf 100644 --- a/src/gallium/drivers/nv30/nv30_fragprog.c +++ b/src/gallium/drivers/nv30/nv30_fragprog.c @@ -803,7 +803,7 @@ nv30_fragprog_upload(struct nv30_context *nv30, uint32_t *map; int i; - map = ws->_buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); #if 0 for (i = 0; i < fp->insn_len; i++) { @@ -825,7 +825,7 @@ nv30_fragprog_upload(struct nv30_context *nv30, } } - ws->_buffer_unmap(ws, fp->buffer); + ws->buffer_unmap(ws, fp->buffer); } static boolean @@ -849,7 +849,7 @@ nv30_fragprog_validate(struct nv30_context *nv30) return FALSE; } - fp->buffer = ws->_buffer_create(ws, 0x100, 0, fp->insn_len * 4); + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); nv30_fragprog_upload(nv30, fp); so = so_new(8, 1); @@ -869,7 +869,7 @@ update_constants: if (fp->nr_consts) { float *map; - map = ws->_buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { struct nv30_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; @@ -880,7 +880,7 @@ update_constants: memcpy(p, cb, 4 * sizeof(float)); new_consts = TRUE; } - ws->_buffer_unmap(ws, constbuf); + ws->buffer_unmap(ws, constbuf); if (new_consts) nv30_fragprog_upload(nv30, fp); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index bf6c4a1c74..79baac714c 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -93,7 +93,7 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) nv30_miptree_layout(mt); - mt->buffer = ws->_buffer_create(ws, 256, + mt->buffer = ws->buffer_create(ws, 256, PIPE_BUFFER_USAGE_PIXEL | NOUVEAU_BUFFER_USAGE_TEXTURE, mt->total_size); diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 56b20ae2fd..1fac6d3df8 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -161,7 +161,7 @@ nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, assert(surface_to_map); - map = ws->_buffer_map(ws, surface_to_map->buffer, flags); + map = ws->buffer_map(ws, surface_to_map->buffer, flags); if (!map) return NULL; @@ -189,7 +189,7 @@ nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) assert(surface_to_unmap); - ws->_buffer_unmap(ws, surface_to_unmap->buffer); + ws->buffer_unmap(ws, surface_to_unmap->buffer); if (surface_to_unmap != surface) { struct nv30_screen *nvscreen = nv30_screen(screen); diff --git a/src/gallium/drivers/nv30/nv30_surface.c b/src/gallium/drivers/nv30/nv30_surface.c index d3376a73bf..806131dcc9 100644 --- a/src/gallium/drivers/nv30/nv30_surface.c +++ b/src/gallium/drivers/nv30/nv30_surface.c @@ -28,7 +28,7 @@ #include "nv30_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_tile.h" diff --git a/src/gallium/drivers/nv30/nv30_vbo.c b/src/gallium/drivers/nv30/nv30_vbo.c index cf0468f879..2d6d48ac16 100644 --- a/src/gallium/drivers/nv30/nv30_vbo.c +++ b/src/gallium/drivers/nv30/nv30_vbo.c @@ -116,7 +116,7 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, if (nv30_vbo_format_to_hw(ve->src_format, &type, &ncomp)) return FALSE; - map = ws->_buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); map += vb->buffer_offset + ve->src_offset; switch (type) { @@ -148,17 +148,17 @@ nv30_vbo_static_attrib(struct nv30_context *nv30, struct nouveau_stateobj *so, so_data (so, fui(v[0])); break; default: - ws->_buffer_unmap(ws, vb->buffer); + ws->buffer_unmap(ws, vb->buffer); return FALSE; } } break; default: - ws->_buffer_unmap(ws, vb->buffer); + ws->buffer_unmap(ws, vb->buffer); return FALSE; } - ws->_buffer_unmap(ws, vb->buffer); + ws->buffer_unmap(ws, vb->buffer); return TRUE; } @@ -371,7 +371,7 @@ nv30_draw_elements_inline(struct pipe_context *pipe, struct pipe_winsys *ws = pipe->winsys; void *map; - map = ws->_buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); return FALSE; @@ -392,7 +392,7 @@ nv30_draw_elements_inline(struct pipe_context *pipe, break; } - ws->_buffer_unmap(ws, ib); + ws->buffer_unmap(ws, ib); return TRUE; } diff --git a/src/gallium/drivers/nv30/nv30_vertprog.c b/src/gallium/drivers/nv30/nv30_vertprog.c index b67dde0808..72824559e8 100644 --- a/src/gallium/drivers/nv30/nv30_vertprog.c +++ b/src/gallium/drivers/nv30/nv30_vertprog.c @@ -749,7 +749,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) float *map = NULL; if (constbuf) { - map = ws->_buffer_map(ws, constbuf, + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -771,7 +771,7 @@ nv30_vertprog_validate(struct nv30_context *nv30) } if (constbuf) { - ws->_buffer_unmap(ws, constbuf); + ws->buffer_unmap(ws, constbuf); } } diff --git a/src/gallium/drivers/nv40/nv40_context.c b/src/gallium/drivers/nv40/nv40_context.c index cc63dd734b..5d325f5067 100644 --- a/src/gallium/drivers/nv40/nv40_context.c +++ b/src/gallium/drivers/nv40/nv40_context.c @@ -1,6 +1,6 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nv40_context.h" #include "nv40_screen.h" diff --git a/src/gallium/drivers/nv40/nv40_draw.c b/src/gallium/drivers/nv40/nv40_draw.c index 3d5332a80b..c83ff91d7e 100644 --- a/src/gallium/drivers/nv40/nv40_draw.c +++ b/src/gallium/drivers/nv40/nv40_draw.c @@ -241,13 +241,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, nv40_state_emit(nv40); for (i = 0; i < nv40->vtxbuf_nr; i++) { - map = ws->_buffer_map(ws, nv40->vtxbuf[i].buffer, + map = ws->buffer_map(ws, nv40->vtxbuf[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_vertex_buffer(nv40->draw, i, map); } if (idxbuf) { - map = ws->_buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->buffer_map(ws, idxbuf, PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_element_buffer(nv40->draw, idxbuf_size, map); } else { draw_set_mapped_element_buffer(nv40->draw, 0, NULL); @@ -256,7 +256,7 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, if (nv40->constbuf[PIPE_SHADER_VERTEX]) { const unsigned nr = nv40->constbuf_nr[PIPE_SHADER_VERTEX]; - map = ws->_buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], + map = ws->buffer_map(ws, nv40->constbuf[PIPE_SHADER_VERTEX], PIPE_BUFFER_USAGE_CPU_READ); draw_set_mapped_constant_buffer(nv40->draw, map, nr); } @@ -264,13 +264,13 @@ nv40_draw_elements_swtnl(struct pipe_context *pipe, draw_arrays(nv40->draw, mode, start, count); for (i = 0; i < nv40->vtxbuf_nr; i++) - ws->_buffer_unmap(ws, nv40->vtxbuf[i].buffer); + ws->buffer_unmap(ws, nv40->vtxbuf[i].buffer); if (idxbuf) - ws->_buffer_unmap(ws, idxbuf); + ws->buffer_unmap(ws, idxbuf); if (nv40->constbuf[PIPE_SHADER_VERTEX]) - ws->_buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); + ws->buffer_unmap(ws, nv40->constbuf[PIPE_SHADER_VERTEX]); draw_flush(nv40->draw); pipe->flush(pipe, 0, NULL); diff --git a/src/gallium/drivers/nv40/nv40_fragprog.c b/src/gallium/drivers/nv40/nv40_fragprog.c index 5a127d9c7b..91dcbebda0 100644 --- a/src/gallium/drivers/nv40/nv40_fragprog.c +++ b/src/gallium/drivers/nv40/nv40_fragprog.c @@ -886,7 +886,7 @@ nv40_fragprog_upload(struct nv40_context *nv40, uint32_t *map; int i; - map = ws->_buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = ws->buffer_map(ws, fp->buffer, PIPE_BUFFER_USAGE_CPU_WRITE); #if 0 for (i = 0; i < fp->insn_len; i++) { @@ -908,7 +908,7 @@ nv40_fragprog_upload(struct nv40_context *nv40, } } - ws->_buffer_unmap(ws, fp->buffer); + ws->buffer_unmap(ws, fp->buffer); } static boolean @@ -932,7 +932,7 @@ nv40_fragprog_validate(struct nv40_context *nv40) return FALSE; } - fp->buffer = ws->_buffer_create(ws, 0x100, 0, fp->insn_len * 4); + fp->buffer = ws->buffer_create(ws, 0x100, 0, fp->insn_len * 4); nv40_fragprog_upload(nv40, fp); so = so_new(4, 1); @@ -948,7 +948,7 @@ update_constants: if (fp->nr_consts) { float *map; - map = ws->_buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); for (i = 0; i < fp->nr_consts; i++) { struct nv40_fragment_program_data *fpd = &fp->consts[i]; uint32_t *p = &fp->insn[fpd->offset]; @@ -959,7 +959,7 @@ update_constants: memcpy(p, cb, 4 * sizeof(float)); new_consts = TRUE; } - ws->_buffer_unmap(ws, constbuf); + ws->buffer_unmap(ws, constbuf); if (new_consts) nv40_fragprog_upload(nv40, fp); diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index 6ed0d39edf..ba912ddcbb 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -97,7 +97,7 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt) nv40_miptree_layout(mt); - mt->buffer = ws->_buffer_create(ws, 256, buf_usage, mt->total_size); + mt->buffer = ws->buffer_create(ws, 256, buf_usage, mt->total_size); if (!mt->buffer) { FREE(mt); return NULL; diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 20662fd3ff..ab128fecda 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -170,7 +170,7 @@ nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, assert(surface_to_map); - map = ws->_buffer_map(ws, surface_to_map->buffer, flags); + map = ws->buffer_map(ws, surface_to_map->buffer, flags); if (!map) return NULL; @@ -198,7 +198,7 @@ nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) assert(surface_to_unmap); - ws->_buffer_unmap(ws, surface_to_unmap->buffer); + ws->buffer_unmap(ws, surface_to_unmap->buffer); if (surface_to_unmap != surface) { struct nv40_screen *nvscreen = nv40_screen(screen); diff --git a/src/gallium/drivers/nv40/nv40_surface.c b/src/gallium/drivers/nv40/nv40_surface.c index 576af7c59e..aa51d04051 100644 --- a/src/gallium/drivers/nv40/nv40_surface.c +++ b/src/gallium/drivers/nv40/nv40_surface.c @@ -28,7 +28,7 @@ #include "nv40_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_tile.h" diff --git a/src/gallium/drivers/nv40/nv40_vbo.c b/src/gallium/drivers/nv40/nv40_vbo.c index f20183ddd4..8f1834628f 100644 --- a/src/gallium/drivers/nv40/nv40_vbo.c +++ b/src/gallium/drivers/nv40/nv40_vbo.c @@ -116,7 +116,7 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, if (nv40_vbo_format_to_hw(ve->src_format, &type, &ncomp)) return FALSE; - map = ws->_buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->buffer_map(ws, vb->buffer, PIPE_BUFFER_USAGE_CPU_READ); map += vb->buffer_offset + ve->src_offset; switch (type) { @@ -148,17 +148,17 @@ nv40_vbo_static_attrib(struct nv40_context *nv40, struct nouveau_stateobj *so, so_data (so, fui(v[0])); break; default: - ws->_buffer_unmap(ws, vb->buffer); + ws->buffer_unmap(ws, vb->buffer); return FALSE; } } break; default: - ws->_buffer_unmap(ws, vb->buffer); + ws->buffer_unmap(ws, vb->buffer); return FALSE; } - ws->_buffer_unmap(ws, vb->buffer); + ws->buffer_unmap(ws, vb->buffer); return TRUE; } @@ -370,7 +370,7 @@ nv40_draw_elements_inline(struct pipe_context *pipe, struct pipe_winsys *ws = pipe->winsys; void *map; - map = ws->_buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); + map = ws->buffer_map(ws, ib, PIPE_BUFFER_USAGE_CPU_READ); if (!ib) { NOUVEAU_ERR("failed mapping ib\n"); return FALSE; @@ -391,7 +391,7 @@ nv40_draw_elements_inline(struct pipe_context *pipe, break; } - ws->_buffer_unmap(ws, ib); + ws->buffer_unmap(ws, ib); return TRUE; } diff --git a/src/gallium/drivers/nv40/nv40_vertprog.c b/src/gallium/drivers/nv40/nv40_vertprog.c index 7a82bb0f5e..1392fe956f 100644 --- a/src/gallium/drivers/nv40/nv40_vertprog.c +++ b/src/gallium/drivers/nv40/nv40_vertprog.c @@ -980,7 +980,7 @@ check_gpu_resources: float *map = NULL; if (constbuf) { - map = ws->_buffer_map(ws, constbuf, + map = ws->buffer_map(ws, constbuf, PIPE_BUFFER_USAGE_CPU_READ); } @@ -1002,7 +1002,7 @@ check_gpu_resources: } if (constbuf) - ws->_buffer_unmap(ws, constbuf); + ws->buffer_unmap(ws, constbuf); } /* Upload vtxprog */ diff --git a/src/gallium/drivers/nv50/nv50_context.c b/src/gallium/drivers/nv50/nv50_context.c index b02c53f209..99776239d2 100644 --- a/src/gallium/drivers/nv50/nv50_context.c +++ b/src/gallium/drivers/nv50/nv50_context.c @@ -22,7 +22,7 @@ #include "draw/draw_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nv50_context.h" #include "nv50_screen.h" diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 3965dad5ad..7770fcc3f2 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -88,14 +88,14 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp) size = align(size, 64); size *= align(pt->height[l], 8) * pt->block.size; - lvl->image[i] = ws->_buffer_create(ws, 256, 0, size); + lvl->image[i] = ws->buffer_create(ws, 256, 0, size); lvl->image_offset[i] = mt->total_size; mt->total_size += size; } } - mt->buffer = ws->_buffer_create(ws, 256, usage, mt->total_size); + mt->buffer = ws->buffer_create(ws, 256, usage, mt->total_size); if (!mt->buffer) { FREE(mt); return NULL; diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 73867cf675..7686f746eb 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -1581,11 +1581,11 @@ nv50_program_validate_data(struct nv50_context *nv50, struct nv50_program *p) } if (p->param_nr) { - float *map = ws->_buffer_map(ws, nv50->constbuf[p->type], + float *map = ws->buffer_map(ws, nv50->constbuf[p->type], PIPE_BUFFER_USAGE_CPU_READ); nv50_program_upload_data(nv50, map, p->data->start, p->param_nr); - ws->_buffer_unmap(ws, nv50->constbuf[p->type]); + ws->buffer_unmap(ws, nv50->constbuf[p->type]); } if (p->immd_nr) { @@ -1606,7 +1606,7 @@ nv50_program_validate_code(struct nv50_context *nv50, struct nv50_program *p) boolean upload = FALSE; if (!p->buffer) { - p->buffer = ws->_buffer_create(ws, 0x100, 0, p->exec_size * 4); + p->buffer = ws->buffer_create(ws, 0x100, 0, p->exec_size * 4); upload = TRUE; } diff --git a/src/gallium/drivers/nv50/nv50_query.c b/src/gallium/drivers/nv50/nv50_query.c index b0fb346ba1..1b3a41340a 100644 --- a/src/gallium/drivers/nv50/nv50_query.c +++ b/src/gallium/drivers/nv50/nv50_query.c @@ -47,7 +47,7 @@ nv50_query_create(struct pipe_context *pipe, unsigned type) assert (q->type == PIPE_QUERY_OCCLUSION_COUNTER); q->type = type; - q->buffer = ws->_buffer_create(ws, 256, 0, 16); + q->buffer = ws->buffer_create(ws, 256, 0, 16); if (!q->buffer) { FREE(q); return NULL; @@ -107,11 +107,11 @@ nv50_query_result(struct pipe_context *pipe, struct pipe_query *pq, */ if (!q->ready) { - uint32_t *map = ws->_buffer_map(ws, q->buffer, + uint32_t *map = ws->buffer_map(ws, q->buffer, PIPE_BUFFER_USAGE_CPU_READ); q->result = map[1]; q->ready = TRUE; - ws->_buffer_unmap(ws, q->buffer); + ws->buffer_unmap(ws, q->buffer); } *result = q->result; diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index 3abacfc8d5..ef46233f83 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -242,7 +242,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) so_data (so, 8); /* Shared constant buffer */ - screen->constbuf = ws->_buffer_create(ws, 0, 0, 128 * 4 * 4); + screen->constbuf = ws->buffer_create(ws, 0, 0, 128 * 4 * 4); if (nvws->res_init(&screen->vp_data_heap, 0, 128)) { NOUVEAU_ERR("Error initialising constant buffer\n"); nv50_screen_destroy(&screen->pipe); @@ -261,7 +261,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) * blocks. At some point we *may* want to go the NVIDIA way of doing * things? */ - screen->tic = ws->_buffer_create(ws, 0, 0, 32 * 8 * 4); + screen->tic = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); so_reloc (so, screen->tic, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); @@ -275,7 +275,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) NOUVEAU_BO_RD | NOUVEAU_BO_LOW, 0, 0); so_data (so, 0x00000800); - screen->tsc = ws->_buffer_create(ws, 0, 0, 32 * 8 * 4); + screen->tsc = ws->buffer_create(ws, 0, 0, 32 * 8 * 4); so_method(so, screen->tesla, 0x1280, 3); so_reloc (so, screen->tsc, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_RD | NOUVEAU_BO_HIGH, 0, 0); diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index 743eb6e257..ed6602ba36 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -22,7 +22,7 @@ #include "nv50_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_tile.h" @@ -65,7 +65,7 @@ nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, { struct pipe_winsys *ws = screen->winsys; - return ws->_buffer_map(ws, ps->buffer, flags); + return ws->buffer_map(ws, ps->buffer, flags); } static void @@ -73,7 +73,7 @@ nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps) { struct pipe_winsys *ws = pscreen->winsys; - ws->_buffer_unmap(ws, ps->buffer); + ws->buffer_unmap(ws, ps->buffer); } void diff --git a/src/gallium/drivers/nv50/nv50_vbo.c b/src/gallium/drivers/nv50/nv50_vbo.c index 86471c00e0..c482a4c241 100644 --- a/src/gallium/drivers/nv50/nv50_vbo.c +++ b/src/gallium/drivers/nv50/nv50_vbo.c @@ -153,7 +153,7 @@ nv50_draw_elements(struct pipe_context *pipe, { struct nv50_context *nv50 = nv50_context(pipe); struct pipe_winsys *ws = pipe->winsys; - void *map = ws->_buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); + void *map = ws->buffer_map(ws, indexBuffer, PIPE_BUFFER_USAGE_CPU_READ); nv50_state_validate(nv50); diff --git a/src/gallium/drivers/softpipe/sp_draw_arrays.c b/src/gallium/drivers/softpipe/sp_draw_arrays.c index 8d58b1ed16..ecc9d00319 100644 --- a/src/gallium/drivers/softpipe/sp_draw_arrays.c +++ b/src/gallium/drivers/softpipe/sp_draw_arrays.c @@ -33,7 +33,7 @@ #include "pipe/p_defines.h" #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "sp_context.h" @@ -50,7 +50,7 @@ softpipe_map_constant_buffers(struct softpipe_context *sp) uint i; for (i = 0; i < PIPE_SHADER_TYPES; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) - sp->mapped_constants[i] = ws->_buffer_map(ws, sp->constants[i].buffer, + sp->mapped_constants[i] = ws->buffer_map(ws, sp->constants[i].buffer, PIPE_BUFFER_USAGE_CPU_READ); } @@ -74,7 +74,7 @@ softpipe_unmap_constant_buffers(struct softpipe_context *sp) for (i = 0; i < 2; i++) { if (sp->constants[i].buffer && sp->constants[i].buffer->size) - ws->_buffer_unmap(ws, sp->constants[i].buffer); + ws->buffer_unmap(ws, sp->constants[i].buffer); sp->mapped_constants[i] = NULL; } } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 11b08b3a82..4bd95a61e6 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -27,7 +27,7 @@ #include "util/u_memory.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c index 43b134354f..4d01a9dbe1 100644 --- a/src/gallium/drivers/softpipe/sp_state_fs.c +++ b/src/gallium/drivers/softpipe/sp_state_fs.c @@ -32,7 +32,7 @@ #include "pipe/p_defines.h" #include "util/u_memory.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_shader_tokens.h" #include "draw/draw_context.h" #include "tgsi/tgsi_dump.h" diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 078925ca45..5952378152 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -33,7 +33,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_math.h" #include "util/u_memory.h" @@ -87,9 +87,9 @@ softpipe_texture_layout(struct pipe_screen *screen, depth = minify(depth); } - spt->buffer = ws->_buffer_create(ws, 32, - PIPE_BUFFER_USAGE_PIXEL, - buffer_size); + spt->buffer = ws->buffer_create(ws, 32, + PIPE_BUFFER_USAGE_PIXEL, + buffer_size); return spt->buffer != NULL; } @@ -105,12 +105,12 @@ softpipe_displaytarget_layout(struct pipe_screen *screen, spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]); spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]); - spt->buffer = ws->_surface_buffer_create( ws, - spt->base.width[0], - spt->base.height[0], - spt->base.format, - usage, - &spt->stride[0]); + spt->buffer = ws->surface_buffer_create( ws, + spt->base.width[0], + spt->base.height[0], + spt->base.format, + usage, + &spt->stride[0]); return spt->buffer != NULL; } diff --git a/src/gallium/drivers/trace/tr_winsys.c b/src/gallium/drivers/trace/tr_winsys.c index 9a19d4d077..c4148fe810 100644 --- a/src/gallium/drivers/trace/tr_winsys.c +++ b/src/gallium/drivers/trace/tr_winsys.c @@ -118,7 +118,7 @@ trace_winsys_surface_buffer_create(struct pipe_winsys *_winsys, trace_dump_arg(format, format); trace_dump_arg(uint, usage); - result = winsys->_surface_buffer_create(winsys, + result = winsys->surface_buffer_create(winsys, width, height, format, usage, @@ -153,7 +153,7 @@ trace_winsys_buffer_create(struct pipe_winsys *_winsys, trace_dump_arg(uint, usage); trace_dump_arg(uint, size); - buffer = winsys->_buffer_create(winsys, alignment, usage, size); + buffer = winsys->buffer_create(winsys, alignment, usage, size); trace_dump_ret(ptr, buffer); @@ -162,10 +162,10 @@ trace_winsys_buffer_create(struct pipe_winsys *_winsys, /* Zero the buffer to avoid dumping uninitialized memory */ if(buffer->usage & PIPE_BUFFER_USAGE_CPU_WRITE) { void *map; - map = winsys->_buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE); + map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_WRITE); if(map) { memset(map, 0, buffer->size); - winsys->_buffer_unmap(winsys, buffer); + winsys->buffer_unmap(winsys, buffer); } } @@ -190,7 +190,7 @@ trace_winsys_user_buffer_create(struct pipe_winsys *_winsys, trace_dump_arg_end(); trace_dump_arg(uint, size); - result = winsys->_user_buffer_create(winsys, data, size); + result = winsys->user_buffer_create(winsys, data, size); trace_dump_ret(ptr, result); @@ -216,7 +216,7 @@ trace_winsys_user_buffer_update(struct pipe_winsys *_winsys, const void *map; if(buffer && buffer->usage & TRACE_BUFFER_USAGE_USER) { - map = winsys->_buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ); + map = winsys->buffer_map(winsys, buffer, PIPE_BUFFER_USAGE_CPU_READ); if(map) { trace_dump_call_begin("pipe_winsys", "buffer_write"); @@ -234,7 +234,7 @@ trace_winsys_user_buffer_update(struct pipe_winsys *_winsys, trace_dump_call_end(); - winsys->_buffer_unmap(winsys, buffer); + winsys->buffer_unmap(winsys, buffer); } } } @@ -249,7 +249,7 @@ trace_winsys_buffer_map(struct pipe_winsys *_winsys, struct pipe_winsys *winsys = tr_ws->winsys; void *map; - map = winsys->_buffer_map(winsys, buffer, usage); + map = winsys->buffer_map(winsys, buffer, usage); if(map) { if(usage & PIPE_BUFFER_USAGE_CPU_WRITE) { assert(!hash_table_get(tr_ws->buffer_maps, buffer)); @@ -290,7 +290,7 @@ trace_winsys_buffer_unmap(struct pipe_winsys *_winsys, hash_table_remove(tr_ws->buffer_maps, buffer); } - winsys->_buffer_unmap(winsys, buffer); + winsys->buffer_unmap(winsys, buffer); } @@ -306,7 +306,7 @@ trace_winsys_buffer_destroy(struct pipe_winsys *_winsys, trace_dump_arg(ptr, winsys); trace_dump_arg(ptr, buffer); - winsys->_buffer_destroy(winsys, buffer); + winsys->buffer_destroy(winsys, buffer); trace_dump_call_end(); } @@ -420,12 +420,12 @@ trace_winsys_create(struct pipe_winsys *winsys) tr_ws->base.destroy = trace_winsys_destroy; tr_ws->base.get_name = trace_winsys_get_name; tr_ws->base.flush_frontbuffer = trace_winsys_flush_frontbuffer; - tr_ws->base._surface_buffer_create = trace_winsys_surface_buffer_create; - tr_ws->base._buffer_create = trace_winsys_buffer_create; - tr_ws->base._user_buffer_create = trace_winsys_user_buffer_create; - tr_ws->base._buffer_map = trace_winsys_buffer_map; - tr_ws->base._buffer_unmap = trace_winsys_buffer_unmap; - tr_ws->base._buffer_destroy = trace_winsys_buffer_destroy; + tr_ws->base.surface_buffer_create = trace_winsys_surface_buffer_create; + tr_ws->base.buffer_create = trace_winsys_buffer_create; + tr_ws->base.user_buffer_create = trace_winsys_user_buffer_create; + tr_ws->base.buffer_map = trace_winsys_buffer_map; + tr_ws->base.buffer_unmap = trace_winsys_buffer_unmap; + tr_ws->base.buffer_destroy = trace_winsys_buffer_destroy; tr_ws->base.fence_reference = trace_winsys_fence_reference; tr_ws->base.fence_signalled = trace_winsys_fence_signalled; tr_ws->base.fence_finish = trace_winsys_fence_finish; diff --git a/src/gallium/drivers/trace/tr_winsys.h b/src/gallium/drivers/trace/tr_winsys.h index 062ddf66a0..0fd2a40556 100644 --- a/src/gallium/drivers/trace/tr_winsys.h +++ b/src/gallium/drivers/trace/tr_winsys.h @@ -31,7 +31,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" /** diff --git a/src/gallium/include/pipe/internal/p_winsys_screen.h b/src/gallium/include/pipe/internal/p_winsys_screen.h new file mode 100644 index 0000000000..ee835578b2 --- /dev/null +++ b/src/gallium/include/pipe/internal/p_winsys_screen.h @@ -0,0 +1,185 @@ + /************************************************************************** + * + * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +/** + * \file + * This is the interface that Gallium3D requires any window system + * hosting it to implement. This is the only include file in Gallium3D + * which is public. + */ + +#ifndef P_WINSYS_H +#define P_WINSYS_H + + +#include "pipe/p_format.h" + + +#ifdef __cplusplus +extern "C" { +#endif + + +/** Opaque type */ +struct pipe_fence_handle; + +struct pipe_surface; + + +/** + * Gallium3D drivers are (meant to be!) independent of both GL and the + * window system. The window system provides a buffer manager and a + * set of additional hooks for things like command buffer submission, + * etc. + * + * There clearly has to be some agreement between the window system + * driver and the hardware driver about the format of command buffers, + * etc. + */ +struct pipe_winsys +{ + void (*destroy)( struct pipe_winsys *ws ); + + /** Returns name of this winsys interface */ + const char *(*get_name)( struct pipe_winsys *ws ); + + /** + * Do any special operations to ensure frontbuffer contents are + * displayed, eg copy fake frontbuffer. + */ + void (*flush_frontbuffer)( struct pipe_winsys *ws, + struct pipe_surface *surf, + void *context_private ); + + + /** + * Buffer management. Buffer attributes are mostly fixed over its lifetime. + * + * Remember that gallium gets to choose the interface it needs, and the + * window systems must then implement that interface (rather than the + * other way around...). + * + * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This + * usage argument is only an optimization hint, not a guarantee, therefore + * proper behavior must be observed in all circumstances. + * + * alignment indicates the client's alignment requirements, eg for + * SSE instructions. + */ + struct pipe_buffer *(*buffer_create)( struct pipe_winsys *ws, + unsigned alignment, + unsigned usage, + unsigned size ); + + /** + * Create a buffer that wraps user-space data. + * + * Effectively this schedules a delayed call to buffer_create + * followed by an upload of the data at *some point in the future*, + * or perhaps never. Basically the allocate/upload is delayed + * until the buffer is actually passed to hardware. + * + * The intention is to provide a quick way to turn regular data + * into a buffer, and secondly to avoid a copy operation if that + * data subsequently turns out to be only accessed by the CPU. + * + * Common example is OpenGL vertex buffers that are subsequently + * processed either by software TNL in the driver or by passing to + * hardware. + * + * XXX: What happens if the delayed call to buffer_create() fails? + * + * Note that ptr may be accessed at any time upto the time when the + * buffer is destroyed, so the data must not be freed before then. + */ + struct pipe_buffer *(*user_buffer_create)(struct pipe_winsys *ws, + void *ptr, + unsigned bytes); + + /** + * Allocate storage for a display target surface. + * + * Often surfaces which are meant to be blitted to the front screen (i.e., + * display targets) must be allocated with special characteristics, memory + * pools, or obtained directly from the windowing system. + * + * This callback is invoked by the pipe_screenwhen creating a texture marked + * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying + * buffer storage. + */ + struct pipe_buffer *(*surface_buffer_create)(struct pipe_winsys *ws, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride); + + + /** + * Map the entire data store of a buffer object into the client's address. + * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. + */ + void *(*buffer_map)( struct pipe_winsys *ws, + struct pipe_buffer *buf, + unsigned usage ); + + void (*buffer_unmap)( struct pipe_winsys *ws, + struct pipe_buffer *buf ); + + void (*buffer_destroy)( struct pipe_winsys *ws, + struct pipe_buffer *buf ); + + + /** Set ptr = fence, with reference counting */ + void (*fence_reference)( struct pipe_winsys *ws, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct pipe_winsys *ws, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct pipe_winsys *ws, + struct pipe_fence_handle *fence, + unsigned flag ); + +}; + +#ifdef __cplusplus +} +#endif + +#endif /* P_WINSYS_H */ diff --git a/src/gallium/include/pipe/p_inlines.h b/src/gallium/include/pipe/p_inlines.h index da7334bb67..1219c817b4 100644 --- a/src/gallium/include/pipe/p_inlines.h +++ b/src/gallium/include/pipe/p_inlines.h @@ -31,7 +31,6 @@ #include "p_context.h" #include "p_defines.h" #include "p_screen.h" -#include "p_winsys.h" #ifdef __cplusplus @@ -129,26 +128,20 @@ pipe_texture_release(struct pipe_texture **ptr) /** - * Convenience wrappers for winsys buffer functions. + * Convenience wrappers for screen buffer functions. */ static INLINE struct pipe_buffer * pipe_buffer_create( struct pipe_screen *screen, unsigned alignment, unsigned usage, unsigned size ) { - if (screen->buffer_create) - return screen->buffer_create(screen, alignment, usage, size); - else - return screen->winsys->_buffer_create(screen->winsys, alignment, usage, size); + return screen->buffer_create(screen, alignment, usage, size); } static INLINE struct pipe_buffer * pipe_user_buffer_create( struct pipe_screen *screen, void *ptr, unsigned size ) { - if (screen->user_buffer_create) - return screen->user_buffer_create(screen, ptr, size); - else - return screen->winsys->_user_buffer_create(screen->winsys, ptr, size); + return screen->user_buffer_create(screen, ptr, size); } static INLINE void * @@ -156,20 +149,14 @@ pipe_buffer_map(struct pipe_screen *screen, struct pipe_buffer *buf, unsigned usage) { - if (screen->buffer_map) - return screen->buffer_map(screen, buf, usage); - else - return screen->winsys->_buffer_map(screen->winsys, buf, usage); + return screen->buffer_map(screen, buf, usage); } static INLINE void pipe_buffer_unmap(struct pipe_screen *screen, struct pipe_buffer *buf) { - if (screen->buffer_unmap) - screen->buffer_unmap(screen, buf); - else - screen->winsys->_buffer_unmap(screen->winsys, buf); + screen->buffer_unmap(screen, buf); } /* XXX: thread safety issues! @@ -187,10 +174,7 @@ pipe_buffer_reference(struct pipe_screen *screen, if (*ptr) { assert((*ptr)->refcount); if(--(*ptr)->refcount == 0) { - if (screen->buffer_destroy) - screen->buffer_destroy( screen, *ptr ); - else - screen->winsys->_buffer_destroy( screen->winsys, *ptr ); + screen->buffer_destroy( screen, *ptr ); } } diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index b072484a84..715fa39cbe 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -48,6 +48,8 @@ extern "C" { #endif +/** Opaque type */ +struct pipe_fence_handle; /** * Gallium screen/adapter context. Basically everything @@ -196,6 +198,41 @@ struct pipe_screen { void (*buffer_destroy)( struct pipe_screen *screen, struct pipe_buffer *buf ); + + + /** + * Do any special operations to ensure frontbuffer contents are + * displayed, eg copy fake frontbuffer. + */ + void (*flush_frontbuffer)( struct pipe_screen *screen, + struct pipe_surface *surf, + void *context_private ); + + + + /** Set ptr = fence, with reference counting */ + void (*fence_reference)( struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence ); + + /** + * Checks whether the fence has been signalled. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_signalled)( struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag ); + + /** + * Wait for the fence to finish. + * \param flags driver-specific meaning + * \return zero on success. + */ + int (*fence_finish)( struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag ); + }; diff --git a/src/gallium/include/pipe/p_winsys.h b/src/gallium/include/pipe/p_winsys.h deleted file mode 100644 index bda1907cc1..0000000000 --- a/src/gallium/include/pipe/p_winsys.h +++ /dev/null @@ -1,186 +0,0 @@ - /************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * This is the interface that Gallium3D requires any window system - * hosting it to implement. This is the only include file in Gallium3D - * which is public. - */ - -#ifndef P_WINSYS_H -#define P_WINSYS_H - - -#include "p_format.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -/** Opaque type */ -struct pipe_fence_handle; - -struct pipe_surface; - - -/** - * Gallium3D drivers are (meant to be!) independent of both GL and the - * window system. The window system provides a buffer manager and a - * set of additional hooks for things like command buffer submission, - * etc. - * - * There clearly has to be some agreement between the window system - * driver and the hardware driver about the format of command buffers, - * etc. - */ -struct pipe_winsys -{ - void (*destroy)( struct pipe_winsys *ws ); - - /** Returns name of this winsys interface */ - const char *(*get_name)( struct pipe_winsys *ws ); - - /** - * Do any special operations to ensure frontbuffer contents are - * displayed, eg copy fake frontbuffer. - */ - void (*flush_frontbuffer)( struct pipe_winsys *ws, - struct pipe_surface *surf, - void *context_private ); - - - /** - * Buffer management. Buffer attributes are mostly fixed over its lifetime. - * - * Remember that gallium gets to choose the interface it needs, and the - * window systems must then implement that interface (rather than the - * other way around...). - * - * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This - * usage argument is only an optimization hint, not a guarantee, therefore - * proper behavior must be observed in all circumstances. - * - * alignment indicates the client's alignment requirements, eg for - * SSE instructions. - */ - struct pipe_buffer *(*_buffer_create)( struct pipe_winsys *ws, - unsigned alignment, - unsigned usage, - unsigned size ); - - /** - * Create a buffer that wraps user-space data. - * - * Effectively this schedules a delayed call to buffer_create - * followed by an upload of the data at *some point in the future*, - * or perhaps never. Basically the allocate/upload is delayed - * until the buffer is actually passed to hardware. - * - * The intention is to provide a quick way to turn regular data - * into a buffer, and secondly to avoid a copy operation if that - * data subsequently turns out to be only accessed by the CPU. - * - * Common example is OpenGL vertex buffers that are subsequently - * processed either by software TNL in the driver or by passing to - * hardware. - * - * XXX: What happens if the delayed call to buffer_create() fails? - * - * Note that ptr may be accessed at any time upto the time when the - * buffer is destroyed, so the data must not be freed before then. - */ - struct pipe_buffer *(*_user_buffer_create)(struct pipe_winsys *ws, - void *ptr, - unsigned bytes); - - /** - * Allocate storage for a display target surface. - * - * Often surfaces which are meant to be blitted to the front screen (i.e., - * display targets) must be allocated with special characteristics, memory - * pools, or obtained directly from the windowing system. - * - * This callback is invoked by the pipe_screenwhen creating a texture marked - * with the PIPE_TEXTURE_USAGE_DISPLAY_TARGET flag to get the underlying - * buffer storage. - */ - struct pipe_buffer *(*_surface_buffer_create)(struct pipe_winsys *ws, - unsigned width, unsigned height, - enum pipe_format format, - unsigned usage, - unsigned *stride); - - - /** - * Map the entire data store of a buffer object into the client's address. - * flags is bitmask of PIPE_BUFFER_USAGE_CPU_READ/WRITE flags. - */ - void *(*_buffer_map)( struct pipe_winsys *ws, - struct pipe_buffer *buf, - unsigned usage ); - - void (*_buffer_unmap)( struct pipe_winsys *ws, - struct pipe_buffer *buf ); - - void (*_buffer_destroy)( struct pipe_winsys *ws, - struct pipe_buffer *buf ); - - - /** Set ptr = fence, with reference counting */ - void (*fence_reference)( struct pipe_winsys *ws, - struct pipe_fence_handle **ptr, - struct pipe_fence_handle *fence ); - - /** - * Checks whether the fence has been signalled. - * \param flags driver-specific meaning - * \return zero on success. - */ - int (*fence_signalled)( struct pipe_winsys *ws, - struct pipe_fence_handle *fence, - unsigned flag ); - - /** - * Wait for the fence to finish. - * \param flags driver-specific meaning - * \return zero on success. - */ - int (*fence_finish)( struct pipe_winsys *ws, - struct pipe_fence_handle *fence, - unsigned flag ); - -}; - - -#ifdef __cplusplus -} -#endif - -#endif /* P_WINSYS_H */ diff --git a/src/gallium/state_trackers/egl/egl_context.c b/src/gallium/state_trackers/egl/egl_context.c index 217fe00338..8564972b91 100644 --- a/src/gallium/state_trackers/egl/egl_context.c +++ b/src/gallium/state_trackers/egl/egl_context.c @@ -10,7 +10,6 @@ #include "pipe/p_context.h" #include "pipe/p_screen.h" -#include "pipe/p_winsys.h" #include "state_tracker/st_public.h" #include "state_tracker/drm_api.h" diff --git a/src/gallium/state_trackers/egl/egl_tracker.c b/src/gallium/state_trackers/egl/egl_tracker.c index 3ca5acb68b..dec82c3a00 100644 --- a/src/gallium/state_trackers/egl/egl_tracker.c +++ b/src/gallium/state_trackers/egl/egl_tracker.c @@ -10,7 +10,7 @@ #include "state_tracker/drm_api.h" #include "pipe/p_screen.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" /** HACK */ void* driDriverAPI; diff --git a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c index 53ef275349..122c42ed0e 100644 --- a/src/gallium/state_trackers/g3dvl/vl_basic_csc.c +++ b/src/gallium/state_trackers/g3dvl/vl_basic_csc.c @@ -2,7 +2,6 @@ #include "vl_basic_csc.h" #include #include -#include #include #include #include diff --git a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c index 789042f6f2..d53482f579 100644 --- a/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c +++ b/src/gallium/state_trackers/g3dvl/vl_r16snorm_mc_buf.c @@ -2,7 +2,6 @@ #include "vl_r16snorm_mc_buf.h" #include #include -#include #include #include #include @@ -649,9 +648,9 @@ static int vlFlush pipe->set_framebuffer_state(pipe, &mc->render_target); pipe->set_viewport_state(pipe, &mc->viewport); - vs_consts = pipe->winsys->buffer_map + vs_consts = pipe_buffer_map ( - pipe->winsys, + pipe->screen, mc->vs_const_buf.buffer, PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD ); diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c index 95c1378a03..20dd8d269d 100644 --- a/src/gallium/state_trackers/python/st_device.c +++ b/src/gallium/state_trackers/python/st_device.c @@ -26,7 +26,7 @@ **************************************************************************/ -#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" #include "pipe/p_context.h" #include "pipe/p_shader_tokens.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/state_trackers/python/st_softpipe_winsys.c b/src/gallium/state_trackers/python/st_softpipe_winsys.c index 01d88ee499..4d798df99b 100644 --- a/src/gallium/state_trackers/python/st_softpipe_winsys.c +++ b/src/gallium/state_trackers/python/st_softpipe_winsys.c @@ -36,7 +36,7 @@ */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h"/* port to just p_screen */ #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.c b/src/gallium/winsys/drm/intel/common/intel_be_device.c index 14aeaf61db..85ab1a2684 100644 --- a/src/gallium/winsys/drm/intel/common/intel_be_device.c +++ b/src/gallium/winsys/drm/intel/common/intel_be_device.c @@ -10,7 +10,7 @@ #include "ws_dri_bufpool.h" #include "ws_dri_fencemgr.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/winsys/drm/intel/common/intel_be_device.h b/src/gallium/winsys/drm/intel/common/intel_be_device.h index 3f8b3f585c..534d638b6a 100644 --- a/src/gallium/winsys/drm/intel/common/intel_be_device.h +++ b/src/gallium/winsys/drm/intel/common/intel_be_device.h @@ -1,7 +1,7 @@ #ifndef INTEL_DRM_DEVICE_H #define INTEL_DRM_DEVICE_H -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_context.h" /* diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_device.c b/src/gallium/winsys/drm/intel/gem/intel_be_device.c index 5406636bcb..82c1cb2f32 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_be_device.c +++ b/src/gallium/winsys/drm/intel/gem/intel_be_device.c @@ -1,7 +1,7 @@ #include "intel_be_device.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/winsys/drm/intel/gem/intel_be_device.h b/src/gallium/winsys/drm/intel/gem/intel_be_device.h index 96e94c47e7..f06890163c 100644 --- a/src/gallium/winsys/drm/intel/gem/intel_be_device.h +++ b/src/gallium/winsys/drm/intel/gem/intel_be_device.h @@ -2,7 +2,7 @@ #ifndef INTEL_DRM_DEVICE_H #define INTEL_DRM_DEVICE_H -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_context.h" #include "drm.h" diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c index 5b3101fbba..8e889b9f36 100644 --- a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.c @@ -1,4 +1,4 @@ -#include +#include "pipe/internal/p_winsys_screen.h" #include #include #include diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h index 14c728690d..d97ffdf337 100644 --- a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_pipe.h @@ -2,7 +2,7 @@ #define NOUVEAU_PIPE_WINSYS_H #include "pipe/p_context.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "nouveau_context.h" struct nouveau_pipe_buffer { diff --git a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c index 04def600f4..396e4f2a2e 100644 --- a/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c +++ b/src/gallium/winsys/drm/nouveau/common/nouveau_winsys_softpipe.c @@ -29,7 +29,7 @@ * Authors: Keith Whitwell */ -#include +#include "pipe/internal/p_winsys_screen.h" #include #include #include diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 82aa60ae58..4876339107 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -38,7 +38,7 @@ #include "pipe/p_compiler.h" #include "pipe/p_format.h" #include "pipe/p_state.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" diff --git a/src/gallium/winsys/egl_xlib/sw_winsys.c b/src/gallium/winsys/egl_xlib/sw_winsys.c index a09ad5e8e9..739bfa1c1a 100644 --- a/src/gallium/winsys/egl_xlib/sw_winsys.c +++ b/src/gallium/winsys/egl_xlib/sw_winsys.c @@ -35,7 +35,7 @@ */ -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_state.h" #include "pipe/p_inlines.h" #include "util/u_math.h" diff --git a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c index 91e3c62d7f..738bca3eac 100644 --- a/src/gallium/winsys/gdi/gdi_softpipe_winsys.c +++ b/src/gallium/winsys/gdi/gdi_softpipe_winsys.c @@ -38,7 +38,7 @@ #include -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/winsys/xlib/xlib_brw_context.c b/src/gallium/winsys/xlib/xlib_brw_context.c index 528473925a..09599507f4 100644 --- a/src/gallium/winsys/xlib/xlib_brw_context.c +++ b/src/gallium/winsys/xlib/xlib_brw_context.c @@ -36,7 +36,7 @@ //#include "glxheader.h" //#include "xmesaP.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/winsys/xlib/xlib_brw_screen.c b/src/gallium/winsys/xlib/xlib_brw_screen.c index 1fd7da8a2f..5344c502ef 100644 --- a/src/gallium/winsys/xlib/xlib_brw_screen.c +++ b/src/gallium/winsys/xlib/xlib_brw_screen.c @@ -36,7 +36,7 @@ //#include "state_trackers/xlib/glxheader.h" //#include "state_trackers/xlib/xmesaP.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c index 5af9ee3bb5..bf69593c5c 100644 --- a/src/gallium/winsys/xlib/xlib_cell.c +++ b/src/gallium/winsys/xlib/xlib_cell.c @@ -41,7 +41,7 @@ #undef ASSERT #undef Elements -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c index c0bf37050a..01d24584e2 100644 --- a/src/gallium/winsys/xlib/xlib_softpipe.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -38,7 +38,7 @@ #undef ASSERT #undef Elements -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_format.h" #include "pipe/p_context.h" #include "pipe/p_inlines.h" diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index 0c69e16623..d18946de7d 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -42,7 +42,7 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" #include "pipe/p_inlines.h" -#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" #include "st_context.h" #include "st_cb_fbo.h" #include "st_cb_texture.h" diff --git a/src/mesa/state_tracker/st_cb_feedback.c b/src/mesa/state_tracker/st_cb_feedback.c index 19021411cf..c7e8aa7cc5 100644 --- a/src/mesa/state_tracker/st_cb_feedback.c +++ b/src/mesa/state_tracker/st_cb_feedback.c @@ -52,7 +52,6 @@ #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" #include "cso_cache/cso_cache.h" #include "draw/draw_context.h" diff --git a/src/mesa/state_tracker/st_cb_flush.c b/src/mesa/state_tracker/st_cb_flush.c index 072f2e92ad..f8621ab125 100644 --- a/src/mesa/state_tracker/st_cb_flush.c +++ b/src/mesa/state_tracker/st_cb_flush.c @@ -42,7 +42,7 @@ #include "st_public.h" #include "pipe/p_context.h" #include "pipe/p_defines.h" -#include "pipe/p_winsys.h" +#include "pipe/p_screen.h" #include "util/u_gen_mipmap.h" #include "util/u_blit.h" @@ -55,7 +55,7 @@ is_front_buffer_dirty(struct st_context *st) /** - * Tell the winsys to display the front color buffer on-screen. + * Tell the screen to display the front color buffer on-screen. */ static void display_front_buffer(struct st_context *st) @@ -67,7 +67,7 @@ display_front_buffer(struct st_context *st) /* Hook for copying "fake" frontbuffer if necessary: */ - st->pipe->winsys->flush_frontbuffer( st->pipe->winsys, front_surf, + st->pipe->screen->flush_frontbuffer( st->pipe->screen, front_surf, st->pipe->priv ); /* @@ -103,8 +103,8 @@ void st_finish( struct st_context *st ) st_flush(st, PIPE_FLUSH_RENDER_CACHE | PIPE_FLUSH_FRAME, &fence); if(fence) { - st->pipe->winsys->fence_finish(st->pipe->winsys, fence, 0); - st->pipe->winsys->fence_reference(st->pipe->winsys, &fence, NULL); + st->pipe->screen->fence_finish(st->pipe->screen, fence, 0); + st->pipe->screen->fence_reference(st->pipe->screen, &fence, NULL); } } diff --git a/src/mesa/state_tracker/st_cb_strings.c b/src/mesa/state_tracker/st_cb_strings.c index 09545aa8fb..27e396ab46 100644 --- a/src/mesa/state_tracker/st_cb_strings.c +++ b/src/mesa/state_tracker/st_cb_strings.c @@ -36,7 +36,8 @@ #include "main/version.h" #include "pipe/p_context.h" #include "pipe/p_screen.h" -#include "pipe/p_winsys.h" +/* We want the name of the winsys we're running on*/ +#include "pipe/internal/p_winsys_screen.h" #include "st_context.h" #include "st_cb_strings.h" diff --git a/src/mesa/state_tracker/wgl/stw_device.c b/src/mesa/state_tracker/wgl/stw_device.c index 129b24ce77..0073fdbc23 100644 --- a/src/mesa/state_tracker/wgl/stw_device.c +++ b/src/mesa/state_tracker/wgl/stw_device.c @@ -28,7 +28,7 @@ #include #include "pipe/p_debug.h" -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_screen.h" #include "stw_device.h" diff --git a/src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c b/src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c index 002bcc64e7..ddc482752b 100644 --- a/src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c +++ b/src/mesa/state_tracker/wgl/stw_wgl_swapbuffers.c @@ -27,7 +27,7 @@ #include -#include "pipe/p_winsys.h" +#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_screen.h" #include "pipe/p_context.h" #include "state_tracker/st_context.h" -- cgit v1.2.3 From 26c9b1534388876797168cfece507fa7b9e8665a Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Fri, 30 Jan 2009 17:59:10 -0500 Subject: gallium: add a convience wrapper for simple screens forwards screen calls to the winsys --- src/gallium/auxiliary/util/Makefile | 3 +- src/gallium/auxiliary/util/u_simple_screen.c | 143 +++++++++++++++++++++++++++ src/gallium/auxiliary/util/u_simple_screen.h | 47 +++++++++ src/gallium/include/pipe/p_screen.h | 1 - 4 files changed, 192 insertions(+), 2 deletions(-) create mode 100644 src/gallium/auxiliary/util/u_simple_screen.c create mode 100644 src/gallium/auxiliary/util/u_simple_screen.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/Makefile b/src/gallium/auxiliary/util/Makefile index 6773ed73cf..44c2377721 100644 --- a/src/gallium/auxiliary/util/Makefile +++ b/src/gallium/auxiliary/util/Makefile @@ -23,7 +23,8 @@ C_SOURCES = \ u_stream_wd.c \ u_tile.c \ u_time.c \ - u_timed_winsys.c + u_timed_winsys.c \ + u_simple_screen.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c new file mode 100644 index 0000000000..089bbbc48a --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_screen.c @@ -0,0 +1,143 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#include "u_simple_screen.h" + +#include "pipe/p_screen.h" +#include "pipe/internal/p_winsys_screen.h" + + +static struct pipe_buffer * +pass_buffer_create(struct pipe_screen *screen, + unsigned alignment, + unsigned usage, + unsigned size) +{ + return screen->winsys->buffer_create(screen->winsys, + alignment, usage, size); +} + +static struct pipe_buffer * +pass_user_buffer_create(struct pipe_screen *screen, + void *ptr, + unsigned bytes) +{ + return screen->winsys->user_buffer_create(screen->winsys, + ptr, bytes); +} + +static struct pipe_buffer * +pass_surface_buffer_create(struct pipe_screen *screen, + unsigned width, unsigned height, + enum pipe_format format, + unsigned usage, + unsigned *stride) +{ + return screen->winsys->surface_buffer_create(screen->winsys, + width, height, + format, usage, stride); +} + +static void * +pass_buffer_map(struct pipe_screen *screen, + struct pipe_buffer *buf, + unsigned usage) +{ + return screen->winsys->buffer_map(screen->winsys, + buf, usage); +} + +static void +pass_buffer_unmap(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + screen->winsys->buffer_unmap(screen->winsys, buf); +} + +static void +pass_buffer_destroy(struct pipe_screen *screen, + struct pipe_buffer *buf) +{ + screen->winsys->buffer_destroy(screen->winsys, buf); +} + + +static void +pass_flush_frontbuffer(struct pipe_screen *screen, + struct pipe_surface *surf, + void *context_private) +{ + screen->winsys->flush_frontbuffer(screen->winsys, + surf, context_private); +} + +static void +pass_fence_reference(struct pipe_screen *screen, + struct pipe_fence_handle **ptr, + struct pipe_fence_handle *fence) +{ + screen->winsys->fence_reference(screen->winsys, + ptr, fence); +} + +static int +pass_fence_signalled(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return screen->winsys->fence_signalled(screen->winsys, + fence, flag); +} + +static int +pass_fence_finish(struct pipe_screen *screen, + struct pipe_fence_handle *fence, + unsigned flag) +{ + return screen->winsys->fence_finish(screen->winsys, + fence, flag); +} + +void u_simple_screen_init(struct pipe_screen *screen) +{ + screen->buffer_create = pass_buffer_create; + screen->user_buffer_create = pass_user_buffer_create; + screen->surface_buffer_create = pass_surface_buffer_create; + + screen->buffer_map = pass_buffer_map; + screen->buffer_unmap = pass_buffer_unmap; + screen->buffer_destroy = pass_buffer_destroy; + screen->flush_frontbuffer = pass_flush_frontbuffer; + screen->fence_reference = pass_fence_reference; + screen->fence_signalled = pass_fence_signalled; + screen->fence_finish = pass_fence_finish; +} + +const char* u_simple_screen_winsys_name(struct pipe_screen *screen) +{ + return screen->winsys->get_name(screen->winsys); +} diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h new file mode 100644 index 0000000000..c575b46c85 --- /dev/null +++ b/src/gallium/auxiliary/util/u_simple_screen.h @@ -0,0 +1,47 @@ +/************************************************************************** + * + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef U_SIMPLE_SCREEN_H +#define U_SIMPLE_SCREEN_H + +struct pipe_screen; +struct pipe_winsys; + +/** + * The following function initializes a simple passthrough screen. + * + * All the relevant screen function pointers will forward to the + * winsys. + */ +void u_simple_screen_init(struct pipe_screen *screen); + +/** + * Returns the name of the winsys associated with this screen. + */ +const char* u_simple_screen_winsys_name(struct pipe_screen *screen); + +#endif diff --git a/src/gallium/include/pipe/p_screen.h b/src/gallium/include/pipe/p_screen.h index 715fa39cbe..0bd7d12e22 100644 --- a/src/gallium/include/pipe/p_screen.h +++ b/src/gallium/include/pipe/p_screen.h @@ -135,7 +135,6 @@ struct pipe_screen { /** * Buffer management. Buffer attributes are mostly fixed over its lifetime. * - * */ struct pipe_buffer *(*buffer_create)( struct pipe_screen *screen, unsigned alignment, -- cgit v1.2.3 From 969c728095e9a18036989f85610390c55ae61d5e Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Sat, 31 Jan 2009 15:14:38 -0500 Subject: gallium: initialize simple screen in drivers --- src/gallium/auxiliary/util/u_simple_screen.h | 2 +- src/gallium/drivers/cell/ppu/cell_screen.c | 2 ++ src/gallium/drivers/i915simple/i915_screen.c | 2 ++ src/gallium/drivers/i965simple/brw_screen.c | 2 ++ src/gallium/drivers/nv04/nv04_screen.c | 2 ++ src/gallium/drivers/nv10/nv10_screen.c | 2 ++ src/gallium/drivers/nv20/nv20_screen.c | 2 ++ src/gallium/drivers/nv30/nv30_screen.c | 2 ++ src/gallium/drivers/nv40/nv40_screen.c | 2 ++ src/gallium/drivers/nv50/nv50_screen.c | 3 +++ src/gallium/drivers/softpipe/sp_screen.c | 2 ++ 11 files changed, 22 insertions(+), 1 deletion(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_simple_screen.h b/src/gallium/auxiliary/util/u_simple_screen.h index c575b46c85..6612a8a7c0 100644 --- a/src/gallium/auxiliary/util/u_simple_screen.h +++ b/src/gallium/auxiliary/util/u_simple_screen.h @@ -34,7 +34,7 @@ struct pipe_winsys; /** * The following function initializes a simple passthrough screen. * - * All the relevant screen function pointers will forward to the + * All the relevant screen function pointers will forwarded to the * winsys. */ void u_simple_screen_init(struct pipe_screen *screen); diff --git a/src/gallium/drivers/cell/ppu/cell_screen.c b/src/gallium/drivers/cell/ppu/cell_screen.c index bbe80793ca..512d85d352 100644 --- a/src/gallium/drivers/cell/ppu/cell_screen.c +++ b/src/gallium/drivers/cell/ppu/cell_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_simple_screen.h" #include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -169,6 +170,7 @@ cell_create_screen(struct pipe_winsys *winsys) screen->is_format_supported = cell_is_format_supported; cell_init_screen_texture_funcs(screen); + u_simple_screen_init(screen); return screen; } diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 069cc331bb..5bb127f3d5 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_simple_screen.h" #include "pipe/internal/p_winsys_screen.h" #include "pipe/p_inlines.h" #include "util/u_string.h" @@ -279,6 +280,7 @@ i915_create_screen(struct pipe_winsys *winsys, uint pci_id) i915screen->screen.surface_unmap = i915_surface_unmap; i915_init_screen_texture_functions(&i915screen->screen); + u_simple_screen_init(&i915screen->screen); return &i915screen->screen; } diff --git a/src/gallium/drivers/i965simple/brw_screen.c b/src/gallium/drivers/i965simple/brw_screen.c index 036ddd8c90..b22e105f10 100644 --- a/src/gallium/drivers/i965simple/brw_screen.c +++ b/src/gallium/drivers/i965simple/brw_screen.c @@ -29,6 +29,7 @@ #include "util/u_memory.h" #include "pipe/internal/p_winsys_screen.h" #include "util/u_string.h" +#include "util/u_simple_screen.h" #include "brw_context.h" #include "brw_screen.h" @@ -239,6 +240,7 @@ brw_create_screen(struct pipe_winsys *winsys, uint pci_id) brwscreen->screen.is_format_supported = brw_is_format_supported; brw_init_screen_texture_funcs(&brwscreen->screen); + u_simple_screen_init(&brwscreen->screen); return &brwscreen->screen; } diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 65eacde6b2..9ce3176aa1 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -1,4 +1,5 @@ #include "pipe/p_screen.h" +#include "util/u_simple_screen.h" #include "nv04_context.h" #include "nv04_screen.h" @@ -208,6 +209,7 @@ nv04_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.surface_unmap = nv04_surface_unmap; nv04_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); return &screen->pipe; } diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 4d9fbd4b5f..12516fd71e 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -1,4 +1,5 @@ #include "pipe/p_screen.h" +#include "util/u_simple_screen.h" #include "nv10_context.h" #include "nv10_screen.h" @@ -204,6 +205,7 @@ nv10_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.surface_unmap = nv10_surface_unmap; nv10_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); return &screen->pipe; } diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index 2ca6e6b149..f09b364b8d 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -1,4 +1,5 @@ #include "pipe/p_screen.h" +#include "util/u_simple_screen.h" #include "nv20_context.h" #include "nv20_screen.h" @@ -200,6 +201,7 @@ nv20_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.surface_unmap = nv20_surface_unmap; nv20_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); return &screen->pipe; } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 1fac6d3df8..0f10d914ad 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -1,4 +1,5 @@ #include "pipe/p_screen.h" +#include "util/u_simple_screen.h" #include "nv30_context.h" #include "nv30_screen.h" @@ -381,6 +382,7 @@ nv30_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.surface_unmap = nv30_surface_unmap; nv30_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); return &screen->pipe; } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index ab128fecda..46fe133d71 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -1,4 +1,5 @@ #include "pipe/p_screen.h" +#include "util/u_simple_screen.h" #include "nv40_context.h" #include "nv40_screen.h" @@ -363,6 +364,7 @@ nv40_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) screen->pipe.surface_unmap = nv40_surface_unmap; nv40_screen_init_miptree_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); return &screen->pipe; } diff --git a/src/gallium/drivers/nv50/nv50_screen.c b/src/gallium/drivers/nv50/nv50_screen.c index ef46233f83..6cddddacd5 100644 --- a/src/gallium/drivers/nv50/nv50_screen.c +++ b/src/gallium/drivers/nv50/nv50_screen.c @@ -22,6 +22,8 @@ #include "pipe/p_screen.h" +#include "util/u_simple_screen.h" + #include "nv50_context.h" #include "nv50_screen.h" @@ -323,6 +325,7 @@ nv50_screen_create(struct pipe_winsys *ws, struct nouveau_winsys *nvws) nv50_screen_init_miptree_functions(&screen->pipe); nv50_surface_init_screen_functions(&screen->pipe); + u_simple_screen_init(&screen->pipe); return &screen->pipe; } diff --git a/src/gallium/drivers/softpipe/sp_screen.c b/src/gallium/drivers/softpipe/sp_screen.c index 4bd95a61e6..7380a6ae2b 100644 --- a/src/gallium/drivers/softpipe/sp_screen.c +++ b/src/gallium/drivers/softpipe/sp_screen.c @@ -27,6 +27,7 @@ #include "util/u_memory.h" +#include "util/u_simple_screen.h" #include "pipe/internal/p_winsys_screen.h" #include "pipe/p_defines.h" #include "pipe/p_screen.h" @@ -174,6 +175,7 @@ softpipe_create_screen(struct pipe_winsys *winsys) screen->base.is_format_supported = softpipe_is_format_supported; softpipe_init_screen_texture_funcs(&screen->base); + u_simple_screen_init(&screen->base); return &screen->base; } -- cgit v1.2.3 From 4ad190c96f2ad4364537e700dcb381c9dceec35c Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 1 Feb 2009 10:27:54 +0000 Subject: pipebuffer: Drop (most of) pipe winsys stuff. --- src/gallium/auxiliary/pipebuffer/Makefile | 3 +- src/gallium/auxiliary/pipebuffer/SConscript | 1 - src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c | 1 - src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c | 1 - src/gallium/auxiliary/pipebuffer/pb_winsys.c | 191 --------------------- src/gallium/auxiliary/pipebuffer/pb_winsys.h | 79 --------- 6 files changed, 1 insertion(+), 275 deletions(-) delete mode 100644 src/gallium/auxiliary/pipebuffer/pb_winsys.c delete mode 100644 src/gallium/auxiliary/pipebuffer/pb_winsys.h (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/pipebuffer/Makefile b/src/gallium/auxiliary/pipebuffer/Makefile index 4bcf08f8a5..3b501c51df 100644 --- a/src/gallium/auxiliary/pipebuffer/Makefile +++ b/src/gallium/auxiliary/pipebuffer/Makefile @@ -14,8 +14,7 @@ C_SOURCES = \ pb_bufmgr_ondemand.c \ pb_bufmgr_pool.c \ pb_bufmgr_slab.c \ - pb_validate.c \ - pb_winsys.c + pb_validate.c include ../../Makefile.template diff --git a/src/gallium/auxiliary/pipebuffer/SConscript b/src/gallium/auxiliary/pipebuffer/SConscript index 4acf721808..8e9f06abe4 100644 --- a/src/gallium/auxiliary/pipebuffer/SConscript +++ b/src/gallium/auxiliary/pipebuffer/SConscript @@ -14,7 +14,6 @@ pipebuffer = env.ConvenienceLibrary( 'pb_bufmgr_pool.c', 'pb_bufmgr_slab.c', 'pb_validate.c', - 'pb_winsys.c', ]) auxiliaries.insert(0, pipebuffer) diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c index 19baa82282..a168853713 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c @@ -36,7 +36,6 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_thread.h" #include "util/u_memory.h" #include "util/u_double_list.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c index a741bae794..26d9c24aec 100644 --- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c +++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c @@ -35,7 +35,6 @@ #include "pipe/p_compiler.h" #include "pipe/p_debug.h" -#include "pipe/internal/p_winsys_screen.h" #include "pipe/p_thread.h" #include "util/u_math.h" #include "util/u_memory.h" diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.c b/src/gallium/auxiliary/pipebuffer/pb_winsys.c deleted file mode 100644 index d26800be48..0000000000 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.c +++ /dev/null @@ -1,191 +0,0 @@ -/************************************************************************** - * - * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * \file - * Implementation of client buffer (also designated as "user buffers"), which - * are just state-tracker owned data masqueraded as buffers. - * - * \author Jose Fonseca - */ - - -#include "pipe/internal/p_winsys_screen.h" -#include "util/u_memory.h" - -#include "pb_buffer.h" -#include "pb_winsys.h" - - -/** - * User buffers are special buffers that initially reference memory - * held by the user but which may if necessary copy that memory into - * device memory behind the scenes, for submission to hardware. - * - * These are particularly useful when the referenced data is never - * submitted to hardware at all, in the particular case of software - * vertex processing. - */ -struct pb_user_buffer -{ - struct pb_buffer base; - void *data; -}; - - -extern const struct pb_vtbl pb_user_buffer_vtbl; - - -static INLINE struct pb_user_buffer * -pb_user_buffer(struct pb_buffer *buf) -{ - assert(buf); - assert(buf->vtbl == &pb_user_buffer_vtbl); - return (struct pb_user_buffer *)buf; -} - - -static void -pb_user_buffer_destroy(struct pb_buffer *buf) -{ - assert(buf); - FREE(buf); -} - - -static void * -pb_user_buffer_map(struct pb_buffer *buf, - unsigned flags) -{ - return pb_user_buffer(buf)->data; -} - - -static void -pb_user_buffer_unmap(struct pb_buffer *buf) -{ - /* No-op */ -} - - -static enum pipe_error -pb_user_buffer_validate(struct pb_buffer *buf, - struct pb_validate *vl, - unsigned flags) -{ - assert(0); - return PIPE_ERROR; -} - - -static void -pb_user_buffer_fence(struct pb_buffer *buf, - struct pipe_fence_handle *fence) -{ - assert(0); -} - - -static void -pb_user_buffer_get_base_buffer(struct pb_buffer *buf, - struct pb_buffer **base_buf, - unsigned *offset) -{ - *base_buf = buf; - *offset = 0; -} - - -const struct pb_vtbl -pb_user_buffer_vtbl = { - pb_user_buffer_destroy, - pb_user_buffer_map, - pb_user_buffer_unmap, - pb_user_buffer_validate, - pb_user_buffer_fence, - pb_user_buffer_get_base_buffer -}; - - -struct pipe_buffer * -pb_winsys_user_buffer_create(struct pipe_winsys *winsys, - void *data, - unsigned bytes) -{ - struct pb_user_buffer *buf = CALLOC_STRUCT(pb_user_buffer); - - if(!buf) - return NULL; - - buf->base.base.refcount = 1; - buf->base.base.size = bytes; - buf->base.base.alignment = 0; - buf->base.base.usage = 0; - - buf->base.vtbl = &pb_user_buffer_vtbl; - buf->data = data; - - return &buf->base.base; -} - - -void * -pb_winsys_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags) -{ - (void)winsys; - return pb_map(pb_buffer(buf), flags); -} - - -void -pb_winsys_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - (void)winsys; - pb_unmap(pb_buffer(buf)); -} - - -void -pb_winsys_buffer_destroy(struct pipe_winsys *winsys, - struct pipe_buffer *buf) -{ - (void)winsys; - pb_destroy(pb_buffer(buf)); -} - - -void -pb_init_winsys(struct pipe_winsys *winsys) -{ - winsys->user_buffer_create = pb_winsys_user_buffer_create; - winsys->buffer_map = pb_winsys_buffer_map; - winsys->buffer_unmap = pb_winsys_buffer_unmap; - winsys->buffer_destroy = pb_winsys_buffer_destroy; -} diff --git a/src/gallium/auxiliary/pipebuffer/pb_winsys.h b/src/gallium/auxiliary/pipebuffer/pb_winsys.h deleted file mode 100644 index 7cf6f8e3f0..0000000000 --- a/src/gallium/auxiliary/pipebuffer/pb_winsys.h +++ /dev/null @@ -1,79 +0,0 @@ -/************************************************************************** - * - * Copyright 2009 VMWare, Inc. - * All Rights Reserved. - * - * Permission is hereby granted, free of charge, to any person obtaining a - * copy of this software and associated documentation files (the - * "Software"), to deal in the Software without restriction, including - * without limitation the rights to use, copy, modify, merge, publish, - * distribute, sub license, and/or sell copies of the Software, and to - * permit persons to whom the Software is furnished to do so, subject to - * the following conditions: - * - * The above copyright notice and this permission notice (including the - * next paragraph) shall be included in all copies or substantial portions - * of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS - * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. - * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR - * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, - * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE - * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - * - **************************************************************************/ - -/** - * @file - * Drop-in replacements for winsys buffer callbacks. - * - * The requirement to use this functions is that all pipe_buffers must be in - * fact pb_buffers. - * - * @author Jose Fonseca - */ - -#ifndef PB_WINSYS_H_ -#define PB_WINSYS_H_ - - -#include "pipe/p_compiler.h" -#include "pipe/p_debug.h" -#include "pipe/p_state.h" -#include "pipe/p_inlines.h" - - -#ifdef __cplusplus -extern "C" { -#endif - - -struct pipe_buffer * -pb_winsys_user_buffer_create(struct pipe_winsys *winsys, - void *data, - unsigned bytes); - -void * -pb_winsys_buffer_map(struct pipe_winsys *winsys, - struct pipe_buffer *buf, - unsigned flags); - -void -pb_winsys_buffer_unmap(struct pipe_winsys *winsys, - struct pipe_buffer *buf); - -void -pb_winsys_buffer_destroy(struct pipe_winsys *winsys, - struct pipe_buffer *buf); - -void -pb_init_winsys(struct pipe_winsys *winsys); - - -#ifdef __cplusplus -} -#endif - -#endif /*PB_WINSYS_H_*/ -- cgit v1.2.3 From 64e525eab9e7c60ee18bc75e6f269783533d5edd Mon Sep 17 00:00:00 2001 From: José Fonseca Date: Sun, 1 Feb 2009 10:31:25 +0000 Subject: util: List new file in sconscript. --- src/gallium/auxiliary/util/SConscript | 1 + 1 file changed, 1 insertion(+) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/SConscript b/src/gallium/auxiliary/util/SConscript index 5df932c0f7..35f683fb8e 100644 --- a/src/gallium/auxiliary/util/SConscript +++ b/src/gallium/auxiliary/util/SConscript @@ -24,6 +24,7 @@ util = env.ConvenienceLibrary( 'u_tile.c', 'u_time.c', 'u_timed_winsys.c', + 'u_simple_screen.c', ]) auxiliaries.insert(0, util) -- cgit v1.2.3 From 5069bfed29bcee2c89c36c74c6d65d388eb7792e Mon Sep 17 00:00:00 2001 From: Zack Rusin Date: Mon, 2 Feb 2009 23:47:16 -0500 Subject: gallium: remove pipe_buffer from surfaces this change disassociates, at least from the driver perspective, the surface from buffer. surfaces are technically now views on the textures so make it so by hiding the buffer in the internals of textures. --- src/gallium/auxiliary/util/u_gen_mipmap.c | 36 +++++++++++----------- src/gallium/drivers/i915simple/i915_screen.c | 6 ++-- src/gallium/drivers/i915simple/i915_state_emit.c | 22 ++++++++----- src/gallium/drivers/i915simple/i915_surface.c | 9 ++++-- src/gallium/drivers/i915simple/i915_texture.c | 2 -- src/gallium/drivers/i965simple/brw_misc_state.c | 4 +-- src/gallium/drivers/i965simple/brw_surface.c | 9 ++++-- src/gallium/drivers/i965simple/brw_tex_layout.c | 1 - .../drivers/i965simple/brw_wm_surface_state.c | 3 +- src/gallium/drivers/nv04/nv04_miptree.c | 2 -- src/gallium/drivers/nv04/nv04_screen.c | 9 +++--- src/gallium/drivers/nv04/nv04_state_emit.c | 9 ++++-- src/gallium/drivers/nv10/nv10_miptree.c | 1 - src/gallium/drivers/nv10/nv10_screen.c | 6 ++-- src/gallium/drivers/nv10/nv10_state_emit.c | 7 +++-- src/gallium/drivers/nv20/nv20_miptree.c | 2 -- src/gallium/drivers/nv20/nv20_screen.c | 6 ++-- src/gallium/drivers/nv20/nv20_state_emit.c | 7 +++-- src/gallium/drivers/nv30/nv30_miptree.c | 2 -- src/gallium/drivers/nv30/nv30_screen.c | 6 ++-- src/gallium/drivers/nv30/nv30_state_fb.c | 16 ++++++---- src/gallium/drivers/nv40/nv40_miptree.c | 2 -- src/gallium/drivers/nv40/nv40_screen.c | 9 ++++-- src/gallium/drivers/nv40/nv40_state_fb.c | 27 ++++++++++------ src/gallium/drivers/nv50/nv50_context.h | 7 +++++ src/gallium/drivers/nv50/nv50_miptree.c | 6 +--- src/gallium/drivers/nv50/nv50_program.c | 2 +- src/gallium/drivers/nv50/nv50_state_validate.c | 8 ++--- src/gallium/drivers/nv50/nv50_surface.c | 4 +-- src/gallium/drivers/softpipe/sp_texture.c | 21 ++++++++----- src/gallium/drivers/softpipe/sp_tile_cache.c | 2 +- src/gallium/drivers/trace/tr_state.c | 1 - src/gallium/include/pipe/p_state.h | 1 - src/gallium/winsys/egl_xlib/egl_xlib.c | 5 +-- src/gallium/winsys/xlib/Makefile | 4 +-- src/gallium/winsys/xlib/xlib_brw_screen.c | 13 ++++++-- src/gallium/winsys/xlib/xlib_cell.c | 4 ++- src/gallium/winsys/xlib/xlib_softpipe.c | 8 +++-- src/mesa/state_tracker/st_cb_fbo.c | 1 - src/mesa/state_tracker/st_gen_mipmap.c | 12 ++++---- 40 files changed, 177 insertions(+), 125 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c index 301a58ed7b..2b4cdab6cf 100644 --- a/src/gallium/auxiliary/util/u_gen_mipmap.c +++ b/src/gallium/auxiliary/util/u_gen_mipmap.c @@ -1125,19 +1125,19 @@ make_1d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); reduce_1d(pt->format, srcSurf->width, srcMap, dstSurf->width, dstMap); - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); @@ -1168,11 +1168,11 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); reduce_2d(pt->format, @@ -1181,8 +1181,8 @@ make_2d_mipmap(struct gen_mipmap_state *ctx, dstSurf->width, dstSurf->height, dstSurf->stride, dstMap); - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); @@ -1212,11 +1212,11 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcMap = ((ubyte *) pipe_buffer_map(screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcMap = ((ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset); - dstMap = ((ubyte *) pipe_buffer_map(screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstMap = ((ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset); #if 0 @@ -1229,8 +1229,8 @@ make_3d_mipmap(struct gen_mipmap_state *ctx, (void) reduce_3d; #endif - pipe_buffer_unmap(screen, srcSurf->buffer); - pipe_buffer_unmap(screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); diff --git a/src/gallium/drivers/i915simple/i915_screen.c b/src/gallium/drivers/i915simple/i915_screen.c index 5bb127f3d5..39e48105b3 100644 --- a/src/gallium/drivers/i915simple/i915_screen.c +++ b/src/gallium/drivers/i915simple/i915_screen.c @@ -209,7 +209,8 @@ i915_surface_map( struct pipe_screen *screen, struct pipe_surface *surface, unsigned flags ) { - char *map = pipe_buffer_map( screen, surface->buffer, flags ); + struct i915_texture *tex = (struct i915_texture *)surface->texture; + char *map = pipe_buffer_map( screen, tex->buffer, flags ); if (map == NULL) return NULL; @@ -228,7 +229,8 @@ static void i915_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - pipe_buffer_unmap( screen, surface->buffer ); + struct i915_texture *tex = (struct i915_texture *)surface->texture; + pipe_buffer_unmap( screen, tex->buffer ); } diff --git a/src/gallium/drivers/i915simple/i915_state_emit.c b/src/gallium/drivers/i915simple/i915_state_emit.c index 9bd6f92323..6558cf1c3e 100644 --- a/src/gallium/drivers/i915simple/i915_state_emit.c +++ b/src/gallium/drivers/i915simple/i915_state_emit.c @@ -213,18 +213,22 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (cbuf_surface) { unsigned cpitch = cbuf_surface->stride; unsigned ctile = BUF_3D_USE_FENCE; - if (cbuf_surface->texture && - ((struct i915_texture*)(cbuf_surface->texture))->tiled) { + struct i915_texture *tex = (struct i915_texture *) + cbuf_surface->texture; + struct pipe_buffer *buffer = tex->buffer; + assert(tex); + + if (tex && tex->tiled) { ctile = BUF_3D_TILED_SURFACE; } OUT_BATCH(_3DSTATE_BUF_INFO_CMD); - OUT_BATCH(BUF_3D_ID_COLOR_BACK | + OUT_BATCH(BUF_3D_ID_COLOR_BACK | BUF_3D_PITCH(cpitch) | /* pitch in bytes */ ctile); - OUT_RELOC(cbuf_surface->buffer, + OUT_RELOC(tex->buffer, I915_BUFFER_ACCESS_WRITE, cbuf_surface->offset); } @@ -234,8 +238,12 @@ i915_emit_hardware_state(struct i915_context *i915 ) if (depth_surface) { unsigned zpitch = depth_surface->stride; unsigned ztile = BUF_3D_USE_FENCE; - if (depth_surface->texture && - ((struct i915_texture*)(depth_surface->texture))->tiled) { + struct i915_texture *tex = (struct i915_texture *) + depth_surface->texture; + struct pipe_buffer *buffer = tex->buffer; + assert(tex); + + if (tex && tex->tiled) { ztile = BUF_3D_TILED_SURFACE; } @@ -245,7 +253,7 @@ i915_emit_hardware_state(struct i915_context *i915 ) BUF_3D_PITCH(zpitch) | /* pitch in bytes */ ztile); - OUT_RELOC(depth_surface->buffer, + OUT_RELOC(tex->buffer, I915_BUFFER_ACCESS_WRITE, depth_surface->offset); } diff --git a/src/gallium/drivers/i915simple/i915_surface.c b/src/gallium/drivers/i915simple/i915_surface.c index 5ffdb76682..94e2deaf61 100644 --- a/src/gallium/drivers/i915simple/i915_surface.c +++ b/src/gallium/drivers/i915simple/i915_surface.c @@ -74,13 +74,15 @@ i915_surface_copy(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct i915_texture *dst_tex = (struct i915_texture *)dst->texture; + struct i915_texture *src_tex = (struct i915_texture *)src->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); i915_copy_blit( i915_context(pipe), do_flip, dst->block.size, - (unsigned short) src->stride, src->buffer, src->offset, - (unsigned short) dst->stride, dst->buffer, dst->offset, + (unsigned short) src->stride, src_tex->buffer, src->offset, + (unsigned short) dst->stride, dst_tex->buffer, dst->offset, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height ); } } @@ -102,12 +104,13 @@ i915_surface_fill(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct i915_texture *tex = (struct i915_texture *)dst->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); i915_fill_blit( i915_context(pipe), dst->block.size, (unsigned short) dst->stride, - dst->buffer, dst->offset, + tex->buffer, dst->offset, (short) dstx, (short) dsty, (short) width, (short) height, value ); diff --git a/src/gallium/drivers/i915simple/i915_texture.c b/src/gallium/drivers/i915simple/i915_texture.c index 803ef3a187..b2ca3a2286 100644 --- a/src/gallium/drivers/i915simple/i915_texture.c +++ b/src/gallium/drivers/i915simple/i915_texture.c @@ -683,7 +683,6 @@ i915_get_tex_surface(struct pipe_screen *screen, if (ps) { ps->refcount = 1; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -755,7 +754,6 @@ i915_tex_surface_release(struct pipe_screen *screen, } pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen, &surf->buffer, NULL); FREE(surf); } diff --git a/src/gallium/drivers/i965simple/brw_misc_state.c b/src/gallium/drivers/i965simple/brw_misc_state.c index be812c5da9..99ff4403a5 100644 --- a/src/gallium/drivers/i965simple/brw_misc_state.c +++ b/src/gallium/drivers/i965simple/brw_misc_state.c @@ -223,7 +223,7 @@ static void upload_depthbuffer(struct brw_context *brw) OUT_BATCH(0); } else { unsigned int format; - + struct brw_texture *tex = (struct brw_texture *)depth_surface->texture; assert(depth_surface->block.width == 1); assert(depth_surface->block.height == 1); switch (depth_surface->block.size) { @@ -246,7 +246,7 @@ static void upload_depthbuffer(struct brw_context *brw) (BRW_TILEWALK_YMAJOR << 26) | // (depth_surface->region->tiled << 27) | (BRW_SURFACE_2D << 29)); - OUT_RELOC(depth_surface->buffer, + OUT_RELOC(tex->buffer, PIPE_BUFFER_USAGE_GPU_READ | PIPE_BUFFER_USAGE_GPU_WRITE, 0); OUT_BATCH((BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1) | ((depth_surface->stride/depth_surface->block.size - 1) << 6) | diff --git a/src/gallium/drivers/i965simple/brw_surface.c b/src/gallium/drivers/i965simple/brw_surface.c index 3159eba2fd..0a95dce194 100644 --- a/src/gallium/drivers/i965simple/brw_surface.c +++ b/src/gallium/drivers/i965simple/brw_surface.c @@ -74,13 +74,15 @@ brw_surface_copy(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct brw_texture *dst_tex = (struct brw_texture *)dst->texture; + struct brw_texture *src_tex = (struct brw_texture *)src->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); brw_copy_blit(brw_context(pipe), do_flip, dst->block.size, - (short) src->stride/src->block.size, src->buffer, src->offset, FALSE, - (short) dst->stride/dst->block.size, dst->buffer, dst->offset, FALSE, + (short) src->stride/src->block.size, src_tex->buffer, src->offset, FALSE, + (short) dst->stride/dst->block.size, dst_tex->buffer, dst->offset, FALSE, (short) srcx, (short) srcy, (short) dstx, (short) dsty, (short) width, (short) height, PIPE_LOGICOP_COPY); } @@ -103,12 +105,13 @@ brw_surface_fill(struct pipe_context *pipe, pipe->screen->surface_unmap(pipe->screen, dst); } else { + struct brw_texture *tex = (struct brw_texture *)dst->texture; assert(dst->block.width == 1); assert(dst->block.height == 1); brw_fill_blit(brw_context(pipe), dst->block.size, (short) dst->stride/dst->block.size, - dst->buffer, dst->offset, FALSE, + tex->buffer, dst->offset, FALSE, (short) dstx, (short) dsty, (short) width, (short) height, value); diff --git a/src/gallium/drivers/i965simple/brw_tex_layout.c b/src/gallium/drivers/i965simple/brw_tex_layout.c index 6af0d5cf4b..448229ed4e 100644 --- a/src/gallium/drivers/i965simple/brw_tex_layout.c +++ b/src/gallium/drivers/i965simple/brw_tex_layout.c @@ -367,7 +367,6 @@ brw_get_tex_surface_screen(struct pipe_screen *screen, if (ps) { ps->refcount = 1; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, tex->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/i965simple/brw_wm_surface_state.c b/src/gallium/drivers/i965simple/brw_wm_surface_state.c index 1a326f9918..1bab5bfdb3 100644 --- a/src/gallium/drivers/i965simple/brw_wm_surface_state.c +++ b/src/gallium/drivers/i965simple/brw_wm_surface_state.c @@ -193,6 +193,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) /* BRW_NEW_FRAMEBUFFER */ struct pipe_surface *pipe_surface = brw->attribs.FrameBuffer.cbufs[0];/*fixme*/ + struct brw_texture *tex = (struct brw_texture *)pipe_surface->texture; memset(&surf, 0, sizeof(surf)); @@ -204,7 +205,7 @@ static void upload_wm_surfaces(struct brw_context *brw ) surf.ss0.surface_type = BRW_SURFACE_2D; - surf.ss1.base_addr = brw_buffer_offset( brw, pipe_surface->buffer ); + surf.ss1.base_addr = brw_buffer_offset( brw, tex->buffer ); surf.ss2.width = pipe_surface->width - 1; surf.ss2.height = pipe_surface->height - 1; diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c index 01cb8ecbf3..0575dc0afc 100644 --- a/src/gallium/drivers/nv04/nv04_miptree.c +++ b/src/gallium/drivers/nv04/nv04_miptree.c @@ -107,7 +107,6 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, nv04mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -138,7 +137,6 @@ nv04_miptree_surface_del(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen->winsys, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv04/nv04_screen.c b/src/gallium/drivers/nv04/nv04_screen.c index 2ae6784077..e5e3d4772a 100644 --- a/src/gallium/drivers/nv04/nv04_screen.c +++ b/src/gallium/drivers/nv04/nv04_screen.c @@ -1,4 +1,5 @@ #include "pipe/p_screen.h" +#include "pipe/p_inlines.h" #include "util/u_simple_screen.h" #include "nv04_context.h" @@ -122,10 +123,10 @@ static void * nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, unsigned flags ) { - struct pipe_winsys *ws = screen->winsys; void *map; + struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture; - map = ws->buffer_map(ws, surface->buffer, flags); + map = pipe_buffer_map(screen, nv04mt->buffer, flags); if (!map) return NULL; @@ -135,9 +136,9 @@ nv04_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, static void nv04_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - struct pipe_winsys *ws = screen->winsys; + struct nv04_miptree *nv04mt = (struct nv04_miptree *)surface->texture; - ws->buffer_unmap(ws, surface->buffer); + pipe_buffer_unmap(screen, nv04mt->buffer); } static void diff --git a/src/gallium/drivers/nv04/nv04_state_emit.c b/src/gallium/drivers/nv04/nv04_state_emit.c index 26491758a0..bd8ef1adbf 100644 --- a/src/gallium/drivers/nv04/nv04_state_emit.c +++ b/src/gallium/drivers/nv04/nv04_state_emit.c @@ -96,6 +96,7 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) struct pipe_surface *rt, *zeta; uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; + struct nv04_miptree *nv04mt = 0; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -129,14 +130,16 @@ static void nv04_state_emit_framebuffer(struct nv04_context* nv04) BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_FORMAT, 1); OUT_RING(rt_format); - + + nv04mt = (struct nv04_miptree *)rt->texture; /* FIXME pitches have to be aligned ! */ BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_PITCH, 2); OUT_RING(rt->stride|(zeta->stride<<16)); - OUT_RELOCl(rt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); if (fb->zsbuf) { + nv04mt = (struct nv04_miptree *)zeta->texture; BEGIN_RING(context_surfaces_3d, NV04_CONTEXT_SURFACES_3D_OFFSET_ZETA, 1); - OUT_RELOCl(zeta->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); + OUT_RELOCl(nv04mt->buffer, 0, NOUVEAU_BO_VRAM | NOUVEAU_BO_WR); } } diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c index f8c021261b..909278213e 100644 --- a/src/gallium/drivers/nv10/nv10_miptree.c +++ b/src/gallium/drivers/nv10/nv10_miptree.c @@ -114,7 +114,6 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, nv10mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; diff --git a/src/gallium/drivers/nv10/nv10_screen.c b/src/gallium/drivers/nv10/nv10_screen.c index 12516fd71e..2f945a193c 100644 --- a/src/gallium/drivers/nv10/nv10_screen.c +++ b/src/gallium/drivers/nv10/nv10_screen.c @@ -122,8 +122,9 @@ nv10_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, { struct pipe_winsys *ws = screen->winsys; void *map; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture; - map = ws->buffer_map(ws, surface->buffer, flags); + map = ws->buffer_map(ws, nv10mt->buffer, flags); if (!map) return NULL; @@ -134,8 +135,9 @@ static void nv10_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; + struct nv10_miptree *nv10mt = (struct nv10_miptree *)surface->texture; - ws->buffer_unmap(ws, surface->buffer); + ws->buffer_unmap(ws, nv10mt->buffer); } static void diff --git a/src/gallium/drivers/nv10/nv10_state_emit.c b/src/gallium/drivers/nv10/nv10_state_emit.c index 46c7e1d753..5dec618b93 100644 --- a/src/gallium/drivers/nv10/nv10_state_emit.c +++ b/src/gallium/drivers/nv10/nv10_state_emit.c @@ -106,6 +106,7 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) struct pipe_surface *rt, *zeta = NULL; uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; + struct nv10_miptree *nv10mt = 0; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -147,11 +148,13 @@ static void nv10_state_emit_framebuffer(struct nv10_context* nv10) OUT_RING (rt->stride | (rt->stride << 16)); } - nv10->rt[0] = rt->buffer; + nv10mt = (struct nv10_miptree *)rt->texture; + nv10->rt[0] = nv10mt->buffer; if (zeta_format) { - nv10->zeta = zeta->buffer; + nv10mt = (struct nv10_miptree *)zeta->texture; + nv10->zeta = nv10mt->buffer; } BEGIN_RING(celsius, NV10TCL_RT_HORIZ, 3); diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c index d2038c391d..8e4cc80902 100644 --- a/src/gallium/drivers/nv20/nv20_miptree.c +++ b/src/gallium/drivers/nv20/nv20_miptree.c @@ -106,7 +106,6 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, nv20mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -141,7 +140,6 @@ nv20_miptree_surface_release(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv20/nv20_screen.c b/src/gallium/drivers/nv20/nv20_screen.c index f09b364b8d..c9171fa178 100644 --- a/src/gallium/drivers/nv20/nv20_screen.c +++ b/src/gallium/drivers/nv20/nv20_screen.c @@ -122,8 +122,9 @@ nv20_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, { struct pipe_winsys *ws = screen->winsys; void *map; + struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture; - map = ws->buffer_map(ws, surface->buffer, flags); + map = ws->buffer_map(ws, nv20mt->buffer, flags); if (!map) return NULL; @@ -134,8 +135,9 @@ static void nv20_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; + struct nv20_miptree *nv20mt = (struct nv20_miptree *)surface->texture; - ws->buffer_unmap(ws, surface->buffer); + ws->buffer_unmap(ws, nv20mt->buffer); } static void diff --git a/src/gallium/drivers/nv20/nv20_state_emit.c b/src/gallium/drivers/nv20/nv20_state_emit.c index ea20078a50..0f4df9ca31 100644 --- a/src/gallium/drivers/nv20/nv20_state_emit.c +++ b/src/gallium/drivers/nv20/nv20_state_emit.c @@ -112,6 +112,7 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) struct pipe_surface *rt, *zeta = NULL; uint32_t rt_format, w, h; int colour_format = 0, zeta_format = 0; + struct nv20_miptree *nv20mt = 0; w = fb->cbufs[0]->width; h = fb->cbufs[0]->height; @@ -153,11 +154,13 @@ static void nv20_state_emit_framebuffer(struct nv20_context* nv20) OUT_RING (rt->stride | (rt->stride << 16)); } - nv20->rt[0] = rt->buffer; + nv20mt = (struct nv20_miptree *)rt->texture; + nv20->rt[0] = nv20mt->buffer; if (zeta_format) { - nv20->zeta = zeta->buffer; + nv20mt = (struct nv20_miptree *)zeta->texture; + nv20->zeta = nv20mt->buffer; } BEGIN_RING(kelvin, NV20TCL_RT_HORIZ, 3); diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c index 79baac714c..c55756971b 100644 --- a/src/gallium/drivers/nv30/nv30_miptree.c +++ b/src/gallium/drivers/nv30/nv30_miptree.c @@ -143,7 +143,6 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, nv30mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -181,7 +180,6 @@ nv30_miptree_surface_del(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c index 0f10d914ad..9738436dc4 100644 --- a/src/gallium/drivers/nv30/nv30_screen.c +++ b/src/gallium/drivers/nv30/nv30_screen.c @@ -134,6 +134,7 @@ nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; struct pipe_surface *surface_to_map; void *map; + struct nv30_miptree *nv30mt = (struct nv30_miptree *)surface->texture; if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { struct nv30_miptree *mt = (struct nv30_miptree *)surface->texture; @@ -162,7 +163,7 @@ nv30_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, assert(surface_to_map); - map = ws->buffer_map(ws, surface_to_map->buffer, flags); + map = ws->buffer_map(ws, nv30mt->buffer, flags); if (!map) return NULL; @@ -174,6 +175,7 @@ nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; struct pipe_surface *surface_to_unmap; + struct nv30_miptree *nv30mt = (struct nv30_miptree *)surface->texture; /* TODO: Copy from shadow just before push buffer is flushed instead. There are probably some programs that map/unmap excessively @@ -190,7 +192,7 @@ nv30_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) assert(surface_to_unmap); - ws->buffer_unmap(ws, surface_to_unmap->buffer); + ws->buffer_unmap(ws, nv30mt->buffer); if (surface_to_unmap != surface) { struct nv30_screen *nvscreen = nv30_screen(screen); diff --git a/src/gallium/drivers/nv30/nv30_state_fb.c b/src/gallium/drivers/nv30/nv30_state_fb.c index 8536acc570..77368cb205 100644 --- a/src/gallium/drivers/nv30/nv30_state_fb.c +++ b/src/gallium/drivers/nv30/nv30_state_fb.c @@ -12,6 +12,7 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) unsigned rt_flags = NOUVEAU_BO_RDWR | NOUVEAU_BO_VRAM; unsigned w = fb->width; unsigned h = fb->height; + struct nv30_miptree *nv30mt; rt_enable = 0; for (i = 0; i < fb->nr_cbufs; i++) { @@ -77,34 +78,37 @@ nv30_state_framebuffer_validate(struct nv30_context *nv30) pitch |= (pitch << 16); } + nv30mt = (struct nv30_miptree *)rt[0]->texture; so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR0, 1); - so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv30->nvws->channel->vram->handle, nv30->nvws->channel->gart->handle); so_method(so, nv30->screen->rankine, NV34TCL_COLOR0_PITCH, 2); so_data (so, pitch); - so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + so_reloc (so, nv30mt->buffer, rt[0]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } if (rt_enable & NV34TCL_RT_ENABLE_COLOR1) { + nv30mt = (struct nv30_miptree *)rt[1]->texture; so_method(so, nv30->screen->rankine, NV34TCL_DMA_COLOR1, 1); - so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv30->nvws->channel->vram->handle, nv30->nvws->channel->gart->handle); so_method(so, nv30->screen->rankine, NV34TCL_COLOR1_OFFSET, 2); - so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + so_reloc (so, nv30mt->buffer, rt[1]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_data (so, rt[1]->stride); } if (zeta_format) { + nv30mt = (struct nv30_miptree *)zeta->texture; so_method(so, nv30->screen->rankine, NV34TCL_DMA_ZETA, 1); - so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv30mt->buffer, 0, rt_flags | NOUVEAU_BO_OR, nv30->nvws->channel->vram->handle, nv30->nvws->channel->gart->handle); so_method(so, nv30->screen->rankine, NV34TCL_ZETA_OFFSET, 1); - so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + so_reloc (so, nv30mt->buffer, zeta->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); /* TODO: allocate LMA depth buffer */ } diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c index ba912ddcbb..b1fba11d2f 100644 --- a/src/gallium/drivers/nv40/nv40_miptree.c +++ b/src/gallium/drivers/nv40/nv40_miptree.c @@ -144,7 +144,6 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -182,7 +181,6 @@ nv40_miptree_surface_del(struct pipe_screen *pscreen, return; pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(ps); } diff --git a/src/gallium/drivers/nv40/nv40_screen.c b/src/gallium/drivers/nv40/nv40_screen.c index 46fe133d71..41d342d27d 100644 --- a/src/gallium/drivers/nv40/nv40_screen.c +++ b/src/gallium/drivers/nv40/nv40_screen.c @@ -143,6 +143,7 @@ nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, struct pipe_winsys *ws = screen->winsys; struct pipe_surface *surface_to_map; void *map; + struct nv40_miptree *mt; if (!(surface->texture->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR)) { struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; @@ -170,8 +171,8 @@ nv40_surface_map(struct pipe_screen *screen, struct pipe_surface *surface, surface_to_map = surface; assert(surface_to_map); - - map = ws->buffer_map(ws, surface_to_map->buffer, flags); + mt = (struct nv40_miptree *)surface_to_map->texture; + map = ws->buffer_map(ws, mt->buffer, flags); if (!map) return NULL; @@ -183,6 +184,7 @@ nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { struct pipe_winsys *ws = screen->winsys; struct pipe_surface *surface_to_unmap; + struct nv40_miptree *mt; /* TODO: Copy from shadow just before push buffer is flushed instead. There are probably some programs that map/unmap excessively @@ -199,7 +201,8 @@ nv40_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) assert(surface_to_unmap); - ws->buffer_unmap(ws, surface_to_unmap->buffer); + mt = (struct nv40_miptree *)surface_to_unmap->texture; + ws->buffer_unmap(ws, mt->buffer); if (surface_to_unmap != surface) { struct nv40_screen *nvscreen = nv40_screen(screen); diff --git a/src/gallium/drivers/nv40/nv40_state_fb.c b/src/gallium/drivers/nv40/nv40_state_fb.c index a2e09e18a4..454abad31f 100644 --- a/src/gallium/drivers/nv40/nv40_state_fb.c +++ b/src/gallium/drivers/nv40/nv40_state_fb.c @@ -1,6 +1,13 @@ #include "nv40_context.h" #include "nouveau/nouveau_util.h" +static struct pipe_buffer * +nv40_surface_buffer(struct pipe_surface *surface) +{ + struct nv40_miptree *mt = (struct nv40_miptree *)surface->texture; + return mt->buffer; +} + static boolean nv40_state_framebuffer_validate(struct nv40_context *nv40) { @@ -71,33 +78,33 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) if (rt_enable & NV40TCL_RT_ENABLE_COLOR0) { so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR0, 1); - so_reloc (so, rt[0]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(rt[0]), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR0_PITCH, 2); so_data (so, rt[0]->stride); - so_reloc (so, rt[0]->buffer, rt[0]->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(rt[0]), rt[0]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR1) { so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR1, 1); - so_reloc (so, rt[1]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(rt[1]), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR1_OFFSET, 2); - so_reloc (so, rt[1]->buffer, rt[1]->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(rt[1]), rt[1]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_data (so, rt[1]->stride); } if (rt_enable & NV40TCL_RT_ENABLE_COLOR2) { so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR2, 1); - so_reloc (so, rt[2]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(rt[2]), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR2_OFFSET, 1); - so_reloc (so, rt[2]->buffer, rt[2]->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(rt[2]), rt[2]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_COLOR2_PITCH, 1); so_data (so, rt[2]->stride); @@ -105,11 +112,11 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) if (rt_enable & NV40TCL_RT_ENABLE_COLOR3) { so_method(so, nv40->screen->curie, NV40TCL_DMA_COLOR3, 1); - so_reloc (so, rt[3]->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(rt[3]), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_COLOR3_OFFSET, 1); - so_reloc (so, rt[3]->buffer, rt[3]->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(rt[3]), rt[3]->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_COLOR3_PITCH, 1); so_data (so, rt[3]->stride); @@ -117,11 +124,11 @@ nv40_state_framebuffer_validate(struct nv40_context *nv40) if (zeta_format) { so_method(so, nv40->screen->curie, NV40TCL_DMA_ZETA, 1); - so_reloc (so, zeta->buffer, 0, rt_flags | NOUVEAU_BO_OR, + so_reloc (so, nv40_surface_buffer(zeta), 0, rt_flags | NOUVEAU_BO_OR, nv40->nvws->channel->vram->handle, nv40->nvws->channel->gart->handle); so_method(so, nv40->screen->curie, NV40TCL_ZETA_OFFSET, 1); - so_reloc (so, zeta->buffer, zeta->offset, rt_flags | + so_reloc (so, nv40_surface_buffer(zeta), zeta->offset, rt_flags | NOUVEAU_BO_LOW, 0, 0); so_method(so, nv40->screen->curie, NV40TCL_ZETA_PITCH, 1); so_data (so, zeta->stride); diff --git a/src/gallium/drivers/nv50/nv50_context.h b/src/gallium/drivers/nv50/nv50_context.h index 061a4c064b..6c9e18429a 100644 --- a/src/gallium/drivers/nv50/nv50_context.h +++ b/src/gallium/drivers/nv50/nv50_context.h @@ -99,6 +99,13 @@ nv50_surface(struct pipe_surface *pt) return (struct nv50_surface *)pt; } +static INLINE struct pipe_buffer * +nv50_surface_buffer(struct pipe_surface *surface) +{ + struct nv50_miptree *mt = (struct nv50_miptree *)surface->texture; + return mt->buffer; +} + struct nv50_state { unsigned dirty; diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c index 7770fcc3f2..c6e65c9816 100644 --- a/src/gallium/drivers/nv50/nv50_miptree.c +++ b/src/gallium/drivers/nv50/nv50_miptree.c @@ -179,7 +179,7 @@ nv50_miptree_sync(struct pipe_screen *pscreen, struct nv50_miptree *mt, } /* The reverse of the above */ -void +static void nv50_miptree_sync_cpu(struct pipe_screen *pscreen, struct nv50_miptree *mt, unsigned level, unsigned image) { @@ -232,7 +232,6 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, if (!ps) return NULL; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); ps->format = pt->format; ps->width = pt->width[level]; ps->height = pt->height[level]; @@ -253,7 +252,6 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->offset = 0; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, lvl->image[img]); if (flags & PIPE_BUFFER_USAGE_CPU_WRITE) mark_dirty(lvl->image_dirty_cpu, img); @@ -262,7 +260,6 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt, ps->offset = lvl->image_offset[img]; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(pscreen, &ps->buffer, mt->buffer); if (flags & PIPE_BUFFER_USAGE_GPU_WRITE) mark_dirty(lvl->image_dirty_gpu, img); @@ -282,7 +279,6 @@ nv50_miptree_surface_del(struct pipe_screen *pscreen, if (--ps->refcount <= 0) { pipe_texture_reference(&ps->texture, NULL); - pipe_buffer_reference(pscreen, &ps->buffer, NULL); FREE(s); } } diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c index 7686f746eb..b902c8cf53 100644 --- a/src/gallium/drivers/nv50/nv50_program.c +++ b/src/gallium/drivers/nv50/nv50_program.c @@ -970,7 +970,7 @@ nv50_program_tx_insn(struct nv50_pc *pc, const union tgsi_full_token *tok) } for (i = 0; i < inst->Instruction.NumSrcRegs; i++) { - struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; + const struct tgsi_full_src_register *fs = &inst->FullSrcRegisters[i]; if (fs->SrcRegister.File == TGSI_FILE_SAMPLER) unit = fs->SrcRegister.Index; diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c index 4dc4c04493..602d76ac74 100644 --- a/src/gallium/drivers/nv50/nv50_state_validate.c +++ b/src/gallium/drivers/nv50/nv50_state_validate.c @@ -46,9 +46,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) so_data (so, fb->cbufs[i]->height); so_method(so, tesla, NV50TCL_RT_ADDRESS_HIGH(i), 5); - so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset, + so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, fb->cbufs[i]->buffer, fb->cbufs[i]->offset, + so_reloc (so, nv50_surface_buffer(fb->cbufs[i]), fb->cbufs[i]->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); switch (fb->cbufs[i]->format) { case PIPE_FORMAT_A8R8G8B8_UNORM: @@ -81,9 +81,9 @@ nv50_state_validate_fb(struct nv50_context *nv50) } so_method(so, tesla, NV50TCL_ZETA_ADDRESS_HIGH, 5); - so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, + so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_HIGH, 0, 0); - so_reloc (so, fb->zsbuf->buffer, fb->zsbuf->offset, + so_reloc (so, nv50_surface_buffer(fb->zsbuf), fb->zsbuf->offset, NOUVEAU_BO_VRAM | NOUVEAU_BO_LOW, 0, 0); switch (fb->zsbuf->format) { case PIPE_FORMAT_Z24S8_UNORM: diff --git a/src/gallium/drivers/nv50/nv50_surface.c b/src/gallium/drivers/nv50/nv50_surface.c index ed6602ba36..8ebbc84817 100644 --- a/src/gallium/drivers/nv50/nv50_surface.c +++ b/src/gallium/drivers/nv50/nv50_surface.c @@ -65,7 +65,7 @@ nv50_surface_map(struct pipe_screen *screen, struct pipe_surface *ps, { struct pipe_winsys *ws = screen->winsys; - return ws->buffer_map(ws, ps->buffer, flags); + return ws->buffer_map(ws, nv50_surface_buffer(ps), flags); } static void @@ -73,7 +73,7 @@ nv50_surface_unmap(struct pipe_screen *pscreen, struct pipe_surface *ps) { struct pipe_winsys *ws = pscreen->winsys; - ws->buffer_unmap(ws, ps->buffer); + ws->buffer_unmap(ws, nv50_surface_buffer(ps)); } void diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c index 5952378152..7af8398193 100644 --- a/src/gallium/drivers/softpipe/sp_texture.c +++ b/src/gallium/drivers/softpipe/sp_texture.c @@ -211,11 +211,9 @@ softpipe_get_tex_surface(struct pipe_screen *screen, assert(level <= pt->last_level); ps = CALLOC_STRUCT(pipe_surface); - ps->refcount = 1; if (ps) { - assert(ps->refcount); + ps->refcount = 1; pipe_texture_reference(&ps->texture, pt); - pipe_buffer_reference(screen, &ps->buffer, spt->buffer); ps->format = pt->format; ps->block = pt->block; ps->width = pt->width[level]; @@ -225,7 +223,7 @@ softpipe_get_tex_surface(struct pipe_screen *screen, ps->stride = spt->stride[level]; ps->offset = spt->level_offset[level]; ps->usage = usage; - + /* Because we are softpipe, anything that the state tracker * thought was going to be done with the GPU will actually get * done with the CPU. Let's adjust the flags to take that into @@ -274,8 +272,7 @@ softpipe_tex_surface_release(struct pipe_screen *screen, */ assert ((*s)->texture); if (--surf->refcount == 0) { - pipe_texture_reference(&surf->texture, NULL); - pipe_buffer_reference(screen, &surf->buffer, NULL); + pipe_texture_reference(&surf->texture, NULL); FREE(surf); } *s = NULL; @@ -288,13 +285,16 @@ softpipe_surface_map( struct pipe_screen *screen, unsigned flags ) { ubyte *map; + struct softpipe_texture *spt; if (flags & ~surface->usage) { assert(0); return NULL; } - map = pipe_buffer_map( screen, surface->buffer, flags ); + assert(surface->texture); + spt = softpipe_texture(surface->texture); + map = pipe_buffer_map(screen, spt->buffer, flags); if (map == NULL) return NULL; @@ -318,7 +318,12 @@ static void softpipe_surface_unmap(struct pipe_screen *screen, struct pipe_surface *surface) { - pipe_buffer_unmap( screen, surface->buffer ); + struct softpipe_texture *spt; + + assert(surface->texture); + spt = softpipe_texture(surface->texture); + + pipe_buffer_unmap( screen, spt->buffer ); } diff --git a/src/gallium/drivers/softpipe/sp_tile_cache.c b/src/gallium/drivers/softpipe/sp_tile_cache.c index 78b0efa46d..ab76009375 100644 --- a/src/gallium/drivers/softpipe/sp_tile_cache.c +++ b/src/gallium/drivers/softpipe/sp_tile_cache.c @@ -369,7 +369,7 @@ sp_flush_tile_cache(struct softpipe_context *softpipe, struct pipe_surface *ps = tc->surface; int inuse = 0, pos; - if (ps && ps->buffer) { + if (ps) { /* caching a drawing surface */ for (pos = 0; pos < NUM_ENTRIES; pos++) { struct softpipe_cached_tile *tile = tc->entries + pos; diff --git a/src/gallium/drivers/trace/tr_state.c b/src/gallium/drivers/trace/tr_state.c index b23ccc1a3d..524f2d6194 100644 --- a/src/gallium/drivers/trace/tr_state.c +++ b/src/gallium/drivers/trace/tr_state.c @@ -397,7 +397,6 @@ void trace_dump_surface(const struct pipe_surface *state) trace_dump_struct_begin("pipe_surface"); - trace_dump_member(ptr, state, buffer); trace_dump_member(format, state, format); trace_dump_member(uint, state, status); trace_dump_member(uint, state, clear_value); diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h index 13fa9ba848..dd0dfac238 100644 --- a/src/gallium/include/pipe/p_state.h +++ b/src/gallium/include/pipe/p_state.h @@ -276,7 +276,6 @@ struct pipe_sampler_state */ struct pipe_surface { - struct pipe_buffer *buffer; /**< surface's buffer/memory */ enum pipe_format format; /**< PIPE_FORMAT_x */ unsigned status; /**< PIPE_SURFACE_STATUS_x */ unsigned clear_value; /**< XXX may be temporary */ diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c index 4876339107..c6b0e3d8c5 100644 --- a/src/gallium/winsys/egl_xlib/egl_xlib.c +++ b/src/gallium/winsys/egl_xlib/egl_xlib.c @@ -41,6 +41,7 @@ #include "pipe/internal/p_winsys_screen.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" #include "eglconfig.h" #include "eglconfigutil.h" @@ -292,7 +293,7 @@ display_surface(struct pipe_winsys *pws, assert(ximage->format); assert(ximage->bitmap_unit); - data = pws->buffer_map(pws, psurf->buffer, 0); + data = pws->buffer_map(pws, softpipe_texture(psurf->texture)->buffer, 0); /* update XImage's fields */ ximage->data = data; @@ -308,7 +309,7 @@ display_surface(struct pipe_winsys *pws, ximage->data = NULL; XDestroyImage(ximage); - pws->buffer_unmap(pws, psurf->buffer); + pws->buffer_unmap(pws, softpipe_texture(psurf->texture)->buffer); } diff --git a/src/gallium/winsys/xlib/Makefile b/src/gallium/winsys/xlib/Makefile index 5b71e6dc38..8c2892d49b 100644 --- a/src/gallium/winsys/xlib/Makefile +++ b/src/gallium/winsys/xlib/Makefile @@ -22,11 +22,11 @@ INCLUDE_DIRS = \ -I$(TOP)/src/gallium/state_trackers/glx/xlib \ -I$(TOP)/src/gallium/auxiliary -DEFINES = \ +DEFINES += \ -DGALLIUM_SOFTPIPE \ - -DGALLIUM_CELL \ -DGALLIUM_TRACE \ -DGALLIUM_BRW +#-DGALLIUM_CELL will be defined by the config */ XLIB_WINSYS_SOURCES = \ xlib.c \ diff --git a/src/gallium/winsys/xlib/xlib_brw_screen.c b/src/gallium/winsys/xlib/xlib_brw_screen.c index 5344c502ef..51740a9af6 100644 --- a/src/gallium/winsys/xlib/xlib_brw_screen.c +++ b/src/gallium/winsys/xlib/xlib_brw_screen.c @@ -42,12 +42,19 @@ #include "util/u_memory.h" #include "i965simple/brw_winsys.h" #include "i965simple/brw_screen.h" +#include "i965simple/brw_context.h" + #include "xlib_brw_aub.h" #include "xlib_brw.h" #include "xlib.h" - +static struct pipe_buffer * +buffer_from_surface(struct pipe_surface *surface) +{ + struct brw_texture *texture = (struct brw_texture *)surface; + return texture->buffer; +} struct aub_buffer { char *data; @@ -226,7 +233,7 @@ aub_flush_frontbuffer( struct pipe_winsys *winsys, // struct aub_pipe_winsys *iws = aub_pipe_winsys(winsys); brw_aub_dump_bmp( global_winsys->aubfile, surface, - aub_bo(surface->buffer)->offset ); + aub_bo(buffer_from_surface(surface))->offset ); } @@ -449,7 +456,7 @@ xlib_brw_display_surface(struct xmesa_buffer *b, { brw_aub_dump_bmp( global_winsys->aubfile, surf, - aub_bo(surf->buffer)->offset ); + aub_bo(buffer_from_surface(surf))->offset ); } diff --git a/src/gallium/winsys/xlib/xlib_cell.c b/src/gallium/winsys/xlib/xlib_cell.c index bf69593c5c..c87564f4dc 100644 --- a/src/gallium/winsys/xlib/xlib_cell.c +++ b/src/gallium/winsys/xlib/xlib_cell.c @@ -51,6 +51,7 @@ #include "cell/ppu/cell_context.h" #include "cell/ppu/cell_screen.h" #include "cell/ppu/cell_winsys.h" +#include "cell/ppu/cell_texture.h" /** @@ -164,7 +165,8 @@ static void xlib_cell_display_surface(struct xmesa_buffer *b, struct pipe_surface *surf) { XImage *ximage; - struct xm_buffer *xm_buf = xm_buffer(surf->buffer); + struct xm_buffer *xm_buf = xm_buffer( + cell_texture(surf->texture)->buffer); const uint tilesPerRow = (surf->width + TILE_SIZE - 1) / TILE_SIZE; uint x, y; diff --git a/src/gallium/winsys/xlib/xlib_softpipe.c b/src/gallium/winsys/xlib/xlib_softpipe.c index 01d24584e2..586e1dfca5 100644 --- a/src/gallium/winsys/xlib/xlib_softpipe.c +++ b/src/gallium/winsys/xlib/xlib_softpipe.c @@ -45,6 +45,7 @@ #include "util/u_math.h" #include "util/u_memory.h" #include "softpipe/sp_winsys.h" +#include "softpipe/sp_texture.h" #include "xlib.h" @@ -58,7 +59,7 @@ struct xm_buffer boolean userBuffer; /** Is this a user-space buffer? */ void *data; void *mapped; - + XImage *tempImage; int shm; XShmSegmentInfo shminfo; @@ -225,11 +226,12 @@ xm_buffer_destroy(struct pipe_winsys *pws, * by the XMesaBuffer. */ static void -xlib_softpipe_display_surface(struct xmesa_buffer *b, +xlib_softpipe_display_surface(struct xmesa_buffer *b, struct pipe_surface *surf) { XImage *ximage; - struct xm_buffer *xm_buf = xm_buffer(surf->buffer); + struct xm_buffer *xm_buf = xm_buffer( + softpipe_texture(surf->texture)->buffer); static boolean no_swap = 0; static boolean firsttime = 1; diff --git a/src/mesa/state_tracker/st_cb_fbo.c b/src/mesa/state_tracker/st_cb_fbo.c index d18946de7d..963ac902d2 100644 --- a/src/mesa/state_tracker/st_cb_fbo.c +++ b/src/mesa/state_tracker/st_cb_fbo.c @@ -171,7 +171,6 @@ st_renderbuffer_alloc_storage(GLcontext * ctx, struct gl_renderbuffer *rb, surface_usage ); assert(strb->surface->texture); - assert(strb->surface->buffer); assert(strb->surface->format); assert(strb->surface->block.size); assert(strb->surface->block.width); diff --git a/src/mesa/state_tracker/st_gen_mipmap.c b/src/mesa/state_tracker/st_gen_mipmap.c index 9e5c35072a..6a3455aaba 100644 --- a/src/mesa/state_tracker/st_gen_mipmap.c +++ b/src/mesa/state_tracker/st_gen_mipmap.c @@ -128,11 +128,11 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstSurf = screen->get_tex_surface(screen, pt, face, dstLevel, zslice, PIPE_BUFFER_USAGE_CPU_WRITE); - srcData = (ubyte *) pipe_buffer_map(pipe->screen, srcSurf->buffer, - PIPE_BUFFER_USAGE_CPU_READ) + srcData = (ubyte *) pipe_surface_map(srcSurf, + PIPE_BUFFER_USAGE_CPU_READ) + srcSurf->offset; - dstData = (ubyte *) pipe_buffer_map(pipe->screen, dstSurf->buffer, - PIPE_BUFFER_USAGE_CPU_WRITE) + dstData = (ubyte *) pipe_surface_map(dstSurf, + PIPE_BUFFER_USAGE_CPU_WRITE) + dstSurf->offset; _mesa_generate_mipmap_level(target, datatype, comps, @@ -144,8 +144,8 @@ fallback_generate_mipmap(GLcontext *ctx, GLenum target, dstData, dstSurf->stride); /* stride in bytes */ - pipe_buffer_unmap(pipe->screen, srcSurf->buffer); - pipe_buffer_unmap(pipe->screen, dstSurf->buffer); + pipe_surface_unmap(srcSurf); + pipe_surface_unmap(dstSurf); pipe_surface_reference(&srcSurf, NULL); pipe_surface_reference(&dstSurf, NULL); -- cgit v1.2.3 From 3120894c6d33a26cda642246344e8945db200ac2 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Thu, 5 Feb 2009 14:44:17 +0000 Subject: gallium: add new aux lib for index list translations Could this be the ultimate index list translating utility? Maybe, but it doesn't yet include support for splitting primitives. Unlike previous attempts, this captures all possible combinations of API and hardware provoking vertex, supports generated list reuse and various other tricks. Relies on python-generated code. --- configs/default | 2 +- src/gallium/SConscript | 1 + src/gallium/auxiliary/indices/Makefile | 16 + src/gallium/auxiliary/indices/SConscript | 17 + src/gallium/auxiliary/indices/u_indices.c | 253 ++ src/gallium/auxiliary/indices/u_indices.h | 83 + src/gallium/auxiliary/indices/u_indices_gen.c | 5129 ++++++++++++++++++++++++ src/gallium/auxiliary/indices/u_indices_gen.py | 318 ++ src/gallium/auxiliary/indices/u_indices_priv.h | 43 + 9 files changed, 5861 insertions(+), 1 deletion(-) create mode 100644 src/gallium/auxiliary/indices/Makefile create mode 100644 src/gallium/auxiliary/indices/SConscript create mode 100644 src/gallium/auxiliary/indices/u_indices.c create mode 100644 src/gallium/auxiliary/indices/u_indices.h create mode 100644 src/gallium/auxiliary/indices/u_indices_gen.c create mode 100644 src/gallium/auxiliary/indices/u_indices_gen.py create mode 100644 src/gallium/auxiliary/indices/u_indices_priv.h (limited to 'src/gallium/auxiliary') diff --git a/configs/default b/configs/default index 40f3a2a02d..13bda58f18 100644 --- a/configs/default +++ b/configs/default @@ -89,7 +89,7 @@ PROGRAM_DIRS = demos redbook samples glsl xdemos EGL_DRIVERS_DIRS = demo # Gallium directories and -GALLIUM_AUXILIARY_DIRS = draw translate cso_cache pipebuffer tgsi sct rtasm util +GALLIUM_AUXILIARY_DIRS = draw translate cso_cache pipebuffer tgsi sct rtasm util indices GALLIUM_AUXILIARIES = $(foreach DIR,$(GALLIUM_AUXILIARY_DIRS),$(TOP)/src/gallium/auxiliary/$(DIR)/lib$(DIR).a) GALLIUM_DRIVER_DIRS = softpipe i915simple i965simple nv04 nv10 nv20 nv30 nv40 nv50 failover trace GALLIUM_DRIVERS = $(foreach DIR,$(GALLIUM_DRIVER_DIRS),$(TOP)/src/gallium/drivers/$(DIR)/lib$(DIR).a) diff --git a/src/gallium/SConscript b/src/gallium/SConscript index 85baf51a7f..0c632ac2b8 100644 --- a/src/gallium/SConscript +++ b/src/gallium/SConscript @@ -21,6 +21,7 @@ SConscript([ 'auxiliary/translate/SConscript', 'auxiliary/draw/SConscript', 'auxiliary/pipebuffer/SConscript', + 'auxiliary/indices/SConscript', ]) for driver in env['drivers']: diff --git a/src/gallium/auxiliary/indices/Makefile b/src/gallium/auxiliary/indices/Makefile new file mode 100644 index 0000000000..8fa61d265e --- /dev/null +++ b/src/gallium/auxiliary/indices/Makefile @@ -0,0 +1,16 @@ +TOP = ../../../.. +include $(TOP)/configs/current + +LIBNAME = indices + +C_SOURCES = \ + u_indices_gen.c + +include ../../Makefile.template + +u_indices_gen.c: u_indices_gen.py + python $< > $@ + + +symlinks: + diff --git a/src/gallium/auxiliary/indices/SConscript b/src/gallium/auxiliary/indices/SConscript new file mode 100644 index 0000000000..65a43a9f64 --- /dev/null +++ b/src/gallium/auxiliary/indices/SConscript @@ -0,0 +1,17 @@ +Import('*') + +env.CodeGenerate( + target = 'u_indices_gen.c', + script = 'u_indices_gen.py', + source = [], + command = 'python $SCRIPT > $TARGET' +) + +indices = env.ConvenienceLibrary( + target = 'indices', + source = [ +# 'u_indices.c', + 'u_indices_gen.c', + ]) + +auxiliaries.insert(0, indices) diff --git a/src/gallium/auxiliary/indices/u_indices.c b/src/gallium/auxiliary/indices/u_indices.c new file mode 100644 index 0000000000..0cf7d88653 --- /dev/null +++ b/src/gallium/auxiliary/indices/u_indices.c @@ -0,0 +1,253 @@ +/* + * Copyright 2009 VMware, Inc. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * on the rights to use, copy, modify, merge, publish, distribute, sub + * license, and/or sell copies of the Software, and to permit persons to whom + * the Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL + * VMWARE AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, + * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE + * USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +#include "u_indices.h" +#include "u_indices_priv.h" + +static void translate_memcpy_ushort( const void *in, + unsigned nr, + void *out ) +{ + memcpy(out, in, nr*sizeof(short)); +} + +static void translate_memcpy_uint( const void *in, + unsigned nr, + void *out ) +{ + memcpy(out, in, nr*sizeof(int)); +} + + +int u_index_translator( unsigned hw_mask, + unsigned prim, + unsigned in_index_size, + unsigned nr, + unsigned in_pv, + unsigned out_pv, + unsigned *out_prim, + unsigned *out_index_size, + unsigned *out_nr, + u_translate_func *out_translate ) +{ + unsigned in_idx; + unsigned out_idx; + int ret = U_TRANSLATE_NORMAL; + + u_index_init(); + + in_idx = in_size_idx(in_index_size); + *out_index_size = (in_index_size == 4) ? 4 : 2; + out_idx = out_size_idx(*out_index_size); + + if ((hw_mask & (1< 0xfffe) ? 4 : 2; + out_idx = out_size_idx(*out_index_size); + + if ((hw_mask & (1< Date: Thu, 5 Feb 2009 16:04:13 +0000 Subject: indices: quad fixes --- src/gallium/auxiliary/indices/u_indices_gen.c | 256 ++++++++++++------------- src/gallium/auxiliary/indices/u_indices_gen.py | 9 +- 2 files changed, 133 insertions(+), 132 deletions(-) (limited to 'src/gallium/auxiliary') diff --git a/src/gallium/auxiliary/indices/u_indices_gen.c b/src/gallium/auxiliary/indices/u_indices_gen.c index 00b99804f2..01953c3244 100644 --- a/src/gallium/auxiliary/indices/u_indices_gen.c +++ b/src/gallium/auxiliary/indices/u_indices_gen.c @@ -176,11 +176,11 @@ static void generate_quadstrip_ushort_first2first( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)(i+0); - (out+j+0)[1] = (ushort)(i+1); + (out+j+0)[0] = (ushort)(i+2); + (out+j+0)[1] = (ushort)(i+0); (out+j+0)[2] = (ushort)(i+3); - (out+j+3)[0] = (ushort)(i+1); - (out+j+3)[1] = (ushort)(i+2); + (out+j+3)[0] = (ushort)(i+0); + (out+j+3)[1] = (ushort)(i+1); (out+j+3)[2] = (ushort)(i+3); } } @@ -309,12 +309,12 @@ static void generate_quadstrip_ushort_first2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)(i+1); + (out+j+0)[0] = (ushort)(i+0); (out+j+0)[1] = (ushort)(i+3); - (out+j+0)[2] = (ushort)(i+0); - (out+j+3)[0] = (ushort)(i+2); + (out+j+0)[2] = (ushort)(i+2); + (out+j+3)[0] = (ushort)(i+1); (out+j+3)[1] = (ushort)(i+3); - (out+j+3)[2] = (ushort)(i+1); + (out+j+3)[2] = (ushort)(i+0); } } static void generate_polygon_ushort_first2last( @@ -443,11 +443,11 @@ static void generate_quadstrip_ushort_last2first( (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { (out+j+0)[0] = (ushort)(i+3); - (out+j+0)[1] = (ushort)(i+0); - (out+j+0)[2] = (ushort)(i+1); + (out+j+0)[1] = (ushort)(i+2); + (out+j+0)[2] = (ushort)(i+0); (out+j+3)[0] = (ushort)(i+3); - (out+j+3)[1] = (ushort)(i+1); - (out+j+3)[2] = (ushort)(i+2); + (out+j+3)[1] = (ushort)(i+0); + (out+j+3)[2] = (ushort)(i+1); } } static void generate_polygon_ushort_last2first( @@ -575,11 +575,11 @@ static void generate_quadstrip_ushort_last2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)(i+0); - (out+j+0)[1] = (ushort)(i+1); + (out+j+0)[0] = (ushort)(i+2); + (out+j+0)[1] = (ushort)(i+0); (out+j+0)[2] = (ushort)(i+3); - (out+j+3)[0] = (ushort)(i+1); - (out+j+3)[1] = (ushort)(i+2); + (out+j+3)[0] = (ushort)(i+0); + (out+j+3)[1] = (ushort)(i+1); (out+j+3)[2] = (ushort)(i+3); } } @@ -708,11 +708,11 @@ static void generate_quadstrip_uint_first2first( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)(i+0); - (out+j+0)[1] = (uint)(i+1); + (out+j+0)[0] = (uint)(i+2); + (out+j+0)[1] = (uint)(i+0); (out+j+0)[2] = (uint)(i+3); - (out+j+3)[0] = (uint)(i+1); - (out+j+3)[1] = (uint)(i+2); + (out+j+3)[0] = (uint)(i+0); + (out+j+3)[1] = (uint)(i+1); (out+j+3)[2] = (uint)(i+3); } } @@ -841,12 +841,12 @@ static void generate_quadstrip_uint_first2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)(i+1); + (out+j+0)[0] = (uint)(i+0); (out+j+0)[1] = (uint)(i+3); - (out+j+0)[2] = (uint)(i+0); - (out+j+3)[0] = (uint)(i+2); + (out+j+0)[2] = (uint)(i+2); + (out+j+3)[0] = (uint)(i+1); (out+j+3)[1] = (uint)(i+3); - (out+j+3)[2] = (uint)(i+1); + (out+j+3)[2] = (uint)(i+0); } } static void generate_polygon_uint_first2last( @@ -975,11 +975,11 @@ static void generate_quadstrip_uint_last2first( (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { (out+j+0)[0] = (uint)(i+3); - (out+j+0)[1] = (uint)(i+0); - (out+j+0)[2] = (uint)(i+1); + (out+j+0)[1] = (uint)(i+2); + (out+j+0)[2] = (uint)(i+0); (out+j+3)[0] = (uint)(i+3); - (out+j+3)[1] = (uint)(i+1); - (out+j+3)[2] = (uint)(i+2); + (out+j+3)[1] = (uint)(i+0); + (out+j+3)[2] = (uint)(i+1); } } static void generate_polygon_uint_last2first( @@ -1107,11 +1107,11 @@ static void generate_quadstrip_uint_last2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)(i+0); - (out+j+0)[1] = (uint)(i+1); + (out+j+0)[0] = (uint)(i+2); + (out+j+0)[1] = (uint)(i+0); (out+j+0)[2] = (uint)(i+3); - (out+j+3)[0] = (uint)(i+1); - (out+j+3)[1] = (uint)(i+2); + (out+j+3)[0] = (uint)(i+0); + (out+j+3)[1] = (uint)(i+1); (out+j+3)[2] = (uint)(i+3); } } @@ -1258,11 +1258,11 @@ static void translate_quadstrip_ubyte2ushort_first2first( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+0]; - (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; (out+j+0)[2] = (ushort)in[i+3]; - (out+j+3)[0] = (ushort)in[i+1]; - (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; (out+j+3)[2] = (ushort)in[i+3]; } } @@ -1411,12 +1411,12 @@ static void translate_quadstrip_ubyte2ushort_first2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+0]; (out+j+0)[1] = (ushort)in[i+3]; - (out+j+0)[2] = (ushort)in[i+0]; - (out+j+3)[0] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; (out+j+3)[1] = (ushort)in[i+3]; - (out+j+3)[2] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+0]; } } static void translate_polygon_ubyte2ushort_first2last( @@ -1565,11 +1565,11 @@ static void translate_quadstrip_ubyte2ushort_last2first( (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { (out+j+0)[0] = (ushort)in[i+3]; - (out+j+0)[1] = (ushort)in[i+0]; - (out+j+0)[2] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; (out+j+3)[0] = (ushort)in[i+3]; - (out+j+3)[1] = (ushort)in[i+1]; - (out+j+3)[2] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; } } static void translate_polygon_ubyte2ushort_last2first( @@ -1717,11 +1717,11 @@ static void translate_quadstrip_ubyte2ushort_last2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+0]; - (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; (out+j+0)[2] = (ushort)in[i+3]; - (out+j+3)[0] = (ushort)in[i+1]; - (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; (out+j+3)[2] = (ushort)in[i+3]; } } @@ -1870,11 +1870,11 @@ static void translate_quadstrip_ubyte2uint_first2first( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+0]; - (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; (out+j+0)[2] = (uint)in[i+3]; - (out+j+3)[0] = (uint)in[i+1]; - (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; (out+j+3)[2] = (uint)in[i+3]; } } @@ -2023,12 +2023,12 @@ static void translate_quadstrip_ubyte2uint_first2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+0]; (out+j+0)[1] = (uint)in[i+3]; - (out+j+0)[2] = (uint)in[i+0]; - (out+j+3)[0] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; (out+j+3)[1] = (uint)in[i+3]; - (out+j+3)[2] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+0]; } } static void translate_polygon_ubyte2uint_first2last( @@ -2177,11 +2177,11 @@ static void translate_quadstrip_ubyte2uint_last2first( (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { (out+j+0)[0] = (uint)in[i+3]; - (out+j+0)[1] = (uint)in[i+0]; - (out+j+0)[2] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; (out+j+3)[0] = (uint)in[i+3]; - (out+j+3)[1] = (uint)in[i+1]; - (out+j+3)[2] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; } } static void translate_polygon_ubyte2uint_last2first( @@ -2329,11 +2329,11 @@ static void translate_quadstrip_ubyte2uint_last2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+0]; - (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; (out+j+0)[2] = (uint)in[i+3]; - (out+j+3)[0] = (uint)in[i+1]; - (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; (out+j+3)[2] = (uint)in[i+3]; } } @@ -2482,11 +2482,11 @@ static void translate_quadstrip_ushort2ushort_first2first( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+0]; - (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; (out+j+0)[2] = (ushort)in[i+3]; - (out+j+3)[0] = (ushort)in[i+1]; - (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; (out+j+3)[2] = (ushort)in[i+3]; } } @@ -2635,12 +2635,12 @@ static void translate_quadstrip_ushort2ushort_first2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+0]; (out+j+0)[1] = (ushort)in[i+3]; - (out+j+0)[2] = (ushort)in[i+0]; - (out+j+3)[0] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; (out+j+3)[1] = (ushort)in[i+3]; - (out+j+3)[2] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+0]; } } static void translate_polygon_ushort2ushort_first2last( @@ -2789,11 +2789,11 @@ static void translate_quadstrip_ushort2ushort_last2first( (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { (out+j+0)[0] = (ushort)in[i+3]; - (out+j+0)[1] = (ushort)in[i+0]; - (out+j+0)[2] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; (out+j+3)[0] = (ushort)in[i+3]; - (out+j+3)[1] = (ushort)in[i+1]; - (out+j+3)[2] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; } } static void translate_polygon_ushort2ushort_last2first( @@ -2941,11 +2941,11 @@ static void translate_quadstrip_ushort2ushort_last2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+0]; - (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; (out+j+0)[2] = (ushort)in[i+3]; - (out+j+3)[0] = (ushort)in[i+1]; - (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; (out+j+3)[2] = (ushort)in[i+3]; } } @@ -3094,11 +3094,11 @@ static void translate_quadstrip_ushort2uint_first2first( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+0]; - (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; (out+j+0)[2] = (uint)in[i+3]; - (out+j+3)[0] = (uint)in[i+1]; - (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; (out+j+3)[2] = (uint)in[i+3]; } } @@ -3247,12 +3247,12 @@ static void translate_quadstrip_ushort2uint_first2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+0]; (out+j+0)[1] = (uint)in[i+3]; - (out+j+0)[2] = (uint)in[i+0]; - (out+j+3)[0] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; (out+j+3)[1] = (uint)in[i+3]; - (out+j+3)[2] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+0]; } } static void translate_polygon_ushort2uint_first2last( @@ -3401,11 +3401,11 @@ static void translate_quadstrip_ushort2uint_last2first( (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { (out+j+0)[0] = (uint)in[i+3]; - (out+j+0)[1] = (uint)in[i+0]; - (out+j+0)[2] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; (out+j+3)[0] = (uint)in[i+3]; - (out+j+3)[1] = (uint)in[i+1]; - (out+j+3)[2] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; } } static void translate_polygon_ushort2uint_last2first( @@ -3553,11 +3553,11 @@ static void translate_quadstrip_ushort2uint_last2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+0]; - (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; (out+j+0)[2] = (uint)in[i+3]; - (out+j+3)[0] = (uint)in[i+1]; - (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; (out+j+3)[2] = (uint)in[i+3]; } } @@ -3706,11 +3706,11 @@ static void translate_quadstrip_uint2ushort_first2first( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+0]; - (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; (out+j+0)[2] = (ushort)in[i+3]; - (out+j+3)[0] = (ushort)in[i+1]; - (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; (out+j+3)[2] = (ushort)in[i+3]; } } @@ -3859,12 +3859,12 @@ static void translate_quadstrip_uint2ushort_first2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+0]; (out+j+0)[1] = (ushort)in[i+3]; - (out+j+0)[2] = (ushort)in[i+0]; - (out+j+3)[0] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+1]; (out+j+3)[1] = (ushort)in[i+3]; - (out+j+3)[2] = (ushort)in[i+1]; + (out+j+3)[2] = (ushort)in[i+0]; } } static void translate_polygon_uint2ushort_first2last( @@ -4013,11 +4013,11 @@ static void translate_quadstrip_uint2ushort_last2first( (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { (out+j+0)[0] = (ushort)in[i+3]; - (out+j+0)[1] = (ushort)in[i+0]; - (out+j+0)[2] = (ushort)in[i+1]; + (out+j+0)[1] = (ushort)in[i+2]; + (out+j+0)[2] = (ushort)in[i+0]; (out+j+3)[0] = (ushort)in[i+3]; - (out+j+3)[1] = (ushort)in[i+1]; - (out+j+3)[2] = (ushort)in[i+2]; + (out+j+3)[1] = (ushort)in[i+0]; + (out+j+3)[2] = (ushort)in[i+1]; } } static void translate_polygon_uint2ushort_last2first( @@ -4165,11 +4165,11 @@ static void translate_quadstrip_uint2ushort_last2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (ushort)in[i+0]; - (out+j+0)[1] = (ushort)in[i+1]; + (out+j+0)[0] = (ushort)in[i+2]; + (out+j+0)[1] = (ushort)in[i+0]; (out+j+0)[2] = (ushort)in[i+3]; - (out+j+3)[0] = (ushort)in[i+1]; - (out+j+3)[1] = (ushort)in[i+2]; + (out+j+3)[0] = (ushort)in[i+0]; + (out+j+3)[1] = (ushort)in[i+1]; (out+j+3)[2] = (ushort)in[i+3]; } } @@ -4318,11 +4318,11 @@ static void translate_quadstrip_uint2uint_first2first( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+0]; - (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; (out+j+0)[2] = (uint)in[i+3]; - (out+j+3)[0] = (uint)in[i+1]; - (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; (out+j+3)[2] = (uint)in[i+3]; } } @@ -4471,12 +4471,12 @@ static void translate_quadstrip_uint2uint_first2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+0]; (out+j+0)[1] = (uint)in[i+3]; - (out+j+0)[2] = (uint)in[i+0]; - (out+j+3)[0] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+1]; (out+j+3)[1] = (uint)in[i+3]; - (out+j+3)[2] = (uint)in[i+1]; + (out+j+3)[2] = (uint)in[i+0]; } } static void translate_polygon_uint2uint_first2last( @@ -4625,11 +4625,11 @@ static void translate_quadstrip_uint2uint_last2first( (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { (out+j+0)[0] = (uint)in[i+3]; - (out+j+0)[1] = (uint)in[i+0]; - (out+j+0)[2] = (uint)in[i+1]; + (out+j+0)[1] = (uint)in[i+2]; + (out+j+0)[2] = (uint)in[i+0]; (out+j+3)[0] = (uint)in[i+3]; - (out+j+3)[1] = (uint)in[i+1]; - (out+j+3)[2] = (uint)in[i+2]; + (out+j+3)[1] = (uint)in[i+0]; + (out+j+3)[2] = (uint)in[i+1]; } } static void translate_polygon_uint2uint_last2first( @@ -4777,11 +4777,11 @@ static void translate_quadstrip_uint2uint_last2last( unsigned i, j; (void)j; for (j = i = 0; j < nr; j+=6, i+=2) { - (out+j+0)[0] = (uint)in[i+0]; - (out+j+0)[1] = (uint)in[i+1]; + (out+j+0)[0] = (uint)in[i+2]; + (out+j+0)[1] = (uint)in[i+0]; (out+j+0)[2] = (uint)in[i+3]; - (out+j+3)[0] = (uint)in[i+1]; - (out+j+3)[1] = (uint)in[i+2]; + (out+j+3)[0] = (uint)in[i+0]; + (out+j+3)[1] = (uint)in[i+1]; (out+j+3)[2] = (uint)in[i+3]; } } diff --git a/src/gallium/auxiliary/indices/u_indices_gen.py b/src/gallium/auxiliary/indices/u_indices_gen.py index 397eea306b..40b047bb58 100644 --- a/src/gallium/auxiliary/indices/u_indices_gen.py +++ b/src/gallium/auxiliary/indices/u_indices_gen.py @@ -139,6 +139,9 @@ def do_tri( intype, outtype, ptr, v0, v1, v2, inpv, outpv ): else: tri( intype, outtype, ptr, v2, v0, v1 ) +def do_quad( intype, outtype, ptr, v0, v1, v2, v3, inpv, outpv ): + do_tri( intype, outtype, ptr+'+0', v0, v1, v3, inpv, outpv ); + do_tri( intype, outtype, ptr+'+3', v1, v2, v3, inpv, outpv ); def name(intype, outtype, inpv, outpv, prim): if intype == GENERATE: @@ -234,8 +237,7 @@ def polygon(intype, outtype, inpv, outpv): def quads(intype, outtype, inpv, outpv): preamble(intype, outtype, inpv, outpv, prim='quads') print ' for (j = i = 0; j < nr; j+=6, i+=4) { ' - do_tri( intype, outtype, 'out+j+0', 'i+0', 'i+1', 'i+3', inpv, outpv ); - do_tri( intype, outtype, 'out+j+3', 'i+1', 'i+2', 'i+3', inpv, outpv ); + do_quad( intype, outtype, 'out+j', 'i+0', 'i+1', 'i+2', 'i+3', inpv, outpv ); print ' }' postamble() @@ -243,8 +245,7 @@ def quads(intype, outtype, inpv, outpv): def quadstrip(intype, outtype, inpv, outpv): preamble(intype, outtype, inpv, outpv, prim='quadstrip') print ' for (j = i = 0; j < nr; j+=6, i+=2) { ' - do_tri( intype, outtype, 'out+j+0', 'i+0', 'i+1', 'i+3', inpv, outpv ); - do_tri( intype, outtype, 'out+j+3', 'i+1', 'i+2', 'i+3', inpv, outpv ); + do_quad( intype, outtype, 'out+j', 'i+2', 'i+0', 'i+1', 'i+3', inpv, outpv ); print ' }' postamble() -- cgit v1.2.3